Spaces:
Runtime error
Runtime error
| import os # For filesystem operations | |
| import shutil # For directory cleanup | |
| import zipfile # For extracting model archives | |
| import pathlib # For path manipulations | |
| import pandas # For tabular data handling | |
| import gradio # For interactive UI | |
| import huggingface_hub # For downloading model assets | |
| import autogluon.tabular # For loading and running AutoGluon predictors | |
| # Settings | |
| MODEL_REPO_ID = "ccm/2024-24679-tabular-autolguon-predictor" | |
| ZIP_FILENAME = "autogluon_predictor_dir.zip" | |
| CACHE_DIR = pathlib.Path("hf_assets") | |
| EXTRACT_DIR = CACHE_DIR / "predictor_native" | |
| # Feature schema (must match training) | |
| FEATURE_COLS = [ | |
| "About how many hours per week do you spend listening to music?", | |
| "Approximately how many songs are in your music library?", | |
| "Approximately how many playlists have you created yourself?", | |
| "How often do you share music with others?", | |
| "Which decade of music do you listen to most?", | |
| "How often do you attend live music events?", | |
| "Do you prefer songs with lyrics or instrumental music?", | |
| ] | |
| TARGET_COL = "Do you usually listen to music alone or with others?" | |
| # Encodings (aligned to survey UI) | |
| LIKERT5_LABELS = ["Never", "Rarely", "Sometimes", "Often", "Very Often"] | |
| LIKERT5_MAP = {label: idx for idx, label in enumerate(LIKERT5_LABELS)} | |
| DECADE_LABELS = ["1970s and before", "1980s", "1990s", "2000s", "2010s", "2020s"] | |
| DECADE_MAP = {label: idx for idx, label in enumerate(DECADE_LABELS)} | |
| LYRICS_LABELS = ["Lyrics", "Instrumental", "Both equally"] | |
| LYRICS_MAP = {label: idx for idx, label in enumerate(LYRICS_LABELS)} | |
| # Outcome label mapping | |
| OUTCOME_LABELS = { | |
| 0: "Mostly Alone", | |
| 1: "Mostly With Others", | |
| } | |
| def _prepare_predictor_dir() -> str: | |
| CACHE_DIR.mkdir(parents=True, exist_ok=True) | |
| local_zip = huggingface_hub.hf_hub_download( | |
| repo_id=MODEL_REPO_ID, | |
| filename=ZIP_FILENAME, | |
| repo_type="model", | |
| local_dir=str(CACHE_DIR), | |
| local_dir_use_symlinks=False, | |
| ) | |
| if EXTRACT_DIR.exists(): | |
| shutil.rmtree(EXTRACT_DIR) | |
| EXTRACT_DIR.mkdir(parents=True, exist_ok=True) | |
| with zipfile.ZipFile(local_zip, "r") as zf: | |
| zf.extractall(str(EXTRACT_DIR)) | |
| contents = list(EXTRACT_DIR.iterdir()) | |
| predictor_root = contents[0] if (len(contents) == 1 and contents[0].is_dir()) else EXTRACT_DIR | |
| return str(predictor_root) | |
| PREDICTOR_DIR = _prepare_predictor_dir() | |
| PREDICTOR = autogluon.tabular.TabularPredictor.load(PREDICTOR_DIR, require_py_version_match=False) | |
| # Class-to-label mapper | |
| def _human_label(c): | |
| try: | |
| ci = int(c) | |
| if ci in OUTCOME_LABELS: | |
| return OUTCOME_LABELS[ci] | |
| except Exception: | |
| pass | |
| if c in OUTCOME_LABELS: | |
| return OUTCOME_LABELS[c] | |
| return str(c) | |
| # Inference | |
| def do_predict(hours_per_week, num_songs, num_playlists, share_label, decade_label, live_events_label, lyrics_label): | |
| share_code = LIKERT5_MAP[share_label] | |
| decade_code = DECADE_MAP[decade_label] | |
| live_events_code = LIKERT5_MAP[live_events_label] | |
| lyrics_code = LYRICS_MAP[lyrics_label] | |
| row = { | |
| FEATURE_COLS[0]: float(hours_per_week), | |
| FEATURE_COLS[1]: int(num_songs), | |
| FEATURE_COLS[2]: int(num_playlists), | |
| FEATURE_COLS[3]: int(share_code), | |
| FEATURE_COLS[4]: int(decade_code), | |
| FEATURE_COLS[5]: int(live_events_code), | |
| FEATURE_COLS[6]: int(lyrics_code), | |
| } | |
| X = pandas.DataFrame([row], columns=FEATURE_COLS) | |
| pred_series = PREDICTOR.predict(X) | |
| raw_pred = pred_series.iloc[0] | |
| try: | |
| proba = PREDICTOR.predict_proba(X) | |
| if isinstance(proba, pandas.Series): | |
| proba = proba.to_frame().T | |
| except Exception: | |
| proba = None | |
| pred_label = _human_label(raw_pred) | |
| proba_dict = None | |
| if proba is not None: | |
| row0 = proba.iloc[0] | |
| tmp = {} | |
| for cls, val in row0.items(): | |
| key = _human_label(cls) | |
| tmp[key] = float(val) + float(tmp.get(key, 0.0)) | |
| proba_dict = dict(sorted(tmp.items(), key=lambda kv: kv[1], reverse=True)) | |
| df_out = pandas.DataFrame([{ | |
| "Predicted outcome": pred_label, | |
| "Confidence (%)": round((proba_dict.get(pred_label, 1.0) if proba_dict else 1.0) * 100, 2), | |
| }]) | |
| md = f"**Prediction:** {pred_label}" | |
| if proba_dict: | |
| md += f" \n**Confidence:** {round(proba_dict.get(pred_label, 0.0) * 100, 2)}%" | |
| return md, proba_dict, df_out | |
| # Representative examples | |
| EXAMPLES = [ | |
| [5.0, 300, 3, "Rarely", "2010s", "Rarely", "Lyrics"], | |
| [18.0, 1500, 25, "Often", "2000s", "Often", "Both equally"], | |
| [12.0, 8000, 40, "Sometimes", "1990s", "Sometimes", "Instrumental"], | |
| [4.0, 120, 1, "Never", "1970s and before", "Rarely", "Lyrics"], | |
| [22.0, 500, 10, "Very Often", "2020s", "Very Often", "Lyrics"], | |
| ] | |
| # Gradio UI | |
| with gradio.Blocks() as demo: | |
| with gradio.Row(): | |
| hours_per_week = gradio.Slider(0, 80, step=0.5, value=5.0, label=FEATURE_COLS[0]) | |
| num_songs = gradio.Number(value=200, precision=0, label=FEATURE_COLS[1]) | |
| num_playlists = gradio.Number(value=5, precision=0, label=FEATURE_COLS[2]) | |
| with gradio.Row(): | |
| share_label = gradio.Radio(choices=LIKERT5_LABELS, value="Sometimes", label="How often do you share music with others?") | |
| live_events_label = gradio.Radio(choices=LIKERT5_LABELS, value="Rarely", label="How often do you attend live music events?") | |
| with gradio.Row(): | |
| decade_label = gradio.Radio(choices=DECADE_LABELS, value="2010s", label="Which decade of music do you listen to most?") | |
| lyrics_label = gradio.Radio(choices=LYRICS_LABELS, value="Lyrics", label="Do you prefer songs with lyrics or instrumental music?") | |
| proba_pretty = gradio.Label(num_top_classes=5, label="Class probabilities") | |
| pred_table = gradio.Dataframe(headers=["Predicted outcome", "Confidence (%)"], label="Prediction (compact)", interactive=False) | |
| inputs = [hours_per_week, num_songs, num_playlists, share_label, decade_label, live_events_label, lyrics_label] | |
| for comp in inputs: | |
| comp.change(fn=do_predict, inputs=inputs, outputs=[proba_pretty, pred_table]) | |
| gradio.Examples( | |
| examples=EXAMPLES, | |
| inputs=inputs, | |
| label="Representative examples", | |
| examples_per_page=5, | |
| cache_examples=False, | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |