Spaces:

InternScience
/

SciEval-Leaderboard

Running

App Files Files Community

naonaowyh commited on 13 days ago

Commit

fc80ff8

verified ·

1 Parent(s): c40fe64

initial leaderboard

Browse files

Files changed (4) hide show

Large Language Model Scientific Capability.csv +20 -0
Multimodal Model Disciplinary Leaderboard.csv +20 -0
Multimodal Model Scientific Capability.csv +17 -0
app.py +455 -196

Large Language Model Scientific Capability.csv ADDED Viewed

	@@ -0,0 +1,20 @@

+Model,Type,Parameters,Knowl. Und.,Code Gen.,Symbolic Reason.,Hypoth. Gen.,Overall
+Claude 4.5 Sonnet,Close,,60.67 ,21.73 ,40.36 ,56.10 ,44.72
+Claude4-1-Opus,Close,,60.87 ,25.32 ,38.69 ,29.47 ,38.58
+GPT-4o,Close,,60.84 ,17.67 ,32.09 ,33.04 ,35.91
+GPT-5,Close,,74.05 ,29.21 ,39.91 ,45.67 ,47.21
+GPT-o3,Close,,76.05 ,25.26 ,38.14 ,34.14 ,43.40
+Gemini-2.5-Flash,Close,,50.46 ,18.28 ,32.07 ,40.86 ,35.42
+Gemini-2.5-Pro,Close,,59.34 ,24.77 ,34.96 ,50.73 ,42.45
+Grok-2-vision-1212,Close,,50.14 ,20.60 ,28.21 ,49.63 ,37.14
+Ling-flash-2.0,Open,100B,53.39 ,25.60 ,37.98 ,50.29 ,41.81
+Seed1.6-vision,Close,,65.78 ,21.49 ,39.24 ,45.00 ,42.88
+DeepSeek-R1,Open,685B,45.17 ,0.06 ,20.00 ,49.73 ,28.74
+GLM-4.5V,Open,106B,52.78 ,3.24 ,13.43 ,42.23 ,27.92
+InternS1,Open,241B,66.14 ,17.08 ,31.62 ,37.45 ,38.07
+Kimi-k2,Open,1040B,62.49 ,20.86 ,38.59 ,42.28 ,41.06
+Llama 4 Maverick,Open,400B,57.22 ,18.26 ,38.97 ,38.31 ,38.19
+Qwen3-VL-235B-A22B,Open,235B,65.98 ,18.00 ,49.93 ,40.62 ,43.63
+Qwen3-Max,Open,1000B,63.14 ,43.97 ,41.04 ,42.12 ,47.57
+GPT-5.1,Close,,69.23 ,25.63 ,32.44 ,41.45 ,42.19
+Gemini-3-Pro,Close,,66.06 ,29.57 ,45.19 ,61.51 ,50.58

Multimodal Model Disciplinary Leaderboard.csv ADDED Viewed

	@@ -0,0 +1,20 @@

+Model,Type,Parameters,Life Sci. ,Astronomy,Earth Sci. ,Chemistry,Mat. Sci. ,Physics,Overall
+Claude 4.5 Sonnet,Close,,43.86 ,34.23 ,64.66 ,71.27 ,83.23 ,40.36 ,56.27
+Claude4-1-Opus,Close,,42.49 ,39.87 ,67.47 ,71.94 ,83.23 ,38.69 ,57.28
+GPT-4o,Close,,54.46 ,30.73 ,63.08 ,72.37 ,61.54 ,32.09 ,52.38
+GPT-5,Close,,59.49 ,44.57 ,74.43 ,81.62 ,93.54 ,39.91 ,65.59
+GPT-o3,Close,,61.57 ,42.82 ,74.15 ,81.77 ,93.85 ,38.14 ,65.38
+Gemini-2.5-Flash,Close,,35.61 ,31.94 ,68.21 ,75.12 ,62.15 ,32.07 ,50.85
+Gemini-2.5-Pro,Close,,34.85 ,43.40 ,70.52 ,78.29 ,83.23 ,34.96 ,57.54
+Grok-2-vision-1212,Close,,43.44 ,33.51 ,58.80 ,63.04 ,49.08 ,28.21 ,46.01
+Ling-flash-2.0,Open,100B,28.83 ,48.12 ,69.55 ,66.80 ,63.69 ,37.98 ,52.49
+Seed1.6-vision,Close,,58.53 ,32.21 ,69.46 ,75.48 ,68.92 ,39.24 ,57.31
+DeepSeek-R1,Open,685B,27.15 ,0.11 ,63.14 ,77.28 ,44.46 ,20.00 ,38.69
+GLM-4.5V,Open,106B,54.11 ,0.32 ,52.68 ,71.38 ,57.08 ,13.43 ,41.50
+InternS1,Open,241B,53.70 ,29.55 ,68.52 ,75.28 ,81.69 ,31.62 ,56.73
+Kimi-k2,Open,1040B,41.10 ,35.57 ,77.77 ,77.04 ,71.38 ,38.59 ,56.91
+Llama 4 Maverick,Open,400B,42.92 ,31.91 ,59.82 ,68.06 ,80.77 ,38.97 ,53.74
+Qwen3-VL-235B-A22B,Open,235B,56.69 ,29.84 ,67.81 ,77.39 ,81.85 ,49.93 ,60.58
+Qwen3-Max,Open,1000B,38.95 ,75.64 ,67.38 ,76.38 ,69.54 ,41.04 ,61.49
+GPT-5.1,Close,,60.54 ,42.02 ,73.88 ,76.45 ,61.08 ,32.44 ,57.73
+Gemini-3-Pro,Close,,49.20 ,42.23 ,72.49 ,83.08 ,91.23 ,45.19 ,63.90

Multimodal Model Scientific Capability.csv ADDED Viewed

	@@ -0,0 +1,17 @@

+Model,Type,Parameters,Sci.MM-Percep.,Sci.Img-Und.,Sci.MM-Reason.,Overall,
+Claude 4.5 Sonnet,Close,,57.87 ,43.64 ,56.11 ,52.54 ,
+Claude4-1-Opus,Close,,58.25 ,45.19 ,58.66 ,54.03 ,
+GPT-4o,Close,,52.78 ,25.93 ,57.97 ,45.56 ,
+GPT-5,Close,,59.94 ,42.44 ,61.46 ,54.61 ,
+GPT-o3,Close,,55.23 ,32.84 ,59.27 ,49.11 ,
+Gemini-2.5-Flash,Close,,55.98 ,38.20 ,57.22 ,50.47 ,
+Gemini-2.5-Pro,Close,,52.12 ,43.76 ,61.28 ,52.39 ,
+Grok-2-vision-1212,Close,,64.00 ,25.04 ,51.76 ,46.93 ,
+Seed1.6-vision,Close,,65.79 ,44.75 ,57.11 ,55.88 ,
+GLM-4.5V,Open,106B,59.10 ,38.57 ,51.04 ,49.57 ,
+InternS1,Open,241B,60.89 ,45.73 ,56.47 ,54.36 ,
+Llama 4 Maverick,Open,400B,56.74 ,36.83 ,55.39 ,49.65 ,
+Qwen3-VL-235B-A22B,Open,235B,72.29 ,38.35 ,50.83 ,53.82 ,
+Qwen3-Max,Open,1000B,24.51 ,20.40 ,49.86 ,31.59 ,
+GPT-5.1,Close,,54.10 ,33.05 ,58.73 ,48.63 ,
+Gemini-3-Pro,Close,,66.54 ,55.62 ,66.49 ,62.88 ,

app.py CHANGED Viewed

@@ -1,204 +1,463 @@
 import gradio as gr
-from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
 import pandas as pd
-from apscheduler.schedulers.background import BackgroundScheduler
-from huggingface_hub import snapshot_download
-from src.about import (
-    CITATION_BUTTON_LABEL,
-    CITATION_BUTTON_TEXT,
-    EVALUATION_QUEUE_TEXT,
-    INTRODUCTION_TEXT,
-    LLM_BENCHMARKS_TEXT,
-    TITLE,
-)
-from src.display.css_html_js import custom_css
-from src.display.utils import (
-    BENCHMARK_COLS,
-    COLS,
-    EVAL_COLS,
-    EVAL_TYPES,
-    AutoEvalColumn,
-    ModelType,
-    fields,
-    WeightType,
-    Precision
-)
-from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
-from src.populate import get_evaluation_queue_df, get_leaderboard_df
-from src.submission.submit import add_new_eval
-def restart_space():
-    API.restart_space(repo_id=REPO_ID)
-### Space initialisation
-try:
-    print(EVAL_REQUESTS_PATH)
-    snapshot_download(
-        repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
-    )
-except Exception:
-    restart_space()
-try:
-    print(EVAL_RESULTS_PATH)
-    snapshot_download(
-        repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
-    )
-except Exception:
-    restart_space()
-LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
-(
-    finished_eval_queue_df,
-    running_eval_queue_df,
-    pending_eval_queue_df,
-) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
-def init_leaderboard(dataframe):
-    if dataframe is None or dataframe.empty:
-        raise ValueError("Leaderboard DataFrame is empty or None.")
-    return Leaderboard(
-        value=dataframe,
-        datatype=[c.type for c in fields(AutoEvalColumn)],
-        select_columns=SelectColumns(
-            default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
-            cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
-            label="Select Columns to Display:",
-        ),
-        search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
-        hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
-        filter_columns=[
-            ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
-            ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
-            ColumnFilter(
-                AutoEvalColumn.params.name,
-                type="slider",
-                min=0.01,
-                max=150,
-                label="Select the number of parameters (B)",
-            ),
-            ColumnFilter(
-                AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
-            ),
-        ],
-        bool_checkboxgroup_label="Hide models",
-        interactive=False,
-    )
-demo = gr.Blocks(css=custom_css)
 with demo:
     gr.HTML(TITLE)
-    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
-    with gr.Tabs(elem_classes="tab-buttons") as tabs:
-        with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
-            leaderboard = init_leaderboard(LEADERBOARD_DF)
-        with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
-            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
-        with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
-            with gr.Column():
-                with gr.Row():
-                    gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
-                with gr.Column():
-                    with gr.Accordion(
-                        f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            finished_eval_table = gr.components.Dataframe(
-                                value=finished_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                row_count=5,
-                            )
-                    with gr.Accordion(
-                        f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            running_eval_table = gr.components.Dataframe(
-                                value=running_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                row_count=5,
-                            )
-                    with gr.Accordion(
-                        f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            pending_eval_table = gr.components.Dataframe(
-                                value=pending_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                row_count=5,
-                            )
-            with gr.Row():
-                gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
-            with gr.Row():
-                with gr.Column():
-                    model_name_textbox = gr.Textbox(label="Model name")
-                    revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
-                    model_type = gr.Dropdown(
-                        choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
-                        label="Model type",
-                        multiselect=False,
-                        value=None,
-                        interactive=True,
-                    )
-                with gr.Column():
-                    precision = gr.Dropdown(
-                        choices=[i.value.name for i in Precision if i != Precision.Unknown],
-                        label="Precision",
-                        multiselect=False,
-                        value="float16",
-                        interactive=True,
-                    )
-                    weight_type = gr.Dropdown(
-                        choices=[i.value.name for i in WeightType],
-                        label="Weights type",
-                        multiselect=False,
-                        value="Original",
-                        interactive=True,
-                    )
-                    base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
-            submit_button = gr.Button("Submit Eval")
-            submission_result = gr.Markdown()
-            submit_button.click(
-                add_new_eval,
-                [
-                    model_name_textbox,
-                    base_model_name_textbox,
-                    revision_name_textbox,
-                    precision,
-                    weight_type,
-                    model_type,
-                ],
-                submission_result,
             )
     with gr.Row():
-        with gr.Accordion("📙 Citation", open=False):
-            citation_button = gr.Textbox(
-                value=CITATION_BUTTON_TEXT,
-                label=CITATION_BUTTON_LABEL,
-                lines=20,
-                elem_id="citation-button",
-                show_copy_button=True,
-            )
-scheduler = BackgroundScheduler()
-scheduler.add_job(restart_space, "interval", seconds=1800)
-scheduler.start()
-demo.queue(default_concurrency_limit=40).launch()

+from pathlib import Path
+from typing import Optional
 import gradio as gr
+from gradio_leaderboard import ColumnFilter, Leaderboard, SelectColumns
 import pandas as pd
+import re
+def _slugify(title: str) -> str:
+    return re.sub(r'[^a-z0-9]+', '-', title.lower()).strip('-')
+# 🎨 增强后的自定义 CSS
+custom_css = """
+/* 全局设置：简洁、高级的字体和背景 */
+:root {
+    --color-background-primary: #f8f8f8; /* 浅米白色背景 */
+    --color-background-secondary: #ffffff; /* 卡片背景 */
+    --color-text-primary: #333333;
+    --color-accent: #8e80ff; /* 浅紫色强调色 (Primary) */
+    --color-accent-light: #a99dff; /* 浅紫色悬停色 */
+    --shadow-medium: 0 4px 12px rgba(0, 0, 0, 0.08);
+}
+/* 全局字体：强制使用 Arial */
+html, body, .gradio-container, .gradio-container * {
+    font-family: Arial, "Helvetica Neue", Helvetica, "Noto Sans", "PingFang SC", "Microsoft YaHei", sans-serif !important;
+}
+body {
+    background-color: var(--color-background-primary) !important;
+}
+/* 增加容器最大宽度以展示完整表格 */
+.gradio-container {
+    max-width: 1400px; /* 宽度从 1800px 调窄到 1400px */
+    margin: 0 auto;
+    padding: 20px;
+}
+/* 标题样式 */
+#space-title {
+    color: var(--color-text-primary);
+    font-size: 3em;
+    font-weight: 700;
+    margin-bottom: 0.5em;
+    padding-top: 20px;
+}
+/* Group/Block 组件的卡片样式 */
+.gr-group, .gr-block {
+    background-color: var(--color-background-secondary);
+    border-radius: 12px;
+    box-shadow: var(--shadow-medium);
+    transition: box-shadow 0.3s ease;
+    padding: 15px;
+    margin-bottom: 20px;
+}
+.gr-group:hover, .gr-block:hover {
+    box-shadow: 0 6px 18px rgba(0, 0, 0, 0.12);
+}
+/* Leaderboard 容器：调整内部布局的关键 */
+[id^="leaderboard-"] {
+    padding: 0 !important;
+}
+/* 搜索栏布局调整 (第一行) */
+.leaderboard_root > div:nth-child(1) {
+    padding: 0 15px 15px 15px;
+}
+/* 过滤器和列选择布局调整 (第二行) */
+.leaderboard_root > div:nth-child(2) {
+    display: flex;
+    padding: 0 15px 15px 15px;
+}
+.leaderboard_root .gr-form {
+    border: none;
+}
+/* Search Bar */
+#search-bar-table-box {
+    width: 100%;
+    margin-bottom: 10px;
+}
+#search-bar-table-box > div:first-child {
+    background: none;
+    border: none;
+}
+/* === Select Columns to Display: 强制单行展示 === */
+/* 定位 SelectColumns 的内部复选框容器 */
+.leaderboard-filter-column:first-child .gr-form-checkbox-group {
+    /* 使用 flex 容器 */
+    display: flex !important;
+    flex-wrap: nowrap !important; /* 强制不换行 */
+    overflow-x: auto !important; /* 允许水平滚动 */
+    gap: 10px;
+    padding-bottom: 5px;
+}
+/* 确保每个复选框标签保持内联块级元素 */
+.leaderboard-filter-column:first-child .gr-form-checkbox-group label {
+    flex-shrink: 0 !important; /* 防止选项被压缩 */
+    display: inline-block !important; /* 确保每个选项占据其自然宽度 */
+    margin: 0;
+    white-space: nowrap; /* 确保文字也不换行 */
+}
+#leaderboard-table, #leaderboard-table-lite {
+    margin-top: 15px;
+    border-radius: 8px;
+    overflow: hidden;
+}
+#leaderboard-table th {
+    background-color: var(--color-accent);
+    color: white;
+    font-weight: 600;
+    text-transform: uppercase;
+    border-bottom: 2px solid var(--color-accent-light);
+}
+#leaderboard-table tr:hover {
+    background-color: #f0f0f0;
+    cursor: pointer;
+    transition: background-color 0.2s ease;
+}
+#leaderboard-table td:nth-child(2),
+#leaderboard-table th:nth-child(2) {
+    max-width: 400px;
+    overflow: auto;
+    white-space: nowrap;
+}
+#leaderboard-table td:nth-child(3) {
+    font-weight: bold;
+    color: var(--color-accent);
+}
+/* Citation 区域 */
+#citation-group {
+    padding: 20px;
+    margin-top: 10px;
+}
+#citation-button {
+    margin-top: 0;
+    padding: 0;
+}
+/* 修复 Citation 复制图标重叠问题 */
+#citation-button label {
+    display: block;
+    position: relative;
+}
+#citation-button textarea {
+    font-family: Arial, "Helvetica Neue", Helvetica, "Noto Sans", "PingFang SC", "Microsoft YaHei", sans-serif !important;
+    background-color: #f1f1f1;
+    border: 1px solid #cccccc;
+    border-radius: 6px;
+    padding: 10px;
+    padding-right: 40px !important; /* 为复制按钮腾出空间 */
+    font-size: 14px !important;
+    width: 100% !important;
+    box-sizing: border-box;
+}
+/* 调整复制按钮的位置 */
+#citation-button > label > button {
+    position: absolute;
+    top: 10px;
+    right: 10px;
+    margin: 0;
+    transform: scale(1.1);
+    transition: transform 0.2s ease;
+    background-color: var(--color-accent) !important;
+    color: white !important;
+    border: none !important;
+    border-radius: 6px;
+    z-index: 10;
+}
+#citation-button > label > button:hover {
+    transform: scale(1.2);
+    background-color: var(--color-accent-light) !important;
+}
+/* Leaderboard 内部过滤/选择组件微调 */
+.leaderboard_root .leaderboard-filter-column:last-child {
+    flex-grow: 1;
+    max-width: 50%;
+}
+.leaderboard_root .leaderboard-filter-column:first-child {
+    max-width: 50%;
+    padding-right: 20px;
+}
+/* 其他 Gradio 元素的简洁化 */
+.wrap-inner input[type="text"], .wrap-inner input[type="number"] {
+    border-radius: 6px;
+    border: 1px solid #cccccc;
+    padding: 8px 12px;
+}
+/* ==== Score bar cells ==== */
+.leaderboard-cell-bar {
+    position: relative;
+    display: block;
+    width: 100%;
+    height: 28px;
+    line-height: 28px;
+    background: #f5f3ff; /* light purple background */
+    border-radius: 8px;
+    overflow: hidden;
+    padding-left: 38px; /* leave room for dot */
+    color: #1d1b84; /* dark purple text */
+    font-weight: 600;
+}
+.leaderboard-cell-bar .bar-fill {
+    position: absolute;
+    left: 0;
+    top: 0;
+    height: 100%;
+    width: var(--w, 0%);
+    background: linear-gradient(90deg, #6c5ce7 0%, #a29bfe 100%);
+    opacity: 0.25;
+}
+.leaderboard-cell-bar .bar-dot {
+    position: absolute;
+    left: 10px;
+    top: 50%;
+    transform: translateY(-50%);
+    width: 12px;
+    height: 12px;
+    border-radius: 50%;
+    background: #3c1be3;
+    box-shadow: 0 0 0 4px rgba(60, 27, 227, 0.08);
+}
+.leaderboard-cell-bar .bar-text {
+    position: relative;
+    z-index: 1;
+    padding-right: 10px;
+}
+"""
+TITLE = """<h1 align="center" id="space-title">SciEval Leaderboards 🏆</h1>"""
+INFO = """<p align="center">
+    <a href="https://huggingface.co/datasets/InternScience/SciEval"><b>HuggingFace</b></a> ·
+    <a href="https://github.com/InternScience/SciEvalKit"><b>GitHub</b></a>
+</p>"""
+CITATION_BUTTON_LABEL = "📖 Citation"
+CITATION_BUTTON_TEXT = r"""
+@article{scieval2025,
+  title={SciEvalKit: An Open-source Evaluation Toolkit for Scientific General Intelligence},
+  author={SciPrismaX Team},
+  journal={arXiv preprint},
+  year={2025}
+}
+"""
+LEADERBOARD_FILES = [
+    ("Large Language Model Scientific Capability", "Large Language Model Scientific Capability.csv"),
+    ("Multimodal Model Scientific Capability", "Multimodal Model Scientific Capability.csv"),
+    ("Multimodal Model Disciplinary Leaderboard", "Multimodal Model Disciplinary Leaderboard.csv"),
+]
+def strip_auxiliary_columns(df: pd.DataFrame) -> pd.DataFrame:
+    """Remove unnamed columns that come from spreadsheet index exports."""
+    return df.loc[:, ~df.columns.str.contains("^Unnamed")]
+def find_sort_column(df: pd.DataFrame) -> Optional[str]:
+    """Pick a sensible default sort column."""
+    preferred = ["overall", "score", "avg", "average"]
+    for col in df.columns:
+        if col.lower() in preferred and pd.api.types.is_numeric_dtype(df[col]):
+            return col
+    numeric_cols = [col for col in df.columns if pd.api.types.is_numeric_dtype(df[col])]
+    return numeric_cols[0] if numeric_cols else None
+def _percent_widths(series: pd.Series) -> pd.Series:
+    """Compute a 0-100 width for a numeric series."""
+    s = series.astype(float)
+    # If values look like percentages already
+    if s.min() >= 0 and s.max() <= 100:
+        return s
+    # If values look like 0-1
+    if s.min() >= 0 and s.max() <= 1.0:
+        return s * 100.0
+    # General min-max scaling
+    rng = s.max() - s.min()
+    if rng == 0:
+        return pd.Series([50.0] * len(s), index=s.index)
+    return (s - s.min()) / rng * 100.0
+def add_bar_cells(df: pd.DataFrame, exclude: Optional[list[str]] = None) -> tuple[pd.DataFrame, set[str]]:
+    """
+    Convert numeric score columns to HTML with a bar background.
+    Returns a new DataFrame and the set of columns that were converted.
+    """
+    exclude = set((exclude or []))
+    # Columns we never bar-render
+    exclude |= {"Model", "Type", "Parameters"}
+    out = df.copy()
+    converted: set[str] = set()
+    for col in out.columns:
+        if col in exclude:
+            continue
+        if pd.api.types.is_numeric_dtype(out[col]):
+            widths = _percent_widths(out[col])
+            # Build HTML for each cell
+            formatted = []
+            for val, w in zip(out[col], widths):
+                try:
+                    disp = f"{float(val):.2f}"
+                except Exception:
+                    disp = str(val)
+                html = (
+                    f'<div class="leaderboard-cell-bar" style="--w:{max(0.0, min(100.0, float(w))):.2f}%">'
+                    f'<span class="bar-fill"></span>'
+                    f'<span class="bar-dot"></span>'
+                    f'<span class="bar-text">{disp}</span>'
+                    f"</div>"
+                )
+                formatted.append(html)
+            out[col] = formatted
+            converted.add(col)
+    return out, converted
+def load_leaderboard_csv(path: Path) -> pd.DataFrame:
+    """Read and clean a leaderboard CSV."""
+    df = pd.read_csv(path)
+    df = strip_auxiliary_columns(df)
+    df.columns = [col.strip() for col in df.columns]
+    numeric_cols = [col for col in df.columns if pd.api.types.is_numeric_dtype(df[col])]
+    if numeric_cols:
+        df[numeric_cols] = df[numeric_cols].round(2)
+    sort_col = find_sort_column(df)
+    if sort_col:
+        df = df.sort_values(by=sort_col, ascending=False)
+    return df.reset_index(drop=True)
+def safe_load(title: str, path: Path) -> tuple[str, pd.DataFrame]:
+    """Load a leaderboard but keep the app running if the CSV is missing or malformed."""
+    try:
+        df = load_leaderboard_csv(path)
+    except Exception as exc:
+        print(f"[leaderboard] Failed to load {path}: {exc}")
+        df = pd.DataFrame(
+            {
+                "Status": [
+                    f"Upload a CSV named '{path.name}' to populate the '{title}' leaderboard. "
+                    f"Error: {exc}"
+                ]
+            }
+        )
+    return title, df
+def build_datatypes(df: pd.DataFrame, html_cols: Optional[set[str]] = None) -> list[str]:
+    """Build the datatype list for gradio_leaderboard.
+    Columns we bar-render should be treated as markdown so inline HTML is rendered.
+    """
+    html_cols = html_cols or set()
+    dtypes: list[str] = []
+    for col in df.columns:
+        if col in html_cols:
+            # Use markdown to allow raw HTML inside cells
+            dtypes.append("markdown")
+        else:
+            dtypes.append("number" if pd.api.types.is_numeric_dtype(df[col]) else "str")
+    return dtypes
+def discover_leaderboards(config: list[tuple[str, str]]) -> list[tuple[str, pd.DataFrame]]:
+    """Load configured leaderboards; if a file is renamed, fall back to any other CSVs in the folder."""
+    configured_paths = [(title, Path(filename)) for title, filename in config]
+    configured_names = {Path(filename).name for _, filename in config}
+    # Load explicitly configured CSVs first
+    boards: list[tuple[str, pd.DataFrame]] = [safe_load(title, path) for title, path in configured_paths]
+    # Add any other CSVs in the folder as additional tabs for resilience
+    extra_csvs = [
+        path
+        for path in sorted(Path(".").glob("*.csv"))
+        if path.name not in configured_names
+    ]
+    for path in extra_csvs:
+        boards.append(safe_load(path.stem, path))
+    return boards
+leaderboards = discover_leaderboards(LEADERBOARD_FILES)
+required_filenames_md = "\n".join([f"    - `{filename}`" for _, filename in LEADERBOARD_FILES])
+demo = gr.Blocks(css=custom_css, theme=gr.themes.Soft())
 with demo:
     gr.HTML(TITLE)
+    gr.HTML(INFO)
+    # Render independent leaderboards (no tabs)
+    for lb_title, df in leaderboards:
+        with gr.Group():
+            centered_titles = {
+                "Large Language Model Scientific Capability",
+                "Multimodal Model Scientific Capability",
+                "Multimodal Model Disciplinary Leaderboard",
+            }
+            if lb_title.strip() in centered_titles:
+                gr.HTML(f'<h2 style="text-align:center; font-weight:700; margin: 0.2em 0;">{lb_title}</h2>')
+            else:
+                gr.Markdown(f"## {lb_title}")
+            # Apply bar-style rendering to numeric score columns
+            df_render, html_cols = add_bar_cells(df)
+            Leaderboard(
+                value=df_render,
+                elem_id=f"leaderboard-{_slugify(lb_title)}",
+                datatype=build_datatypes(df_render, html_cols),
+                select_columns=SelectColumns(
+                    default_selection=list(df_render.columns),
+                    cant_deselect=[c for c in ("Model", "Type") if c in df_render.columns],
+                    label="Select columns to display:",
+                ),
+                search_columns=["Model"] if "Model" in df_render.columns else [df_render.columns[0]],
+                filter_columns=(
+                    [ColumnFilter("Type", type="checkboxgroup", label="Model Types:")]
+                    if "Type" in df_render.columns else []
+                ),
+                interactive=False,
             )
+            gr.Markdown("---")
     with gr.Row():
+        with gr.Column():
+            with gr.Group(elem_id="citation-group"):
+                gr.Textbox(
+                    value=CITATION_BUTTON_TEXT,
+                    label=CITATION_BUTTON_LABEL,
+                    lines=CITATION_BUTTON_TEXT.count("\n") + 1,
+                    elem_id="citation-button",
+                    show_copy_button=True,
+                    interactive=False,
+                )
+demo.queue(default_concurrency_limit=40).launch()