naonaowyh commited on
Commit
fc80ff8
·
verified ·
1 Parent(s): c40fe64

initial leaderboard

Browse files
Large Language Model Scientific Capability.csv ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,Type,Parameters,Knowl. Und.,Code Gen.,Symbolic Reason.,Hypoth. Gen.,Overall
2
+ Claude 4.5 Sonnet,Close,,60.67 ,21.73 ,40.36 ,56.10 ,44.72
3
+ Claude4-1-Opus,Close,,60.87 ,25.32 ,38.69 ,29.47 ,38.58
4
+ GPT-4o,Close,,60.84 ,17.67 ,32.09 ,33.04 ,35.91
5
+ GPT-5,Close,,74.05 ,29.21 ,39.91 ,45.67 ,47.21
6
+ GPT-o3,Close,,76.05 ,25.26 ,38.14 ,34.14 ,43.40
7
+ Gemini-2.5-Flash,Close,,50.46 ,18.28 ,32.07 ,40.86 ,35.42
8
+ Gemini-2.5-Pro,Close,,59.34 ,24.77 ,34.96 ,50.73 ,42.45
9
+ Grok-2-vision-1212,Close,,50.14 ,20.60 ,28.21 ,49.63 ,37.14
10
+ Ling-flash-2.0,Open,100B,53.39 ,25.60 ,37.98 ,50.29 ,41.81
11
+ Seed1.6-vision,Close,,65.78 ,21.49 ,39.24 ,45.00 ,42.88
12
+ DeepSeek-R1,Open,685B,45.17 ,0.06 ,20.00 ,49.73 ,28.74
13
+ GLM-4.5V,Open,106B,52.78 ,3.24 ,13.43 ,42.23 ,27.92
14
+ InternS1,Open,241B,66.14 ,17.08 ,31.62 ,37.45 ,38.07
15
+ Kimi-k2,Open,1040B,62.49 ,20.86 ,38.59 ,42.28 ,41.06
16
+ Llama 4 Maverick,Open,400B,57.22 ,18.26 ,38.97 ,38.31 ,38.19
17
+ Qwen3-VL-235B-A22B,Open,235B,65.98 ,18.00 ,49.93 ,40.62 ,43.63
18
+ Qwen3-Max,Open,1000B,63.14 ,43.97 ,41.04 ,42.12 ,47.57
19
+ GPT-5.1,Close,,69.23 ,25.63 ,32.44 ,41.45 ,42.19
20
+ Gemini-3-Pro,Close,,66.06 ,29.57 ,45.19 ,61.51 ,50.58
Multimodal Model Disciplinary Leaderboard.csv ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,Type,Parameters,Life Sci. ,Astronomy,Earth Sci. ,Chemistry,Mat. Sci. ,Physics,Overall
2
+ Claude 4.5 Sonnet,Close,,43.86 ,34.23 ,64.66 ,71.27 ,83.23 ,40.36 ,56.27
3
+ Claude4-1-Opus,Close,,42.49 ,39.87 ,67.47 ,71.94 ,83.23 ,38.69 ,57.28
4
+ GPT-4o,Close,,54.46 ,30.73 ,63.08 ,72.37 ,61.54 ,32.09 ,52.38
5
+ GPT-5,Close,,59.49 ,44.57 ,74.43 ,81.62 ,93.54 ,39.91 ,65.59
6
+ GPT-o3,Close,,61.57 ,42.82 ,74.15 ,81.77 ,93.85 ,38.14 ,65.38
7
+ Gemini-2.5-Flash,Close,,35.61 ,31.94 ,68.21 ,75.12 ,62.15 ,32.07 ,50.85
8
+ Gemini-2.5-Pro,Close,,34.85 ,43.40 ,70.52 ,78.29 ,83.23 ,34.96 ,57.54
9
+ Grok-2-vision-1212,Close,,43.44 ,33.51 ,58.80 ,63.04 ,49.08 ,28.21 ,46.01
10
+ Ling-flash-2.0,Open,100B,28.83 ,48.12 ,69.55 ,66.80 ,63.69 ,37.98 ,52.49
11
+ Seed1.6-vision,Close,,58.53 ,32.21 ,69.46 ,75.48 ,68.92 ,39.24 ,57.31
12
+ DeepSeek-R1,Open,685B,27.15 ,0.11 ,63.14 ,77.28 ,44.46 ,20.00 ,38.69
13
+ GLM-4.5V,Open,106B,54.11 ,0.32 ,52.68 ,71.38 ,57.08 ,13.43 ,41.50
14
+ InternS1,Open,241B,53.70 ,29.55 ,68.52 ,75.28 ,81.69 ,31.62 ,56.73
15
+ Kimi-k2,Open,1040B,41.10 ,35.57 ,77.77 ,77.04 ,71.38 ,38.59 ,56.91
16
+ Llama 4 Maverick,Open,400B,42.92 ,31.91 ,59.82 ,68.06 ,80.77 ,38.97 ,53.74
17
+ Qwen3-VL-235B-A22B,Open,235B,56.69 ,29.84 ,67.81 ,77.39 ,81.85 ,49.93 ,60.58
18
+ Qwen3-Max,Open,1000B,38.95 ,75.64 ,67.38 ,76.38 ,69.54 ,41.04 ,61.49
19
+ GPT-5.1,Close,,60.54 ,42.02 ,73.88 ,76.45 ,61.08 ,32.44 ,57.73
20
+ Gemini-3-Pro,Close,,49.20 ,42.23 ,72.49 ,83.08 ,91.23 ,45.19 ,63.90
Multimodal Model Scientific Capability.csv ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model,Type,Parameters,Sci.MM-Percep.,Sci.Img-Und.,Sci.MM-Reason.,Overall,
2
+ Claude 4.5 Sonnet,Close,,57.87 ,43.64 ,56.11 ,52.54 ,
3
+ Claude4-1-Opus,Close,,58.25 ,45.19 ,58.66 ,54.03 ,
4
+ GPT-4o,Close,,52.78 ,25.93 ,57.97 ,45.56 ,
5
+ GPT-5,Close,,59.94 ,42.44 ,61.46 ,54.61 ,
6
+ GPT-o3,Close,,55.23 ,32.84 ,59.27 ,49.11 ,
7
+ Gemini-2.5-Flash,Close,,55.98 ,38.20 ,57.22 ,50.47 ,
8
+ Gemini-2.5-Pro,Close,,52.12 ,43.76 ,61.28 ,52.39 ,
9
+ Grok-2-vision-1212,Close,,64.00 ,25.04 ,51.76 ,46.93 ,
10
+ Seed1.6-vision,Close,,65.79 ,44.75 ,57.11 ,55.88 ,
11
+ GLM-4.5V,Open,106B,59.10 ,38.57 ,51.04 ,49.57 ,
12
+ InternS1,Open,241B,60.89 ,45.73 ,56.47 ,54.36 ,
13
+ Llama 4 Maverick,Open,400B,56.74 ,36.83 ,55.39 ,49.65 ,
14
+ Qwen3-VL-235B-A22B,Open,235B,72.29 ,38.35 ,50.83 ,53.82 ,
15
+ Qwen3-Max,Open,1000B,24.51 ,20.40 ,49.86 ,31.59 ,
16
+ GPT-5.1,Close,,54.10 ,33.05 ,58.73 ,48.63 ,
17
+ Gemini-3-Pro,Close,,66.54 ,55.62 ,66.49 ,62.88 ,
app.py CHANGED
@@ -1,204 +1,463 @@
 
 
 
1
  import gradio as gr
2
- from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
3
  import pandas as pd
4
- from apscheduler.schedulers.background import BackgroundScheduler
5
- from huggingface_hub import snapshot_download
6
-
7
- from src.about import (
8
- CITATION_BUTTON_LABEL,
9
- CITATION_BUTTON_TEXT,
10
- EVALUATION_QUEUE_TEXT,
11
- INTRODUCTION_TEXT,
12
- LLM_BENCHMARKS_TEXT,
13
- TITLE,
14
- )
15
- from src.display.css_html_js import custom_css
16
- from src.display.utils import (
17
- BENCHMARK_COLS,
18
- COLS,
19
- EVAL_COLS,
20
- EVAL_TYPES,
21
- AutoEvalColumn,
22
- ModelType,
23
- fields,
24
- WeightType,
25
- Precision
26
- )
27
- from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
28
- from src.populate import get_evaluation_queue_df, get_leaderboard_df
29
- from src.submission.submit import add_new_eval
30
-
31
-
32
- def restart_space():
33
- API.restart_space(repo_id=REPO_ID)
34
-
35
- ### Space initialisation
36
- try:
37
- print(EVAL_REQUESTS_PATH)
38
- snapshot_download(
39
- repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
40
- )
41
- except Exception:
42
- restart_space()
43
- try:
44
- print(EVAL_RESULTS_PATH)
45
- snapshot_download(
46
- repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
47
- )
48
- except Exception:
49
- restart_space()
50
-
51
-
52
- LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
53
-
54
- (
55
- finished_eval_queue_df,
56
- running_eval_queue_df,
57
- pending_eval_queue_df,
58
- ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
59
-
60
- def init_leaderboard(dataframe):
61
- if dataframe is None or dataframe.empty:
62
- raise ValueError("Leaderboard DataFrame is empty or None.")
63
- return Leaderboard(
64
- value=dataframe,
65
- datatype=[c.type for c in fields(AutoEvalColumn)],
66
- select_columns=SelectColumns(
67
- default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
68
- cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
69
- label="Select Columns to Display:",
70
- ),
71
- search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
72
- hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
73
- filter_columns=[
74
- ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
75
- ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
76
- ColumnFilter(
77
- AutoEvalColumn.params.name,
78
- type="slider",
79
- min=0.01,
80
- max=150,
81
- label="Select the number of parameters (B)",
82
- ),
83
- ColumnFilter(
84
- AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
85
- ),
86
- ],
87
- bool_checkboxgroup_label="Hide models",
88
- interactive=False,
89
- )
90
-
91
-
92
- demo = gr.Blocks(css=custom_css)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  with demo:
94
  gr.HTML(TITLE)
95
- gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
96
-
97
- with gr.Tabs(elem_classes="tab-buttons") as tabs:
98
- with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
99
- leaderboard = init_leaderboard(LEADERBOARD_DF)
100
-
101
- with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
102
- gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
103
-
104
- with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
105
- with gr.Column():
106
- with gr.Row():
107
- gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
108
-
109
- with gr.Column():
110
- with gr.Accordion(
111
- f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
112
- open=False,
113
- ):
114
- with gr.Row():
115
- finished_eval_table = gr.components.Dataframe(
116
- value=finished_eval_queue_df,
117
- headers=EVAL_COLS,
118
- datatype=EVAL_TYPES,
119
- row_count=5,
120
- )
121
- with gr.Accordion(
122
- f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
123
- open=False,
124
- ):
125
- with gr.Row():
126
- running_eval_table = gr.components.Dataframe(
127
- value=running_eval_queue_df,
128
- headers=EVAL_COLS,
129
- datatype=EVAL_TYPES,
130
- row_count=5,
131
- )
132
-
133
- with gr.Accordion(
134
- f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
135
- open=False,
136
- ):
137
- with gr.Row():
138
- pending_eval_table = gr.components.Dataframe(
139
- value=pending_eval_queue_df,
140
- headers=EVAL_COLS,
141
- datatype=EVAL_TYPES,
142
- row_count=5,
143
- )
144
- with gr.Row():
145
- gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
146
-
147
- with gr.Row():
148
- with gr.Column():
149
- model_name_textbox = gr.Textbox(label="Model name")
150
- revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
151
- model_type = gr.Dropdown(
152
- choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
153
- label="Model type",
154
- multiselect=False,
155
- value=None,
156
- interactive=True,
157
- )
158
-
159
- with gr.Column():
160
- precision = gr.Dropdown(
161
- choices=[i.value.name for i in Precision if i != Precision.Unknown],
162
- label="Precision",
163
- multiselect=False,
164
- value="float16",
165
- interactive=True,
166
- )
167
- weight_type = gr.Dropdown(
168
- choices=[i.value.name for i in WeightType],
169
- label="Weights type",
170
- multiselect=False,
171
- value="Original",
172
- interactive=True,
173
- )
174
- base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
175
-
176
- submit_button = gr.Button("Submit Eval")
177
- submission_result = gr.Markdown()
178
- submit_button.click(
179
- add_new_eval,
180
- [
181
- model_name_textbox,
182
- base_model_name_textbox,
183
- revision_name_textbox,
184
- precision,
185
- weight_type,
186
- model_type,
187
- ],
188
- submission_result,
189
  )
 
190
 
191
  with gr.Row():
192
- with gr.Accordion("📙 Citation", open=False):
193
- citation_button = gr.Textbox(
194
- value=CITATION_BUTTON_TEXT,
195
- label=CITATION_BUTTON_LABEL,
196
- lines=20,
197
- elem_id="citation-button",
198
- show_copy_button=True,
199
- )
 
 
200
 
201
- scheduler = BackgroundScheduler()
202
- scheduler.add_job(restart_space, "interval", seconds=1800)
203
- scheduler.start()
204
- demo.queue(default_concurrency_limit=40).launch()
 
1
+ from pathlib import Path
2
+ from typing import Optional
3
+
4
  import gradio as gr
5
+ from gradio_leaderboard import ColumnFilter, Leaderboard, SelectColumns
6
  import pandas as pd
7
+ import re
8
+
9
+ def _slugify(title: str) -> str:
10
+ return re.sub(r'[^a-z0-9]+', '-', title.lower()).strip('-')
11
+
12
+ # 🎨 增强后的自定义 CSS
13
+ custom_css = """
14
+ /* 全局设置:简洁、高级的字体和背景 */
15
+ :root {
16
+ --color-background-primary: #f8f8f8; /* 浅米白色背景 */
17
+ --color-background-secondary: #ffffff; /* 卡片背景 */
18
+ --color-text-primary: #333333;
19
+ --color-accent: #8e80ff; /* 浅紫色强调色 (Primary) */
20
+ --color-accent-light: #a99dff; /* 浅紫色悬停色 */
21
+ --shadow-medium: 0 4px 12px rgba(0, 0, 0, 0.08);
22
+ }
23
+
24
+ /* 全局字体:强制使用 Arial */
25
+ html, body, .gradio-container, .gradio-container * {
26
+ font-family: Arial, "Helvetica Neue", Helvetica, "Noto Sans", "PingFang SC", "Microsoft YaHei", sans-serif !important;
27
+ }
28
+
29
+ body {
30
+ background-color: var(--color-background-primary) !important;
31
+ }
32
+
33
+ /* 增加容器最大宽度以展示完整表格 */
34
+ .gradio-container {
35
+ max-width: 1400px; /* 宽度从 1800px 调窄到 1400px */
36
+ margin: 0 auto;
37
+ padding: 20px;
38
+ }
39
+
40
+ /* 标题样式 */
41
+ #space-title {
42
+ color: var(--color-text-primary);
43
+ font-size: 3em;
44
+ font-weight: 700;
45
+ margin-bottom: 0.5em;
46
+ padding-top: 20px;
47
+ }
48
+
49
+ /* Group/Block 组件的卡片样式 */
50
+ .gr-group, .gr-block {
51
+ background-color: var(--color-background-secondary);
52
+ border-radius: 12px;
53
+ box-shadow: var(--shadow-medium);
54
+ transition: box-shadow 0.3s ease;
55
+ padding: 15px;
56
+ margin-bottom: 20px;
57
+ }
58
+
59
+ .gr-group:hover, .gr-block:hover {
60
+ box-shadow: 0 6px 18px rgba(0, 0, 0, 0.12);
61
+ }
62
+
63
+ /* Leaderboard 容器:调整内部布局的关键 */
64
+ [id^="leaderboard-"] {
65
+ padding: 0 !important;
66
+ }
67
+
68
+ /* 搜索栏布局调整 (第一行) */
69
+ .leaderboard_root > div:nth-child(1) {
70
+ padding: 0 15px 15px 15px;
71
+ }
72
+
73
+ /* 过滤器和列选择布局调整 (第二行) */
74
+ .leaderboard_root > div:nth-child(2) {
75
+ display: flex;
76
+ padding: 0 15px 15px 15px;
77
+ }
78
+
79
+ .leaderboard_root .gr-form {
80
+ border: none;
81
+ }
82
+
83
+ /* Search Bar */
84
+ #search-bar-table-box {
85
+ width: 100%;
86
+ margin-bottom: 10px;
87
+ }
88
+ #search-bar-table-box > div:first-child {
89
+ background: none;
90
+ border: none;
91
+ }
92
+
93
+ /* === Select Columns to Display: 强制单行展示 === */
94
+ /* 定位 SelectColumns 的内部复选框容器 */
95
+ .leaderboard-filter-column:first-child .gr-form-checkbox-group {
96
+ /* 使用 flex 容器 */
97
+ display: flex !important;
98
+ flex-wrap: nowrap !important; /* 强制不换行 */
99
+ overflow-x: auto !important; /* 允许水平滚动 */
100
+ gap: 10px;
101
+ padding-bottom: 5px;
102
+ }
103
+
104
+ /* 确保每个复选框标签保持内联块级元素 */
105
+ .leaderboard-filter-column:first-child .gr-form-checkbox-group label {
106
+ flex-shrink: 0 !important; /* 防止选项被压缩 */
107
+ display: inline-block !important; /* 确保每个选项占据其自然宽度 */
108
+ margin: 0;
109
+ white-space: nowrap; /* 确保文字也不换行 */
110
+ }
111
+
112
+ #leaderboard-table, #leaderboard-table-lite {
113
+ margin-top: 15px;
114
+ border-radius: 8px;
115
+ overflow: hidden;
116
+ }
117
+
118
+ #leaderboard-table th {
119
+ background-color: var(--color-accent);
120
+ color: white;
121
+ font-weight: 600;
122
+ text-transform: uppercase;
123
+ border-bottom: 2px solid var(--color-accent-light);
124
+ }
125
+
126
+ #leaderboard-table tr:hover {
127
+ background-color: #f0f0f0;
128
+ cursor: pointer;
129
+ transition: background-color 0.2s ease;
130
+ }
131
+
132
+ #leaderboard-table td:nth-child(2),
133
+ #leaderboard-table th:nth-child(2) {
134
+ max-width: 400px;
135
+ overflow: auto;
136
+ white-space: nowrap;
137
+ }
138
+
139
+ #leaderboard-table td:nth-child(3) {
140
+ font-weight: bold;
141
+ color: var(--color-accent);
142
+ }
143
+
144
+ /* Citation 区域 */
145
+ #citation-group {
146
+ padding: 20px;
147
+ margin-top: 10px;
148
+ }
149
+
150
+ #citation-button {
151
+ margin-top: 0;
152
+ padding: 0;
153
+ }
154
+
155
+ /* 修复 Citation 复制图标重叠问题 */
156
+ #citation-button label {
157
+ display: block;
158
+ position: relative;
159
+ }
160
+
161
+ #citation-button textarea {
162
+ font-family: Arial, "Helvetica Neue", Helvetica, "Noto Sans", "PingFang SC", "Microsoft YaHei", sans-serif !important;
163
+ background-color: #f1f1f1;
164
+ border: 1px solid #cccccc;
165
+ border-radius: 6px;
166
+ padding: 10px;
167
+ padding-right: 40px !important; /* 为复制按钮腾出空间 */
168
+ font-size: 14px !important;
169
+ width: 100% !important;
170
+ box-sizing: border-box;
171
+ }
172
+
173
+ /* 调整复制按钮的位置 */
174
+ #citation-button > label > button {
175
+ position: absolute;
176
+ top: 10px;
177
+ right: 10px;
178
+ margin: 0;
179
+ transform: scale(1.1);
180
+ transition: transform 0.2s ease;
181
+ background-color: var(--color-accent) !important;
182
+ color: white !important;
183
+ border: none !important;
184
+ border-radius: 6px;
185
+ z-index: 10;
186
+ }
187
+
188
+ #citation-button > label > button:hover {
189
+ transform: scale(1.2);
190
+ background-color: var(--color-accent-light) !important;
191
+ }
192
+
193
+ /* Leaderboard 内部过滤/选择组件微调 */
194
+ .leaderboard_root .leaderboard-filter-column:last-child {
195
+ flex-grow: 1;
196
+ max-width: 50%;
197
+ }
198
+
199
+ .leaderboard_root .leaderboard-filter-column:first-child {
200
+ max-width: 50%;
201
+ padding-right: 20px;
202
+ }
203
+
204
+ /* 其他 Gradio 元素的简洁化 */
205
+ .wrap-inner input[type="text"], .wrap-inner input[type="number"] {
206
+ border-radius: 6px;
207
+ border: 1px solid #cccccc;
208
+ padding: 8px 12px;
209
+ }
210
+
211
+ /* ==== Score bar cells ==== */
212
+ .leaderboard-cell-bar {
213
+ position: relative;
214
+ display: block;
215
+ width: 100%;
216
+ height: 28px;
217
+ line-height: 28px;
218
+ background: #f5f3ff; /* light purple background */
219
+ border-radius: 8px;
220
+ overflow: hidden;
221
+ padding-left: 38px; /* leave room for dot */
222
+ color: #1d1b84; /* dark purple text */
223
+ font-weight: 600;
224
+ }
225
+ .leaderboard-cell-bar .bar-fill {
226
+ position: absolute;
227
+ left: 0;
228
+ top: 0;
229
+ height: 100%;
230
+ width: var(--w, 0%);
231
+ background: linear-gradient(90deg, #6c5ce7 0%, #a29bfe 100%);
232
+ opacity: 0.25;
233
+ }
234
+ .leaderboard-cell-bar .bar-dot {
235
+ position: absolute;
236
+ left: 10px;
237
+ top: 50%;
238
+ transform: translateY(-50%);
239
+ width: 12px;
240
+ height: 12px;
241
+ border-radius: 50%;
242
+ background: #3c1be3;
243
+ box-shadow: 0 0 0 4px rgba(60, 27, 227, 0.08);
244
+ }
245
+ .leaderboard-cell-bar .bar-text {
246
+ position: relative;
247
+ z-index: 1;
248
+ padding-right: 10px;
249
+ }
250
+ """
251
+
252
+ TITLE = """<h1 align="center" id="space-title">SciEval Leaderboards 🏆</h1>"""
253
+ INFO = """<p align="center">
254
+ <a href="https://huggingface.co/datasets/InternScience/SciEval"><b>HuggingFace</b></a> ·
255
+ <a href="https://github.com/InternScience/SciEvalKit"><b>GitHub</b></a>
256
+ </p>"""
257
+
258
+ CITATION_BUTTON_LABEL = "📖 Citation"
259
+ CITATION_BUTTON_TEXT = r"""
260
+ @article{scieval2025,
261
+ title={SciEvalKit: An Open-source Evaluation Toolkit for Scientific General Intelligence},
262
+ author={SciPrismaX Team},
263
+ journal={arXiv preprint},
264
+ year={2025}
265
+ }
266
+ """
267
+
268
+ LEADERBOARD_FILES = [
269
+ ("Large Language Model Scientific Capability", "Large Language Model Scientific Capability.csv"),
270
+ ("Multimodal Model Scientific Capability", "Multimodal Model Scientific Capability.csv"),
271
+ ("Multimodal Model Disciplinary Leaderboard", "Multimodal Model Disciplinary Leaderboard.csv"),
272
+ ]
273
+
274
+
275
+ def strip_auxiliary_columns(df: pd.DataFrame) -> pd.DataFrame:
276
+ """Remove unnamed columns that come from spreadsheet index exports."""
277
+ return df.loc[:, ~df.columns.str.contains("^Unnamed")]
278
+
279
+
280
+ def find_sort_column(df: pd.DataFrame) -> Optional[str]:
281
+ """Pick a sensible default sort column."""
282
+ preferred = ["overall", "score", "avg", "average"]
283
+ for col in df.columns:
284
+ if col.lower() in preferred and pd.api.types.is_numeric_dtype(df[col]):
285
+ return col
286
+ numeric_cols = [col for col in df.columns if pd.api.types.is_numeric_dtype(df[col])]
287
+ return numeric_cols[0] if numeric_cols else None
288
+
289
+
290
+ def _percent_widths(series: pd.Series) -> pd.Series:
291
+ """Compute a 0-100 width for a numeric series."""
292
+ s = series.astype(float)
293
+ # If values look like percentages already
294
+ if s.min() >= 0 and s.max() <= 100:
295
+ return s
296
+ # If values look like 0-1
297
+ if s.min() >= 0 and s.max() <= 1.0:
298
+ return s * 100.0
299
+ # General min-max scaling
300
+ rng = s.max() - s.min()
301
+ if rng == 0:
302
+ return pd.Series([50.0] * len(s), index=s.index)
303
+ return (s - s.min()) / rng * 100.0
304
+
305
+
306
+ def add_bar_cells(df: pd.DataFrame, exclude: Optional[list[str]] = None) -> tuple[pd.DataFrame, set[str]]:
307
+ """
308
+ Convert numeric score columns to HTML with a bar background.
309
+ Returns a new DataFrame and the set of columns that were converted.
310
+ """
311
+ exclude = set((exclude or []))
312
+ # Columns we never bar-render
313
+ exclude |= {"Model", "Type", "Parameters"}
314
+ out = df.copy()
315
+ converted: set[str] = set()
316
+ for col in out.columns:
317
+ if col in exclude:
318
+ continue
319
+ if pd.api.types.is_numeric_dtype(out[col]):
320
+ widths = _percent_widths(out[col])
321
+ # Build HTML for each cell
322
+ formatted = []
323
+ for val, w in zip(out[col], widths):
324
+ try:
325
+ disp = f"{float(val):.2f}"
326
+ except Exception:
327
+ disp = str(val)
328
+ html = (
329
+ f'<div class="leaderboard-cell-bar" style="--w:{max(0.0, min(100.0, float(w))):.2f}%">'
330
+ f'<span class="bar-fill"></span>'
331
+ f'<span class="bar-dot"></span>'
332
+ f'<span class="bar-text">{disp}</span>'
333
+ f"</div>"
334
+ )
335
+ formatted.append(html)
336
+ out[col] = formatted
337
+ converted.add(col)
338
+ return out, converted
339
+
340
+
341
+ def load_leaderboard_csv(path: Path) -> pd.DataFrame:
342
+ """Read and clean a leaderboard CSV."""
343
+ df = pd.read_csv(path)
344
+ df = strip_auxiliary_columns(df)
345
+ df.columns = [col.strip() for col in df.columns]
346
+
347
+ numeric_cols = [col for col in df.columns if pd.api.types.is_numeric_dtype(df[col])]
348
+ if numeric_cols:
349
+ df[numeric_cols] = df[numeric_cols].round(2)
350
+
351
+ sort_col = find_sort_column(df)
352
+ if sort_col:
353
+ df = df.sort_values(by=sort_col, ascending=False)
354
+
355
+ return df.reset_index(drop=True)
356
+
357
+
358
+ def safe_load(title: str, path: Path) -> tuple[str, pd.DataFrame]:
359
+ """Load a leaderboard but keep the app running if the CSV is missing or malformed."""
360
+ try:
361
+ df = load_leaderboard_csv(path)
362
+ except Exception as exc:
363
+ print(f"[leaderboard] Failed to load {path}: {exc}")
364
+ df = pd.DataFrame(
365
+ {
366
+ "Status": [
367
+ f"Upload a CSV named '{path.name}' to populate the '{title}' leaderboard. "
368
+ f"Error: {exc}"
369
+ ]
370
+ }
371
+ )
372
+ return title, df
373
+
374
+
375
+ def build_datatypes(df: pd.DataFrame, html_cols: Optional[set[str]] = None) -> list[str]:
376
+ """Build the datatype list for gradio_leaderboard.
377
+ Columns we bar-render should be treated as markdown so inline HTML is rendered.
378
+ """
379
+ html_cols = html_cols or set()
380
+ dtypes: list[str] = []
381
+ for col in df.columns:
382
+ if col in html_cols:
383
+ # Use markdown to allow raw HTML inside cells
384
+ dtypes.append("markdown")
385
+ else:
386
+ dtypes.append("number" if pd.api.types.is_numeric_dtype(df[col]) else "str")
387
+ return dtypes
388
+
389
+
390
+ def discover_leaderboards(config: list[tuple[str, str]]) -> list[tuple[str, pd.DataFrame]]:
391
+ """Load configured leaderboards; if a file is renamed, fall back to any other CSVs in the folder."""
392
+ configured_paths = [(title, Path(filename)) for title, filename in config]
393
+ configured_names = {Path(filename).name for _, filename in config}
394
+
395
+ # Load explicitly configured CSVs first
396
+ boards: list[tuple[str, pd.DataFrame]] = [safe_load(title, path) for title, path in configured_paths]
397
+
398
+ # Add any other CSVs in the folder as additional tabs for resilience
399
+ extra_csvs = [
400
+ path
401
+ for path in sorted(Path(".").glob("*.csv"))
402
+ if path.name not in configured_names
403
+ ]
404
+ for path in extra_csvs:
405
+ boards.append(safe_load(path.stem, path))
406
+
407
+ return boards
408
+
409
+
410
+ leaderboards = discover_leaderboards(LEADERBOARD_FILES)
411
+
412
+ required_filenames_md = "\n".join([f" - `{filename}`" for _, filename in LEADERBOARD_FILES])
413
+
414
+ demo = gr.Blocks(css=custom_css, theme=gr.themes.Soft())
415
  with demo:
416
  gr.HTML(TITLE)
417
+ gr.HTML(INFO)
418
+
419
+ # Render independent leaderboards (no tabs)
420
+ for lb_title, df in leaderboards:
421
+ with gr.Group():
422
+ centered_titles = {
423
+ "Large Language Model Scientific Capability",
424
+ "Multimodal Model Scientific Capability",
425
+ "Multimodal Model Disciplinary Leaderboard",
426
+ }
427
+ if lb_title.strip() in centered_titles:
428
+ gr.HTML(f'<h2 style="text-align:center; font-weight:700; margin: 0.2em 0;">{lb_title}</h2>')
429
+ else:
430
+ gr.Markdown(f"## {lb_title}")
431
+ # Apply bar-style rendering to numeric score columns
432
+ df_render, html_cols = add_bar_cells(df)
433
+ Leaderboard(
434
+ value=df_render,
435
+ elem_id=f"leaderboard-{_slugify(lb_title)}",
436
+ datatype=build_datatypes(df_render, html_cols),
437
+ select_columns=SelectColumns(
438
+ default_selection=list(df_render.columns),
439
+ cant_deselect=[c for c in ("Model", "Type") if c in df_render.columns],
440
+ label="Select columns to display:",
441
+ ),
442
+ search_columns=["Model"] if "Model" in df_render.columns else [df_render.columns[0]],
443
+ filter_columns=(
444
+ [ColumnFilter("Type", type="checkboxgroup", label="Model Types:")]
445
+ if "Type" in df_render.columns else []
446
+ ),
447
+ interactive=False,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
448
  )
449
+ gr.Markdown("---")
450
 
451
  with gr.Row():
452
+ with gr.Column():
453
+ with gr.Group(elem_id="citation-group"):
454
+ gr.Textbox(
455
+ value=CITATION_BUTTON_TEXT,
456
+ label=CITATION_BUTTON_LABEL,
457
+ lines=CITATION_BUTTON_TEXT.count("\n") + 1,
458
+ elem_id="citation-button",
459
+ show_copy_button=True,
460
+ interactive=False,
461
+ )
462
 
463
+ demo.queue(default_concurrency_limit=40).launch()