update with gemini3 results

#14
data/community_results/New Results - June2025.csv CHANGED
@@ -78,3 +78,10 @@ GPT-5 (Aug),afrimmlu,1.0,90.6,85.8,82.8,84.6,82.0,82.6,82.0,86.4,83.0,85.2,90.0,
78
  GPT-5 (Aug),injongointent,4.0,88.3,90.9,95.8,89.2,79.8,87.3,87.8,83.8,94.7,77.2,93.3,89.4,80.6,87.5,85.0,87.3
79
  GPT-5 (Aug),sib,3.0,91.2,89.2,90.2,90.2,89.7,88.2,85.8,88.2,88.7,90.2,91.2,90.7,79.9,87.3,89.7,88.5
80
  GPT-5 (Aug),belebele,5.0,89.2,90.3,82.3,78.3,86.9,81.2,77.6,81.3,85.6,86.4,94.6,87.3,69.1,80.4,85.3,83.3
 
 
 
 
 
 
 
 
78
  GPT-5 (Aug),injongointent,4.0,88.3,90.9,95.8,89.2,79.8,87.3,87.8,83.8,94.7,77.2,93.3,89.4,80.6,87.5,85.0,87.3
79
  GPT-5 (Aug),sib,3.0,91.2,89.2,90.2,90.2,89.7,88.2,85.8,88.2,88.7,90.2,91.2,90.7,79.9,87.3,89.7,88.5
80
  GPT-5 (Aug),belebele,5.0,89.2,90.3,82.3,78.3,86.9,81.2,77.6,81.3,85.6,86.4,94.6,87.3,69.1,80.4,85.3,83.3
81
+ Gemini 3 Pro,afrixnli,3.0,90.8,84.3,81.8,83.8,78.3,31.0,81.3,84.0,83.2,80.2,80.3,84.3,71.8,78.2,81.2,77.4
82
+ Gemini 3 Pro,afrimgsm,2.0,84.4,82.8,75.2,80.8,71.6,63.6,66.4,83.6,76.4,68.4,88.0,66.4,56.4,82.4,66.0,73.4
83
+ Gemini 3 Pro,flores - en_xx,3.0,69.8,42.1,50.8,44.1,52.6,49.8,44.0,45.4,48.3,53.5,61.5,53.0,32.9,28.6,56.1,47.3
84
+ Gemini 3 Pro,afrimmlu,1.0,88.4,89.6,82.8,88.4,83.6,85.0,85.6,88.0,86.8,82.8,89.0,89.4,77.0,88.0,88.8,86.1
85
+ Gemini 3 Pro,injongointent,4.0,88.1,92.0,95.9,92.2,81.1,91.9,87.7,86.7,94.5,78.0,91.4,89.1,87.5,90.5,85.3,88.8
86
+ Gemini 3 Pro,sib,3.0,87.3,87.7,88.2,87.7,89.7,88.7,85.3,89.2,87.7,89.2,89.2,87.7,84.8,86.8,88.2,87.9
87
+ Gemini 3 Pro,belebele,5.0,76.3,76.8,54.3,63.1,75.4,71.3,74.7,68.9,73.8,61.9,74.9,78.2,72.1,73.3,77.6,71.2
data/leaderboard_json/afrobench_lite.json CHANGED
@@ -23,7 +23,8 @@
23
  "Claude 4.5 Sonnet": 69.9,
24
  "Gemini-2.5 Flash": 69.3,
25
  "Gemini-2.5 Pro": 72.5,
26
- "GPT-5 (Aug)": 83.3
 
27
  }
28
  },
29
  "Intent": {
@@ -50,7 +51,8 @@
50
  "Claude 4.5 Sonnet": 79.3,
51
  "Gemini-2.5 Flash": 87.4,
52
  "Gemini-2.5 Pro": 88.0,
53
- "GPT-5 (Aug)": 87.3
 
54
  }
55
  },
56
  "MT(en/fr-xx)": {
@@ -77,7 +79,8 @@
77
  "Claude 4.5 Sonnet": 45.2,
78
  "Gemini-2.5 Flash": 45.3,
79
  "Gemini-2.5 Pro": 46.3,
80
- "GPT-5 (Aug)": 44.8
 
81
  }
82
  },
83
  "MMLU": {
@@ -104,7 +107,8 @@
104
  "Claude 4.5 Sonnet": 74.0,
105
  "Gemini-2.5 Flash": 67.3,
106
  "Gemini-2.5 Pro": 77.4,
107
- "GPT-5 (Aug)": 83.3
 
108
  }
109
  },
110
  "Math": {
@@ -131,7 +135,8 @@
131
  "Claude 4.5 Sonnet": 69.7,
132
  "Gemini-2.5 Flash": 69.3,
133
  "Gemini-2.5 Pro": 73.2,
134
- "GPT-5 (Aug)": 73.7
 
135
  }
136
  },
137
  "Topic": {
@@ -158,7 +163,8 @@
158
  "Claude 4.5 Sonnet": 83.3,
159
  "Gemini-2.5 Flash": 86.8,
160
  "Gemini-2.5 Pro": 87.9,
161
- "GPT-5 (Aug)": 88.5
 
162
  }
163
  },
164
  "RC": {
@@ -185,7 +191,8 @@
185
  "Claude 4.5 Sonnet": 72.8,
186
  "Gemini-2.5 Flash": 41.6,
187
  "Gemini-2.5 Pro": 76.4,
188
- "GPT-5 (Aug)": 83.3
 
189
  }
190
  }
191
  }
 
23
  "Claude 4.5 Sonnet": 69.9,
24
  "Gemini-2.5 Flash": 69.3,
25
  "Gemini-2.5 Pro": 72.5,
26
+ "GPT-5 (Aug)": 83.3,
27
+ "Gemini 3 Pro": 77.4
28
  }
29
  },
30
  "Intent": {
 
51
  "Claude 4.5 Sonnet": 79.3,
52
  "Gemini-2.5 Flash": 87.4,
53
  "Gemini-2.5 Pro": 88.0,
54
+ "GPT-5 (Aug)": 87.3,
55
+ "Gemini 3 Pro": 88.8
56
  }
57
  },
58
  "MT(en/fr-xx)": {
 
79
  "Claude 4.5 Sonnet": 45.2,
80
  "Gemini-2.5 Flash": 45.3,
81
  "Gemini-2.5 Pro": 46.3,
82
+ "GPT-5 (Aug)": 44.8,
83
+ "Gemini 3 Pro": 47.3
84
  }
85
  },
86
  "MMLU": {
 
107
  "Claude 4.5 Sonnet": 74.0,
108
  "Gemini-2.5 Flash": 67.3,
109
  "Gemini-2.5 Pro": 77.4,
110
+ "GPT-5 (Aug)": 83.3,
111
+ "Gemini 3 Pro": 86.1
112
  }
113
  },
114
  "Math": {
 
135
  "Claude 4.5 Sonnet": 69.7,
136
  "Gemini-2.5 Flash": 69.3,
137
  "Gemini-2.5 Pro": 73.2,
138
+ "GPT-5 (Aug)": 73.7,
139
+ "Gemini 3 Pro": 73.4
140
  }
141
  },
142
  "Topic": {
 
163
  "Claude 4.5 Sonnet": 83.3,
164
  "Gemini-2.5 Flash": 86.8,
165
  "Gemini-2.5 Pro": 87.9,
166
+ "GPT-5 (Aug)": 88.5,
167
+ "Gemini 3 Pro": 87.9
168
  }
169
  },
170
  "RC": {
 
191
  "Claude 4.5 Sonnet": 72.8,
192
  "Gemini-2.5 Flash": 41.6,
193
  "Gemini-2.5 Pro": 76.4,
194
+ "GPT-5 (Aug)": 83.3,
195
+ "Gemini 3 Pro": 71.2
196
  }
197
  }
198
  }
data/leaderboard_json/lite_language_scores.json CHANGED
@@ -366,5 +366,21 @@
366
  "wol": 63.4,
367
  "yor": 75.4,
368
  "zul": 80.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369
  }
370
  }
 
366
  "wol": 63.4,
367
  "yor": 75.4,
368
  "zul": 80.0
369
+ },
370
+ "Gemini 3 Pro": {
371
+ "amh": 79.3,
372
+ "hau": 75.6,
373
+ "ibo": 77.2,
374
+ "kin": 76.0,
375
+ "lin": 68.8,
376
+ "lug": 75.0,
377
+ "orm": 78.0,
378
+ "sna": 78.7,
379
+ "sot": 73.4,
380
+ "swa": 82.0,
381
+ "xho": 78.3,
382
+ "wol": 68.9,
383
+ "yor": 75.4,
384
+ "zul": 77.6
385
  }
386
  }