tangtang
commited on
Commit
·
c7ca903
1
Parent(s):
23c8313
Update space1
Browse files- src/about.py +5 -4
- src/display/utils.py +1 -1
- src/populate.py +3 -3
src/about.py
CHANGED
|
@@ -10,9 +10,9 @@ class Task:
|
|
| 10 |
|
| 11 |
class Tasks(Enum):
|
| 12 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
| 13 |
-
Reference_Generation_Precision = Task("Reference_Generation", "Precision","Precision (%)")
|
| 14 |
-
Title_search_rate = Task("Reference Generation",
|
| 15 |
-
|
| 16 |
# Overlap_rate = Task("Reference Generation",
|
| 17 |
# "Overlap_rate", "Overlap_rate (%)")
|
| 18 |
# Similarity = Task("Abstract Writing",
|
|
@@ -42,7 +42,8 @@ class Tasks(Enum):
|
|
| 42 |
# "ROUGE-2", "ROUGE-2↑")
|
| 43 |
# ROUGE_L = Task("Review Composition",
|
| 44 |
# "ROUGE-L", "ROUGE-L↑")
|
| 45 |
-
|
|
|
|
| 46 |
# factual_consistency_rate = Task("factual_consistency_rate", "factual_consistency_rate", "Factual Consistency Rate (%)")
|
| 47 |
# answer_rate = Task("answer_rate", "answer_rate", "Answer Rate (%)")
|
| 48 |
# average_summary_length = Task("average_summary_length",
|
|
|
|
| 10 |
|
| 11 |
class Tasks(Enum):
|
| 12 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
| 13 |
+
# Reference_Generation_Precision = Task("Reference_Generation", "Precision","Precision (%)")
|
| 14 |
+
# Title_search_rate = Task("Reference Generation",
|
| 15 |
+
# "Title_search_rate", "Title search rate (%)")
|
| 16 |
# Overlap_rate = Task("Reference Generation",
|
| 17 |
# "Overlap_rate", "Overlap_rate (%)")
|
| 18 |
# Similarity = Task("Abstract Writing",
|
|
|
|
| 42 |
# "ROUGE-2", "ROUGE-2↑")
|
| 43 |
# ROUGE_L = Task("Review Composition",
|
| 44 |
# "ROUGE-L", "ROUGE-L↑")
|
| 45 |
+
hallucination_rate = Task("hallucination_rate", "hallucination_rate", "Hallucination Rate (%)")
|
| 46 |
+
# reference_validity_rate = Task("reference_validity_rate", "reference_validity_rate",
|
| 47 |
# factual_consistency_rate = Task("factual_consistency_rate", "factual_consistency_rate", "Factual Consistency Rate (%)")
|
| 48 |
# answer_rate = Task("answer_rate", "answer_rate", "Answer Rate (%)")
|
| 49 |
# average_summary_length = Task("average_summary_length",
|
src/display/utils.py
CHANGED
|
@@ -26,7 +26,7 @@ auto_eval_column_dict = []
|
|
| 26 |
auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
|
| 27 |
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
| 28 |
#Scores
|
| 29 |
-
|
| 30 |
for task in Tasks:
|
| 31 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
| 32 |
# Model information
|
|
|
|
| 26 |
auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
|
| 27 |
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
| 28 |
#Scores
|
| 29 |
+
auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
|
| 30 |
for task in Tasks:
|
| 31 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
| 32 |
# Model information
|
src/populate.py
CHANGED
|
@@ -18,11 +18,11 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
| 18 |
# print(df.head(10))
|
| 19 |
|
| 20 |
# 将数组转标量,空数组变为 0
|
| 21 |
-
df["Precision (%)"] = df["Precision (%)"].apply(lambda x: x[0] if len(x) > 0 else 0)
|
| 22 |
-
df["Title search rate (%)"] = df["Title search rate (%)"].apply(lambda x: x[0] if len(x) > 0 else 0)
|
| 23 |
|
| 24 |
# 平均值列
|
| 25 |
-
df["Average ⬆️"] = df[["Precision (%)", "Title search rate (%)"]].mean(axis=1)
|
| 26 |
|
| 27 |
# 排序
|
| 28 |
df = df.sort_values(by=["Average ⬆️"], ascending=False)
|
|
|
|
| 18 |
# print(df.head(10))
|
| 19 |
|
| 20 |
# 将数组转标量,空数组变为 0
|
| 21 |
+
# df["Precision (%)"] = df["Precision (%)"].apply(lambda x: x[0] if len(x) > 0 else 0)
|
| 22 |
+
# df["Title search rate (%)"] = df["Title search rate (%)"].apply(lambda x: x[0] if len(x) > 0 else 0)
|
| 23 |
|
| 24 |
# 平均值列
|
| 25 |
+
# df["Average ⬆️"] = df[["Precision (%)", "Title search rate (%)"]].mean(axis=1)
|
| 26 |
|
| 27 |
# 排序
|
| 28 |
df = df.sort_values(by=["Average ⬆️"], ascending=False)
|