File size: 5,099 Bytes
f977fd5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
# Copyright contributors to the geobench project
# modified from geobench (https://github.com/ServiceNow/geo-bench/blob/main/geobench/plot_tools.py)
import os
import numpy as np
import pandas as pd
from pathlib import Path
from utils.constants import NORMALIZER_DIR
import json
from scipy.stats import trim_mean, sem
from scipy.stats.mstats import trim
np.random.seed(100)
def biqm(scores):
"""Return a bootstram sample of iqm."""
b_scores = np.random.choice(scores, size=len(scores), replace=True)
return trim_mean(b_scores, proportiontocut=0.25, axis=None)
def trimmed_sem(scores):
"""Interquantile mean."""
scores = trim(scores, limits=(0.25,0.25), relative=True)
scores = scores.data[np.where(~scores.mask)]
return sem(scores)
def iqm(scores):
"""Interquantile mean."""
return trim_mean(scores, proportiontocut=0.25, axis=None)
def bootstrap_iqm(
df, group_keys=("model", "dataset"), metric="test_metric", repeat=100
):
"""Boostram of seeds for all model and all datasets to comput iqm score distribution."""
df_list = []
for i in range(repeat):
series = df.groupby(list(group_keys))[metric].apply(biqm)
df_list.append(series.to_frame().reset_index())
return pd.concat(df_list)
def bootstrap_iqm_aggregate(df, metric="test_metric", repeat=100):
"""Stratified bootstrap (by dataset) of all seeds to compute iqm score distribution for each backbone."""
group = df.groupby(["backbone", "dataset"])
df_list = []
for i in range(repeat):
new_df = group.sample(frac=1, replace=True, random_state=100+i)
series = new_df.groupby(["backbone"])[metric].apply(iqm)
df_list.append(series.to_frame().reset_index())
new_df = pd.concat(df_list)
new_df.loc[:, "dataset"] = "aggregated"
return new_df
def bootstrap_mean_aggregate(df, metric="test_metric", repeat=100):
"""Stratified bootstrap (by dataset) of all seeds to compute mean score distribution for each backbone."""
group = df.groupby(["backbone", "dataset"])
df_list = []
for i in range(repeat):
new_df = group.sample(frac=1, replace=True, random_state=100+i)
series = new_df.groupby(["backbone"])[metric].apply(np.mean)
df_list.append(series.to_frame().reset_index())
new_df = pd.concat(df_list)
new_df.loc[:, "dataset"] = "aggregated"
return new_df
def average_seeds(df, group_keys=("model", "dataset"), metric="test metric"):
"""Average seeds for all model and all datasets."""
df_avg = df.groupby(list(group_keys))[metric].mean()
df_avg = df_avg.unstack(level="dataset")
df_avg = df_avg.round(3)
return df_avg
def extract_1x_data(df_all):
"""Extract only resutls trained on 100% of the data"""
return df_all[
(df_all["partition name"] == "1.00x train") | (df_all["partition name"] == "default")
].copy()
class Normalizer:
"""Class used to normalize results beween min and max for each dataset."""
def __init__(self, range_dict):
"""Initialize a new instance of Normalizer class."""
self.range_dict = range_dict
def __call__(self, ds_name, values, scale_only=False):
"""Call the Normalizer class."""
mn, mx = self.range_dict[ds_name]
range = mx - mn
if scale_only:
return values / range
else:
return (values - mn) / range
def from_row(self, row, scale_only=False):
"""Normalize from row."""
return [self(ds_name, val, scale_only=scale_only) for ds_name, val in row.items()]
def normalize_data_frame(self, df, metric):
"""Normalize the entire dataframe."""
new_metric = f"normalized {metric}"
df[new_metric] = df.apply(lambda row: self.__call__(row["dataset"], row[metric]), axis=1)
return new_metric
def save(self, benchmark_name):
"""Save normalizer to json file."""
if not os.path.exists(f"{NORMALIZER_DIR}/{benchmark_name}/"):
print("making directory")
os.makedirs(f"{NORMALIZER_DIR}/{benchmark_name}/")
with open(f"{NORMALIZER_DIR}/{benchmark_name}/normalizer.json", "w") as f:
json.dump(self.range_dict, f, indent=2)
def load_normalizer(benchmark_name):
"""Load normalizer from json file."""
with open(f"{NORMALIZER_DIR}/{benchmark_name}/normalizer.json", "r") as f:
range_dict = json.load(f)
return Normalizer(range_dict)
def make_normalizer(data_frame, metrics=("test metric",), benchmark_name="leaderboard_combined"):
"""Extract min and max from data_frame to build Normalizer object for all datasets."""
datasets = data_frame["dataset"].unique()
range_dict = {}
for dataset in datasets:
sub_df = data_frame[data_frame["dataset"] == dataset]
data = []
for metric in metrics:
data.append(sub_df[metric].to_numpy())
range_dict[dataset] = (np.min(data), np.max(data))
normalizer = Normalizer(range_dict)
if benchmark_name:
normalizer.save(benchmark_name)
return normalizer
|