abdev-leaderboard

Running

File size: 8,112 Bytes

df06d5c
 
2982a51
281711d
2dafeb1
1bcb06b
211c032
688f116
df06d5c
281711d
9a87acd
84fdef4
2dafeb1
509301c
672339b
10e69e7
22f82e7
61fa714
15ae508
61fa714
 
 
688f116
84fdef4
3edbc93
cfa5138
8f9985e
2dafeb1
ba1131a
069fb2c
 
 
de75bee
1bcb06b
8f9985e
10e69e7
177a597
 
 
 
6836e69
a3fddce
 
 
4808b6b
 
 
 
 
 
509301c
4808b6b
 
2dafeb1
61fa714
 
177a597
1bcb06b
8f9985e
fe04bb9
10e69e7
1bcb06b
5b5ee28
10e69e7
de9585b
4a37ad2
de9585b
 
069fb2c
2dafeb1
069fb2c
2dafeb1
 
61fa714
de9585b
8f9985e
a3fddce
1bcb06b
de9585b
 
2982a51
211c032
4a37ad2
 
211c032
 
 
cfa5138
4a37ad2
df06d5c
4a37ad2
a897a54
5554fb7
5135eea
6c94821
2dafeb1
22f82e7
2dafeb1
22f82e7
 
a3fddce
22f82e7
3d4c9af
 
 
 
0fdb208
27f3da5
2dafeb1
3d4c9af
2dafeb1
22f82e7
 
21f87d6
2dafeb1
767c884
2dafeb1
 
0fdb208
27f3da5
 
2dafeb1
3edbc93
a897a54
27f3da5
 
a897a54
 
 
 
27f3da5
 
 
 
 
 
 
 
 
 
 
 
 
8f9985e
2dafeb1
 
 
8f9985e
2dafeb1
 
 
688f116
672339b
27f3da5
2dafeb1
8f9985e
de9585b
211c032
 
61fa714
22f82e7
 
 
 
 
95a9631
 
 
 
 
 
2dafeb1
ba1131a
 
15ae508
ba1131a
688f116
 
ba1131a
 
8f9985e
ba1131a
281711d
 
6836e69
fb66aff
 
6836e69

import time

import pandas as pd
import gradio as gr
from gradio.themes.utils import sizes
from gradio_leaderboard import Leaderboard
from dotenv import load_dotenv

load_dotenv()  # Load environment variables from .env file (before imports)

from about import ABOUT_INTRO, ABOUT_TEXT, FAQS, SUBMIT_INSTRUCTIONS, WEBSITE_HEADER
from constants import (
    ASSAY_RENAME,  # noqa: F401
    SECOND_DEADLINE,
    SEQUENCES_FILE_DICT,
    LEADERBOARD_DISPLAY_COLUMNS,
    ABOUT_TAB_NAME,
    FAQ_TAB_NAME,
    TERMS_URL,
    LEADERBOARD_COLUMNS_RENAME,
    LEADERBOARD_COLUMNS_RENAME_LIST,
    SUBMIT_TAB_NAME,
    SLACK_URL,
)
from submit import make_submission
from utils import fetch_hf_results, show_output_box, periodic_data_fetch


def format_leaderboard_table(df_results: pd.DataFrame, assay: str | None = None):
    """
    Format the dataframe for display on the leaderboard. The dataframe comes from utils.fetch_hf_results().
    """
    df = df_results.query("assay.isin(@ASSAY_RENAME.keys())").copy()
    if assay is not None:
        df = df[df["assay"] == assay]
    df = df[LEADERBOARD_DISPLAY_COLUMNS]
    df = df.sort_values(by="spearman", ascending=False)
    # After sorting, just add the reason for excluding heldout test set
    # Note: We can also just say the following as a text box at the bottom of the leaderboard: "Note: Results for the Heldout Test Set are only evaluated at competition close"
    # Convert spearman column to string to avoid dtype incompatibility when assigning text
    df["spearman"] = df["spearman"].apply(lambda x: f"{x:.3f}")
    # Cast submission_time to datetime
    df["submission_time"] = pd.to_datetime(df["submission_time"], errors="coerce")
    # Before the first deadline: Say we're busy evaluating
    for metric in ["spearman", "top_10_recall"]:
        if metric not in df.columns:
            continue
        df.loc[
            (df["dataset"] == "Heldout Test Set")
            & (df[metric] == "nan")
            & (df["submission_time"] <= SECOND_DEADLINE),
            metric,
        ] = "Error: All predictions have a constant value"

    # Finally, rename columns for readability
    df = df.rename(columns=LEADERBOARD_COLUMNS_RENAME)
    return df


def get_leaderboard_object(assay: str | None = None):
    filter_columns = ["dataset"]
    if assay is None:
        filter_columns.append("property")
    # Bug: Can't leave search_columns empty because then it says "Column None not found in headers"
    # Note(Lood): Would be nice to make it clear that the Search Column is searching on model name
    current_dataframe = pd.read_csv("debug-current-results.csv")
    lb = Leaderboard(
        value=format_leaderboard_table(df_results=current_dataframe, assay=assay),
        datatype=["str", "str", "str", "number", "str"],
        select_columns=LEADERBOARD_COLUMNS_RENAME_LIST(
            ["model", "property", "spearman", "dataset", "user"]
        ),
        search_columns=["Model Name"],
        filter_columns=LEADERBOARD_COLUMNS_RENAME_LIST(filter_columns),
        every=15,
        render=True,
        height=500,  # Set a fixed height to make it scrollable
    )
    return lb


def refresh_overall_leaderboard():
    # debug-current-results.csv is updated by the outer thread
    current_dataframe = pd.read_csv("debug-current-results.csv")
    return format_leaderboard_table(df_results=current_dataframe)


# Initialize global dataframe
fetch_hf_results()
time.sleep(2)  # Give the outer thread time to create the file at the start
current_dataframe = pd.read_csv("debug-current-results.csv")

# Make font size bigger using gradio theme
with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
    timer = gr.Timer(3)  # Run every 3 seconds when page is focused

    ## Header

    with gr.Row():
        with gr.Column(scale=6):  # bigger text area
            gr.Markdown(WEBSITE_HEADER)
        with gr.Column(scale=2):  # smaller side column for logo
            gr.Image(
                value="./assets/competition_logo.jpg",
                show_label=False,
                show_download_button=False,
                show_share_button=False,
                show_fullscreen_button=False,
                width="25vw",  # Take up the width of the column (2/8 = 1/4)
            )

    with gr.Tabs(elem_classes="tab-buttons"):
        with gr.TabItem(ABOUT_TAB_NAME, elem_id="abdev-benchmark-tab-table"):
            gr.Markdown(ABOUT_INTRO)
            gr.Image(
                value="./assets/prediction_explainer_v3.png",
                show_label=False,
                show_download_button=False,
                show_share_button=False,
                show_fullscreen_button=False,
                width="30vw",
            )
            gr.Markdown(ABOUT_TEXT)

            # Sequence download buttons
            gr.Markdown(
                """### 📥 Download Sequences
            The GDPa1 dataset (with assay data and sequences) is available on Hugging Face [here](https://huggingface.co/datasets/ginkgo-datapoints/GDPa1),
            but we provide this and the private test set for convenience."""
            )
            with gr.Row():
                with gr.Column():
                    download_button_cv_about = gr.DownloadButton(
                        label="📥 Download GDPa1 sequences",
                        value=SEQUENCES_FILE_DICT["GDPa1_cross_validation"],
                        variant="secondary",
                    )
                with gr.Column():
                    download_button_test_about = gr.DownloadButton(
                        label="📥 Download Private Test Set sequences",
                        value=SEQUENCES_FILE_DICT["Heldout Test Set"],
                        variant="secondary",
                    )

        with gr.TabItem(
            "🏆 Leaderboard", elem_id="abdev-benchmark-tab-table"
        ) as leaderboard_tab:
            gr.Markdown(
                """
                # Overall Leaderboard (filter below by property)
                Each property has its own prize, and participants can submit models for any combination of properties.

                **Note**: It is *easy to overfit* the public GDPa1 dataset, which results in artificially high Spearman correlations.
                We would suggest training using cross-validation to give a better indication of the model's performance on the eventual private test set.
                """
            )
            lb = get_leaderboard_object()
            timer.tick(fn=refresh_overall_leaderboard, outputs=lb)
            demo.load(fn=refresh_overall_leaderboard, outputs=lb)
        with gr.Tab(FAQ_TAB_NAME):
            gr.Markdown("# Frequently Asked Questions")
            for i, (question, answer) in enumerate(FAQS.items()):
                # Would love to make questions bold but accordion doesn't support it
                question = f"{i+1}. {question}"
                with gr.Accordion(question, open=False):
                    if isinstance(answer, list):
                        # Italicize each line
                        italicized_answer = "  \n".join(f"*{item}*" for item in answer)
                        gr.Markdown(italicized_answer)
                    else:
                        gr.Markdown(f"*{answer}*")  # Italics for answers

    # Footnote
    gr.Markdown(
        f"""
        <div style="text-align: center; font-size: 14px; color: gray; margin-top: 2em;">
        📬 For questions or feedback, contact <a href="mailto:[email protected]">[email protected]</a> or discuss on the <a href="{SLACK_URL}">Slack community</a> co-hosted by Bits in Bio.<br>
        Visit the <a href="https://datapoints.ginkgo.bio/ai-competitions/2025-abdev-competition">Competition Registration page</a> to sign up for updates and to register, and see Terms <a href="{TERMS_URL}">here</a>.
        </div>
        """,
        elem_id="contact-footer",
    )

if __name__ == "__main__":
    demo.launch(
        ssr_mode=False,
        app_kwargs={"lifespan": periodic_data_fetch},
    )