CBT-QA / app.py
Mian Zhang
update
21889ba
import gradio as gr
import json
import os
from huggingface_hub import hf_hub_download
# Load ground truth data
file_path = hf_hub_download(
repo_id="Psychotherapy-LLM/CBT-QA",
filename="qa_test.json",
repo_type="dataset",
token=os.environ.get("HF_TOKEN")
)
with open(file_path, 'r') as f:
ground_truth = json.load(f)
# Create ID to answer mapping
id_to_answer = {item['id']: item['answer'][0] for item in ground_truth}
def calculate_accuracy(uploaded_file):
"""Calculate Task 1 accuracy from uploaded predictions"""
try:
# Read uploaded file
if hasattr(uploaded_file, 'read'):
# File-like object
content = uploaded_file.read()
if isinstance(content, bytes):
content = content.decode('utf-8')
else:
# NamedString or file path
with open(uploaded_file, 'r') as f:
content = f.read()
predictions = json.loads(content)
# Calculate accuracy
correct = 0
total = 0
for item in predictions:
if 'id' in item and 'prediction' in item:
item_id = str(item['id'])
if item_id in id_to_answer:
pred = item['prediction'].strip()[0] if item['prediction'].strip() else ""
answer = id_to_answer[item_id]
if pred == answer:
correct += 1
total += 1
if total == 0:
return "Error: No valid predictions found"
accuracy = correct / total
return f"Accuracy: {correct}/{total} = {accuracy:.4f} ({accuracy*100:.2f}%)"
except Exception as e:
return f"Error: {str(e)}"
# Create simple interface
demo = gr.Interface(
fn=calculate_accuracy,
inputs=gr.File(label="Upload Predictions JSON", file_types=[".json"]),
outputs=gr.Textbox(label="Results"),
title="CBT-QA Task 1 Metrics Calculator",
description="Upload a JSON file with predictions (format: [{'id': '123', 'prediction': 'a'}, ...])"
)
if __name__ == "__main__":
demo.launch()