import gradio as gr import json import os from huggingface_hub import hf_hub_download # Load ground truth data file_path = hf_hub_download( repo_id="Psychotherapy-LLM/CBT-QA", filename="qa_test.json", repo_type="dataset", token=os.environ.get("HF_TOKEN") ) with open(file_path, 'r') as f: ground_truth = json.load(f) # Create ID to answer mapping id_to_answer = {item['id']: item['answer'][0] for item in ground_truth} def calculate_accuracy(uploaded_file): """Calculate Task 1 accuracy from uploaded predictions""" try: # Read uploaded file if hasattr(uploaded_file, 'read'): # File-like object content = uploaded_file.read() if isinstance(content, bytes): content = content.decode('utf-8') else: # NamedString or file path with open(uploaded_file, 'r') as f: content = f.read() predictions = json.loads(content) # Calculate accuracy correct = 0 total = 0 for item in predictions: if 'id' in item and 'prediction' in item: item_id = str(item['id']) if item_id in id_to_answer: pred = item['prediction'].strip()[0] if item['prediction'].strip() else "" answer = id_to_answer[item_id] if pred == answer: correct += 1 total += 1 if total == 0: return "Error: No valid predictions found" accuracy = correct / total return f"Accuracy: {correct}/{total} = {accuracy:.4f} ({accuracy*100:.2f}%)" except Exception as e: return f"Error: {str(e)}" # Create simple interface demo = gr.Interface( fn=calculate_accuracy, inputs=gr.File(label="Upload Predictions JSON", file_types=[".json"]), outputs=gr.Textbox(label="Results"), title="CBT-QA Task 1 Metrics Calculator", description="Upload a JSON file with predictions (format: [{'id': '123', 'prediction': 'a'}, ...])" ) if __name__ == "__main__": demo.launch()