Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import json | |
| import os | |
| from huggingface_hub import hf_hub_download | |
| # Load ground truth data | |
| file_path = hf_hub_download( | |
| repo_id="Psychotherapy-LLM/CBT-QA", | |
| filename="qa_test.json", | |
| repo_type="dataset", | |
| token=os.environ.get("HF_TOKEN") | |
| ) | |
| with open(file_path, 'r') as f: | |
| ground_truth = json.load(f) | |
| # Create ID to answer mapping | |
| id_to_answer = {item['id']: item['answer'][0] for item in ground_truth} | |
| def calculate_accuracy(uploaded_file): | |
| """Calculate Task 1 accuracy from uploaded predictions""" | |
| try: | |
| # Read uploaded file | |
| if hasattr(uploaded_file, 'read'): | |
| # File-like object | |
| content = uploaded_file.read() | |
| if isinstance(content, bytes): | |
| content = content.decode('utf-8') | |
| else: | |
| # NamedString or file path | |
| with open(uploaded_file, 'r') as f: | |
| content = f.read() | |
| predictions = json.loads(content) | |
| # Calculate accuracy | |
| correct = 0 | |
| total = 0 | |
| for item in predictions: | |
| if 'id' in item and 'prediction' in item: | |
| item_id = str(item['id']) | |
| if item_id in id_to_answer: | |
| pred = item['prediction'].strip()[0] if item['prediction'].strip() else "" | |
| answer = id_to_answer[item_id] | |
| if pred == answer: | |
| correct += 1 | |
| total += 1 | |
| if total == 0: | |
| return "Error: No valid predictions found" | |
| accuracy = correct / total | |
| return f"Accuracy: {correct}/{total} = {accuracy:.4f} ({accuracy*100:.2f}%)" | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| # Create simple interface | |
| demo = gr.Interface( | |
| fn=calculate_accuracy, | |
| inputs=gr.File(label="Upload Predictions JSON", file_types=[".json"]), | |
| outputs=gr.Textbox(label="Results"), | |
| title="CBT-QA Task 1 Metrics Calculator", | |
| description="Upload a JSON file with predictions (format: [{'id': '123', 'prediction': 'a'}, ...])" | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |