File size: 2,166 Bytes
8b6c23b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21889ba
 
 
 
 
 
 
 
 
8b6c23b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71


import gradio as gr
import json
import os
from huggingface_hub import hf_hub_download

# Load ground truth data
file_path = hf_hub_download(
    repo_id="Psychotherapy-LLM/CBT-QA",
    filename="qa_test.json",
    repo_type="dataset",
    token=os.environ.get("HF_TOKEN")
)

with open(file_path, 'r') as f:
    ground_truth = json.load(f)

# Create ID to answer mapping
id_to_answer = {item['id']: item['answer'][0] for item in ground_truth}

def calculate_accuracy(uploaded_file):
    """Calculate Task 1 accuracy from uploaded predictions"""
    try:
        # Read uploaded file
        if hasattr(uploaded_file, 'read'):
            # File-like object
            content = uploaded_file.read()
            if isinstance(content, bytes):
                content = content.decode('utf-8')
        else:
            # NamedString or file path
            with open(uploaded_file, 'r') as f:
                content = f.read()
        
        predictions = json.loads(content)
        
        # Calculate accuracy
        correct = 0
        total = 0
        
        for item in predictions:
            if 'id' in item and 'prediction' in item:
                item_id = str(item['id'])
                if item_id in id_to_answer:
                    pred = item['prediction'].strip()[0] if item['prediction'].strip() else ""
                    answer = id_to_answer[item_id]
                    if pred == answer:
                        correct += 1
                    total += 1
        
        if total == 0:
            return "Error: No valid predictions found"
        
        accuracy = correct / total
        return f"Accuracy: {correct}/{total} = {accuracy:.4f} ({accuracy*100:.2f}%)"
        
    except Exception as e:
        return f"Error: {str(e)}"

# Create simple interface
demo = gr.Interface(
    fn=calculate_accuracy,
    inputs=gr.File(label="Upload Predictions JSON", file_types=[".json"]),
    outputs=gr.Textbox(label="Results"),
    title="CBT-QA Task 1 Metrics Calculator",
    description="Upload a JSON file with predictions (format: [{'id': '123', 'prediction': 'a'}, ...])"
)

if __name__ == "__main__":
    demo.launch()