Spaces:

Psychotherapy-LLM
/

CBT-QA

Sleeping

CBT-QA / app.py

Mian Zhang

update

21889ba 3 months ago

2.17 kB



	import gradio as gr
	import json
	import os
	from huggingface_hub import hf_hub_download

	# Load ground truth data
	file_path = hf_hub_download(
	repo_id="Psychotherapy-LLM/CBT-QA",
	filename="qa_test.json",
	repo_type="dataset",
	token=os.environ.get("HF_TOKEN")
	)

	with open(file_path, 'r') as f:
	ground_truth = json.load(f)

	# Create ID to answer mapping
	id_to_answer = {item['id']: item['answer'][0] for item in ground_truth}

	def calculate_accuracy(uploaded_file):
	"""Calculate Task 1 accuracy from uploaded predictions"""
	try:
	# Read uploaded file
	if hasattr(uploaded_file, 'read'):
	# File-like object
	content = uploaded_file.read()
	if isinstance(content, bytes):
	content = content.decode('utf-8')
	else:
	# NamedString or file path
	with open(uploaded_file, 'r') as f:
	content = f.read()

	predictions = json.loads(content)

	# Calculate accuracy
	correct = 0
	total = 0

	for item in predictions:
	if 'id' in item and 'prediction' in item:
	item_id = str(item['id'])
	if item_id in id_to_answer:
	pred = item['prediction'].strip()[0] if item['prediction'].strip() else ""
	answer = id_to_answer[item_id]
	if pred == answer:
	correct += 1
	total += 1

	if total == 0:
	return "Error: No valid predictions found"

	accuracy = correct / total
	return f"Accuracy: {correct}/{total} = {accuracy:.4f} ({accuracy*100:.2f}%)"

	except Exception as e:
	return f"Error: {str(e)}"

	# Create simple interface
	demo = gr.Interface(
	fn=calculate_accuracy,
	inputs=gr.File(label="Upload Predictions JSON", file_types=[".json"]),
	outputs=gr.Textbox(label="Results"),
	title="CBT-QA Task 1 Metrics Calculator",
	description="Upload a JSON file with predictions (format: [{'id': '123', 'prediction': 'a'}, ...])"
	)

	if __name__ == "__main__":
	demo.launch()