import gradio as gr
import json
import os
import tempfile
import traceback
from typing import Dict, Any, Union, Optional, List, Tuple
import base64
import pandas as pd
import numpy as np
from PIL import Image
import io
import warnings
warnings.filterwarnings('ignore')

# =============================================================================
# 🔑 CONFIGURATION - SET YOUR API KEYS HERE
# =============================================================================
# Replace these with your actual API keys
# Claude API key from: https://console.anthropic.com/
CLAUDE_API_KEY = os.environ.get("CLAUDE_API_KEY")

# OpenAI API key from: https://platform.openai.com/api-keys
OPENAI_API_KEY = ""  # Add your OpenAI API key here

# ⚠️ SECURITY WARNING: Do not share this script with your API keys exposed!
# For production use, consider using environment variables instead:
# CLAUDE_API_KEY = os.getenv('CLAUDE_API_KEY')
# OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
# =============================================================================

# Import the necessary libraries (install these if not already installed)
try:
    import anthropic
    import openai
    from sentence_transformers import SentenceTransformer
    import networkx as nx
    from sklearn.metrics.pairwise import cosine_similarity
    import matplotlib.pyplot as plt
    import mimetypes
    from collections import defaultdict
    import itertools
    
    # Example chart pairs - Replace these paths with your actual example images
    EXAMPLE_CHART_PAIRS = {
        "Example 1: Maternal Mortality": {
            "ground_truth": "examples/ex_1/ground_truth.png",
            "predicted": "examples/ex_1/output.png",
            "description": "Line chart showing maternal mortality rate over 4 years"
        },
        "Example 2: Main Cooking Fuel": {
            "ground_truth": "examples/ex_2/ground_truth.png", 
            "predicted": "examples/ex_2/output.png",
            "description": "Line chart showing main cooking fuel used by households"
        },
        "Example 3: Distribution of Website Users": {
            "ground_truth": "examples/ex_3/ground_truth.png",
            "predicted": "examples/ex_3/output.png", 
            "description": "Pie chart showing distribution of website users by websites"
        },
        "Example 4: Relation between Latitude and Daylight": {
            "ground_truth": "examples/ex_4/ground_truth.png",
            "predicted": "examples/ex_4/output.png",
            "description": "Scatter chart showing relation between latitude and daylight duration"
        },
        "Example 5: Market Share": {
            "ground_truth": "examples/ex_5/ground_truth.png",
            "predicted": "examples/ex_5/output.png",
            "description": "Bar chart showing market share of top streaming platforms"
        },
        "Example 6: Roaming Wisps": {
            "ground_truth": "examples/ex_6/ground_truth.png",
            "predicted": "examples/ex_6/output.png",
            "description": "3D chart showing roaming wisps of celestial aurora"
        },
        "Example 7: Function Chart": {
            "ground_truth": "examples/ex_7/ground_truth.png", 
            "predicted": "examples/ex_7/output.png",
            "description": "Function chart of a polynomial function"
        },
        "Example 8: Target vs Prediction": {
            "ground_truth": "examples/ex_8/ground_truth.png",
            "predicted": "examples/ex_8/output.png", 
            "description": "Scatter plot showing target vs prediction"
        },
        "Example 9: Saudi Arabia's Re-export in 1991": {
            "ground_truth": "examples/ex_9/ground_truth.png",
            "predicted": "examples/ex_9/output.png",
            "description": "Line chart showing Saudi Arabia's re-export in 1991"
        },
        "Example 10: Glucose vs Fructose": {
            "ground_truth": "examples/ex_10/ground_truth.png",
            "predicted": "examples/ex_10/output.png",
            "description": "Bar chart showing glucose vs fructose in different fruits"
        }
    }
    
    class ChartEval:
        
        def __init__(self, llm_provider="Claude", api_key=None, model_config=None):
            """
            Initialize ChartEval with configurable LLM provider
            
            Args:
                llm_provider: LLM provider name ("GPT-3.5", "GPT-4", "Claude", etc.)
                api_key: API key for the LLM service
                model_config: Additional model configuration parameters
            """
            self.llm_provider = llm_provider
            self.api_key = api_key or os.getenv('LLM_API_KEY')
            self.model_config = model_config or {}
            
            # Initialize LLM client based on provider
            self._init_llm_client()
            
            # Initialize sentence transformer for GraphBERT scoring
            try:
                self.sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
            except Exception as e:
                print(f"Warning: Could not load sentence transformer: {e}")
                self.sentence_model = None
        
        def _init_llm_client(self):
            """Initialize the appropriate LLM client based on provider"""
            if self.llm_provider.startswith("GPT"):
                try:
                    self.llm_client = openai.OpenAI(api_key=self.api_key)
                except ImportError:
                    raise ImportError("OpenAI package required for GPT models. Install with: pip install openai")
            
            elif self.llm_provider == "Claude":
                try:
                    self.llm_client = anthropic.Anthropic(api_key=self.api_key)
                except ImportError:
                    raise ImportError("Anthropic package required for Claude. Install with: pip install anthropic")
            
            else:
                # Generic/custom LLM - user needs to provide their own client
                self.llm_client = None
        
        def compare(self, chart1, chart2):
            """
            Compare two charts and return various similarity scores
            
            Args:
                chart1: First chart (ground truth) - image path, base64, image data, or graph dict
                chart2: Second chart (predicted) - image path, base64, image data, or graph dict
            
            Returns:
                Tuple of scores: (graphBertScore, hallucinationScore, omissionScore, graphEditDist)
            """
            # Handle different input types - if already graph dicts, use directly
            if isinstance(chart1, dict) and 'chart_type' in chart1:
                graph1 = chart1
            else:
                vega_dict1 = self.chartToVega(chart1)
                graph1 = self.vegaToGraph(vega_dict1)
            
            if isinstance(chart2, dict) and 'chart_type' in chart2:
                graph2 = chart2
            else:
                vega_dict2 = self.chartToVega(chart2)
                graph2 = self.vegaToGraph(vega_dict2)
            
            # Calculate all evaluation metrics
            graphBertScore = self.calculate_graphBert_score(graph1, graph2)
            hallucinationScore = self.calculate_hallucination_score(graph1, graph2)
            omissionScore = self.calculate_omission_score(graph1, graph2)
            graphEditDist = self.calculate_GED_score(graph1, graph2)
            
            return graphBertScore, hallucinationScore, omissionScore, graphEditDist
        
        def generate_detailed_explanation(self, graph1, graph2, metrics, chart1_image=None, chart2_image=None):
            """
            Generate a detailed human-readable explanation of chart comparison results
            
            Args:
                graph1: Ground truth graph structure
                graph2: Predicted graph structure
                metrics: Dictionary containing calculated metrics
                chart1_image: Base64 encoded ground truth chart image (optional)
                chart2_image: Base64 encoded predicted chart image (optional)
            
            Returns:
                String containing detailed explanation
            """
            try:
                # Create comprehensive prompt for detailed analysis
                prompt = self._create_detailed_analysis_prompt(graph1, graph2, metrics)
                
                # Prepare images if available
                image_inputs = []
                if chart1_image and chart2_image:
                    image_inputs = [
                        {"type": "image", "data": chart1_image, "label": "Ground Truth Chart"},
                        {"type": "image", "data": chart2_image, "label": "Predicted Chart"}
                    ]
                
                # Call LLM for detailed analysis
                if image_inputs:
                    explanation = self._call_llm_with_images_for_explanation(prompt, image_inputs)
                else:
                    explanation = self._call_llm_text_only(prompt)
                
                return explanation
                
            except Exception as e:
                return f"Error generating detailed explanation: {str(e)}"
        
        def _create_detailed_analysis_prompt(self, graph1, graph2, metrics):
            """Create a comprehensive prompt for detailed chart analysis"""
            
            # Extract key information from graphs
            gt_info = self._extract_graph_summary(graph1, "Ground Truth")
            pred_info = self._extract_graph_summary(graph2, "Predicted")
            
            # Format metrics for inclusion
            bert_score = metrics.get('bert_score', {})
            hall_score = metrics.get('hallucination_score', {})
            omis_score = metrics.get('omission_score', {})
            ged_score = metrics.get('ged_score', {})
            
            prompt = f"""
You are an expert data analyst tasked with providing a comprehensive, human-readable comparison between two charts. Your analysis should be accessible to non-technical stakeholders while being detailed and actionable.

## CHART INFORMATION:

### Ground Truth Chart (Reference):
{gt_info}

### Predicted Chart (Generated):
{pred_info}

## COMPUTED METRICS:
- GraphBERT F1 Score: {bert_score.get('f1', 0):.3f} (Semantic similarity - higher is better)
- Hallucination Rate: {hall_score.get('hallucination_rate', 0):.3f} (False information - lower is better) 
- Omission Rate: {omis_score.get('omission_rate', 0):.3f} (Missing information - lower is better)
- Normalized Graph Edit Distance: {ged_score.get('normalized_ged', 0):.3f} (Structural difference - lower is better)

## DETAILED ISSUES FOUND:

### Hallucinations (False Information):
{self._format_issues_list(hall_score.get('hallucinations', []))}

### Omissions (Missing Information):
{self._format_issues_list(omis_score.get('omissions', []))}

## TASK:
Provide a detailed analysis in the following structure. Use specific examples from the charts and reference actual data points, labels, and values wherever possible.

## REQUIRED OUTPUT FORMAT:

### 📊 EXECUTIVE SUMMARY
[2-3 sentence high-level assessment of how well the predicted chart matches the ground truth]

### 🎯 OVERALL PERFORMANCE ASSESSMENT
**Accuracy Score: [X/10]**
[Brief justification based on metrics]

**Key Strengths:**
- [Specific examples of what the predicted chart got right]
- [Reference actual data points, labels, axis titles, etc.]

**Critical Issues:**
- [Specific examples of major problems with concrete details]
- [Point to exact discrepancies in data values, missing elements, etc.]

### 🔍 DETAILED BREAKDOWN BY CHART ELEMENTS

**Title and Labels:**
- Ground Truth: [Specific title/labels from GT chart]
- Predicted: [Specific title/labels from predicted chart]
- Assessment: [What matches, what differs, impact on understanding]

**Data Accuracy:**
- [Compare specific data points with exact values]
- [Highlight any missing or incorrect data series]
- [Discuss trends and patterns - are they preserved?]

**Visual Design:**
- [Compare chart types, colors, layout]
- [Assess if visual encoding effectively represents the data]

### ⚠️ SPECIFIC ERRORS WITH EXAMPLES

**Data Errors:**
- [List each incorrect data point with: "Ground truth shows X, but predicted shows Y"]
- [Quantify the magnitude of errors where applicable]

**Missing Elements:**
- [List each missing element: "The predicted chart is missing [specific element] which shows [importance]"]

**Added Elements (Hallucinations):**
- [List each incorrectly added element: "The predicted chart incorrectly includes [specific element] which doesn't exist in the ground truth"]

### 💡 ACTIONABLE RECOMMENDATIONS

**Immediate Fixes:**
1. [Specific correction needed with exact details]
2. [Another specific fix with concrete steps]

**Improvement Suggestions:**
1. [Suggestion for better data accuracy]
2. [Suggestion for better visual representation]

**Quality Assurance:**
- [Recommend specific validation checks]
- [Suggest verification steps for similar charts]

### 📈 IMPACT ASSESSMENT
[Explain how the identified issues would affect:]
- Data interpretation by end users
- Decision-making based on this chart
- Overall credibility and trust

### 🏆 CONCLUSION
[Final verdict with specific confidence level and key takeaway message]

## INSTRUCTIONS:
1. Be specific - always reference actual data points, labels, and values from the charts
2. Use concrete examples rather than general statements
3. Explain the business/analytical impact of each issue
4. Provide actionable recommendations with clear steps
5. Use a tone that's professional but accessible to non-technical audiences
6. Focus on the most impactful differences first
7. If charts are very similar, still provide constructive analysis
8. Include specific numerical references wherever possible
"""
            
            return prompt
        
        def _extract_graph_summary(self, graph, label):
            """Extract key information from graph structure for prompt"""
            if not isinstance(graph, dict):
                return f"{label}: Unable to parse graph structure"
            
            summary = [f"{label}:"]
            summary.append(f"- Chart Type: {graph.get('chart_type', 'Unknown')}")
            summary.append(f"- Title: '{graph.get('title', 'No title')}'")
            
            # Extract axis information
            axes = graph.get('axes', {})
            x_axis = axes.get('x_axis', {})
            y_axis = axes.get('y_axis', {})
            
            if x_axis.get('title'):
                summary.append(f"- X-axis: {x_axis['title']}")
            if y_axis.get('title'):
                summary.append(f"- Y-axis: {y_axis['title']}")
            
            # Extract data points summary
            data_points = graph.get('data_points', [])
            summary.append(f"- Data Points: {len(data_points)} points")
            
            if graph.get('chart_type') == 'pie':
                # For pie charts, show segment breakdown
                segments = []
                for point in data_points[:5]:  # Show first 5 segments
                    if 'label' in point and 'value' in point:
                        segments.append(f"{point['label']}: {point['value']}%")
                if segments:
                    summary.append(f"- Segments: {', '.join(segments)}")
                    if len(data_points) > 5:
                        summary.append(f"  (... and {len(data_points) - 5} more)")
            else:
                # For other charts, show data range
                if data_points:
                    x_values = [p.get('data_x') for p in data_points if p.get('data_x') is not None]
                    y_values = [p.get('data_y') for p in data_points if p.get('data_y') is not None]
                    
                    if x_values and y_values:
                        summary.append(f"- X range: {min(x_values)} to {max(x_values)}")
                        summary.append(f"- Y range: {min(y_values)} to {max(y_values)}")
            
            # Add semantic content if available
            semantic = graph.get('semantic_content', {})
            if semantic.get('data_trend'):
                summary.append(f"- Data Trend: {semantic['data_trend']}")
            
            return '\n'.join(summary)
        
        def _format_issues_list(self, issues):
            """Format list of issues for prompt"""
            if not issues:
                return "None detected"
            
            formatted = []
            for i, issue in enumerate(issues[:10], 1):  # Show first 10 issues
                issue_type = issue.get('type', 'Unknown')
                content = issue.get('content', 'Unknown')
                reason = issue.get('reason', 'No reason provided')
                formatted.append(f"{i}. {issue_type}: {content} ({reason})")
            
            if len(issues) > 10:
                formatted.append(f"... and {len(issues) - 10} more issues")
            
            return '\n'.join(formatted) if formatted else "None detected"
        
        def _call_llm_with_images_for_explanation(self, prompt, image_inputs):
            """Call LLM with both text prompt and images for detailed explanation"""
            try:
                if self.llm_provider == "Claude":
                    # Prepare message content with images and text
                    content = []
                    
                    # Add images first
                    for img_input in image_inputs:
                        content.append({
                            "type": "image",
                            "source": {
                                "type": "base64",
                                "media_type": "image/jpeg",  # Assume JPEG for simplicity
                                "data": img_input["data"]
                            }
                        })
                    
                    # Add text prompt
                    content.append({
                        "type": "text",
                        "text": prompt
                    })
                    
                    message = self.llm_client.messages.create(
                        model=self.model_config.get("model", "claude-3-5-sonnet-20241022"),
                        max_tokens=self.model_config.get("max_tokens", 4000),
                        temperature=self.model_config.get("temperature", 0.1),
                        messages=[{
                            "role": "user",
                            "content": content
                        }]
                    )
                    return message.content[0].text
                    
                elif self.llm_provider.startswith("GPT"):
                    # For GPT-4 with vision
                    content = [{"type": "text", "text": prompt}]
                    
                    # Add images
                    for img_input in image_inputs:
                        content.append({
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/jpeg;base64,{img_input['data']}"
                            }
                        })
                    
                    response = self.llm_client.chat.completions.create(
                        model=self.model_config.get("model", "gpt-4-vision-preview"),
                        messages=[{
                            "role": "user",
                            "content": content
                        }],
                        max_tokens=self.model_config.get("max_tokens", 4000),
                        temperature=self.model_config.get("temperature", 0.1)
                    )
                    return response.choices[0].message.content
                else:
                    return "Detailed explanation with images not supported for this LLM provider"
                    
            except Exception as e:
                return f"Error generating explanation with images: {str(e)}"
        
        def _call_llm_text_only(self, prompt):
            """Call LLM with text-only prompt for explanation"""
            try:
                if self.llm_provider == "Claude":
                    message = self.llm_client.messages.create(
                        model=self.model_config.get("model", "claude-3-5-sonnet-20241022"),
                        max_tokens=self.model_config.get("max_tokens", 4000),
                        temperature=self.model_config.get("temperature", 0.1),
                        messages=[{
                            "role": "user",
                            "content": prompt
                        }]
                    )
                    return message.content[0].text
                    
                elif self.llm_provider.startswith("GPT"):
                    response = self.llm_client.chat.completions.create(
                        model=self.model_config.get("model", "gpt-4"),
                        messages=[{
                            "role": "user",
                            "content": prompt
                        }],
                        max_tokens=self.model_config.get("max_tokens", 4000),
                        temperature=self.model_config.get("temperature", 0.1)
                    )
                    return response.choices[0].message.content
                else:
                    return "Detailed explanation not supported for this LLM provider"
                    
            except Exception as e:
                return f"Error generating text-only explanation: {str(e)}"
        
        def calculate_graphBert_score(self, graph1, graph2):
            """Calculate GraphBERT similarity score between two chart graphs."""
            if self.sentence_model is None:
                return {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'error': 'Sentence model not available'}
            
            # Extract semantic elements from both graphs as sentences
            sentences1 = self._graph_to_sentences(graph1)
            sentences2 = self._graph_to_sentences(graph2)
            
            if not sentences1 or not sentences2:
                return {'precision': 0.0, 'recall': 0.0, 'f1': 0.0}
            
            # Get embeddings for all sentences
            embeddings1 = self.sentence_model.encode(sentences1)
            embeddings2 = self.sentence_model.encode(sentences2)
            
            # Calculate similarity matrix
            similarity_matrix = cosine_similarity(embeddings1, embeddings2)
            
            # Calculate BERT-style precision and recall
            recall_scores = []
            for i in range(len(sentences1)):
                max_sim = np.max(similarity_matrix[i])
                recall_scores.append(max_sim)
            
            precision_scores = []
            for j in range(len(sentences2)):
                max_sim = np.max(similarity_matrix[:, j])
                precision_scores.append(max_sim)
            
            # Calculate final metrics
            recall = np.mean(recall_scores)
            precision = np.mean(precision_scores)
            
            if precision + recall == 0:
                f1 = 0.0
            else:
                f1 = 2 * precision * recall / (precision + recall)
            
            return {
                'precision': float(precision),
                'recall': float(recall), 
                'f1': float(f1),
                'sentences1_count': len(sentences1),
                'sentences2_count': len(sentences2)
            }
        
        def _graph_to_sentences(self, graph):
            """Convert graph elements to natural language sentences for BERTScore comparison."""
            sentences = []
            
            if not isinstance(graph, dict):
                return sentences
                
            # Add title as sentence
            title = graph.get('title', '')
            if title:
                sentences.append(f"Chart title: {title}")
            
            # Add chart type
            chart_type = graph.get('chart_type', '')
            if chart_type:
                sentences.append(f"Chart type: {chart_type}")
            
            # Handle different chart types differently
            if chart_type == 'pie':
                # For pie charts, focus on segments and their values
                data_points = graph.get('data_points', [])
                for point in data_points:
                    if 'label' in point and 'value' in point:
                        sentences.append(f"{point['label']} accounts for {point['value']}% of the total")
                    elif 'description' in point and point['description']:
                        sentences.append(point['description'])
                
                # Add total validation sentence
                total_percentage = sum(point.get('value', 0) for point in data_points if 'value' in point)
                if abs(total_percentage - 100) < 1:  # Allow small rounding errors
                    sentences.append("All segments sum to 100 percent")
                
            else:
                # For line/bar/scatter charts, use axis information
                axes = graph.get('axes', {})
                x_axis = axes.get('x_axis', {})
                y_axis = axes.get('y_axis', {})
                
                if x_axis.get('title'):
                    sentences.append(f"X-axis represents: {x_axis['title']}")
                if y_axis.get('title'):
                    sentences.append(f"Y-axis represents: {y_axis['title']}")
                
                # Add data points as sentences
                data_points = graph.get('data_points', [])
                for point in data_points:
                    if 'description' in point and point['description']:
                        sentences.append(point['description'])
                    elif 'data_x' in point and 'data_y' in point:
                        sentences.append(f"Data point at x={point['data_x']}, y={point['data_y']}")
            
            # Add semantic content
            semantic = graph.get('semantic_content', {})
            if semantic.get('data_trend'):
                sentences.append(f"Data trend is {semantic['data_trend']}")
            
            if semantic.get('temporal_extent'):
                temp = semantic['temporal_extent']
                if 'start_year' in temp and 'end_year' in temp:
                    sentences.append(f"Time period from {temp['start_year']} to {temp['end_year']}")
            
            # Add visual properties
            visual = graph.get('visual_properties', {})
            if visual.get('stroke'):
                sentences.append(f"Line color: {visual['stroke']}")
            
            return sentences
        
        def calculate_hallucination_score(self, graph1, graph2):
            """Calculate hallucination score - elements present in predicted graph but absent in ground truth."""
            # Extract comparable elements from both graphs
            elements1 = self._extract_graph_elements(graph1)
            elements2 = self._extract_graph_elements(graph2)
            
            # Find elements in graph2 that are not in graph1 (hallucinations)
            hallucinations = []
            
            for element_type, element_data in elements2.items():
                ground_truth_data = elements1.get(element_type, set())
                
                if isinstance(element_data, set):
                    hallucinated_items = element_data - ground_truth_data
                    for item in hallucinated_items:
                        hallucinations.append({
                            'type': element_type,
                            'content': item,
                            'reason': f'{element_type} not present in ground truth'
                        })
                elif isinstance(element_data, (str, int, float)):
                    if element_data != elements1.get(element_type):
                        hallucinations.append({
                            'type': element_type,
                            'content': element_data,
                            'expected': elements1.get(element_type),
                            'reason': f'{element_type} differs from ground truth'
                        })
            
            # Calculate hallucination rate
            total_elements = sum(len(v) if isinstance(v, set) else 1 for v in elements2.values())
            hallucination_count = len(hallucinations)
            
            hallucination_rate = hallucination_count / max(total_elements, 1)
            
            return {
                'hallucination_rate': float(hallucination_rate),
                'hallucination_count': hallucination_count,
                'total_predicted_elements': total_elements,
                'hallucinations': hallucinations
            }
        
        def calculate_omission_score(self, graph1, graph2):
            """Calculate omission score - elements present in ground truth but missing in predicted graph."""
            # Extract comparable elements from both graphs
            elements1 = self._extract_graph_elements(graph1)
            elements2 = self._extract_graph_elements(graph2)
            
            # Find elements in graph1 that are not in graph2 (omissions)
            omissions = []
            
            for element_type, element_data in elements1.items():
                predicted_data = elements2.get(element_type, set())
                
                if isinstance(element_data, set):
                    omitted_items = element_data - predicted_data
                    for item in omitted_items:
                        omissions.append({
                            'type': element_type,
                            'content': item,
                            'reason': f'{element_type} missing from prediction'
                        })
                elif isinstance(element_data, (str, int, float)):
                    if element_data != elements2.get(element_type):
                        omissions.append({
                            'type': element_type,
                            'content': element_data,
                            'predicted': elements2.get(element_type),
                            'reason': f'{element_type} not correctly predicted'
                        })
            
            # Calculate omission rate
            total_elements = sum(len(v) if isinstance(v, set) else 1 for v in elements1.values())
            omission_count = len(omissions)
            
            omission_rate = omission_count / max(total_elements, 1)
            
            return {
                'omission_rate': float(omission_rate),
                'omission_count': omission_count,
                'total_ground_truth_elements': total_elements,
                'omissions': omissions
            }
        
        def _extract_graph_elements(self, graph):
            """Extract comparable elements from a chart graph for hallucination/omission analysis."""
            elements = {}
            
            if not isinstance(graph, dict):
                return elements
            
            # Extract title
            if graph.get('title'):
                elements['title'] = graph['title']
            
            # Extract chart type
            if graph.get('chart_type'):
                elements['chart_type'] = graph['chart_type']
            
            chart_type = graph.get('chart_type', '')
            
            if chart_type == 'pie':
                # For pie charts, extract segment data as label-value pairs
                pie_segments = set()
                data_points = graph.get('data_points', [])
                
                for point in data_points:
                    if 'label' in point and 'value' in point:
                        # Round percentage values to 1 decimal place to handle minor variations
                        label = point['label'].strip()
                        value = round(float(point['value']), 1)
                        pie_segments.add((label, value))
                    elif 'description' in point:
                        # Try to parse label and value from description
                        parsed_segment = self._parse_pie_segment_from_description(point['description'])
                        if parsed_segment:
                            pie_segments.add(parsed_segment)
                
                elements['pie_segments'] = pie_segments
                
            else:
                # For other chart types, use existing logic
                # Extract axis titles
                axes = graph.get('axes', {})
                if axes.get('x_axis', {}).get('title'):
                    elements['x_axis_title'] = axes['x_axis']['title']
                if axes.get('y_axis', {}).get('title'):
                    elements['y_axis_title'] = axes['y_axis']['title']
                
                # Extract data points (rounded to avoid floating point precision issues)
                data_points = set()
                for point in graph.get('data_points', []):
                    if 'data_x' in point and 'data_y' in point:
                        x_val = round(point['data_x'], 2) if isinstance(point['data_x'], (int, float)) else point['data_x']
                        y_val = round(point['data_y'], 2) if isinstance(point['data_y'], (int, float)) else point['data_y']
                        data_points.add((x_val, y_val))
                elements['data_points'] = data_points
                
                # Extract axis labels
                x_labels = set()
                y_labels = set()
                
                if 'x_axis' in axes and 'labels' in axes['x_axis']:
                    for label in axes['x_axis']['labels']:
                        if isinstance(label, dict) and 'text' in label:
                            x_labels.add(label['text'])
                
                if 'y_axis' in axes and 'labels' in axes['y_axis']:
                    for label in axes['y_axis']['labels']:
                        if isinstance(label, dict) and 'text' in label:
                            y_labels.add(label['text'])
                
                if x_labels:
                    elements['x_axis_labels'] = x_labels
                if y_labels:
                    elements['y_axis_labels'] = y_labels
            
            # Extract semantic information (common for all chart types)
            semantic = graph.get('semantic_content', {})
            if semantic.get('data_trend'):
                elements['data_trend'] = semantic['data_trend']
            
            return elements
        
        def _parse_pie_segment_from_description(self, description):
            """Parse pie chart segment information from description text"""
            import re
            
            # Look for patterns like "Label accounts for X% of the total" or "Label: X%"
            patterns = [
                r'(.+?)\s+accounts\s+for\s+([\d.]+)%',
                r'(.+?):\s*([\d.]+)%',
                r'(.+?)\s+([\d.]+)%',
                r'(.+?)\s*-\s*([\d.]+)%'
            ]
            
            for pattern in patterns:
                match = re.search(pattern, description, re.IGNORECASE)
                if match:
                    label = match.group(1).strip()
                    try:
                        value = round(float(match.group(2)), 1)
                        return (label, value)
                    except ValueError:
                        continue
            
            return None
        
        def calculate_GED_score(self, graph1, graph2):
            """Calculate Graph Edit Distance (GED) score between two chart graphs."""
            # Convert graphs to NetworkX format for GED calculation
            nx_graph1 = self._convert_to_networkx(graph1, "ground_truth")
            nx_graph2 = self._convert_to_networkx(graph2, "predicted")
            
            # Calculate edit operations
            edit_ops = self._calculate_edit_operations(graph1, graph2)
            
            # Simple GED approximation based on element differences
            ged_distance = (
                edit_ops['node_insertions'] + 
                edit_ops['node_deletions'] + 
                edit_ops['node_substitutions'] + 
                edit_ops['edge_insertions'] + 
                edit_ops['edge_deletions'] + 
                edit_ops['edge_substitutions']
            )
            
            # Normalize by the maximum possible operations
            max_nodes = max(nx_graph1.number_of_nodes(), nx_graph2.number_of_nodes())
            max_edges = max(nx_graph1.number_of_edges(), nx_graph2.number_of_edges())
            max_operations = max_nodes + max_edges
            
            normalized_ged = ged_distance / max(max_operations, 1)
            
            return {
                'ged_distance': ged_distance,
                'normalized_ged': float(normalized_ged),
                'edit_operations': edit_ops,
                'graph1_nodes': nx_graph1.number_of_nodes(),
                'graph1_edges': nx_graph1.number_of_edges(),
                'graph2_nodes': nx_graph2.number_of_nodes(),
                'graph2_edges': nx_graph2.number_of_edges()
            }
        
        def _convert_to_networkx(self, graph, graph_name="graph"):
            """Convert chart graph to NetworkX graph for GED calculation."""
            G = nx.DiGraph()
            
            if not isinstance(graph, dict):
                return G
                
            # Add nodes for different graph elements
            node_id = 0
            
            # Add title node
            if graph.get('title'):
                G.add_node(f"title_{node_id}", type="title", content=graph['title'])
                node_id += 1
            
            # Add chart type node
            if graph.get('chart_type'):
                G.add_node(f"chart_type_{node_id}", type="chart_type", content=graph['chart_type'])
                node_id += 1
            
            chart_type = graph.get('chart_type', '')
            
            if chart_type == 'pie':
                # For pie charts, create nodes for each segment
                pie_center_node = f"pie_center_{node_id}"
                G.add_node(pie_center_node, type="pie_center")
                node_id += 1
                
                data_points = graph.get('data_points', [])
                segment_nodes = []
                
                for i, point in enumerate(data_points):
                    segment_node = f"pie_segment_{node_id}"
                    G.add_node(segment_node, type="pie_segment", 
                              label=point.get('label', f'Segment {i+1}'),
                              value=point.get('value', 0),
                              percentage=point.get('value', 0))
                    segment_nodes.append(segment_node)
                    node_id += 1
                    
                    # Connect segment to pie center
                    G.add_edge(pie_center_node, segment_node, type="contains_segment")
                
                # Connect adjacent segments (circular structure)
                for i in range(len(segment_nodes)):
                    next_i = (i + 1) % len(segment_nodes)
                    G.add_edge(segment_nodes[i], segment_nodes[next_i], type="adjacent_segment")
                    
            else:
                # For line/bar/scatter charts, use existing logic
                # Add axis nodes
                axes = graph.get('axes', {})
                x_axis_node = None
                y_axis_node = None
                
                if axes.get('x_axis', {}).get('title'):
                    x_axis_node = f"x_axis_{node_id}"
                    G.add_node(x_axis_node, type="x_axis", title=axes['x_axis']['title'])
                    node_id += 1
                    
                if axes.get('y_axis', {}).get('title'):
                    y_axis_node = f"y_axis_{node_id}"  
                    G.add_node(y_axis_node, type="y_axis", title=axes['y_axis']['title'])
                    node_id += 1
                
                # Add data point nodes
                data_nodes = []
                for i, point in enumerate(graph.get('data_points', [])):
                    point_node = f"data_point_{node_id}"
                    G.add_node(point_node, type="data_point", 
                              x=point.get('data_x'), y=point.get('data_y'),
                              description=point.get('description', ''))
                    data_nodes.append(point_node)
                    node_id += 1
                    
                    # Connect data points to axes
                    if x_axis_node:
                        G.add_edge(point_node, x_axis_node, type="uses_x_axis")
                    if y_axis_node:
                        G.add_edge(point_node, y_axis_node, type="uses_y_axis")
                
                # Connect consecutive data points (for line charts)
                if graph.get('chart_type') == 'line' and len(data_nodes) > 1:
                    for i in range(len(data_nodes) - 1):
                        G.add_edge(data_nodes[i], data_nodes[i+1], type="sequence")
            
            return G
        
        def _calculate_edit_operations(self, graph1, graph2):
            """Calculate the edit operations needed to transform graph1 into graph2."""
            elements1 = self._extract_graph_elements(graph1)
            elements2 = self._extract_graph_elements(graph2)
            
            operations = {
                'node_insertions': 0,
                'node_deletions': 0, 
                'node_substitutions': 0,
                'edge_insertions': 0,
                'edge_deletions': 0,
                'edge_substitutions': 0
            }
            
            # Compare each element type
            all_keys = set(elements1.keys()) | set(elements2.keys())
            
            for key in all_keys:
                val1 = elements1.get(key)
                val2 = elements2.get(key)
                
                if val1 is None and val2 is not None:
                    # Insertion
                    if isinstance(val2, set):
                        operations['node_insertions'] += len(val2)
                    else:
                        operations['node_insertions'] += 1
                elif val1 is not None and val2 is None:
                    # Deletion
                    if isinstance(val1, set):
                        operations['node_deletions'] += len(val1)
                    else:
                        operations['node_deletions'] += 1
                elif val1 != val2:
                    # Substitution
                    if isinstance(val1, set) and isinstance(val2, set):
                        # Calculate set differences
                        inserted = val2 - val1
                        deleted = val1 - val2
                        operations['node_insertions'] += len(inserted)
                        operations['node_deletions'] += len(deleted)
                    else:
                        operations['node_substitutions'] += 1
            
            return operations
        
        def chartToVega(self, chart_input):
            """Convert chart image to Vega-Lite specification using LLM"""
            try:
                # Prepare image for LLM
                image_data, media_type = self._prepare_image(chart_input)
                
                # Create prompt for LLM
                prompt = self._create_chart_analysis_prompt()
                
                # Get LLM response
                llm_response = self._call_llm(prompt, image_data, media_type)
                
                # Validate LLM response
                if llm_response is None or not llm_response.strip():
                    raise ValueError("LLM returned empty or None response")
                
                # Parse LLM response to Vega-Lite format
                vega_spec = self._parse_llm_response_to_vega(llm_response)
                
                return vega_spec
                
            except Exception as e:
                print(f"Error in chartToVega: {str(e)}")
                # Return a safe fallback structure
                return {
                    "marktype": "group",
                    "name": "root",
                    "role": "frame",
                    "interactive": True,
                    "clip": False,
                    "items": [],
                    "zindex": 0,
                    "_chart_analysis_error": str(e)
                }
        
        def _detect_image_format(self, file_path):
            """Detect image format from file path or content"""
            # First try to get from file extension
            mime_type, _ = mimetypes.guess_type(file_path)
            if mime_type and mime_type.startswith('image/'):
                return mime_type
            
            # Fallback: try to detect from file content
            try:
                with Image.open(file_path) as img:
                    format_map = {
                        'JPEG': 'image/jpeg',
                        'PNG': 'image/png',
                        'GIF': 'image/gif',
                        'WebP': 'image/webp',
                        'BMP': 'image/bmp'
                    }
                    return format_map.get(img.format, 'image/jpeg')
            except Exception:
                # Default fallback
                return 'image/jpeg'
        
        def _prepare_image(self, chart_input):
            """Prepare image data for LLM input"""
            if isinstance(chart_input, str):
                if os.path.isfile(chart_input):
                    # File path
                    media_type = self._detect_image_format(chart_input)
                    with open(chart_input, "rb") as image_file:
                        image_bytes = image_file.read()
                    return base64.b64encode(image_bytes).decode('utf-8'), media_type
                elif chart_input.startswith('data:image'):
                    # Data URL - extract media type
                    header, data = chart_input.split(',', 1)
                    media_type = header.split(':')[1].split(';')[0]
                    return data, media_type
                elif len(chart_input) > 100:
                    # Assume it's base64 - default to JPEG
                    return chart_input, 'image/jpeg'
                else:
                    raise ValueError("Invalid image input: not a valid file path or base64 string")
            
            elif isinstance(chart_input, bytes):
                # Raw bytes - try to detect format
                try:
                    img = Image.open(io.BytesIO(chart_input))
                    format_map = {
                        'JPEG': 'image/jpeg',
                        'PNG': 'image/png',
                        'GIF': 'image/gif',
                        'WebP': 'image/webp',
                        'BMP': 'image/bmp'
                    }
                    media_type = format_map.get(img.format, 'image/jpeg')
                except Exception:
                    media_type = 'image/jpeg'
                
                return base64.b64encode(chart_input).decode('utf-8'), media_type
            
            else:
                raise ValueError("Chart input must be file path, base64 string, or bytes")
        
        def _create_chart_analysis_prompt(self):
            """Create a comprehensive prompt for chart analysis that handles multiple chart types"""
            return """
            Analyze this chart image and extract ALL data points, axis information, and visual elements with PRECISE values. 
            The chart could be a line chart, bar chart, pie chart, scatter plot, or other visualization type.
            
            Please provide a detailed analysis in the following JSON format:

            {
                "title": "Exact chart title text",
                "description": "Brief description of what the chart shows",
                "chart_type": "line|bar|scatter|pie|area|donut|etc.",
                "data": [
                    // For line/bar/scatter charts:
                    {"x": exact_value, "y": exact_value, "label": "optional_label", "description": "point description"},
                    
                    // For pie/donut charts:
                    {"label": "segment_name", "value": percentage_value, "description": "segment description"},
                    
                    // Include ALL data points/segments visible in the chart
                    ...
                ],
                "x_axis": {
                    "title": "Exact X-axis title (for non-pie charts)",
                    "type": "quantitative|temporal|ordinal|nominal",
                    "domain": [min_value, max_value],
                    "ticks": [list_of_tick_values],
                    "tick_labels": ["label1", "label2", ...]
                },
                "y_axis": {
                    "title": "Exact Y-axis title (for non-pie charts)", 
                    "type": "quantitative|temporal|ordinal|nominal",
                    "domain": [min_value, max_value],
                    "ticks": [list_of_tick_values],
                    "tick_labels": ["label1", "label2", ...]
                },
                "chart_dimensions": {
                    "width": estimated_width,
                    "height": estimated_height
                },
                "styling": {
                    "primary_color": "#color",
                    "line_width": width_in_pixels,
                    "grid_lines": true/false,
                    "background_color": "#color"
                }
            }

            CRITICAL REQUIREMENTS:
            - Extract EVERY visible data point with exact values
            - For PIE CHARTS: Extract each segment's label and percentage value (ensure they sum to ~100%)
            - For LINE/BAR CHARTS: Extract exact X and Y coordinates for every data point
            - Read ALL axis labels and tick values precisely
            - Include the complete chart title exactly as shown
            - For temporal data (years/dates), extract exact years/dates
            - For numerical axes, read exact tick values and ranges
            - Include descriptive text for each data point/segment
            - Identify chart type correctly (pie, line, bar, scatter, etc.)
            """
        
        def _call_llm(self, prompt, image_data, media_type):
            """Call the configured LLM with the prompt and image"""
            try:
                if self.llm_provider.startswith("GPT"):
                    return self._call_openai_llm(prompt, image_data)
                elif self.llm_provider == "Claude":
                    return self._call_claude_llm(prompt, image_data, media_type)
                else:
                    raise NotImplementedError(f"LLM provider {self.llm_provider} not implemented")
            except Exception as e:
                print(f"LLM call failed: {str(e)}")
                return None
        
        def _call_openai_llm(self, prompt, image_data):
            """Call OpenAI GPT with vision capabilities"""
            try:
                response = self.llm_client.chat.completions.create(
                    model=self.model_config.get("model", "gpt-4-vision-preview"),
                    messages=[
                        {
                            "role": "user",
                            "content": [
                                {"type": "text", "text": prompt},
                                {
                                    "type": "image_url",
                                    "image_url": {
                                        "url": f"data:image/jpeg;base64,{image_data}"
                                    }
                                }
                            ]
                        }
                    ],
                    max_tokens=self.model_config.get("max_tokens", 2000),
                    temperature=self.model_config.get("temperature", 0.1)
                )
                
                # Validate response structure
                if not response or not response.choices or not response.choices[0].message:
                    raise Exception("Invalid response structure from OpenAI API")
                    
                content = response.choices[0].message.content
                if not content:
                    raise Exception("Empty content in OpenAI API response")
                    
                return content
                
            except Exception as e:
                raise Exception(f"OpenAI API call failed: {str(e)}")
        
        def _call_claude_llm(self, prompt, image_data, media_type):
            """Call Anthropic Claude with vision capabilities"""
            try:
                message = self.llm_client.messages.create(
                    model=self.model_config.get("model", "claude-3-5-sonnet-20241022"),
                    max_tokens=self.model_config.get("max_tokens", 2000),
                    temperature=self.model_config.get("temperature", 0.1),
                    messages=[
                        {
                            "role": "user",
                            "content": [
                                {
                                    "type": "image",
                                    "source": {
                                        "type": "base64",
                                        "media_type": media_type,
                                        "data": image_data
                                    }
                                },
                                {
                                    "type": "text",
                                    "text": prompt
                                }
                            ]
                        }
                    ]
                )
                
                # Validate response structure
                if not message or not message.content or not message.content[0]:
                    raise Exception("Invalid response structure from Claude API")
                    
                content = message.content[0].text
                if not content:
                    raise Exception("Empty content in Claude API response")
                    
                return content
                
            except Exception as e:
                raise Exception(f"Claude API call failed: {str(e)}")
        
        def _parse_llm_response_to_vega(self, llm_response):
            """Parse LLM response and convert to full Vega specification (not Vega-Lite)"""
            try:
                # Validate input
                if not llm_response or not llm_response.strip():
                    raise ValueError("Empty or None LLM response")
                
                # Try to extract JSON from the response
                json_start = llm_response.find('{')
                json_end = llm_response.rfind('}') + 1
                
                if json_start != -1 and json_end != -1:
                    json_str = llm_response[json_start:json_end]
                    chart_data = json.loads(json_str)
                else:
                    raise ValueError("No valid JSON found in LLM response")
                
                # Convert to full Vega format
                vega_spec = self._build_vega_specification(chart_data)
                
                return vega_spec
                
            except (json.JSONDecodeError, KeyError, ValueError) as e:
                print(f"Error parsing LLM response: {str(e)}")
                # Fallback: create basic structure if parsing fails
                return {
                    "marktype": "group",
                    "name": "root",
                    "role": "frame",
                    "interactive": True,
                    "clip": False,
                    "items": [],
                    "zindex": 0,
                    "_parse_error": f"Error parsing LLM response: {str(e)}"
                }
        
        def _build_vega_specification(self, chart_data):
            """Build complete Vega specification from chart data"""
            
            # Validate input
            if not isinstance(chart_data, dict):
                chart_data = {}
            
            # Extract basic info with safe defaults
            title = chart_data.get("title", "Chart")
            data_points = chart_data.get("data", [])
            if not isinstance(data_points, list):
                data_points = []
                
            x_axis = chart_data.get("x_axis", {})
            if not isinstance(x_axis, dict):
                x_axis = {}
                
            y_axis = chart_data.get("y_axis", {})
            if not isinstance(y_axis, dict):
                y_axis = {}
                
            chart_type = self._normalize_chart_type(chart_data.get("chart_type", "line"))
            
            dimensions = chart_data.get("chart_dimensions", {"width": 200, "height": 200})
            if not isinstance(dimensions, dict):
                dimensions = {"width": 200, "height": 200}
                
            styling = chart_data.get("styling", {})
            if not isinstance(styling, dict):
                styling = {}
            
            width = dimensions.get("width", 200)
            height = dimensions.get("height", 200)
            
            # Build main frame
            vega_spec = {
                "marktype": "group",
                "name": "root", 
                "role": "frame",
                "interactive": True,
                "clip": False,
                "items": [],
                "zindex": 0
            }
            
            # Main chart area
            main_group = {
                "items": [],
                "x": 0,
                "y": 0,
                "width": width,
                "height": height,
                "fill": "transparent",
                "stroke": "#ddd"
            }
            
            try:
                if chart_type == "pie":
                    # Add pie chart marks
                    main_group["items"].append(self._create_pie_marks(data_points, width, height, styling))
                else:
                    # Add grid lines and axes for non-pie charts
                    axes_items = self._create_axes(x_axis, y_axis, width, height)
                    if axes_items:
                        main_group["items"].extend(axes_items)
                    
                    # Add data marks based on chart type
                    if chart_type == "line":
                        main_group["items"].append(self._create_line_marks(data_points, x_axis, y_axis, width, height, styling))
                    elif chart_type in ["scatter", "point"]:
                        main_group["items"].append(self._create_point_marks(data_points, x_axis, y_axis, width, height, styling))
                    elif chart_type == "bar":
                        main_group["items"].append(self._create_bar_marks(data_points, x_axis, y_axis, width, height, styling))
                    # Add other chart type handling here as needed
                
                # Add title
                if title:
                    main_group["items"].append(self._create_title(title, width))
                    
            except Exception as e:
                print(f"Error creating chart marks: {str(e)}")
                # Add error information to the structure
                main_group["items"].append({
                    "marktype": "text",
                    "role": "error",
                    "text": f"Error creating chart: {str(e)}",
                    "x": width / 2,
                    "y": height / 2
                })
            
            vega_spec["items"] = [main_group]
            
            return vega_spec
        
        def _normalize_chart_type(self, chart_type):
            """Normalize chart type names returned by the LLM"""
            if not chart_type:
                return "line"
                
            ct = str(chart_type).strip().lower()
            aliases = {
                "donut": "pie",
                "doughnut": "pie",
                "bubble": "scatter",
                "scatterplot": "scatter",
                "points": "point",
                "line chart": "line",
                "bar chart": "bar",
            }
            return aliases.get(ct, ct if ct in ["pie", "line", "bar", "scatter", "point", "area"] else "line")
        
        def _create_point_marks(self, data_points, x_axis, y_axis, width, height, styling):
            """Create point marks (scatter plot) from data points with robust type handling"""
            if not data_points or not isinstance(data_points, list):
                return {"marktype": "point", "items": []}
            
            numeric_points = []
            for point in data_points:
                if not isinstance(point, dict):
                    continue
                    
                try:
                    x_val = float(point.get("x", point.get("data_x", 0)))
                except (ValueError, TypeError):
                    x_val = point.get("x", point.get("data_x"))
                try:
                    y_val = float(point.get("y", point.get("data_y", 0)))
                except (ValueError, TypeError):
                    y_val = point.get("y", point.get("data_y"))
                    
                if isinstance(x_val, (int, float)) and isinstance(y_val, (int, float)):
                    numeric_points.append({
                        "x": float(x_val),
                        "y": float(y_val),
                        "description": point.get("description", f"X: {x_val}, Y: {y_val}")
                    })
                    
            if not numeric_points:
                return {"marktype": "point", "items": []}
            
            x_values = [p["x"] for p in numeric_points]
            y_values = [p["y"] for p in numeric_points]
            x_domain = x_axis.get("domain", [min(x_values), max(x_values)])
            y_domain = y_axis.get("domain", [min(y_values), max(y_values)])
            
            try:
                x_domain = [float(x_domain[0]), float(x_domain[1])]
            except (Exception, IndexError):
                x_domain = [min(x_values), max(x_values)]
                
            try:
                y_domain = [float(y_domain[0]), float(y_domain[1])]
            except (Exception, IndexError):
                y_domain = [min(y_values), max(y_values)]
            
            point_color = styling.get("point_color", "#1f77b4")
            point_size = styling.get("point_size", 40)
            
            items = []
            for p in numeric_points:
                items.append({
                    "x": self._scale_value(p["x"], x_domain, [0, width]),
                    "y": self._scale_value(p["y"], y_domain, [height, 0]),
                    "fill": point_color,
                    "size": point_size,
                    "description": p["description"],
                })
                
            return {
                "marktype": "point",
                "name": "marks",
                "role": "mark",
                "interactive": True,
                "clip": False,
                "items": items,
                "zindex": 0
            }
        
        def _create_bar_marks(self, data_points, x_axis, y_axis, width, height, styling):
            """Create bar marks from data points, supporting categorical X"""
            if not data_points or not isinstance(data_points, list):
                return {"marktype": "bar", "items": []}
            
            # Extract categories and values
            categories = []
            values = []
            
            for point in data_points:
                if not isinstance(point, dict):
                    continue
                    
                categories.append(str(point.get("label", point.get("x", ""))))
                try:
                    values.append(float(point.get("y", point.get("value", 0))))
                except (ValueError, TypeError):
                    continue
                    
            if not categories or not values:
                return {"marktype": "bar", "items": []}
            
            # Map categories to index positions
            unique_cats = list(dict.fromkeys(categories))
            x_positions = {cat: idx for idx, cat in enumerate(unique_cats)}
            
            y_domain = y_axis.get("domain", [0, max(values)])
            try:
                y_domain = [float(y_domain[0]), float(y_domain[1])]
            except Exception:
                y_domain = [0, max(values)]
            
            bar_width = max(5, width / max(1, len(unique_cats)) * 0.6)
            items = []
            
            for cat, val in zip(categories, values):
                x_center = self._scale_value(x_positions[cat], [0, max(1, len(unique_cats) - 1)], [0, width])
                y_top = self._scale_value(val, y_domain, [height, 0])
                items.append({
                    "x": x_center - bar_width / 2,
                    "y": y_top,
                    "width": bar_width,
                    "height": height - y_top,
                    "fill": styling.get("bar_color", "#4CAF50"),
                    "description": f"{cat}: {val}",
                })
                
            return {
                "marktype": "bar",
                "name": "marks",
                "role": "mark",
                "interactive": True,
                "clip": False,
                "items": items,
                "zindex": 0
            }
        
        def _create_pie_marks(self, data_points, width, height, styling):
            """Create pie chart marks from data points"""
            if not data_points or not isinstance(data_points, list):
                return {"marktype": "arc", "items": []}
            
            # Extract values and labels
            segments = []
            for point in data_points:
                if not isinstance(point, dict):
                    continue
                    
                if 'label' in point and 'value' in point:
                    try:
                        value = float(point['value'])
                        segments.append({
                            'label': str(point['label']),
                            'value': value,
                            'description': point.get('description', f"{point['label']}: {point['value']}%")
                        })
                    except (ValueError, TypeError):
                        continue
            
            if not segments:
                return {"marktype": "arc", "items": []}
            
            # Calculate angles for pie segments
            total_value = sum(seg['value'] for seg in segments)
            if total_value == 0:
                return {"marktype": "arc", "items": []}
            
            center_x = width / 2
            center_y = height / 2
            radius = min(width, height) / 3
            
            pie_items = []
            current_angle = -90  # Start from top
            
            for segment in segments:
                angle_size = (segment['value'] / total_value) * 360
                
                pie_items.append({
                    "x": center_x,
                    "y": center_y,
                    "startAngle": current_angle,
                    "endAngle": current_angle + angle_size,
                    "innerRadius": 0,
                    "outerRadius": radius,
                    "fill": styling.get("primary_color", "#4CAF50"),
                    "stroke": "#ffffff",
                    "strokeWidth": 2,
                    "label": segment['label'],
                    "value": segment['value'],
                    "description": segment['description']
                })
                
                current_angle += angle_size
            
            return {
                "marktype": "arc",
                "name": "pie_marks",
                "role": "mark", 
                "interactive": True,
                "clip": False,
                "items": pie_items,
                "zindex": 0
            }
        
        def _create_axes(self, x_axis, y_axis, width, height):
            """Create axis groups with grids, ticks, and labels"""
            axes = []
            
            try:
                # X-axis grid
                x_grid = self._create_x_grid(x_axis, width, height)
                if x_grid:
                    axes.append(x_grid)
                
                # Y-axis grid
                y_grid = self._create_y_grid(y_axis, width, height)
                if y_grid:
                    axes.append(y_grid)
                
                # X-axis
                x_axis_group = self._create_x_axis(x_axis, width, height)
                if x_axis_group:
                    axes.append(x_axis_group)
                
                # Y-axis
                y_axis_group = self._create_y_axis(y_axis, width, height)
                if y_axis_group:
                    axes.append(y_axis_group)
                    
            except Exception as e:
                print(f"Error creating axes: {str(e)}")
            
            return axes
        
        def _create_x_grid(self, x_axis, width, height):
            """Create X-axis grid lines with robust type handling"""
            if not isinstance(x_axis, dict):
                return None
                
            ticks = x_axis.get("ticks", [])
            if not ticks or not isinstance(ticks, list):
                return None
            
            # Convert ticks to numeric values, filter out non-numeric ones
            numeric_ticks = []
            for tick in ticks:
                try:
                    numeric_ticks.append(float(tick))
                except (ValueError, TypeError):
                    # Skip non-numeric ticks for grid creation
                    continue
            
            if not numeric_ticks:
                return None
                
            domain = x_axis.get("domain", [min(numeric_ticks), max(numeric_ticks)])
            
            # Ensure domain values are numeric
            try:
                domain = [float(domain[0]), float(domain[1])]
            except (ValueError, TypeError, IndexError):
                domain = [min(numeric_ticks), max(numeric_ticks)]
            
            grid_items = []
            
            for tick in numeric_ticks:
                x_pos = self._scale_value(tick, domain, [0, width])
                grid_items.append({
                    "x": x_pos,
                    "y": -height,
                    "opacity": 1,
                    "stroke": "#ddd",
                    "strokeWidth": 0.2,
                    "y2": 0
                })
            
            return {
                "marktype": "group",
                "role": "axis",
                "interactive": False,
                "clip": False,
                "items": [{
                    "items": [{
                        "marktype": "rule",
                        "role": "axis-grid",
                        "interactive": False,
                        "clip": False,
                        "items": grid_items,
                        "zindex": 0
                    }],
                    "x": 0.5,
                    "y": height + 0.5,
                    "orient": "bottom"
                }],
                "zindex": 0,
                "aria": False
            }
        
        def _create_y_grid(self, y_axis, width, height):
            """Create Y-axis grid lines with robust type handling"""
            if not isinstance(y_axis, dict):
                return None
                
            ticks = y_axis.get("ticks", [])
            if not ticks or not isinstance(ticks, list):
                return None
            
            # Convert ticks to numeric values, filter out non-numeric ones
            numeric_ticks = []
            for tick in ticks:
                try:
                    numeric_ticks.append(float(tick))
                except (ValueError, TypeError):
                    # Skip non-numeric ticks for grid creation
                    continue
            
            if not numeric_ticks:
                return None
                
            domain = y_axis.get("domain", [min(numeric_ticks), max(numeric_ticks)])
            
            # Ensure domain values are numeric
            try:
                domain = [float(domain[0]), float(domain[1])]
            except (ValueError, TypeError, IndexError):
                domain = [min(numeric_ticks), max(numeric_ticks)]
            
            grid_items = []
            
            for tick in numeric_ticks:
                y_pos = self._scale_value(tick, domain, [height, 0])
                grid_items.append({
                    "x": 0,
                    "y": y_pos,
                    "opacity": 1,
                    "stroke": "#ddd",
                    "strokeWidth": 0.2,
                    "x2": width
                })
            
            return {
                "marktype": "group",
                "role": "axis",
                "interactive": False,
                "clip": False,
                "items": [{
                    "items": [{
                        "marktype": "rule",
                        "role": "axis-grid",
                        "interactive": False,
                        "clip": False,
                        "items": grid_items,
                        "zindex": 0
                    }],
                    "x": 0.5,
                    "y": 0.5,
                    "orient": "left"
                }],
                "zindex": 0,
                "aria": False
            }
        
        def _create_x_axis(self, x_axis, width, height):
            """Create X-axis with ticks and labels with robust type handling"""
            if not isinstance(x_axis, dict):
                return None
                
            ticks = x_axis.get("ticks", [])
            tick_labels = x_axis.get("tick_labels", [str(t) for t in ticks] if ticks else [])
            title = x_axis.get("title", "")
            
            if not ticks or not isinstance(ticks, list):
                return None
            
            # Convert ticks to numeric values where possible
            numeric_ticks = []
            valid_labels = []
            
            for i, tick in enumerate(ticks):
                try:
                    numeric_tick = float(tick)
                    numeric_ticks.append(numeric_tick)
                    # Use corresponding label if available, otherwise convert tick to string
                    if i < len(tick_labels):
                        valid_labels.append(str(tick_labels[i]))
                    else:
                        valid_labels.append(str(tick))
                except (ValueError, TypeError):
                    # For non-numeric ticks, use position-based approximation
                    numeric_ticks.append(i)
                    valid_labels.append(str(tick) if i < len(tick_labels) else str(tick))
            
            if not numeric_ticks:
                return None
                
            domain = x_axis.get("domain", [min(numeric_ticks), max(numeric_ticks)])
            
            # Ensure domain values are numeric
            try:
                domain = [float(domain[0]), float(domain[1])]
            except (ValueError, TypeError, IndexError):
                domain = [min(numeric_ticks), max(numeric_ticks)]
            
            # Create simplified axis representation
            return {
                "marktype": "group",
                "role": "axis",
                "items": [],
                "domain": domain,
                "ticks": numeric_ticks,
                "labels": valid_labels,
                "title": title
            }
        
        def _create_y_axis(self, y_axis, width, height):
            """Create Y-axis with ticks and labels with robust type handling"""
            if not isinstance(y_axis, dict):
                return None
                
            ticks = y_axis.get("ticks", [])
            tick_labels = y_axis.get("tick_labels", [str(t) for t in ticks] if ticks else [])
            title = y_axis.get("title", "")
            
            if not ticks or not isinstance(ticks, list):
                return None
            
            # Convert ticks to numeric values where possible
            numeric_ticks = []
            valid_labels = []
            
            for i, tick in enumerate(ticks):
                try:
                    numeric_tick = float(tick)
                    numeric_ticks.append(numeric_tick)
                    # Use corresponding label if available, otherwise convert tick to string
                    if i < len(tick_labels):
                        valid_labels.append(str(tick_labels[i]))
                    else:
                        valid_labels.append(str(tick))
                except (ValueError, TypeError):
                    # For non-numeric ticks, use position-based approximation
                    numeric_ticks.append(i)
                    valid_labels.append(str(tick) if i < len(tick_labels) else str(tick))
            
            if not numeric_ticks:
                return None
                
            domain = y_axis.get("domain", [min(numeric_ticks), max(numeric_ticks)])
            
            # Ensure domain values are numeric
            try:
                domain = [float(domain[0]), float(domain[1])]
            except (ValueError, TypeError, IndexError):
                domain = [min(numeric_ticks), max(numeric_ticks)]
            
            # Create simplified axis representation
            return {
                "marktype": "group",
                "role": "axis",
                "items": [],
                "domain": domain,
                "ticks": numeric_ticks,
                "labels": valid_labels,
                "title": title
            }
        
        def _create_line_marks(self, data_points, x_axis, y_axis, width, height, styling):
            """Create line marks from data points with robust type handling"""
            if not data_points or not isinstance(data_points, list):
                return {"marktype": "line", "items": []}
            
            # Extract and convert data points to numeric values where possible
            numeric_points = []
            for point in data_points:
                if not isinstance(point, dict):
                    continue
                    
                try:
                    x_val = float(point.get("x", 0))
                    y_val = float(point.get("y", 0))
                    numeric_points.append({
                        "x": x_val,
                        "y": y_val,
                        "description": point.get("description", f"X: {x_val}, Y: {y_val}")
                    })
                except (ValueError, TypeError):
                    # Skip points that can't be converted to numeric
                    continue
            
            if not numeric_points:
                return {"marktype": "line", "items": []}
            
            # Determine domains from numeric points
            x_values = [p["x"] for p in numeric_points]
            y_values = [p["y"] for p in numeric_points]
            
            x_domain = x_axis.get("domain", [min(x_values), max(x_values)])
            y_domain = y_axis.get("domain", [min(y_values), max(y_values)])
            
            # Ensure domains are numeric
            try:
                x_domain = [float(x_domain[0]), float(x_domain[1])]
            except (ValueError, TypeError, IndexError):
                x_domain = [min(x_values), max(x_values)]
                
            try:
                y_domain = [float(y_domain[0]), float(y_domain[1])]
            except (ValueError, TypeError, IndexError):
                y_domain = [min(y_values), max(y_values)]
            
            line_color = styling.get("line_color", "#c4c4c4")
            line_width = styling.get("line_width", 2)
            
            line_items = []
            
            for point in numeric_points:
                x_pos = self._scale_value(point["x"], x_domain, [0, width])
                y_pos = self._scale_value(point["y"], y_domain, [height, 0])
                
                line_items.append({
                    "x": x_pos,
                    "y": y_pos,
                    "stroke": line_color,
                    "strokeWidth": line_width,
                    "defined": True,
                    "description": point["description"]
                })
            
            return {
                "marktype": "line",
                "name": "marks",
                "role": "mark",
                "interactive": True,
                "clip": False,
                "items": line_items,
                "zindex": 0
            }
        
        def _create_title(self, title_text, width):
            """Create title group"""
            if not title_text:
                return {"marktype": "group", "role": "title", "content": ""}
                
            # Handle multi-line titles
            if isinstance(title_text, str) and len(title_text) > 60:
                # Try to split long titles into multiple lines
                words = title_text.split()
                lines = []
                current_line = []
                
                for word in words:
                    if len(' '.join(current_line + [word])) > 40:
                        if current_line:
                            lines.append(' '.join(current_line))
                            current_line = [word]
                        else:
                            lines.append(word)
                    else:
                        current_line.append(word)
                
                if current_line:
                    lines.append(' '.join(current_line))
                
                title_content = lines
            else:
                title_content = str(title_text)
            
            return {
                "marktype": "group",
                "role": "title",
                "content": title_content
            }
        
        def _scale_value(self, value, domain, range_vals):
            """Scale a value from domain to range with robust type conversion"""
            try:
                # Convert all values to float for mathematical operations
                value = float(value) if value is not None else 0.0
                domain_0 = float(domain[0]) if domain[0] is not None else 0.0
                domain_1 = float(domain[1]) if domain[1] is not None else 1.0
                
                # Avoid division by zero
                if domain_1 == domain_0:
                    return range_vals[0]
                
                ratio = (value - domain_0) / (domain_1 - domain_0)
                return range_vals[0] + ratio * (range_vals[1] - range_vals[0])
                
            except (ValueError, TypeError, ZeroDivisionError) as e:
                # Fallback: return middle of range if conversion fails
                return (range_vals[0] + range_vals[1]) / 2
        
        def vegaToGraph(self, vega_dict):
            """Convert Vega specification to graph representation for comparison"""
            # Validate input
            if not vega_dict or not isinstance(vega_dict, dict):
                return {
                    'chart_type': 'unknown',
                    'data_points': [],
                    'axes': {'x_axis': {}, 'y_axis': {}},
                    'title': '',
                    'visual_properties': {},
                    'error': 'Invalid or empty Vega specification'
                }
            
            if 'items' not in vega_dict:
                return {
                    'chart_type': 'unknown',
                    'data_points': [],
                    'axes': {'x_axis': {}, 'y_axis': {}},
                    'title': '',
                    'visual_properties': {},
                    'error': 'Missing items in Vega specification'
                }
            
            # Initialize graph structure
            graph = {
                'chart_type': 'unknown',
                'data_points': [],
                'axes': {
                    'x_axis': {},
                    'y_axis': {}
                },
                'title': '',
                'visual_properties': {},
                'structural_elements': [],
                'semantic_content': {}
            }
            
            try:
                # Get main chart group
                main_items = vega_dict.get('items', [])
                if not main_items or not isinstance(main_items, list):
                    return graph
                    
                chart_group = main_items[0]
                if not isinstance(chart_group, dict) or 'items' not in chart_group:
                    return graph
                    
                chart_items = chart_group.get('items', [])
                if not isinstance(chart_items, list):
                    chart_items = []
                
                # Extract chart dimensions
                graph['visual_properties']['width'] = chart_group.get('width', 0)
                graph['visual_properties']['height'] = chart_group.get('height', 0)
                
                # Parse different components
                for item in chart_items:
                    if not isinstance(item, dict):
                        continue
                        
                    role = item.get('role', '')
                    marktype = item.get('marktype', '')
                    
                    if role == 'title':
                        graph['title'] = self._extract_title(item)
                    elif role == 'axis':
                        self._extract_axis_info(item, graph)
                    elif marktype == 'arc':
                        # Handle pie chart arcs
                        self._extract_pie_marks(item, graph)
                        graph['chart_type'] = 'pie'
                    elif role == 'mark' or marktype in ['line', 'bar', 'point', 'area']:
                        self._extract_data_marks(item, graph)
                    
                    # Track structural elements
                    graph['structural_elements'].append({
                        'type': marktype,
                        'role': role,
                        'interactive': item.get('interactive', False)
                    })
                
                # Determine chart type from marks if not already set
                if graph['chart_type'] == 'unknown':
                    graph['chart_type'] = self._determine_chart_type(graph)
                
                # Extract semantic content
                graph['semantic_content'] = self._extract_semantic_content(graph)
                
            except Exception as e:
                graph['error'] = f"Error parsing Vega specification: {str(e)}"
            
            return graph
        
        def _extract_pie_marks(self, pie_item, graph):
            """Extract pie chart segments from pie marks"""
            try:
                pie_marks = pie_item.get('items', [])
                if not isinstance(pie_marks, list):
                    pie_marks = []
                
                for mark in pie_marks:
                    if not isinstance(mark, dict):
                        continue
                        
                    data_point = {
                        'label': mark.get('label', ''),
                        'value': mark.get('value', 0),
                        'startAngle': mark.get('startAngle', 0),
                        'endAngle': mark.get('endAngle', 0),
                        'description': mark.get('description', ''),
                        'mark_type': 'arc'
                    }
                    
                    graph['data_points'].append(data_point)
                
                # Store visual properties from first mark
                if pie_marks:
                    first_mark = pie_marks[0]
                    if isinstance(first_mark, dict):
                        graph['visual_properties'].update({
                            'fill': first_mark.get('fill', ''),
                            'stroke': first_mark.get('stroke', ''),
                            'strokeWidth': first_mark.get('strokeWidth', 0),
                            'innerRadius': first_mark.get('innerRadius', 0),
                            'outerRadius': first_mark.get('outerRadius', 0)
                        })
            except Exception as e:
                print(f"Error extracting pie marks: {str(e)}")
        
        def _extract_title(self, title_item):
            """Extract title text from title item"""
            try:
                if 'content' in title_item:
                    content = title_item['content']
                    if isinstance(content, list):
                        return ' '.join(str(item) for item in content)
                    return str(content)
                
                title_groups = title_item.get('items', [])
                if not isinstance(title_groups, list):
                    return ''
                    
                for group in title_groups:
                    if not isinstance(group, dict):
                        continue
                        
                    text_items = group.get('items', [])
                    if not isinstance(text_items, list):
                        continue
                        
                    for text_item in text_items:
                        if not isinstance(text_item, dict):
                            continue
                            
                        if text_item.get('role') == 'title-text':
                            text_content = text_item.get('items', [])
                            if text_content and isinstance(text_content, list):
                                title_text = text_content[0].get('text', '') if text_content[0] else ''
                                # Handle multi-line titles
                                if isinstance(title_text, list):
                                    return ' '.join(str(item) for item in title_text)
                                return str(title_text)
            except Exception as e:
                print(f"Error extracting title: {str(e)}")
            return ''

        def _extract_axis_info(self, axis_item, graph):
            """Extract axis information from axis item"""
            try:
                # Handle simplified axis representation
                if 'domain' in axis_item and 'ticks' in axis_item:
                    # Determine if this is x or y axis based on position or other indicators
                    # For now, we'll need to make an assumption or add more logic
                    axis_key = 'x_axis'  # Default assumption
                    
                    axis_info = graph['axes'][axis_key]
                    axis_info['domain'] = axis_item.get('domain', [])
                    axis_info['ticks'] = axis_item.get('ticks', [])
                    axis_info['labels'] = [{'text': str(label)} for label in axis_item.get('labels', [])]
                    axis_info['title'] = axis_item.get('title', '')
                    return
                
                axis_groups = axis_item.get('items', [])
                if not isinstance(axis_groups, list):
                    return
                    
                for group in axis_groups:
                    if not isinstance(group, dict):
                        continue
                        
                    orient = group.get('orient', '')
                    axis_components = group.get('items', [])
                    if not isinstance(axis_components, list):
                        continue
                    
                    axis_key = 'x_axis' if orient == 'bottom' else 'y_axis'
                    axis_info = graph['axes'][axis_key]
                    
                    for component in axis_components:
                        if not isinstance(component, dict):
                            continue
                            
                        role = component.get('role', '')
                        
                        if role == 'axis-label':
                            axis_info['labels'] = self._extract_axis_labels(component)
                        elif role == 'axis-title':
                            axis_info['title'] = self._extract_axis_title(component)
                        elif role == 'axis-tick':
                            axis_info['ticks'] = self._extract_axis_ticks(component)
                        elif role == 'axis-domain':
                            axis_info['domain'] = self._extract_axis_domain(component)
                        elif role == 'axis-grid':
                            axis_info['grid'] = self._extract_axis_grid(component)
            except Exception as e:
                print(f"Error extracting axis info: {str(e)}")

        def _extract_axis_labels(self, label_component):
            """Extract axis labels"""
            labels = []
            try:
                label_items = label_component.get('items', [])
                if not isinstance(label_items, list):
                    return labels
                    
                for item in label_items:
                    if not isinstance(item, dict):
                        continue
                        
                    text = item.get('text', '')
                    x = item.get('x', 0)
                    y = item.get('y', 0)
                    labels.append({
                        'text': str(text),
                        'position': {'x': x, 'y': y}
                    })
            except Exception as e:
                print(f"Error extracting axis labels: {str(e)}")
            return labels

        def _extract_axis_title(self, title_component):
            """Extract axis title"""
            try:
                title_items = title_component.get('items', [])
                if title_items and isinstance(title_items, list) and title_items[0]:
                    return str(title_items[0].get('text', ''))
            except Exception as e:
                print(f"Error extracting axis title: {str(e)}")
            return ''

        def _extract_axis_ticks(self, tick_component):
            """Extract axis tick positions"""
            ticks = []
            try:
                tick_items = tick_component.get('items', [])
                if not isinstance(tick_items, list):
                    return ticks
                    
                for item in tick_items:
                    if not isinstance(item, dict):
                        continue
                        
                    x = item.get('x', 0)
                    y = item.get('y', 0)
                    ticks.append({'x': x, 'y': y})
            except Exception as e:
                print(f"Error extracting axis ticks: {str(e)}")
            return ticks

        def _extract_axis_domain(self, domain_component):
            """Extract axis domain line"""
            try:
                domain_items = domain_component.get('items', [])
                if domain_items and isinstance(domain_items, list) and domain_items[0]:
                    item = domain_items[0]
                    if isinstance(item, dict):
                        return {
                            'x1': item.get('x', 0),
                            'y1': item.get('y', 0),
                            'x2': item.get('x2', 0),
                            'y2': item.get('y2', 0),
                            'stroke': item.get('stroke', ''),
                            'strokeWidth': item.get('strokeWidth', 0)
                        }
            except Exception as e:
                print(f"Error extracting axis domain: {str(e)}")
            return {}

        def _extract_axis_grid(self, grid_component):
            """Extract axis grid lines"""
            grid_lines = []
            try:
                grid_items = grid_component.get('items', [])
                if not isinstance(grid_items, list):
                    return grid_lines
                    
                for item in grid_items:
                    if not isinstance(item, dict):
                        continue
                        
                    grid_lines.append({
                        'x1': item.get('x', 0),
                        'y1': item.get('y', 0),
                        'x2': item.get('x2', 0),
                        'y2': item.get('y2', 0),
                        'stroke': item.get('stroke', ''),
                        'strokeWidth': item.get('strokeWidth', 0)
                    })
            except Exception as e:
                print(f"Error extracting axis grid: {str(e)}")
            return grid_lines

        def _extract_data_marks(self, mark_item, graph):
            """Extract data points from mark items"""
            try:
                mark_type = mark_item.get('marktype', '')
                mark_items = mark_item.get('items', [])
                if not isinstance(mark_items, list):
                    return
                
                # Store visual properties
                if mark_items:
                    first_item = mark_items[0]
                    if isinstance(first_item, dict):
                        graph['visual_properties'].update({
                            'stroke': first_item.get('stroke', ''),
                            'strokeWidth': first_item.get('strokeWidth', 0),
                            'fill': first_item.get('fill', ''),
                            'opacity': first_item.get('opacity', 1)
                        })
                
                # Extract data points
                for item in mark_items:
                    if not isinstance(item, dict):
                        continue
                        
                    data_point = {
                        'x': item.get('x', 0),
                        'y': item.get('y', 0),
                        'description': item.get('description', ''),
                        'mark_type': mark_type
                    }
                    
                    # Extract value information from description if available
                    desc = data_point['description']
                    if desc:
                        # Try to parse values from description
                        parsed_values = self._parse_description_values(desc)
                        data_point.update(parsed_values)
                    
                    graph['data_points'].append(data_point)
            except Exception as e:
                print(f"Error extracting data marks: {str(e)}")

        def _parse_description_values(self, description):
            """Parse actual data values from description text"""
            values = {}
            try:
                if not description or not isinstance(description, str):
                    return values
                    
                # Look for patterns like "X: value, Y: value" or "Year: value, Price: value"
                import re
                
                # Pattern for year
                year_match = re.search(r'(\d{4})', description)
                if year_match:
                    values['data_x'] = int(year_match.group(1))
                
                # Pattern for dollar amounts
                price_match = re.search(r'\$(\d+\.?\d*)', description)
                if price_match:
                    values['data_y'] = float(price_match.group(1))
                
                # Pattern for generic X: value, Y: value
                x_match = re.search(r'X:\s*([^\s,]+)', description)
                y_match = re.search(r'Y:\s*([^\s,]+)', description)
                
                if x_match and 'data_x' not in values:
                    try:
                        values['data_x'] = float(x_match.group(1))
                    except:
                        values['data_x'] = x_match.group(1)
                
                if y_match and 'data_y' not in values:
                    try:
                        values['data_y'] = float(y_match.group(1))
                    except:
                        values['data_y'] = y_match.group(1)
                        
            except Exception as e:
                print(f"Error parsing description values: {str(e)}")
            
            return values

        def _determine_chart_type(self, graph):
            """Determine chart type from structural elements"""
            try:
                structural_elements = graph.get('structural_elements', [])
                mark_types = [elem.get('type', '') for elem in structural_elements 
                            if elem.get('type', '') in ['line', 'bar', 'point', 'area', 'pie', 'arc']]
                
                if 'arc' in mark_types:
                    return 'pie'
                elif 'line' in mark_types:
                    return 'line'
                elif 'bar' in mark_types:
                    return 'bar'
                elif 'point' in mark_types:
                    return 'scatter'
                elif 'area' in mark_types:
                    return 'area'
                else:
                    return 'unknown'
            except Exception as e:
                print(f"Error determining chart type: {str(e)}")
                return 'unknown'

        def _extract_semantic_content(self, graph):
            """Extract high-level semantic content for comparison"""
            semantic = {
                'data_trend': 'unknown',
                'data_range': {},
                'temporal_extent': {},
                'value_distribution': {},
                'key_statistics': {}
            }
            
            try:
                data_points = graph.get('data_points', [])
                if not data_points or not isinstance(data_points, list):
                    return semantic
                
                chart_type = graph.get('chart_type', '')
                
                if chart_type == 'pie':
                    # For pie charts, extract segment statistics
                    segment_values = []
                    for p in data_points:
                        if isinstance(p, dict) and 'value' in p:
                            try:
                                segment_values.append(float(p['value']))
                            except (ValueError, TypeError):
                                continue
                                
                    if segment_values:
                        semantic['value_distribution'] = {
                            'total_segments': len(segment_values),
                            'largest_segment': max(segment_values),
                            'smallest_segment': min(segment_values),
                            'total_percentage': sum(segment_values)
                        }
                        
                        # Check if percentages sum to approximately 100
                        if abs(sum(segment_values) - 100) < 2:
                            semantic['data_integrity'] = 'valid_percentages'
                        else:
                            semantic['data_integrity'] = 'invalid_percentages'
                
                else:
                    # For other chart types, extract x and y values
                    x_values = []
                    y_values = []
                    
                    for p in data_points:
                        if isinstance(p, dict):
                            if p.get('data_x') is not None:
                                try:
                                    x_values.append(float(p['data_x']))
                                except (ValueError, TypeError):
                                    pass
                            if p.get('data_y') is not None:
                                try:
                                    y_values.append(float(p['data_y']))
                                except (ValueError, TypeError):
                                    pass
                    
                    if x_values and y_values:
                        # Data range
                        semantic['data_range'] = {
                            'x_min': min(x_values),
                            'x_max': max(x_values),
                            'y_min': min(y_values),
                            'y_max': max(y_values)
                        }
                        
                        # Temporal extent (if x values look like years)
                        if all(isinstance(x, (int, float)) and 1900 <= x <= 2100 for x in x_values):
                            semantic['temporal_extent'] = {
                                'start_year': min(x_values),
                                'end_year': max(x_values),
                                'duration': max(x_values) - min(x_values)
                            }
                        
                        # Data trend (simple linear trend)
                        if len(y_values) >= 2:
                            first_y, last_y = y_values[0], y_values[-1]
                            if last_y > first_y * 1.1:
                                semantic['data_trend'] = 'increasing'
                            elif last_y < first_y * 0.9:
                                semantic['data_trend'] = 'decreasing'
                            else:
                                semantic['data_trend'] = 'stable'
                        
                        # Key statistics
                        semantic['key_statistics'] = {
                            'num_points': len(y_values),
                            'y_mean': sum(y_values) / len(y_values),
                            'y_std': (sum((y - sum(y_values)/len(y_values))**2 for y in y_values) / len(y_values))**0.5
                        }
            
            except Exception as e:
                print(f"Error extracting semantic content: {str(e)}")
            
            return semantic

    DEPENDENCIES_AVAILABLE = True
    
except ImportError as e:
    print(f"Missing dependencies: {e}")
    print("Please install required packages:")
    print("pip install anthropic openai sentence-transformers networkx scikit-learn matplotlib Pillow")
    DEPENDENCIES_AVAILABLE = False


def get_api_key_status(api_key):
    """Check if API key is properly configured"""
    if not api_key or len(api_key.strip()) < 10:
        return "❌ Not Set"
    elif api_key.startswith("sk-ant-") or api_key.startswith("sk-"):
        return "✅ Configured"
    else:
        return "⚠️ Invalid Format"


def safe_evaluate_charts(chart1_image, chart2_image, llm_provider, claude_api_key, openai_api_key, progress=gr.Progress()):
    """
    Enhanced wrapper for chart evaluation that includes detailed human-readable explanations
    
    Args:
        chart1_image: PIL Image object for ground truth chart
        chart2_image: PIL Image object for predicted chart
        llm_provider: Selected LLM provider ("Claude" or "GPT-4")
        claude_api_key: Claude API key
        openai_api_key: OpenAI API key
        progress: Gradio progress tracker
        
    Returns:
        ALWAYS returns exactly 4 values: (success_message, error_message, results_dataframe, detailed_explanation)
    """
    
    # Initialize default return values
    default_success = ""
    default_error = ""
    default_df = pd.DataFrame([
        ["Status", "Not Evaluated", "Please check inputs and try again"]
    ], columns=["Metric", "Score", "Description"])
    default_explanation = "No detailed explanation available. Please check inputs and try again."
    
    try:
        # Check dependencies first
        if not DEPENDENCIES_AVAILABLE:
            error_msg = """
            ❌ **Missing Dependencies**
            
            Please install required packages:
            ```
            pip install anthropic openai sentence-transformers networkx scikit-learn matplotlib Pillow pandas numpy
            ```
            """
            return default_success, error_msg, default_df, default_explanation
        
        # Determine which API key to use
        if llm_provider == "Claude":
            api_key = claude_api_key or CLAUDE_API_KEY
            if not api_key or len(api_key.strip()) < 10:
                return default_success, "❌ **Error**: Please set your Claude API key.", default_df, default_explanation
        elif llm_provider.startswith("GPT"):
            api_key = openai_api_key or OPENAI_API_KEY
            if not api_key or len(api_key.strip()) < 10:
                return default_success, "❌ **Error**: Please set your OpenAI API key.", default_df, default_explanation
        else:
            return default_success, f"❌ **Error**: Unsupported LLM provider: {llm_provider}", default_df, default_explanation
        
        # Progress update with error handling
        try:
            if progress:
                progress(0.1, desc="Validating inputs...")
        except:
            pass
        
        # Validate inputs
        if chart1_image is None or chart2_image is None:
            return default_success, "❌ **Error**: Please upload both chart images.", default_df, default_explanation
        
        # Initialize evaluator
        try:
            if progress:
                progress(0.2, desc=f"Initializing {llm_provider} evaluator...")
        except:
            pass
        
        # Set model configuration based on provider
        if llm_provider == "Claude":
            model_config = {
                "model": "claude-3-5-sonnet-20241022",
                "max_tokens": 4000,
                "temperature": 0.1
            }
        elif llm_provider == "GPT-4":
            model_config = {
                "model": "gpt-4-vision-preview",
                "max_tokens": 4000,
                "temperature": 0.1
            }
        else:
            model_config = {}
        
        evaluator = ChartEval(
            llm_provider=llm_provider,
            api_key=api_key.strip(),
            model_config=model_config
        )
        
        # Convert PIL images to temporary files and base64
        try:
            if progress:
                progress(0.3, desc="Converting images to temporary files...")
        except:
            pass
        
        chart1_path = None
        chart2_path = None
        chart1_b64 = None
        chart2_b64 = None
        
        try:
            with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp1:
                chart1_image.save(tmp1.name, 'PNG')
                chart1_path = tmp1.name
                
            with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp2:
                chart2_image.save(tmp2.name, 'PNG')
                chart2_path = tmp2.name
                
            # Convert to base64 for detailed explanation
            import io
            chart1_buffer = io.BytesIO()
            chart1_image.save(chart1_buffer, format='PNG')
            chart1_b64 = base64.b64encode(chart1_buffer.getvalue()).decode('utf-8')
            
            chart2_buffer = io.BytesIO()
            chart2_image.save(chart2_buffer, format='PNG')
            chart2_b64 = base64.b64encode(chart2_buffer.getvalue()).decode('utf-8')
            
        except Exception as e:
            return default_success, f"❌ **Error**: Failed to process uploaded images: {str(e)}", default_df, default_explanation
        
        try:
            # Analyze Chart 1
            try:
                if progress:
                    progress(0.4, desc=f"Analyzing Chart 1 with {llm_provider}...")
            except:
                pass
            
            try:
                vega1 = evaluator.chartToVega(chart1_path)
                graph1 = evaluator.vegaToGraph(vega1)
            except Exception as e:
                print(f"Chart 1 analysis error: {e}")
                graph1 = {
                    'chart_type': 'unknown',
                    'data_points': [],
                    'axes': {'x_axis': {}, 'y_axis': {}},
                    'title': 'Chart 1 (Analysis Error)',
                    'visual_properties': {},
                    'semantic_content': {},
                    'parse_error': str(e)
                }
            
            # Analyze Chart 2
            try:
                if progress:
                    progress(0.6, desc=f"Analyzing Chart 2 with {llm_provider}...")
            except:
                pass
            
            try:
                vega2 = evaluator.chartToVega(chart2_path)
                graph2 = evaluator.vegaToGraph(vega2)
            except Exception as e:
                print(f"Chart 2 analysis error: {e}")
                graph2 = {
                    'chart_type': 'unknown',
                    'data_points': [],
                    'axes': {'x_axis': {}, 'y_axis': {}},
                    'title': 'Chart 2 (Analysis Error)',
                    'visual_properties': {},
                    'semantic_content': {},
                    'parse_error': str(e)
                }
            
            # Run evaluation metrics
            try:
                if progress:
                    progress(0.8, desc="Running evaluation metrics...")
            except:
                pass
            
            bert_score, hall_score, omis_score, ged_score = evaluator.compare(graph1, graph2)
            
            # Generate detailed explanation
            try:
                if progress:
                    progress(0.9, desc="Generating detailed analysis...")
            except:
                pass
            
            metrics_for_explanation = {
                'bert_score': bert_score,
                'hallucination_score': hall_score,
                'omission_score': omis_score,
                'ged_score': ged_score
            }
            
            detailed_explanation = evaluator.generate_detailed_explanation(
                graph1, graph2, metrics_for_explanation, chart1_b64, chart2_b64
            )
            
            # Format results
            try:
                if progress:
                    progress(0.95, desc="Formatting results...")
            except:
                pass
            
            success_message = f"""
            ## ✅ **Evaluation Completed Successfully!**
            
            ### 🤖 **LLM Provider**: {llm_provider}
            
            ### 📊 **Chart Analysis Summary**
            - **Chart 1**: {graph1.get('chart_type', 'unknown')} chart with {len(graph1.get('data_points', []))} data points
            - **Chart 2**: {graph2.get('chart_type', 'unknown')} chart with {len(graph2.get('data_points', []))} data points
            
            ### 🏆 **Overall Scores**
            - **Semantic Similarity (F1)**: {bert_score.get('f1', 0):.3f}
            - **Hallucination Rate**: {hall_score.get('hallucination_rate', 0):.3f} (lower is better)
            - **Omission Rate**: {omis_score.get('omission_rate', 0):.3f} (lower is better)  
            - **Structural Difference**: {ged_score.get('normalized_ged', 0):.3f} (lower is better)
            """
            
            # Create detailed results DataFrame
            results_data = [
                ["LLM Provider", llm_provider, f"Chart analysis performed using {llm_provider}"],
                ["GraphBERT Correctness", f"{bert_score.get('precision', 0):.3f}", "Semantic similarity precision"],
                ["GraphBERT Completeness", f"{bert_score.get('recall', 0):.3f}", "Semantic similarity recall"],
                ["GraphBERT F1", f"{bert_score.get('f1', 0):.3f}", "Overall semantic similarity"],
                ["Hallucination Rate", f"{hall_score.get('hallucination_rate', 0):.3f}", "False information rate"],
                ["Hallucination Count", str(hall_score.get('hallucination_count', 0)), "Number of hallucinated elements"],
                ["Omission Rate", f"{omis_score.get('omission_rate', 0):.3f}", "Missing information rate"],
                ["Omission Count", str(omis_score.get('omission_count', 0)), "Number of omitted elements"],
                ["Graph Edit Distance", f"{ged_score.get('ged_distance', 0)}", "Raw structural differences"],
                ["Normalized GED", f"{ged_score.get('normalized_ged', 0):.3f}", "Normalized structural similarity"]
            ]
            
            try:
                results_df = pd.DataFrame(results_data, columns=["Metric", "Score", "Description"])
            except Exception as e:
                print(f"DataFrame creation error: {e}")
                results_df = default_df
            
            try:
                if progress:
                    progress(1.0, desc="Complete!")
            except:
                pass
            
            return success_message, default_error, results_df, detailed_explanation
            
        finally:
            # Clean up temporary files
            try:
                if chart1_path and os.path.exists(chart1_path):
                    os.unlink(chart1_path)
                if chart2_path and os.path.exists(chart2_path):
                    os.unlink(chart2_path)
            except:
                pass
                
    except Exception as e:
        error_msg = f"""
        ❌ **Evaluation Failed**
        
        **Error**: {str(e)}
        
        **Common issues:**
        - API key not configured or invalid
        - Network connection problems  
        - Image processing errors
        - API rate limits exceeded
        
        **Troubleshooting:**
        - Set your API key for the selected LLM provider
        - Verify your API key is correct and active
        - Ensure images are valid chart images (PNG, JPG, etc.)
        - Check your internet connection
        - Wait a moment and try again
        """
        
        print(f"Full error traceback: {traceback.format_exc()}")
        return default_success, error_msg, default_df, default_explanation


def load_example_charts(example_name):
    """
    Load example chart images based on selection
    
    Args:
        example_name: Name of the selected example
        
    Returns:
        Tuple of (ground_truth_image, predicted_image, info_message)
    """
    if example_name == "Select an example...":
        return None, None, ""
    
    if example_name not in EXAMPLE_CHART_PAIRS:
        return None, None, "❌ Example not found"
    
    example_data = EXAMPLE_CHART_PAIRS[example_name]
    gt_path = example_data["ground_truth"]
    pred_path = example_data["predicted"]
    description = example_data["description"]
    
    try:
        # Check if files exist
        if not os.path.exists(gt_path):
            # Create a placeholder image if file doesn't exist
            gt_image = create_placeholder_image(f"Ground Truth\n{example_name}", (400, 300))
        else:
            gt_image = Image.open(gt_path)
        
        if not os.path.exists(pred_path):
            # Create a placeholder image if file doesn't exist  
            pred_image = create_placeholder_image(f"Predicted\n{example_name}", (400, 300))
        else:
            pred_image = Image.open(pred_path)
        
        info_message = f"""
        ### 📋 **Example Loaded: {example_name}**
        
        **Description**: {description}
        
        **Files**:
        - Ground Truth: `{gt_path}`
        - Predicted: `{pred_path}`
        
        ℹ️ *If you see placeholder images, replace the file paths in the code with your actual example images.*
        """
        
        return gt_image, pred_image, info_message
        
    except Exception as e:
        error_msg = f"❌ Error loading example images: {str(e)}"
        # Return placeholder images on error
        gt_placeholder = create_placeholder_image(f"Error loading\nGround Truth", (400, 300))
        pred_placeholder = create_placeholder_image(f"Error loading\nPredicted", (400, 300))
        return gt_placeholder, pred_placeholder, error_msg


def create_placeholder_image(text, size=(400, 300)):
    """
    Create a placeholder image with text
    
    Args:
        text: Text to display on the image
        size: Tuple of (width, height)
        
    Returns:
        PIL Image object
    """
    from PIL import Image, ImageDraw, ImageFont
    
    # Create a new image with white background
    img = Image.new('RGB', size, color='white')
    draw = ImageDraw.Draw(img)
    
    # Try to use a default font, fallback to basic if not available
    try:
        font = ImageFont.truetype("arial.ttf", 16)
    except:
        try:
            font = ImageFont.load_default()
        except:
            font = None
    
    # Calculate text position (center)
    if font:
        bbox = draw.textbbox((0, 0), text, font=font)
        text_width = bbox[2] - bbox[0]
        text_height = bbox[3] - bbox[1]
    else:
        text_width = len(text) * 8  # Rough estimate
        text_height = 16
    
    x = (size[0] - text_width) // 2
    y = (size[1] - text_height) // 2
    
    # Draw the text
    draw.text((x, y), text, fill='black', font=font)
    
    # Draw a border
    draw.rectangle([0, 0, size[0]-1, size[1]-1], outline='gray', width=2)
    
    return img


def create_demo():
    """Create the enhanced Gradio interface with detailed explanations"""
    
    # Define the interface
    with gr.Blocks(
        title="Enhanced Chart Evaluation System",
        theme=gr.themes.Soft(),
        css="""
        .gradio-container {
            max-width: 1400px;
            margin: auto;
        }
        .metric-card {
            border: 1px solid #e0e0e0;
            border-radius: 8px;
            padding: 16px;
            margin: 8px 0;
        }
        .explanation-box {
            background: #f8f9fa;
            border: 1px solid #dee2e6;
            border-radius: 8px;
            padding: 20px;
            margin: 10px 0;
            max-height: 600px;
            overflow-y: auto;
        }
        """
    ) as demo:
        
        gr.HTML("""
        <div style="text-align: center; padding: 20px;">
            <h1>📊 Enhanced Chart Evaluation System</h1>
            <p style="font-size: 18px; color: #666;">
                Compare two chart images using advanced evaluation metrics with detailed human-readable explanations.
                Get GraphBERT Score, Hallucination Detection, Omission Analysis, Graph Edit Distance, 
                and comprehensive data analyst insights.
            </p>
            <p style="font-size: 16px; color: #888;">
                🎯 Ready to use with Claude or GPT-4! Now includes detailed explanations pointing to specific chart elements
            </p>
        </div>
        """)
        
        with gr.Row():
            with gr.Column(scale=1):
                gr.HTML("<h3>🔧 Configuration</h3>")
                
                # LLM Provider Selection
                llm_provider = gr.Dropdown(
                    choices=["Claude", "GPT-4"],
                    value="Claude",
                    label="🤖 LLM Provider",
                    info="Select the AI model to analyze your charts"
                )
                
                # API Key inputs
                claude_api_key_input = gr.Textbox(
                    label="🔑 Claude API Key",
                    type="password",
                    placeholder="Enter your Claude API key (or leave blank to use configured key)",
                    value="",
                    visible=True
                )
                
                openai_api_key_input = gr.Textbox(
                    label="🔑 OpenAI API Key", 
                    type="password",
                    placeholder="Enter your OpenAI API key (or leave blank to use configured key)",
                    value="",
                    visible=False
                )
                
                # API Key Status Display
                claude_status = get_api_key_status(CLAUDE_API_KEY)
                openai_status = get_api_key_status(OPENAI_API_KEY)
                    
                api_status_display = gr.HTML(f"""
                    <div style="background: #f8f9fa; padding: 10px; border-radius: 5px; margin: 10px 0; border: 1px solid #dee2e6;">
                        <strong>🔑 API Key Status:</strong><br>
                        <span style="display: block; margin: 5px 0;">Claude: {claude_status}</span>
                        <span style="display: block; margin: 5px 0;">OpenAI: {openai_status}</span>
                        <small style="color: #6c757d;">Configure API keys in the script or enter them above</small>
                    </div>
                    """)
                    
                # Function to toggle API key visibility
                def toggle_api_key_fields(provider):
                        if provider == "Claude":
                            return gr.update(visible=True), gr.update(visible=False)
                        elif provider == "GPT-4":
                            return gr.update(visible=True), gr.update(visible=True)
                        else:
                            return gr.update(visible=True), gr.update(visible=False)
                    
                llm_provider.change(
                        fn=toggle_api_key_fields,
                        inputs=[llm_provider],
                        outputs=[claude_api_key_input, openai_api_key_input]
                    )
                    
                gr.HTML("""
                    <div style="background: #f0f8ff; padding: 10px; border-radius: 5px; margin: 10px 0;">
                        <strong>📝 How to use:</strong><br>
                        1. Select your preferred LLM provider (Claude or GPT-4)<br>
                        2. Enter API key if not configured in script<br>
                        3. Either:<br>
                        &nbsp;&nbsp;&nbsp;• Select a pre-loaded example from the dropdown, OR<br>
                        &nbsp;&nbsp;&nbsp;• Upload your own ground truth chart (Chart 1)<br>
                        &nbsp;&nbsp;&nbsp;• Upload your own predicted/generated chart (Chart 2)<br>
                        4. Click "Evaluate Charts" to run the analysis
                    </div>
                    """)
                    
                evaluate_btn = gr.Button(
                        "🚀 Evaluate Charts",
                        variant="primary",
                        size="lg"
                        )
                    
                gr.HTML("""
                    <div style="background: #fff8e1; padding: 10px; border-radius: 5px; margin: 10px 0;">
                        <strong>📊 Metrics Explained:</strong><br>
                        • <strong>GraphBERT F1</strong>: Semantic similarity (higher = better)<br>
                        • <strong>Hallucination Rate</strong>: False information (lower = better)<br>
                        • <strong>Omission Rate</strong>: Missing information (lower = better)<br>
                        • <strong>Normalized GED</strong>: Structural differences (lower = better)<br>
                        • <strong>Detailed Explanation</strong>: Human-readable analysis with specific examples
                    </div>
                    """)
            
            with gr.Column(scale=1):
                gr.HTML("<h3>📈 Chart Images</h3>")
                
                # Example selection dropdown
                gr.HTML("<h4>🎯 Quick Examples</h4>")
                example_dropdown = gr.Dropdown(
                    choices=["Select an example..."] + list(EXAMPLE_CHART_PAIRS.keys()),
                    value="Select an example...",
                    label="Choose from pre-loaded examples",
                    info="Select an example to automatically load both ground truth and predicted charts"
                )
                
                example_info = gr.Markdown(
                    value="Select an example above to see details and load chart images automatically.",
                    visible=True
                )
                
                gr.HTML("<h4>📤 Or Upload Your Own</h4>")
                
                chart1_input = gr.Image(
                    label="Chart 1 (Ground Truth)",
                    type="pil",
                    height=300
                )
                
                chart2_input = gr.Image(
                    label="Chart 2 (Predicted/Generated)",
                    type="pil", 
                    height=300
                )
        
        gr.HTML("<hr style='margin: 30px 0;'>")
        
        # Results section
        with gr.Row():
            with gr.Column():
                gr.HTML("<h3>📋 Results</h3>")
                
                success_output = gr.Markdown(
                    label="Success Message",
                    visible=True
                )
                
                error_output = gr.Markdown(
                    label="Error Message", 
                    visible=True
                )
                
                results_output = gr.Dataframe(
                    label="Detailed Metrics"
                )
                
        # NEW: Detailed Explanation Section
        gr.HTML("<hr style='margin: 30px 0;'>")
        
        with gr.Row():
            with gr.Column():
                gr.HTML("<h3>🔍 Detailed Analysis & Insights</h3>")
                gr.HTML("""
                <div style="background: #e8f4fd; padding: 10px; border-radius: 5px; margin: 10px 0;">
                    <strong>📋 What you'll get:</strong><br>
                    • Executive summary with accuracy score<br>
                    • Specific examples of what went right and wrong<br>
                    • Element-by-element comparison (titles, data, axes, etc.)<br>
                    • Actionable recommendations for improvement<br>
                    • Impact assessment for decision-making
                </div>
                """)
                
                detailed_explanation_output = gr.Markdown(
                    value="Detailed explanation will appear here after evaluation.",
                    label="Human-Readable Analysis",
                    elem_classes=["explanation-box"]
                )
                
        # Example section
        gr.HTML("<hr style='margin: 30px 0;'>")
            
        with gr.Accordion("📚 Examples & Help", open=False):
            gr.HTML("""
            <div style="padding: 20px;">
                <h4>🔑 API Key Configuration</h4>
                <p>To use this application, you need API keys for your chosen provider:</p>
                
                <h5>Claude API Key:</h5>
                <ol>
                    <li>Get your Claude API key from <a href="https://console.anthropic.com/" target="_blank">console.anthropic.com</a></li>
                    <li>Either enter it in the Claude API Key field above, or</li>
                    <li>Set it permanently in the script by editing the <code>CLAUDE_API_KEY</code> variable</li>
                </ol>
                
                <h5>OpenAI API Key (for GPT-4):</h5>
                <ol>
                    <li>Get your OpenAI API key from <a href="https://platform.openai.com/api-keys" target="_blank">platform.openai.com</a></li>
                    <li>Either enter it in the OpenAI API Key field above, or</li>
                    <li>Set it permanently in the script by editing the <code>OPENAI_API_KEY</code> variable</li>
                </ol>
                
                <h4>🤖 LLM Provider Comparison</h4>
                <ul>
                    <li><strong>Claude</strong>: Excellent at detailed chart analysis, precise data extraction, comprehensive explanations</li>
                    <li><strong>GPT-4</strong>: Good vision capabilities, different analytical perspective, thorough insights</li>
                </ul>
                
                <h4>🎯 Quick Start with Examples</h4>
                <p>Use the dropdown above to try pre-loaded chart examples. Each example includes:</p>
                <ul>
                    <li><strong>Ground Truth Chart</strong>: The reference/correct chart</li>
                    <li><strong>Predicted Chart</strong>: The generated/predicted version to evaluate</li>
                    <li><strong>Description</strong>: Context about what the chart represents</li>
                </ul>
                
                <h4>🏆 What makes a good chart comparison?</h4>
                <ul>
                    <li><strong>High GraphBERT F1 (>0.8)</strong>: Charts convey similar semantic information</li>
                    <li><strong>Low Hallucination Rate (<0.2)</strong>: Predicted chart doesn't add false information</li>
                    <li><strong>Low Omission Rate (<0.2)</strong>: Predicted chart doesn't miss important information</li>
                    <li><strong>Low Normalized GED (<0.3)</strong>: Charts have similar structure</li>
                    <li><strong>Clear Detailed Explanation</strong>: Specific examples of strengths and areas for improvement</li>
                </ul>
                
                <h4>🔍 Understanding the Detailed Analysis</h4>
                <p>The enhanced system now provides:</p>
                <ul>
                    <li><strong>Executive Summary</strong>: High-level assessment with accuracy score</li>
                    <li><strong>Specific Examples</strong>: References to actual data points, labels, and chart elements</li>
                    <li><strong>Element Breakdown</strong>: Detailed comparison of titles, axes, data, and visual design</li>
                    <li><strong>Error Analysis</strong>: Specific data errors, missing elements, and hallucinations</li>
                    <li><strong>Actionable Recommendations</strong>: Concrete steps for improvement</li>
                    <li><strong>Impact Assessment</strong>: How issues affect interpretation and decision-making</li>
                </ul>
                
                <h4>📁 Adding Your Own Examples</h4>
                <p>To add your own example chart pairs:</p>
                <ol>
                    <li>Create an <code>examples/</code> folder in your project directory</li>
                    <li>Add your chart image pairs (ground truth + predicted)</li>
                    <li>Update the <code>EXAMPLE_CHART_PAIRS</code> dictionary in the code</li>
                    <li>Replace the placeholder paths with your actual file paths</li>
                </ol>
                
                <h4>🔧 Troubleshooting</h4>
                <ul>
                    <li><strong>API Key Issues</strong>: Make sure your API key is set and valid for the selected provider</li>
                    <li><strong>Provider Switching</strong>: You can switch between Claude and GPT-4 at any time</li>
                    <li><strong>Image Quality</strong>: Use clear, high-resolution chart images</li>
                    <li><strong>Chart Types</strong>: Works best with line charts, bar charts, pie charts, and scatter plots</li>
                    <li><strong>Processing Time</strong>: Analysis may take 60-90 seconds per chart due to detailed explanation</li>
                    <li><strong>Long Explanations</strong>: Detailed analysis may be lengthy but provides comprehensive insights</li>
                </ul>
                
                <h4>📞 Support</h4>
                <p>For issues or questions, check the console logs for detailed error messages.</p>
            </div>
            """)
        
        # Connect the example dropdown to load example images
        example_dropdown.change(
            fn=load_example_charts,
            inputs=[example_dropdown],
            outputs=[chart1_input, chart2_input, example_info]
        )
        
        # Connect the evaluation function (now with detailed explanation)
        evaluate_btn.click(
            fn=safe_evaluate_charts,
            inputs=[chart1_input, chart2_input, llm_provider, claude_api_key_input, openai_api_key_input],
            outputs=[success_output, error_output, results_output, detailed_explanation_output],
            show_progress=True
        )
    
    return demo

if __name__ == "__main__":
    demo = create_demo()
    demo.launch(
        share=True,
        server_name="0.0.0.0",
        server_port=7860,
        show_error=True
    )