Spaces:
Sleeping
Sleeping
| # GRADIO ML CLASSIFICATION APP - SIMPLIFIED VERSION | |
| # ================================================= | |
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| import joblib | |
| import matplotlib.pyplot as plt | |
| import warnings | |
| import tempfile | |
| import os | |
| from typing import Tuple, List, Optional | |
| warnings.filterwarnings('ignore') | |
| # ============================================================================ | |
| # MODEL LOADING | |
| # ============================================================================ | |
| def load_models(): | |
| """Load all available ML models""" | |
| models = {} | |
| try: | |
| # Load pipeline | |
| try: | |
| models['pipeline'] = joblib.load('models/sentiment_analysis_pipeline.pkl') | |
| models['pipeline_available'] = True | |
| except: | |
| models['pipeline_available'] = False | |
| # Load vectorizer | |
| try: | |
| models['vectorizer'] = joblib.load('models/tfidf_vectorizer.pkl') | |
| models['vectorizer_available'] = True | |
| except: | |
| models['vectorizer_available'] = False | |
| # Load LR model | |
| try: | |
| models['logistic_regression'] = joblib.load('models/logistic_regression_model.pkl') | |
| models['lr_available'] = True | |
| except: | |
| models['lr_available'] = False | |
| # Load NB model | |
| try: | |
| models['naive_bayes'] = joblib.load('models/multinomial_nb_model.pkl') | |
| models['nb_available'] = True | |
| except: | |
| models['nb_available'] = False | |
| # Check if we have working models | |
| pipeline_ready = models['pipeline_available'] | |
| individual_ready = models['vectorizer_available'] and (models['lr_available'] or models['nb_available']) | |
| return models if (pipeline_ready or individual_ready) else None | |
| except Exception as e: | |
| print(f"Error loading models: {e}") | |
| return None | |
| # Load models globally | |
| MODELS = load_models() | |
| # ============================================================================ | |
| # CORE FUNCTIONS | |
| # ============================================================================ | |
| def get_available_models(): | |
| """Get available model names""" | |
| if MODELS is None: | |
| return ["No models available"] | |
| available = [] | |
| if MODELS.get('pipeline_available') or (MODELS.get('vectorizer_available') and MODELS.get('lr_available')): | |
| available.append("Logistic Regression") | |
| if MODELS.get('vectorizer_available') and MODELS.get('nb_available'): | |
| available.append("Multinomial Naive Bayes") | |
| return available if available else ["No models available"] | |
| def make_prediction(text, model_choice): | |
| """Make prediction using selected model""" | |
| if MODELS is None or not text.strip(): | |
| return None, None, "Please enter text and ensure models are loaded" | |
| try: | |
| if model_choice == "Logistic Regression": | |
| if MODELS.get('pipeline_available'): | |
| prediction = MODELS['pipeline'].predict([text])[0] | |
| probabilities = MODELS['pipeline'].predict_proba([text])[0] | |
| elif MODELS.get('vectorizer_available') and MODELS.get('lr_available'): | |
| X = MODELS['vectorizer'].transform([text]) | |
| prediction = MODELS['logistic_regression'].predict(X)[0] | |
| probabilities = MODELS['logistic_regression'].predict_proba(X)[0] | |
| else: | |
| return None, None, "Logistic Regression model not available" | |
| elif model_choice == "Multinomial Naive Bayes": | |
| if MODELS.get('vectorizer_available') and MODELS.get('nb_available'): | |
| X = MODELS['vectorizer'].transform([text]) | |
| prediction = MODELS['naive_bayes'].predict(X)[0] | |
| probabilities = MODELS['naive_bayes'].predict_proba(X)[0] | |
| else: | |
| return None, None, "Naive Bayes model not available" | |
| # Convert prediction | |
| class_names = ['Negative', 'Positive'] | |
| prediction_label = class_names[prediction] if isinstance(prediction, int) else str(prediction) | |
| return prediction_label, probabilities, "Success" | |
| except Exception as e: | |
| return None, None, f"Error: {str(e)}" | |
| def create_plot(probabilities): | |
| """Create probability plot""" | |
| fig, ax = plt.subplots(figsize=(8, 5)) | |
| classes = ['Negative', 'Positive'] | |
| colors = ['#ff6b6b', '#51cf66'] | |
| bars = ax.bar(classes, probabilities, color=colors, alpha=0.8) | |
| # Add labels | |
| for bar, prob in zip(bars, probabilities): | |
| height = bar.get_height() | |
| ax.text(bar.get_x() + bar.get_width()/2., height + 0.01, | |
| f'{prob:.1%}', ha='center', va='bottom', fontweight='bold') | |
| ax.set_ylim(0, 1.1) | |
| ax.set_ylabel('Probability') | |
| ax.set_title('Sentiment Prediction Probabilities') | |
| ax.grid(axis='y', alpha=0.3) | |
| plt.tight_layout() | |
| return fig | |
| # ============================================================================ | |
| # INTERFACE FUNCTIONS | |
| # ============================================================================ | |
| def predict_text(text, model_choice): | |
| """Single text prediction interface""" | |
| prediction, probabilities, status = make_prediction(text, model_choice) | |
| if prediction and probabilities is not None: | |
| confidence = max(probabilities) | |
| # Format results | |
| result = f"**Prediction:** {prediction} Sentiment\n" | |
| result += f"**Confidence:** {confidence:.1%}\n\n" | |
| result += f"**Detailed Probabilities:**\n" | |
| result += f"- Negative: {probabilities[0]:.1%}\n" | |
| result += f"- Positive: {probabilities[1]:.1%}\n\n" | |
| # Interpretation | |
| if confidence >= 0.8: | |
| result += "**High Confidence:** The model is very confident about this prediction." | |
| elif confidence >= 0.6: | |
| result += "**Medium Confidence:** The model is reasonably confident." | |
| else: | |
| result += "**Low Confidence:** The model is uncertain about this prediction." | |
| # Create plot | |
| plot = create_plot(probabilities) | |
| return result, plot | |
| else: | |
| return f"Error: {status}", None | |
| def process_file(file, model_choice, max_texts): | |
| """Process uploaded file""" | |
| if file is None: | |
| return "Please upload a file!", None | |
| if MODELS is None: | |
| return "No models loaded!", None | |
| try: | |
| # Read file | |
| if file.name.endswith('.txt'): | |
| with open(file.name, 'r', encoding='utf-8') as f: | |
| content = f.read() | |
| texts = [line.strip() for line in content.split('\n') if line.strip()] | |
| elif file.name.endswith('.csv'): | |
| df = pd.read_csv(file.name) | |
| texts = df.iloc[:, 0].astype(str).tolist() | |
| else: | |
| return "Unsupported file format! Use .txt or .csv", None | |
| if not texts: | |
| return "No text found in file!", None | |
| # Limit texts | |
| if len(texts) > max_texts: | |
| texts = texts[:max_texts] | |
| # Process texts | |
| results = [] | |
| for i, text in enumerate(texts): | |
| if text.strip(): | |
| prediction, probabilities, _ = make_prediction(text, model_choice) | |
| if prediction and probabilities is not None: | |
| results.append({ | |
| 'Index': i + 1, | |
| 'Text': text[:100] + "..." if len(text) > 100 else text, | |
| 'Prediction': prediction, | |
| 'Confidence': f"{max(probabilities):.1%}", | |
| 'Negative_Prob': f"{probabilities[0]:.1%}", | |
| 'Positive_Prob': f"{probabilities[1]:.1%}" | |
| }) | |
| if results: | |
| # Create summary | |
| positive_count = sum(1 for r in results if r['Prediction'] == 'Positive') | |
| negative_count = len(results) - positive_count | |
| avg_confidence = np.mean([float(r['Confidence'].strip('%')) for r in results]) | |
| summary = f"**Processing Complete!**\n\n" | |
| summary += f"**Summary Statistics:**\n" | |
| summary += f"- Total Processed: {len(results)}\n" | |
| summary += f"- Positive: {positive_count} ({positive_count/len(results):.1%})\n" | |
| summary += f"- Negative: {negative_count} ({negative_count/len(results):.1%})\n" | |
| summary += f"- Average Confidence: {avg_confidence:.1f}%\n" | |
| # Create CSV for download | |
| results_df = pd.DataFrame(results) | |
| # Save to temporary file | |
| with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f: | |
| results_df.to_csv(f, index=False) | |
| temp_file = f.name | |
| return summary, temp_file | |
| else: | |
| return "No valid texts could be processed!", None | |
| except Exception as e: | |
| return f"Error processing file: {str(e)}", None | |
| def compare_models_func(text): | |
| """Compare predictions from different models""" | |
| if MODELS is None: | |
| return "No models loaded!", None | |
| if not text.strip(): | |
| return "Please enter text to compare!", None | |
| available_models = get_available_models() | |
| if len(available_models) < 2: | |
| return "Need at least 2 models for comparison.", None | |
| results = [] | |
| all_probs = [] | |
| for model_name in available_models: | |
| prediction, probabilities, _ = make_prediction(text, model_name) | |
| if prediction and probabilities is not None: | |
| results.append({ | |
| 'Model': model_name, | |
| 'Prediction': prediction, | |
| 'Confidence': f"{max(probabilities):.1%}", | |
| 'Negative': f"{probabilities[0]:.1%}", | |
| 'Positive': f"{probabilities[1]:.1%}" | |
| }) | |
| all_probs.append(probabilities) | |
| if results: | |
| # Create comparison text | |
| comparison_text = "**Model Comparison Results:**\n\n" | |
| for result in results: | |
| comparison_text += f"**{result['Model']}:**\n" | |
| comparison_text += f"- Prediction: {result['Prediction']}\n" | |
| comparison_text += f"- Confidence: {result['Confidence']}\n" | |
| comparison_text += f"- Negative: {result['Negative']}, Positive: {result['Positive']}\n\n" | |
| # Agreement analysis | |
| predictions = [r['Prediction'] for r in results] | |
| if len(set(predictions)) == 1: | |
| comparison_text += f"**Agreement:** All models agree on {predictions[0]} sentiment!" | |
| else: | |
| comparison_text += "**Disagreement:** Models have different predictions." | |
| # Create comparison plot | |
| fig, axes = plt.subplots(1, len(results), figsize=(6*len(results), 5)) | |
| if len(results) == 1: | |
| axes = [axes] | |
| for i, (result, probs) in enumerate(zip(results, all_probs)): | |
| ax = axes[i] | |
| classes = ['Negative', 'Positive'] | |
| colors = ['#ff6b6b', '#51cf66'] | |
| bars = ax.bar(classes, probs, color=colors, alpha=0.8) | |
| # Add labels | |
| for bar, prob in zip(bars, probs): | |
| height = bar.get_height() | |
| ax.text(bar.get_x() + bar.get_width()/2., height + 0.02, | |
| f'{prob:.0%}', ha='center', va='bottom', fontweight='bold') | |
| ax.set_ylim(0, 1.1) | |
| ax.set_title(f"{result['Model']}\n{result['Prediction']}") | |
| ax.grid(axis='y', alpha=0.3) | |
| plt.tight_layout() | |
| return comparison_text, fig | |
| else: | |
| return "Failed to get predictions!", None | |
| def get_model_info(): | |
| """Get model information""" | |
| if MODELS is None: | |
| return """ | |
| **No models loaded!** | |
| Please ensure you have model files in the 'models/' directory: | |
| - sentiment_analysis_pipeline.pkl (complete pipeline), OR | |
| - tfidf_vectorizer.pkl + logistic_regression_model.pkl, OR | |
| - tfidf_vectorizer.pkl + multinomial_nb_model.pkl | |
| """ | |
| info = "**Models loaded successfully!**\n\n" | |
| info += "**Available Models:**\n\n" | |
| if MODELS.get('pipeline_available') or (MODELS.get('vectorizer_available') and MODELS.get('lr_available')): | |
| info += "**Logistic Regression**\n" | |
| info += "- Type: Linear Classification\n" | |
| info += "- Features: TF-IDF vectors\n" | |
| info += "- Strengths: Fast, interpretable\n\n" | |
| if MODELS.get('vectorizer_available') and MODELS.get('nb_available'): | |
| info += "**Multinomial Naive Bayes**\n" | |
| info += "- Type: Probabilistic Classification\n" | |
| info += "- Features: TF-IDF vectors\n" | |
| info += "- Strengths: Works well with small data\n\n" | |
| info += "**File Status:**\n" | |
| files = [ | |
| ("sentiment_analysis_pipeline.pkl", MODELS.get('pipeline_available', False)), | |
| ("tfidf_vectorizer.pkl", MODELS.get('vectorizer_available', False)), | |
| ("logistic_regression_model.pkl", MODELS.get('lr_available', False)), | |
| ("multinomial_nb_model.pkl", MODELS.get('nb_available', False)) | |
| ] | |
| for filename, status in files: | |
| status_icon = "✅" if status else "❌" | |
| info += f"- {filename}: {status_icon}\n" | |
| return info | |
| # ============================================================================ | |
| # GRADIO INTERFACE | |
| # ============================================================================ | |
| def create_app(): | |
| """Create Gradio interface""" | |
| with gr.Blocks(title="ML Text Classification") as app: | |
| # Header | |
| gr.HTML(""" | |
| <div style="text-align: center; margin-bottom: 2rem;"> | |
| <h1 style="color: #1f77b4; font-size: 2.5rem;">🤖 ML Text Classification App</h1> | |
| <p style="font-size: 1.2rem; color: #666;">Advanced Sentiment Analysis with Multiple ML Models</p> | |
| </div> | |
| """) | |
| # Main interface with tabs | |
| with gr.Tabs(): | |
| # Single Prediction Tab | |
| with gr.Tab("🔮 Single Prediction"): | |
| gr.Markdown("### Enter text and select a model for sentiment analysis") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| model_dropdown = gr.Dropdown( | |
| choices=get_available_models(), | |
| value=get_available_models()[0] if get_available_models() else None, | |
| label="Choose Model" | |
| ) | |
| text_input = gr.Textbox( | |
| lines=5, | |
| placeholder="Enter your text here...", | |
| label="Text Input" | |
| ) | |
| with gr.Row(): | |
| example1_btn = gr.Button("Good Example", size="sm") | |
| example2_btn = gr.Button("Bad Example", size="sm") | |
| example3_btn = gr.Button("Neutral Example", size="sm") | |
| predict_btn = gr.Button("🚀 Analyze Sentiment", variant="primary") | |
| with gr.Column(scale=1): | |
| prediction_output = gr.Markdown(label="Results") | |
| prediction_plot = gr.Plot(label="Probability Chart") | |
| # Example handlers | |
| example1_btn.click( | |
| lambda: "This product is absolutely amazing! Best purchase ever!", | |
| outputs=text_input | |
| ) | |
| example2_btn.click( | |
| lambda: "Terrible quality, broke immediately. Waste of money!", | |
| outputs=text_input | |
| ) | |
| example3_btn.click( | |
| lambda: "It's okay, nothing special but does the job.", | |
| outputs=text_input | |
| ) | |
| # Prediction handler | |
| predict_btn.click( | |
| predict_text, | |
| inputs=[text_input, model_dropdown], | |
| outputs=[prediction_output, prediction_plot] | |
| ) | |
| # Batch Processing Tab | |
| with gr.Tab("📁 Batch Processing"): | |
| gr.Markdown("### Upload a file to process multiple texts") | |
| with gr.Row(): | |
| with gr.Column(): | |
| file_upload = gr.File( | |
| label="Upload File (.txt or .csv)", | |
| file_types=[".txt", ".csv"] | |
| ) | |
| batch_model = gr.Dropdown( | |
| choices=get_available_models(), | |
| value=get_available_models()[0] if get_available_models() else None, | |
| label="Model for Batch Processing" | |
| ) | |
| max_texts = gr.Slider( | |
| minimum=10, | |
| maximum=500, | |
| value=100, | |
| step=10, | |
| label="Max Texts to Process" | |
| ) | |
| process_btn = gr.Button("📊 Process File", variant="primary") | |
| with gr.Column(): | |
| batch_output = gr.Markdown(label="Processing Results") | |
| download_file = gr.File(label="Download Results") | |
| # Process handler | |
| process_btn.click( | |
| process_file, | |
| inputs=[file_upload, batch_model, max_texts], | |
| outputs=[batch_output, download_file] | |
| ) | |
| # Model Comparison Tab | |
| with gr.Tab("⚖️ Model Comparison"): | |
| gr.Markdown("### Compare predictions from different models") | |
| with gr.Row(): | |
| with gr.Column(): | |
| comparison_input = gr.Textbox( | |
| lines=4, | |
| placeholder="Enter text to compare models...", | |
| label="Text for Comparison" | |
| ) | |
| compare_btn = gr.Button("🔍 Compare Models", variant="primary") | |
| with gr.Row(): | |
| comp_ex1 = gr.Button("Mixed Example 1", size="sm") | |
| comp_ex2 = gr.Button("Mixed Example 2", size="sm") | |
| with gr.Column(): | |
| comparison_output = gr.Markdown(label="Comparison Results") | |
| comparison_plot = gr.Plot(label="Model Comparison") | |
| # Example handlers | |
| comp_ex1.click( | |
| lambda: "This movie was okay but not great.", | |
| outputs=comparison_input | |
| ) | |
| comp_ex2.click( | |
| lambda: "The product is fine, I guess.", | |
| outputs=comparison_input | |
| ) | |
| # Compare handler | |
| compare_btn.click( | |
| compare_models_func, | |
| inputs=comparison_input, | |
| outputs=[comparison_output, comparison_plot] | |
| ) | |
| # Model Info Tab | |
| with gr.Tab("📊 Model Info"): | |
| model_info = gr.Markdown( | |
| value=get_model_info(), | |
| label="Model Information" | |
| ) | |
| refresh_btn = gr.Button("🔄 Refresh", size="sm") | |
| refresh_btn.click(get_model_info, outputs=model_info) | |
| # Footer | |
| gr.HTML(""" | |
| <div style="text-align: center; margin-top: 2rem; padding: 1rem; border-top: 1px solid #eee; color: #666;"> | |
| <p><strong>🤖 ML Text Classification App</strong></p> | |
| <p>Built with Gradio | By Maaz Amjad</p> | |
| <p><small>Part of Introduction to Large Language Models course</small></p> | |
| </div> | |
| """) | |
| return app | |
| # ============================================================================ | |
| # MAIN | |
| # ============================================================================ | |
| if __name__ == "__main__": | |
| # Check models | |
| if MODELS is None: | |
| print("⚠️ Warning: No models loaded!") | |
| else: | |
| available = get_available_models() | |
| print(f"✅ Successfully loaded {len(available)} model(s): {', '.join(available)}") | |
| # Launch app | |
| app = create_app() | |
| app.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False, | |
| debug=True | |
| ) |