Spaces:

maazamjad
/

sent_analysis

Sleeping

App Files Files Community

sent_analysis / app.py

maazamjad

Update app.py

9cbecf4 verified 6 months ago

raw

history blame contribute delete

21.4 kB

	# GRADIO ML CLASSIFICATION APP - SIMPLIFIED VERSION
	# =================================================

	import gradio as gr
	import pandas as pd
	import numpy as np
	import joblib
	import matplotlib.pyplot as plt
	import warnings
	import tempfile
	import os
	from typing import Tuple, List, Optional

	warnings.filterwarnings('ignore')

	# ============================================================================
	# MODEL LOADING
	# ============================================================================

	def load_models():
	"""Load all available ML models"""
	models = {}

	try:
	# Load pipeline
	try:
	models['pipeline'] = joblib.load('models/sentiment_analysis_pipeline.pkl')
	models['pipeline_available'] = True
	except:
	models['pipeline_available'] = False

	# Load vectorizer
	try:
	models['vectorizer'] = joblib.load('models/tfidf_vectorizer.pkl')
	models['vectorizer_available'] = True
	except:
	models['vectorizer_available'] = False

	# Load LR model
	try:
	models['logistic_regression'] = joblib.load('models/logistic_regression_model.pkl')
	models['lr_available'] = True
	except:
	models['lr_available'] = False

	# Load NB model
	try:
	models['naive_bayes'] = joblib.load('models/multinomial_nb_model.pkl')
	models['nb_available'] = True
	except:
	models['nb_available'] = False

	# Check if we have working models
	pipeline_ready = models['pipeline_available']
	individual_ready = models['vectorizer_available'] and (models['lr_available'] or models['nb_available'])

	return models if (pipeline_ready or individual_ready) else None

	except Exception as e:
	print(f"Error loading models: {e}")
	return None

	# Load models globally
	MODELS = load_models()

	# ============================================================================
	# CORE FUNCTIONS
	# ============================================================================

	def get_available_models():
	"""Get available model names"""
	if MODELS is None:
	return ["No models available"]

	available = []
	if MODELS.get('pipeline_available') or (MODELS.get('vectorizer_available') and MODELS.get('lr_available')):
	available.append("Logistic Regression")

	if MODELS.get('vectorizer_available') and MODELS.get('nb_available'):
	available.append("Multinomial Naive Bayes")

	return available if available else ["No models available"]

	def make_prediction(text, model_choice):
	"""Make prediction using selected model"""
	if MODELS is None or not text.strip():
	return None, None, "Please enter text and ensure models are loaded"

	try:
	if model_choice == "Logistic Regression":
	if MODELS.get('pipeline_available'):
	prediction = MODELS['pipeline'].predict([text])[0]
	probabilities = MODELS['pipeline'].predict_proba([text])[0]
	elif MODELS.get('vectorizer_available') and MODELS.get('lr_available'):
	X = MODELS['vectorizer'].transform([text])
	prediction = MODELS['logistic_regression'].predict(X)[0]
	probabilities = MODELS['logistic_regression'].predict_proba(X)[0]
	else:
	return None, None, "Logistic Regression model not available"

	elif model_choice == "Multinomial Naive Bayes":
	if MODELS.get('vectorizer_available') and MODELS.get('nb_available'):
	X = MODELS['vectorizer'].transform([text])
	prediction = MODELS['naive_bayes'].predict(X)[0]
	probabilities = MODELS['naive_bayes'].predict_proba(X)[0]
	else:
	return None, None, "Naive Bayes model not available"

	# Convert prediction
	class_names = ['Negative', 'Positive']
	prediction_label = class_names[prediction] if isinstance(prediction, int) else str(prediction)

	return prediction_label, probabilities, "Success"

	except Exception as e:
	return None, None, f"Error: {str(e)}"

	def create_plot(probabilities):
	"""Create probability plot"""
	fig, ax = plt.subplots(figsize=(8, 5))

	classes = ['Negative', 'Positive']
	colors = ['#ff6b6b', '#51cf66']

	bars = ax.bar(classes, probabilities, color=colors, alpha=0.8)

	# Add labels
	for bar, prob in zip(bars, probabilities):
	height = bar.get_height()
	ax.text(bar.get_x() + bar.get_width()/2., height + 0.01,
	f'{prob:.1%}', ha='center', va='bottom', fontweight='bold')

	ax.set_ylim(0, 1.1)
	ax.set_ylabel('Probability')
	ax.set_title('Sentiment Prediction Probabilities')
	ax.grid(axis='y', alpha=0.3)

	plt.tight_layout()
	return fig

	# ============================================================================
	# INTERFACE FUNCTIONS
	# ============================================================================

	def predict_text(text, model_choice):
	"""Single text prediction interface"""
	prediction, probabilities, status = make_prediction(text, model_choice)

	if prediction and probabilities is not None:
	confidence = max(probabilities)

	# Format results
	result = f"Prediction: {prediction} Sentiment\n"
	result += f"Confidence: {confidence:.1%}\n\n"
	result += f"Detailed Probabilities:\n"
	result += f"- Negative: {probabilities[0]:.1%}\n"
	result += f"- Positive: {probabilities[1]:.1%}\n\n"

	# Interpretation
	if confidence >= 0.8:
	result += "High Confidence: The model is very confident about this prediction."
	elif confidence >= 0.6:
	result += "Medium Confidence: The model is reasonably confident."
	else:
	result += "Low Confidence: The model is uncertain about this prediction."

	# Create plot
	plot = create_plot(probabilities)

	return result, plot
	else:
	return f"Error: {status}", None

	def process_file(file, model_choice, max_texts):
	"""Process uploaded file"""
	if file is None:
	return "Please upload a file!", None

	if MODELS is None:
	return "No models loaded!", None

	try:
	# Read file
	if file.name.endswith('.txt'):
	with open(file.name, 'r', encoding='utf-8') as f:
	content = f.read()
	texts = [line.strip() for line in content.split('\n') if line.strip()]
	elif file.name.endswith('.csv'):
	df = pd.read_csv(file.name)
	texts = df.iloc[:, 0].astype(str).tolist()
	else:
	return "Unsupported file format! Use .txt or .csv", None

	if not texts:
	return "No text found in file!", None

	# Limit texts
	if len(texts) > max_texts:
	texts = texts[:max_texts]

	# Process texts
	results = []
	for i, text in enumerate(texts):
	if text.strip():
	prediction, probabilities, _ = make_prediction(text, model_choice)

	if prediction and probabilities is not None:
	results.append({
	'Index': i + 1,
	'Text': text[:100] + "..." if len(text) > 100 else text,
	'Prediction': prediction,
	'Confidence': f"{max(probabilities):.1%}",
	'Negative_Prob': f"{probabilities[0]:.1%}",
	'Positive_Prob': f"{probabilities[1]:.1%}"
	})

	if results:
	# Create summary
	positive_count = sum(1 for r in results if r['Prediction'] == 'Positive')
	negative_count = len(results) - positive_count
	avg_confidence = np.mean([float(r['Confidence'].strip('%')) for r in results])

	summary = f"Processing Complete!\n\n"
	summary += f"Summary Statistics:\n"
	summary += f"- Total Processed: {len(results)}\n"
	summary += f"- Positive: {positive_count} ({positive_count/len(results):.1%})\n"
	summary += f"- Negative: {negative_count} ({negative_count/len(results):.1%})\n"
	summary += f"- Average Confidence: {avg_confidence:.1f}%\n"

	# Create CSV for download
	results_df = pd.DataFrame(results)

	# Save to temporary file
	with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f:
	results_df.to_csv(f, index=False)
	temp_file = f.name

	return summary, temp_file
	else:
	return "No valid texts could be processed!", None

	except Exception as e:
	return f"Error processing file: {str(e)}", None

	def compare_models_func(text):
	"""Compare predictions from different models"""
	if MODELS is None:
	return "No models loaded!", None

	if not text.strip():
	return "Please enter text to compare!", None

	available_models = get_available_models()

	if len(available_models) < 2:
	return "Need at least 2 models for comparison.", None

	results = []
	all_probs = []

	for model_name in available_models:
	prediction, probabilities, _ = make_prediction(text, model_name)

	if prediction and probabilities is not None:
	results.append({
	'Model': model_name,
	'Prediction': prediction,
	'Confidence': f"{max(probabilities):.1%}",
	'Negative': f"{probabilities[0]:.1%}",
	'Positive': f"{probabilities[1]:.1%}"
	})
	all_probs.append(probabilities)

	if results:
	# Create comparison text
	comparison_text = "Model Comparison Results:\n\n"

	for result in results:
	comparison_text += f"{result['Model']}:\n"
	comparison_text += f"- Prediction: {result['Prediction']}\n"
	comparison_text += f"- Confidence: {result['Confidence']}\n"
	comparison_text += f"- Negative: {result['Negative']}, Positive: {result['Positive']}\n\n"

	# Agreement analysis
	predictions = [r['Prediction'] for r in results]
	if len(set(predictions)) == 1:
	comparison_text += f"Agreement: All models agree on {predictions[0]} sentiment!"
	else:
	comparison_text += "Disagreement: Models have different predictions."

	# Create comparison plot
	fig, axes = plt.subplots(1, len(results), figsize=(6*len(results), 5))

	if len(results) == 1:
	axes = [axes]

	for i, (result, probs) in enumerate(zip(results, all_probs)):
	ax = axes[i]

	classes = ['Negative', 'Positive']
	colors = ['#ff6b6b', '#51cf66']

	bars = ax.bar(classes, probs, color=colors, alpha=0.8)

	# Add labels
	for bar, prob in zip(bars, probs):
	height = bar.get_height()
	ax.text(bar.get_x() + bar.get_width()/2., height + 0.02,
	f'{prob:.0%}', ha='center', va='bottom', fontweight='bold')

	ax.set_ylim(0, 1.1)
	ax.set_title(f"{result['Model']}\n{result['Prediction']}")
	ax.grid(axis='y', alpha=0.3)

	plt.tight_layout()

	return comparison_text, fig
	else:
	return "Failed to get predictions!", None

	def get_model_info():
	"""Get model information"""
	if MODELS is None:
	return """
	No models loaded!

	Please ensure you have model files in the 'models/' directory:
	- sentiment_analysis_pipeline.pkl (complete pipeline), OR
	- tfidf_vectorizer.pkl + logistic_regression_model.pkl, OR
	- tfidf_vectorizer.pkl + multinomial_nb_model.pkl
	"""

	info = "Models loaded successfully!\n\n"

	info += "Available Models:\n\n"

	if MODELS.get('pipeline_available') or (MODELS.get('vectorizer_available') and MODELS.get('lr_available')):
	info += "Logistic Regression\n"
	info += "- Type: Linear Classification\n"
	info += "- Features: TF-IDF vectors\n"
	info += "- Strengths: Fast, interpretable\n\n"

	if MODELS.get('vectorizer_available') and MODELS.get('nb_available'):
	info += "Multinomial Naive Bayes\n"
	info += "- Type: Probabilistic Classification\n"
	info += "- Features: TF-IDF vectors\n"
	info += "- Strengths: Works well with small data\n\n"

	info += "File Status:\n"
	files = [
	("sentiment_analysis_pipeline.pkl", MODELS.get('pipeline_available', False)),
	("tfidf_vectorizer.pkl", MODELS.get('vectorizer_available', False)),
	("logistic_regression_model.pkl", MODELS.get('lr_available', False)),
	("multinomial_nb_model.pkl", MODELS.get('nb_available', False))
	]

	for filename, status in files:
	status_icon = "✅" if status else "❌"
	info += f"- {filename}: {status_icon}\n"

	return info

	# ============================================================================
	# GRADIO INTERFACE
	# ============================================================================

	def create_app():
	"""Create Gradio interface"""

	with gr.Blocks(title="ML Text Classification") as app:

	# Header
	gr.HTML("""
	<div style="text-align: center; margin-bottom: 2rem;">
	<h1 style="color: #1f77b4; font-size: 2.5rem;">🤖 ML Text Classification App</h1>
	<p style="font-size: 1.2rem; color: #666;">Advanced Sentiment Analysis with Multiple ML Models</p>
	</div>
	""")

	# Main interface with tabs
	with gr.Tabs():

	# Single Prediction Tab
	with gr.Tab("🔮 Single Prediction"):
	gr.Markdown("### Enter text and select a model for sentiment analysis")

	with gr.Row():
	with gr.Column(scale=1):
	model_dropdown = gr.Dropdown(
	choices=get_available_models(),
	value=get_available_models()[0] if get_available_models() else None,
	label="Choose Model"
	)

	text_input = gr.Textbox(
	lines=5,
	placeholder="Enter your text here...",
	label="Text Input"
	)

	with gr.Row():
	example1_btn = gr.Button("Good Example", size="sm")
	example2_btn = gr.Button("Bad Example", size="sm")
	example3_btn = gr.Button("Neutral Example", size="sm")

	predict_btn = gr.Button("🚀 Analyze Sentiment", variant="primary")

	with gr.Column(scale=1):
	prediction_output = gr.Markdown(label="Results")
	prediction_plot = gr.Plot(label="Probability Chart")

	# Example handlers
	example1_btn.click(
	lambda: "This product is absolutely amazing! Best purchase ever!",
	outputs=text_input
	)
	example2_btn.click(
	lambda: "Terrible quality, broke immediately. Waste of money!",
	outputs=text_input
	)
	example3_btn.click(
	lambda: "It's okay, nothing special but does the job.",
	outputs=text_input
	)

	# Prediction handler
	predict_btn.click(
	predict_text,
	inputs=[text_input, model_dropdown],
	outputs=[prediction_output, prediction_plot]
	)

	# Batch Processing Tab
	with gr.Tab("📁 Batch Processing"):
	gr.Markdown("### Upload a file to process multiple texts")

	with gr.Row():
	with gr.Column():
	file_upload = gr.File(
	label="Upload File (.txt or .csv)",
	file_types=[".txt", ".csv"]
	)

	batch_model = gr.Dropdown(
	choices=get_available_models(),
	value=get_available_models()[0] if get_available_models() else None,
	label="Model for Batch Processing"
	)

	max_texts = gr.Slider(
	minimum=10,
	maximum=500,
	value=100,
	step=10,
	label="Max Texts to Process"
	)

	process_btn = gr.Button("📊 Process File", variant="primary")

	with gr.Column():
	batch_output = gr.Markdown(label="Processing Results")
	download_file = gr.File(label="Download Results")

	# Process handler
	process_btn.click(
	process_file,
	inputs=[file_upload, batch_model, max_texts],
	outputs=[batch_output, download_file]
	)

	# Model Comparison Tab
	with gr.Tab("⚖️ Model Comparison"):
	gr.Markdown("### Compare predictions from different models")

	with gr.Row():
	with gr.Column():
	comparison_input = gr.Textbox(
	lines=4,
	placeholder="Enter text to compare models...",
	label="Text for Comparison"
	)

	compare_btn = gr.Button("🔍 Compare Models", variant="primary")

	with gr.Row():
	comp_ex1 = gr.Button("Mixed Example 1", size="sm")
	comp_ex2 = gr.Button("Mixed Example 2", size="sm")

	with gr.Column():
	comparison_output = gr.Markdown(label="Comparison Results")

	comparison_plot = gr.Plot(label="Model Comparison")

	# Example handlers
	comp_ex1.click(
	lambda: "This movie was okay but not great.",
	outputs=comparison_input
	)
	comp_ex2.click(
	lambda: "The product is fine, I guess.",
	outputs=comparison_input
	)

	# Compare handler
	compare_btn.click(
	compare_models_func,
	inputs=comparison_input,
	outputs=[comparison_output, comparison_plot]
	)

	# Model Info Tab
	with gr.Tab("📊 Model Info"):
	model_info = gr.Markdown(
	value=get_model_info(),
	label="Model Information"
	)

	refresh_btn = gr.Button("🔄 Refresh", size="sm")
	refresh_btn.click(get_model_info, outputs=model_info)

	# Footer
	gr.HTML("""
	<div style="text-align: center; margin-top: 2rem; padding: 1rem; border-top: 1px solid #eee; color: #666;">
	<p><strong>🤖 ML Text Classification App</strong></p>
	<p>Built with Gradio \| By Maaz Amjad</p>
	<p><small>Part of Introduction to Large Language Models course</small></p>
	</div>
	""")

	return app

	# ============================================================================
	# MAIN
	# ============================================================================

	if __name__ == "__main__":
	# Check models
	if MODELS is None:
	print("⚠️ Warning: No models loaded!")
	else:
	available = get_available_models()
	print(f"✅ Successfully loaded {len(available)} model(s): {', '.join(available)}")

	# Launch app
	app = create_app()
	app.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False,
	debug=True
	)