aradhyapavan's picture
nlp ultimate tutor
ca2c89c verified
import matplotlib
matplotlib.use('Agg') # Use non-interactive backend
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
from collections import Counter
from utils.model_loader import load_sentiment_analyzer, load_emotion_classifier
from utils.helpers import fig_to_html, df_to_html_table
def sentiment_handler(text_input):
"""Show sentiment analysis capabilities."""
output_html = []
# Add result area container
output_html.append('<div class="result-area">')
output_html.append('<h2 class="task-header">Sentiment Analysis</h2>')
output_html.append("""
<div class="alert alert-info">
<i class="fas fa-info-circle"></i>
Sentiment analysis determines the emotional tone behind text to identify if it expresses positive, negative, or neutral sentiment.
</div>
""")
# Model info
output_html.append("""
<div class="alert alert-info">
<h4><i class="fas fa-tools"></i> Models Used:</h4>
<ul>
<li><b>NLTK VADER</b> - Rule-based sentiment analyzer specifically tuned for social media text</li>
<li><b>DistilBERT</b> - Transformer model fine-tuned on SST-2 dataset, achieving ~91% accuracy</li>
<li><b>RoBERTa Emotion</b> - Transformer model for multi-label emotion detection</li>
</ul>
</div>
""")
try:
# VADER Analysis
output_html.append('<h3 class="task-subheader">VADER Sentiment Analysis</h3>')
output_html.append('<p>VADER (Valence Aware Dictionary and sEntiment Reasoner) is a lexicon and rule-based sentiment analysis tool specifically attuned to sentiments expressed in social media.</p>')
# Get VADER analyzer
vader_analyzer = SentimentIntensityAnalyzer()
vader_scores = vader_analyzer.polarity_scores(text_input)
# Extract scores
compound_score = vader_scores['compound']
pos_score = vader_scores['pos']
neg_score = vader_scores['neg']
neu_score = vader_scores['neu']
# Determine sentiment category
if compound_score >= 0.05:
sentiment_category = "Positive"
sentiment_color = "#4CAF50" # Green
sentiment_emoji = "😊"
elif compound_score <= -0.05:
sentiment_category = "Negative"
sentiment_color = "#F44336" # Red
sentiment_emoji = "😞"
else:
sentiment_category = "Neutral"
sentiment_color = "#FFC107" # Amber
sentiment_emoji = "😐"
# Create sentiment gauge display
output_html.append(f"""
<div class="card">
<div class="card-body">
<div class="text-center mb-3">
<span style="font-size: 3rem; margin-right: 15px;">{sentiment_emoji}</span>
<div>
<h3 class="mb-0" style="color: {sentiment_color};">{sentiment_category}</h3>
<p class="mb-0 fs-5">Compound Score: {compound_score:.2f}</p>
</div>
</div>
<div style="height: 30px; background-color: #e0e0e0; border-radius: 15px; position: relative; overflow: hidden; margin: 10px 0;">
<div style="position: absolute; top: 0; bottom: 0; left: 50%; width: 2px; background-color: #000; z-index: 2;"></div>
<div style="position: absolute; top: 0; bottom: 0; left: {(compound_score + 1) / 2 * 100}%; width: 10px; background-color: {sentiment_color}; border-radius: 5px; transform: translateX(-50%); z-index: 3;"></div>
<div style="position: absolute; top: 0; bottom: 0; left: 0; width: 50%; background: linear-gradient(90deg, #F44336 0%, #FFC107 100%);"></div>
<div style="position: absolute; top: 0; bottom: 0; right: 0; width: 50%; background: linear-gradient(90deg, #FFC107 0%, #4CAF50 100%);"></div>
</div>
<div class="d-flex justify-content-between mt-2">
<span>Negative (-1.0)</span>
<span>Neutral (0.0)</span>
<span>Positive (1.0)</span>
</div>
</div>
</div>
""")
# VADER score breakdown
output_html.append('<h4>VADER Score Breakdown</h4>')
# Create pie chart
fig = plt.figure(figsize=(8, 8))
labels = ['Positive', 'Neutral', 'Negative']
sizes = [pos_score, neu_score, neg_score]
colors = ['#4CAF50', '#FFC107', '#F44336']
explode = (0.1, 0, 0) if pos_score > neg_score and pos_score > neu_score else \
(0, 0.1, 0) if neu_score > pos_score and neu_score > neg_score else \
(0, 0, 0.1)
plt.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%',
shadow=True, startangle=90)
plt.axis('equal')
plt.title('VADER Sentiment Distribution')
# Create detail table
detail_df = pd.DataFrame({
'Metric': ['Positive Score', 'Neutral Score', 'Negative Score', 'Compound Score'],
'Value': [pos_score, neu_score, neg_score, compound_score]
})
# Layout with columns for VADER results
output_html.append('<div class="row">')
# Column 1: Chart
output_html.append('<div class="col-md-6">')
output_html.append(fig_to_html(fig))
output_html.append('</div>')
# Column 2: Data
output_html.append('<div class="col-md-6">')
output_html.append(df_to_html_table(detail_df))
# Add interpretation
if compound_score >= 0.75:
interpretation = "Extremely positive sentiment"
elif compound_score >= 0.5:
interpretation = "Moderately positive sentiment"
elif compound_score >= 0.05:
interpretation = "Slightly positive sentiment"
elif compound_score > -0.05:
interpretation = "Neutral sentiment"
elif compound_score > -0.5:
interpretation = "Slightly negative sentiment"
elif compound_score > -0.75:
interpretation = "Moderately negative sentiment"
else:
interpretation = "Extremely negative sentiment"
output_html.append(f"""
<div class="alert alert-success mt-3">
<h4>Interpretation</h4>
<p class="mb-0">{interpretation}</p>
</div>
""")
output_html.append('</div>') # Close column 2
output_html.append('</div>') # Close row
# Transformer-based Sentiment Analysis
output_html.append('<h3 class="task-subheader">Transformer-based Sentiment Analysis</h3>')
output_html.append('<p>This analysis uses a DistilBERT model fine-tuned on the Stanford Sentiment Treebank dataset.</p>')
try:
# Load transformer model
sentiment_model = load_sentiment_analyzer()
# Maximum text length for transformer model (BERT has a 512 token limit)
max_length = 512
# Get prediction
truncated_text = text_input[:max_length * 4] # Rough character estimate
transformer_result = sentiment_model(truncated_text)
if len(text_input) > max_length * 4:
output_html.append(f"""
<div class="alert alert-warning">
<p class="mb-0"><b>⚠️ Note:</b> Text was truncated for analysis as it exceeds the model's length limit.</p>
</div>
""")
# Extract prediction
transformer_label = transformer_result[0]['label']
transformer_score = transformer_result[0]['score']
# Display transformer result
sentiment_color = "#4CAF50" if transformer_label == "POSITIVE" else "#F44336"
sentiment_emoji = "😊" if transformer_label == "POSITIVE" else "😞"
output_html.append(f"""
<div class="card" style="border-color: {sentiment_color};">
<div class="card-body" style="background-color: {sentiment_color}22;">
<div class="d-flex align-items-center">
<span style="font-size: 3rem; margin-right: 15px;">{sentiment_emoji}</span>
<div>
<h3 class="mb-0" style="color: {sentiment_color};">{transformer_label.capitalize()}</h3>
<p class="mb-0 fs-5">Confidence: {transformer_score:.2%}</p>
</div>
</div>
</div>
</div>
""")
# Confidence bar
output_html.append(f"""
<div style="height: 30px; background-color: #e0e0e0; border-radius: 15px; position: relative; overflow: hidden; margin: 10px 0;">
<div style="position: absolute; top: 0; bottom: 0; left: 0; width: {transformer_score * 100}%; background-color: {sentiment_color}; border-radius: 5px;"></div>
<div style="position: absolute; top: 0; bottom: 0; width: 100%; text-align: center; line-height: 30px; color: #000; font-weight: bold;">
{transformer_score:.1%} Confidence
</div>
</div>
""")
except Exception as e:
output_html.append(f"""
<div class="alert alert-danger">
<h4>Transformer Model Error</h4>
<p>Failed to load or run transformer sentiment model: {str(e)}</p>
<p>Falling back to VADER results only.</p>
</div>
""")
# Emotion Analysis
output_html.append('<h3 class="task-subheader">Emotion Analysis</h3>')
output_html.append('<p>Identifying specific emotions in text using a RoBERTa model fine-tuned on the emotion dataset.</p>')
try:
# Load emotion classifier
emotion_classifier = load_emotion_classifier()
# Get predictions
truncated_text = text_input[:max_length * 4] # Rough character estimate
emotion_result = emotion_classifier(truncated_text)
# Extract emotion scores
emotion_scores = {}
for item in emotion_result[0]:
emotion_scores[item['label']] = item['score']
# Create emotion dataframe
emotion_df = pd.DataFrame({
'Emotion': list(emotion_scores.keys()),
'Score': list(emotion_scores.values())
}).sort_values('Score', ascending=False)
# Get primary emotion
primary_emotion = emotion_df.iloc[0]['Emotion']
primary_score = emotion_df.iloc[0]['Score']
# Emotion color map
emotion_colors = {
'joy': '#FFD54F',
'anger': '#EF5350',
'sadness': '#42A5F5',
'fear': '#9C27B0',
'surprise': '#26C6DA',
'love': '#EC407A',
'disgust': '#66BB6A',
'optimism': '#FF9800',
'pessimism': '#795548',
'trust': '#4CAF50',
'anticipation': '#FF7043',
'neutral': '#9E9E9E'
}
# Emotion emoji map
emotion_emojis = {
'joy': 'πŸ˜ƒ',
'anger': '😠',
'sadness': '😒',
'fear': '😨',
'surprise': '😲',
'love': '❀️',
'disgust': '🀒',
'optimism': '🀩',
'pessimism': 'πŸ˜’',
'trust': '🀝',
'anticipation': 'πŸ€”',
'neutral': '😐'
}
# Create bar chart
fig = plt.figure(figsize=(10, 6))
bars = plt.barh(
emotion_df['Emotion'],
emotion_df['Score'],
color=[emotion_colors.get(emotion, '#9E9E9E') for emotion in emotion_df['Emotion']]
)
plt.xlabel('Score')
plt.title('Emotion Scores')
# Add value labels
for i, bar in enumerate(bars):
plt.text(bar.get_width() + 0.01, bar.get_y() + bar.get_height()/2,
f"{bar.get_width():.2f}", va='center')
plt.xlim(0, 1)
plt.tight_layout()
# Chart section
output_html.append('<section class="emotion-chart-section">')
output_html.append('<div class="chart-container">')
output_html.append(fig_to_html(fig))
output_html.append('</div>')
output_html.append('</section>')
# Primary emotion section
primary_color = emotion_colors.get(primary_emotion, '#9E9E9E')
primary_emoji = emotion_emojis.get(primary_emotion, '😐')
output_html.append('<section class="emotion-result-container">')
output_html.append(f"""
<div class="card" style="border-color: {primary_color};">
<div class="card-body" style="background-color: {primary_color}22;">
<div class="d-flex align-items-center">
<span style="font-size: 3rem; margin-right: 15px;">{primary_emoji}</span>
<div>
<h3 class="mb-0" style="color: {primary_color};">{primary_emotion.capitalize()}</h3>
<p class="mb-0 fs-5">Score: {primary_score:.2f}</p>
</div>
</div>
</div>
</div>
""")
# Show top emotions table
output_html.append('<h4>Top Emotions</h4>')
output_html.append(df_to_html_table(emotion_df.head(5)))
output_html.append('</section>') # Close emotion result container
except Exception as e:
output_html.append(f"""
<div class="alert alert-danger">
<h4>Emotion Analysis Error</h4>
<p>Failed to load or run emotion classifier: {str(e)}</p>
</div>
""")
# Sentence-level Analysis
output_html.append('<h3 class="task-subheader">Sentence-level Analysis</h3>')
output_html.append('<p>Breaking down sentiment by individual sentences to identify sentiment variations throughout the text.</p>')
# Split text into sentences
sentences = nltk.sent_tokenize(text_input)
# Minimum 2 sentences to do the analysis
if len(sentences) >= 2:
# Calculate sentiment for each sentence
sentence_sentiments = []
for i, sentence in enumerate(sentences):
vader_score = vader_analyzer.polarity_scores(sentence)
sentence_sentiments.append({
'Sentence': sentence,
'Index': i + 1,
'Compound': vader_score['compound'],
'Positive': vader_score['pos'],
'Negative': vader_score['neg'],
'Neutral': vader_score['neu'],
'Sentiment': 'Positive' if vader_score['compound'] >= 0.05 else 'Negative' if vader_score['compound'] <= -0.05 else 'Neutral'
})
# Create DataFrame
sent_df = pd.DataFrame(sentence_sentiments)
# Create line graph of sentiment flow
fig = plt.figure(figsize=(10, 6))
plt.plot(sent_df['Index'], sent_df['Compound'], 'o-', color='#1976D2', linewidth=2, markersize=8)
plt.axhline(y=0, color='#9E9E9E', linestyle='-', alpha=0.3)
plt.axhline(y=0.05, color='#4CAF50', linestyle='--', alpha=0.3)
plt.axhline(y=-0.05, color='#F44336', linestyle='--', alpha=0.3)
# Annotate with sentiment
for i, row in sent_df.iterrows():
if row['Sentiment'] == 'Positive':
color = '#4CAF50'
elif row['Sentiment'] == 'Negative':
color = '#F44336'
else:
color = '#9E9E9E'
plt.scatter(row['Index'], row['Compound'], color=color, s=100, zorder=5)
plt.grid(alpha=0.3)
plt.xlabel('Sentence Number')
plt.ylabel('Compound Sentiment Score')
plt.title('Sentiment Flow Through Text')
plt.ylim(-1.05, 1.05)
plt.tight_layout()
# Calculate statistics
positive_count = sum(1 for score in sent_df['Compound'] if score >= 0.05)
negative_count = sum(1 for score in sent_df['Compound'] if score <= -0.05)
neutral_count = len(sent_df) - positive_count - negative_count
# Chart section
output_html.append('<section class="sentence-chart-section">')
output_html.append('<div class="chart-container">')
output_html.append(fig_to_html(fig))
output_html.append('</div>')
output_html.append('</section>')
# Sentence analysis section
output_html.append('<section class="sentence-analysis-container">')
# Create sentence stats
output_html.append(f"""
<div class="row mb-3">
<div class="col-4">
<div class="card text-center">
<div class="card-body p-2">
<h5 class="text-success">{positive_count}</h5>
<small>Positive</small>
</div>
</div>
</div>
<div class="col-4">
<div class="card text-center">
<div class="card-body p-2">
<h5 class="text-warning">{neutral_count}</h5>
<small>Neutral</small>
</div>
</div>
</div>
<div class="col-4">
<div class="card text-center">
<div class="card-body p-2">
<h5 class="text-danger">{negative_count}</h5>
<small>Negative</small>
</div>
</div>
</div>
</div>
""")
# Display sentiment swings
sentiment_changes = 0
prev_sentiment = None
for sentiment in sent_df['Sentiment']:
if prev_sentiment is not None and sentiment != prev_sentiment:
sentiment_changes += 1
prev_sentiment = sentiment
if sentiment_changes > 0:
output_html.append(f"""
<div class="alert alert-success">
<p class="mb-0"><b>Sentiment Shifts:</b> {sentiment_changes}</p>
<p class="mb-0">The text shows {sentiment_changes} shifts in sentiment between sentences.</p>
</div>
""")
# Show sentence breakdown table
output_html.append('<h4>Sentence-by-Sentence Analysis</h4>')
# Custom HTML table for better formatting
output_html.append('<div class="table-responsive" style="max-height: 400px;">')
output_html.append('<table class="table table-striped">')
output_html.append('<thead><tr><th>#</th><th>Sentence</th><th>Sentiment</th></tr></thead>')
output_html.append('<tbody>')
for i, row in sent_df.iterrows():
if row['Sentiment'] == 'Positive':
bg_class = 'table-success'
sentiment_html = f"""
<div class="d-flex align-items-center">
<span class="me-2">😊</span>
<span class="text-success fw-bold">Positive</span>
<span class="ms-2 text-muted">({row['Compound']:.2f})</span>
</div>
"""
elif row['Sentiment'] == 'Negative':
bg_class = 'table-danger'
sentiment_html = f"""
<div class="d-flex align-items-center">
<span class="me-2">😞</span>
<span class="text-danger fw-bold">Negative</span>
<span class="ms-2 text-muted">({row['Compound']:.2f})</span>
</div>
"""
else:
bg_class = 'table-warning'
sentiment_html = f"""
<div class="d-flex align-items-center">
<span class="me-2">😐</span>
<span class="text-warning fw-bold">Neutral</span>
<span class="ms-2 text-muted">({row['Compound']:.2f})</span>
</div>
"""
output_html.append(f'<tr class="{bg_class}">')
output_html.append(f'<td>{i+1}</td>')
output_html.append(f'<td>{row["Sentence"]}</td>')
output_html.append(f'<td>{sentiment_html}</td>')
output_html.append('</tr>')
output_html.append('</tbody></table>')
output_html.append('</div>')
output_html.append('</section>') # Close sentence analysis container
else:
output_html.append("""
<div class="alert alert-warning">
<p class="mb-0">Sentence-level analysis requires at least two sentences. The provided text doesn't have enough sentences for this analysis.</p>
</div>
""")
except Exception as e:
output_html.append(f"""
<div class="alert alert-danger">
<h3>Error</h3>
<p>Failed to analyze sentiment: {str(e)}</p>
</div>
""")
# About Sentiment Analysis section
output_html.append("""
<div class="card mt-4">
<div class="card-header">
<h4 class="mb-0">
<i class="fas fa-info-circle"></i>
About Sentiment Analysis
</h4>
</div>
<div class="card-body">
<h5>What is Sentiment Analysis?</h5>
<p>Sentiment Analysis (also known as opinion mining) is a natural language processing technique that identifies
and extracts subjective information from text. It determines whether a piece of text expresses positive, negative,
or neutral sentiment.</p>
<h5>Common Approaches:</h5>
<ol>
<li><b>Lexicon-based</b> (like VADER) - Uses dictionaries of words with pre-assigned sentiment scores</li>
<li><b>Machine learning</b> - Supervised techniques that learn from labeled data</li>
<li><b>Deep learning</b> (like our Transformer models) - Neural networks that can capture complex patterns and contexts</li>
</ol>
<h5>Applications:</h5>
<ul>
<li><b>Brand monitoring</b> - Track public perception of a brand</li>
<li><b>Customer feedback analysis</b> - Understand customer satisfaction</li>
<li><b>Market research</b> - Analyze product reviews and consumer opinions</li>
<li><b>Social media monitoring</b> - Track public sentiment on topics or events</li>
<li><b>Stock market prediction</b> - Analyze news sentiment to predict stock movements</li>
</ul>
</div>
</div>
""")
output_html.append('</div>') # Close result-area div
return '\n'.join(output_html)