Spaces:
Running
Running
| import matplotlib | |
| matplotlib.use('Agg') # Use non-interactive backend | |
| import matplotlib.pyplot as plt | |
| import pandas as pd | |
| import numpy as np | |
| from nltk.sentiment.vader import SentimentIntensityAnalyzer | |
| import nltk | |
| from collections import Counter | |
| from utils.model_loader import load_sentiment_analyzer, load_emotion_classifier | |
| from utils.helpers import fig_to_html, df_to_html_table | |
| def sentiment_handler(text_input): | |
| """Show sentiment analysis capabilities.""" | |
| output_html = [] | |
| # Add result area container | |
| output_html.append('<div class="result-area">') | |
| output_html.append('<h2 class="task-header">Sentiment Analysis</h2>') | |
| output_html.append(""" | |
| <div class="alert alert-info"> | |
| <i class="fas fa-info-circle"></i> | |
| Sentiment analysis determines the emotional tone behind text to identify if it expresses positive, negative, or neutral sentiment. | |
| </div> | |
| """) | |
| # Model info | |
| output_html.append(""" | |
| <div class="alert alert-info"> | |
| <h4><i class="fas fa-tools"></i> Models Used:</h4> | |
| <ul> | |
| <li><b>NLTK VADER</b> - Rule-based sentiment analyzer specifically tuned for social media text</li> | |
| <li><b>DistilBERT</b> - Transformer model fine-tuned on SST-2 dataset, achieving ~91% accuracy</li> | |
| <li><b>RoBERTa Emotion</b> - Transformer model for multi-label emotion detection</li> | |
| </ul> | |
| </div> | |
| """) | |
| try: | |
| # VADER Analysis | |
| output_html.append('<h3 class="task-subheader">VADER Sentiment Analysis</h3>') | |
| output_html.append('<p>VADER (Valence Aware Dictionary and sEntiment Reasoner) is a lexicon and rule-based sentiment analysis tool specifically attuned to sentiments expressed in social media.</p>') | |
| # Get VADER analyzer | |
| vader_analyzer = SentimentIntensityAnalyzer() | |
| vader_scores = vader_analyzer.polarity_scores(text_input) | |
| # Extract scores | |
| compound_score = vader_scores['compound'] | |
| pos_score = vader_scores['pos'] | |
| neg_score = vader_scores['neg'] | |
| neu_score = vader_scores['neu'] | |
| # Determine sentiment category | |
| if compound_score >= 0.05: | |
| sentiment_category = "Positive" | |
| sentiment_color = "#4CAF50" # Green | |
| sentiment_emoji = "π" | |
| elif compound_score <= -0.05: | |
| sentiment_category = "Negative" | |
| sentiment_color = "#F44336" # Red | |
| sentiment_emoji = "π" | |
| else: | |
| sentiment_category = "Neutral" | |
| sentiment_color = "#FFC107" # Amber | |
| sentiment_emoji = "π" | |
| # Create sentiment gauge display | |
| output_html.append(f""" | |
| <div class="card"> | |
| <div class="card-body"> | |
| <div class="text-center mb-3"> | |
| <span style="font-size: 3rem; margin-right: 15px;">{sentiment_emoji}</span> | |
| <div> | |
| <h3 class="mb-0" style="color: {sentiment_color};">{sentiment_category}</h3> | |
| <p class="mb-0 fs-5">Compound Score: {compound_score:.2f}</p> | |
| </div> | |
| </div> | |
| <div style="height: 30px; background-color: #e0e0e0; border-radius: 15px; position: relative; overflow: hidden; margin: 10px 0;"> | |
| <div style="position: absolute; top: 0; bottom: 0; left: 50%; width: 2px; background-color: #000; z-index: 2;"></div> | |
| <div style="position: absolute; top: 0; bottom: 0; left: {(compound_score + 1) / 2 * 100}%; width: 10px; background-color: {sentiment_color}; border-radius: 5px; transform: translateX(-50%); z-index: 3;"></div> | |
| <div style="position: absolute; top: 0; bottom: 0; left: 0; width: 50%; background: linear-gradient(90deg, #F44336 0%, #FFC107 100%);"></div> | |
| <div style="position: absolute; top: 0; bottom: 0; right: 0; width: 50%; background: linear-gradient(90deg, #FFC107 0%, #4CAF50 100%);"></div> | |
| </div> | |
| <div class="d-flex justify-content-between mt-2"> | |
| <span>Negative (-1.0)</span> | |
| <span>Neutral (0.0)</span> | |
| <span>Positive (1.0)</span> | |
| </div> | |
| </div> | |
| </div> | |
| """) | |
| # VADER score breakdown | |
| output_html.append('<h4>VADER Score Breakdown</h4>') | |
| # Create pie chart | |
| fig = plt.figure(figsize=(8, 8)) | |
| labels = ['Positive', 'Neutral', 'Negative'] | |
| sizes = [pos_score, neu_score, neg_score] | |
| colors = ['#4CAF50', '#FFC107', '#F44336'] | |
| explode = (0.1, 0, 0) if pos_score > neg_score and pos_score > neu_score else \ | |
| (0, 0.1, 0) if neu_score > pos_score and neu_score > neg_score else \ | |
| (0, 0, 0.1) | |
| plt.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%', | |
| shadow=True, startangle=90) | |
| plt.axis('equal') | |
| plt.title('VADER Sentiment Distribution') | |
| # Create detail table | |
| detail_df = pd.DataFrame({ | |
| 'Metric': ['Positive Score', 'Neutral Score', 'Negative Score', 'Compound Score'], | |
| 'Value': [pos_score, neu_score, neg_score, compound_score] | |
| }) | |
| # Layout with columns for VADER results | |
| output_html.append('<div class="row">') | |
| # Column 1: Chart | |
| output_html.append('<div class="col-md-6">') | |
| output_html.append(fig_to_html(fig)) | |
| output_html.append('</div>') | |
| # Column 2: Data | |
| output_html.append('<div class="col-md-6">') | |
| output_html.append(df_to_html_table(detail_df)) | |
| # Add interpretation | |
| if compound_score >= 0.75: | |
| interpretation = "Extremely positive sentiment" | |
| elif compound_score >= 0.5: | |
| interpretation = "Moderately positive sentiment" | |
| elif compound_score >= 0.05: | |
| interpretation = "Slightly positive sentiment" | |
| elif compound_score > -0.05: | |
| interpretation = "Neutral sentiment" | |
| elif compound_score > -0.5: | |
| interpretation = "Slightly negative sentiment" | |
| elif compound_score > -0.75: | |
| interpretation = "Moderately negative sentiment" | |
| else: | |
| interpretation = "Extremely negative sentiment" | |
| output_html.append(f""" | |
| <div class="alert alert-success mt-3"> | |
| <h4>Interpretation</h4> | |
| <p class="mb-0">{interpretation}</p> | |
| </div> | |
| """) | |
| output_html.append('</div>') # Close column 2 | |
| output_html.append('</div>') # Close row | |
| # Transformer-based Sentiment Analysis | |
| output_html.append('<h3 class="task-subheader">Transformer-based Sentiment Analysis</h3>') | |
| output_html.append('<p>This analysis uses a DistilBERT model fine-tuned on the Stanford Sentiment Treebank dataset.</p>') | |
| try: | |
| # Load transformer model | |
| sentiment_model = load_sentiment_analyzer() | |
| # Maximum text length for transformer model (BERT has a 512 token limit) | |
| max_length = 512 | |
| # Get prediction | |
| truncated_text = text_input[:max_length * 4] # Rough character estimate | |
| transformer_result = sentiment_model(truncated_text) | |
| if len(text_input) > max_length * 4: | |
| output_html.append(f""" | |
| <div class="alert alert-warning"> | |
| <p class="mb-0"><b>β οΈ Note:</b> Text was truncated for analysis as it exceeds the model's length limit.</p> | |
| </div> | |
| """) | |
| # Extract prediction | |
| transformer_label = transformer_result[0]['label'] | |
| transformer_score = transformer_result[0]['score'] | |
| # Display transformer result | |
| sentiment_color = "#4CAF50" if transformer_label == "POSITIVE" else "#F44336" | |
| sentiment_emoji = "π" if transformer_label == "POSITIVE" else "π" | |
| output_html.append(f""" | |
| <div class="card" style="border-color: {sentiment_color};"> | |
| <div class="card-body" style="background-color: {sentiment_color}22;"> | |
| <div class="d-flex align-items-center"> | |
| <span style="font-size: 3rem; margin-right: 15px;">{sentiment_emoji}</span> | |
| <div> | |
| <h3 class="mb-0" style="color: {sentiment_color};">{transformer_label.capitalize()}</h3> | |
| <p class="mb-0 fs-5">Confidence: {transformer_score:.2%}</p> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| """) | |
| # Confidence bar | |
| output_html.append(f""" | |
| <div style="height: 30px; background-color: #e0e0e0; border-radius: 15px; position: relative; overflow: hidden; margin: 10px 0;"> | |
| <div style="position: absolute; top: 0; bottom: 0; left: 0; width: {transformer_score * 100}%; background-color: {sentiment_color}; border-radius: 5px;"></div> | |
| <div style="position: absolute; top: 0; bottom: 0; width: 100%; text-align: center; line-height: 30px; color: #000; font-weight: bold;"> | |
| {transformer_score:.1%} Confidence | |
| </div> | |
| </div> | |
| """) | |
| except Exception as e: | |
| output_html.append(f""" | |
| <div class="alert alert-danger"> | |
| <h4>Transformer Model Error</h4> | |
| <p>Failed to load or run transformer sentiment model: {str(e)}</p> | |
| <p>Falling back to VADER results only.</p> | |
| </div> | |
| """) | |
| # Emotion Analysis | |
| output_html.append('<h3 class="task-subheader">Emotion Analysis</h3>') | |
| output_html.append('<p>Identifying specific emotions in text using a RoBERTa model fine-tuned on the emotion dataset.</p>') | |
| try: | |
| # Load emotion classifier | |
| emotion_classifier = load_emotion_classifier() | |
| # Get predictions | |
| truncated_text = text_input[:max_length * 4] # Rough character estimate | |
| emotion_result = emotion_classifier(truncated_text) | |
| # Extract emotion scores | |
| emotion_scores = {} | |
| for item in emotion_result[0]: | |
| emotion_scores[item['label']] = item['score'] | |
| # Create emotion dataframe | |
| emotion_df = pd.DataFrame({ | |
| 'Emotion': list(emotion_scores.keys()), | |
| 'Score': list(emotion_scores.values()) | |
| }).sort_values('Score', ascending=False) | |
| # Get primary emotion | |
| primary_emotion = emotion_df.iloc[0]['Emotion'] | |
| primary_score = emotion_df.iloc[0]['Score'] | |
| # Emotion color map | |
| emotion_colors = { | |
| 'joy': '#FFD54F', | |
| 'anger': '#EF5350', | |
| 'sadness': '#42A5F5', | |
| 'fear': '#9C27B0', | |
| 'surprise': '#26C6DA', | |
| 'love': '#EC407A', | |
| 'disgust': '#66BB6A', | |
| 'optimism': '#FF9800', | |
| 'pessimism': '#795548', | |
| 'trust': '#4CAF50', | |
| 'anticipation': '#FF7043', | |
| 'neutral': '#9E9E9E' | |
| } | |
| # Emotion emoji map | |
| emotion_emojis = { | |
| 'joy': 'π', | |
| 'anger': 'π ', | |
| 'sadness': 'π’', | |
| 'fear': 'π¨', | |
| 'surprise': 'π²', | |
| 'love': 'β€οΈ', | |
| 'disgust': 'π€’', | |
| 'optimism': 'π€©', | |
| 'pessimism': 'π', | |
| 'trust': 'π€', | |
| 'anticipation': 'π€', | |
| 'neutral': 'π' | |
| } | |
| # Create bar chart | |
| fig = plt.figure(figsize=(10, 6)) | |
| bars = plt.barh( | |
| emotion_df['Emotion'], | |
| emotion_df['Score'], | |
| color=[emotion_colors.get(emotion, '#9E9E9E') for emotion in emotion_df['Emotion']] | |
| ) | |
| plt.xlabel('Score') | |
| plt.title('Emotion Scores') | |
| # Add value labels | |
| for i, bar in enumerate(bars): | |
| plt.text(bar.get_width() + 0.01, bar.get_y() + bar.get_height()/2, | |
| f"{bar.get_width():.2f}", va='center') | |
| plt.xlim(0, 1) | |
| plt.tight_layout() | |
| # Chart section | |
| output_html.append('<section class="emotion-chart-section">') | |
| output_html.append('<div class="chart-container">') | |
| output_html.append(fig_to_html(fig)) | |
| output_html.append('</div>') | |
| output_html.append('</section>') | |
| # Primary emotion section | |
| primary_color = emotion_colors.get(primary_emotion, '#9E9E9E') | |
| primary_emoji = emotion_emojis.get(primary_emotion, 'π') | |
| output_html.append('<section class="emotion-result-container">') | |
| output_html.append(f""" | |
| <div class="card" style="border-color: {primary_color};"> | |
| <div class="card-body" style="background-color: {primary_color}22;"> | |
| <div class="d-flex align-items-center"> | |
| <span style="font-size: 3rem; margin-right: 15px;">{primary_emoji}</span> | |
| <div> | |
| <h3 class="mb-0" style="color: {primary_color};">{primary_emotion.capitalize()}</h3> | |
| <p class="mb-0 fs-5">Score: {primary_score:.2f}</p> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| """) | |
| # Show top emotions table | |
| output_html.append('<h4>Top Emotions</h4>') | |
| output_html.append(df_to_html_table(emotion_df.head(5))) | |
| output_html.append('</section>') # Close emotion result container | |
| except Exception as e: | |
| output_html.append(f""" | |
| <div class="alert alert-danger"> | |
| <h4>Emotion Analysis Error</h4> | |
| <p>Failed to load or run emotion classifier: {str(e)}</p> | |
| </div> | |
| """) | |
| # Sentence-level Analysis | |
| output_html.append('<h3 class="task-subheader">Sentence-level Analysis</h3>') | |
| output_html.append('<p>Breaking down sentiment by individual sentences to identify sentiment variations throughout the text.</p>') | |
| # Split text into sentences | |
| sentences = nltk.sent_tokenize(text_input) | |
| # Minimum 2 sentences to do the analysis | |
| if len(sentences) >= 2: | |
| # Calculate sentiment for each sentence | |
| sentence_sentiments = [] | |
| for i, sentence in enumerate(sentences): | |
| vader_score = vader_analyzer.polarity_scores(sentence) | |
| sentence_sentiments.append({ | |
| 'Sentence': sentence, | |
| 'Index': i + 1, | |
| 'Compound': vader_score['compound'], | |
| 'Positive': vader_score['pos'], | |
| 'Negative': vader_score['neg'], | |
| 'Neutral': vader_score['neu'], | |
| 'Sentiment': 'Positive' if vader_score['compound'] >= 0.05 else 'Negative' if vader_score['compound'] <= -0.05 else 'Neutral' | |
| }) | |
| # Create DataFrame | |
| sent_df = pd.DataFrame(sentence_sentiments) | |
| # Create line graph of sentiment flow | |
| fig = plt.figure(figsize=(10, 6)) | |
| plt.plot(sent_df['Index'], sent_df['Compound'], 'o-', color='#1976D2', linewidth=2, markersize=8) | |
| plt.axhline(y=0, color='#9E9E9E', linestyle='-', alpha=0.3) | |
| plt.axhline(y=0.05, color='#4CAF50', linestyle='--', alpha=0.3) | |
| plt.axhline(y=-0.05, color='#F44336', linestyle='--', alpha=0.3) | |
| # Annotate with sentiment | |
| for i, row in sent_df.iterrows(): | |
| if row['Sentiment'] == 'Positive': | |
| color = '#4CAF50' | |
| elif row['Sentiment'] == 'Negative': | |
| color = '#F44336' | |
| else: | |
| color = '#9E9E9E' | |
| plt.scatter(row['Index'], row['Compound'], color=color, s=100, zorder=5) | |
| plt.grid(alpha=0.3) | |
| plt.xlabel('Sentence Number') | |
| plt.ylabel('Compound Sentiment Score') | |
| plt.title('Sentiment Flow Through Text') | |
| plt.ylim(-1.05, 1.05) | |
| plt.tight_layout() | |
| # Calculate statistics | |
| positive_count = sum(1 for score in sent_df['Compound'] if score >= 0.05) | |
| negative_count = sum(1 for score in sent_df['Compound'] if score <= -0.05) | |
| neutral_count = len(sent_df) - positive_count - negative_count | |
| # Chart section | |
| output_html.append('<section class="sentence-chart-section">') | |
| output_html.append('<div class="chart-container">') | |
| output_html.append(fig_to_html(fig)) | |
| output_html.append('</div>') | |
| output_html.append('</section>') | |
| # Sentence analysis section | |
| output_html.append('<section class="sentence-analysis-container">') | |
| # Create sentence stats | |
| output_html.append(f""" | |
| <div class="row mb-3"> | |
| <div class="col-4"> | |
| <div class="card text-center"> | |
| <div class="card-body p-2"> | |
| <h5 class="text-success">{positive_count}</h5> | |
| <small>Positive</small> | |
| </div> | |
| </div> | |
| </div> | |
| <div class="col-4"> | |
| <div class="card text-center"> | |
| <div class="card-body p-2"> | |
| <h5 class="text-warning">{neutral_count}</h5> | |
| <small>Neutral</small> | |
| </div> | |
| </div> | |
| </div> | |
| <div class="col-4"> | |
| <div class="card text-center"> | |
| <div class="card-body p-2"> | |
| <h5 class="text-danger">{negative_count}</h5> | |
| <small>Negative</small> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| """) | |
| # Display sentiment swings | |
| sentiment_changes = 0 | |
| prev_sentiment = None | |
| for sentiment in sent_df['Sentiment']: | |
| if prev_sentiment is not None and sentiment != prev_sentiment: | |
| sentiment_changes += 1 | |
| prev_sentiment = sentiment | |
| if sentiment_changes > 0: | |
| output_html.append(f""" | |
| <div class="alert alert-success"> | |
| <p class="mb-0"><b>Sentiment Shifts:</b> {sentiment_changes}</p> | |
| <p class="mb-0">The text shows {sentiment_changes} shifts in sentiment between sentences.</p> | |
| </div> | |
| """) | |
| # Show sentence breakdown table | |
| output_html.append('<h4>Sentence-by-Sentence Analysis</h4>') | |
| # Custom HTML table for better formatting | |
| output_html.append('<div class="table-responsive" style="max-height: 400px;">') | |
| output_html.append('<table class="table table-striped">') | |
| output_html.append('<thead><tr><th>#</th><th>Sentence</th><th>Sentiment</th></tr></thead>') | |
| output_html.append('<tbody>') | |
| for i, row in sent_df.iterrows(): | |
| if row['Sentiment'] == 'Positive': | |
| bg_class = 'table-success' | |
| sentiment_html = f""" | |
| <div class="d-flex align-items-center"> | |
| <span class="me-2">π</span> | |
| <span class="text-success fw-bold">Positive</span> | |
| <span class="ms-2 text-muted">({row['Compound']:.2f})</span> | |
| </div> | |
| """ | |
| elif row['Sentiment'] == 'Negative': | |
| bg_class = 'table-danger' | |
| sentiment_html = f""" | |
| <div class="d-flex align-items-center"> | |
| <span class="me-2">π</span> | |
| <span class="text-danger fw-bold">Negative</span> | |
| <span class="ms-2 text-muted">({row['Compound']:.2f})</span> | |
| </div> | |
| """ | |
| else: | |
| bg_class = 'table-warning' | |
| sentiment_html = f""" | |
| <div class="d-flex align-items-center"> | |
| <span class="me-2">π</span> | |
| <span class="text-warning fw-bold">Neutral</span> | |
| <span class="ms-2 text-muted">({row['Compound']:.2f})</span> | |
| </div> | |
| """ | |
| output_html.append(f'<tr class="{bg_class}">') | |
| output_html.append(f'<td>{i+1}</td>') | |
| output_html.append(f'<td>{row["Sentence"]}</td>') | |
| output_html.append(f'<td>{sentiment_html}</td>') | |
| output_html.append('</tr>') | |
| output_html.append('</tbody></table>') | |
| output_html.append('</div>') | |
| output_html.append('</section>') # Close sentence analysis container | |
| else: | |
| output_html.append(""" | |
| <div class="alert alert-warning"> | |
| <p class="mb-0">Sentence-level analysis requires at least two sentences. The provided text doesn't have enough sentences for this analysis.</p> | |
| </div> | |
| """) | |
| except Exception as e: | |
| output_html.append(f""" | |
| <div class="alert alert-danger"> | |
| <h3>Error</h3> | |
| <p>Failed to analyze sentiment: {str(e)}</p> | |
| </div> | |
| """) | |
| # About Sentiment Analysis section | |
| output_html.append(""" | |
| <div class="card mt-4"> | |
| <div class="card-header"> | |
| <h4 class="mb-0"> | |
| <i class="fas fa-info-circle"></i> | |
| About Sentiment Analysis | |
| </h4> | |
| </div> | |
| <div class="card-body"> | |
| <h5>What is Sentiment Analysis?</h5> | |
| <p>Sentiment Analysis (also known as opinion mining) is a natural language processing technique that identifies | |
| and extracts subjective information from text. It determines whether a piece of text expresses positive, negative, | |
| or neutral sentiment.</p> | |
| <h5>Common Approaches:</h5> | |
| <ol> | |
| <li><b>Lexicon-based</b> (like VADER) - Uses dictionaries of words with pre-assigned sentiment scores</li> | |
| <li><b>Machine learning</b> - Supervised techniques that learn from labeled data</li> | |
| <li><b>Deep learning</b> (like our Transformer models) - Neural networks that can capture complex patterns and contexts</li> | |
| </ol> | |
| <h5>Applications:</h5> | |
| <ul> | |
| <li><b>Brand monitoring</b> - Track public perception of a brand</li> | |
| <li><b>Customer feedback analysis</b> - Understand customer satisfaction</li> | |
| <li><b>Market research</b> - Analyze product reviews and consumer opinions</li> | |
| <li><b>Social media monitoring</b> - Track public sentiment on topics or events</li> | |
| <li><b>Stock market prediction</b> - Analyze news sentiment to predict stock movements</li> | |
| </ul> | |
| </div> | |
| </div> | |
| """) | |
| output_html.append('</div>') # Close result-area div | |
| return '\n'.join(output_html) | |