aradhyapavan commited on
Commit
ca2c89c
·
verified ·
1 Parent(s): 9442fad

nlp ultimate tutor

Browse files
Dockerfile ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use Python 3.9 slim image for better performance
2
+ FROM python:3.11-slim
3
+
4
+ # Set environment variables
5
+ ENV PYTHONUNBUFFERED=1
6
+ ENV PYTHONDONTWRITEBYTECODE=1
7
+ ENV HF_HOME=/tmp/huggingface
8
+ ENV TRANSFORMERS_CACHE=/tmp/huggingface/transformers
9
+ ENV TORCH_HOME=/tmp/torch
10
+
11
+ # Create a non-root user (required for HF Spaces)
12
+ RUN useradd -m -u 1000 user
13
+ USER user
14
+ ENV HOME=/home/user
15
+ ENV PATH=/home/user/.local/bin:$PATH
16
+
17
+ # Set working directory
18
+ WORKDIR $HOME/app
19
+
20
+ # Install system dependencies (as root)
21
+ USER root
22
+ RUN apt-get update && apt-get install -y \
23
+ build-essential \
24
+ curl \
25
+ software-properties-common \
26
+ git \
27
+ && rm -rf /var/lib/apt/lists/*
28
+
29
+ # Switch back to user
30
+ USER user
31
+
32
+ # Copy requirements and install Python dependencies
33
+ COPY --chown=user requirements.txt .
34
+ RUN pip install --no-cache-dir --upgrade pip
35
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
36
+
37
+ # Download spaCy model
38
+ RUN python -m spacy download en_core_web_sm
39
+
40
+ # Copy application code
41
+ COPY --chown=user . .
42
+
43
+ # Create cache directories
44
+ RUN mkdir -p /tmp/huggingface/transformers
45
+ RUN mkdir -p /tmp/torch
46
+
47
+ # Expose port
48
+ EXPOSE 7860
49
+
50
+ # Health check
51
+ HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
52
+ CMD curl -f http://localhost:7860/ || exit 1
53
+
54
+ # Run the Flask application
55
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,360 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template, request, jsonify, session
2
+ import os
3
+ import json
4
+ from datetime import datetime
5
+
6
+ # Import components
7
+ from components.preprocessing import preprocessing_handler
8
+ from components.tokenization import tokenization_handler
9
+ from components.pos_tagging import pos_tagging_handler
10
+ from components.named_entity import named_entity_handler
11
+ from components.sentiment import sentiment_handler
12
+ from components.summarization import summarization_handler
13
+ from components.topic_analysis import topic_analysis_handler
14
+ from components.question_answering import question_answering_handler
15
+ from components.text_generation import text_generation_handler
16
+ from components.translation import translation_handler
17
+ from components.classification import classification_handler
18
+ from components.vector_embeddings import vector_embeddings_handler
19
+
20
+ # Import utilities
21
+ from utils.model_loader_hf import download_nltk_resources, load_spacy, initialize_essential_models
22
+ from utils.helpers import text_statistics
23
+
24
+ app = Flask(__name__)
25
+ app.secret_key = 'your-secret-key-here' # Change this in production
26
+
27
+ # Sample texts
28
+ SAMPLE_TEXTS = {
29
+ "News Article": "The European Commission has fined Google €1.49 billion for abusive practices in online advertising. Google abused its market dominance by imposing restrictive clauses in contracts with third-party websites, preventing competitors from placing their search adverts on these websites.",
30
+ "Product Review": "I absolutely love this smartphone! The camera quality is outstanding and the battery life is impressive. The user interface is intuitive and the performance is smooth even when running multiple apps. However, I find the price a bit high compared to similar models on the market.",
31
+ "Scientific Text": "Climate change is the long-term alteration of temperature and typical weather patterns in a place. The cause of current climate change is largely human activity, like burning fossil fuels, which adds heat-trapping gases to Earth's atmosphere. The consequences of changing climate are already being felt worldwide.",
32
+ "Literary Text": "The old man was thin and gaunt with deep wrinkles in the back of his neck. The brown blotches of the benevolent skin cancer the sun brings from its reflection on the tropical sea were on his cheeks. The blotches ran well down the sides of his face and his hands had the deep-creased scars from handling heavy fish on the cords."
33
+ }
34
+
35
+ # Initialize essential models for HF Spaces
36
+ initialize_essential_models()
37
+
38
+ @app.route('/')
39
+ def index():
40
+ """Main page with text input and analysis options"""
41
+ return render_template('index.html', sample_texts=SAMPLE_TEXTS)
42
+
43
+ @app.route('/api/text-stats', methods=['POST'])
44
+ def get_text_stats():
45
+ """API endpoint to get text statistics"""
46
+ data = request.get_json()
47
+ text = data.get('text', '')
48
+
49
+ if not text:
50
+ return jsonify({'error': 'No text provided'}), 400
51
+
52
+ stats = text_statistics(text)
53
+ return jsonify(stats)
54
+
55
+ @app.route('/api/sample-text', methods=['POST'])
56
+ def get_sample_text():
57
+ """API endpoint to get sample text"""
58
+ data = request.get_json()
59
+ sample_type = data.get('sample_type', 'Custom')
60
+
61
+ if sample_type == "Custom":
62
+ return jsonify({'text': ''})
63
+ else:
64
+ return jsonify({'text': SAMPLE_TEXTS.get(sample_type, '')})
65
+
66
+ # Text Processing Routes
67
+ @app.route('/preprocessing')
68
+ def preprocessing():
69
+ """Text preprocessing page"""
70
+ return render_template('preprocessing.html')
71
+
72
+ @app.route('/api/preprocessing', methods=['POST'])
73
+ def api_preprocessing():
74
+ """API endpoint for text preprocessing"""
75
+ data = request.get_json()
76
+ text = data.get('text', '')
77
+
78
+ if not text:
79
+ return jsonify({'error': 'No text provided'}), 400
80
+
81
+ try:
82
+ result = preprocessing_handler(text)
83
+ return jsonify({'success': True, 'result': result})
84
+ except Exception as e:
85
+ return jsonify({'error': str(e)}), 500
86
+
87
+ @app.route('/tokenization')
88
+ def tokenization():
89
+ """Tokenization page"""
90
+ return render_template('tokenization.html')
91
+
92
+ @app.route('/api/tokenization', methods=['POST'])
93
+ def api_tokenization():
94
+ """API endpoint for tokenization"""
95
+ data = request.get_json()
96
+ text = data.get('text', '')
97
+
98
+ if not text:
99
+ return jsonify({'error': 'No text provided'}), 400
100
+
101
+ try:
102
+ result = tokenization_handler(text)
103
+ return jsonify({'success': True, 'result': result})
104
+ except Exception as e:
105
+ return jsonify({'error': str(e)}), 500
106
+
107
+ @app.route('/pos-tagging')
108
+ def pos_tagging():
109
+ """POS tagging page"""
110
+ return render_template('pos_tagging.html')
111
+
112
+ @app.route('/api/pos-tagging', methods=['POST'])
113
+ def api_pos_tagging():
114
+ """API endpoint for POS tagging"""
115
+ data = request.get_json()
116
+ text = data.get('text', '')
117
+
118
+ if not text:
119
+ return jsonify({'error': 'No text provided'}), 400
120
+
121
+ try:
122
+ result = pos_tagging_handler(text)
123
+ return jsonify({'success': True, 'result': result})
124
+ except Exception as e:
125
+ return jsonify({'error': str(e)}), 500
126
+
127
+ @app.route('/named-entity')
128
+ def named_entity():
129
+ """Named entity recognition page"""
130
+ return render_template('named_entity.html')
131
+
132
+ @app.route('/api/named-entity', methods=['POST'])
133
+ def api_named_entity():
134
+ """API endpoint for named entity recognition"""
135
+ data = request.get_json()
136
+ text = data.get('text', '')
137
+
138
+ if not text:
139
+ return jsonify({'error': 'No text provided'}), 400
140
+
141
+ try:
142
+ result = named_entity_handler(text)
143
+ return jsonify({'success': True, 'result': result})
144
+ except Exception as e:
145
+ return jsonify({'error': str(e)}), 500
146
+
147
+ # Analysis Routes
148
+ @app.route('/sentiment')
149
+ def sentiment():
150
+ """Sentiment analysis page"""
151
+ return render_template('sentiment.html')
152
+
153
+ @app.route('/api/sentiment', methods=['POST'])
154
+ def api_sentiment():
155
+ """API endpoint for sentiment analysis"""
156
+ data = request.get_json()
157
+ text = data.get('text', '')
158
+
159
+ if not text:
160
+ return jsonify({'error': 'No text provided'}), 400
161
+
162
+ try:
163
+ result = sentiment_handler(text)
164
+ return jsonify({'success': True, 'result': result})
165
+ except Exception as e:
166
+ return jsonify({'error': str(e)}), 500
167
+
168
+ @app.route('/summarization')
169
+ def summarization():
170
+ """Text summarization page"""
171
+ return render_template('summarization.html')
172
+
173
+ @app.route('/api/summarization', methods=['POST'])
174
+ def api_summarization():
175
+ """API endpoint for text summarization"""
176
+ data = request.get_json()
177
+ text = data.get('text', '')
178
+
179
+ if not text:
180
+ return jsonify({'error': 'No text provided'}), 400
181
+
182
+ try:
183
+ result = summarization_handler(text)
184
+ return jsonify({'success': True, 'result': result})
185
+ except Exception as e:
186
+ return jsonify({'error': str(e)}), 500
187
+
188
+ @app.route('/topic-analysis')
189
+ def topic_analysis():
190
+ """Topic analysis page"""
191
+ return render_template('topic_analysis.html')
192
+
193
+ @app.route('/api/topic-analysis', methods=['POST'])
194
+ def api_topic_analysis():
195
+ """API endpoint for topic analysis"""
196
+ data = request.get_json()
197
+ text = data.get('text', '')
198
+
199
+ if not text:
200
+ return jsonify({'error': 'No text provided'}), 400
201
+
202
+ try:
203
+ result = topic_analysis_handler(text)
204
+ return jsonify({'success': True, 'result': result})
205
+ except Exception as e:
206
+ return jsonify({'error': str(e)}), 500
207
+
208
+ # Advanced NLP Routes
209
+ @app.route('/question-answering')
210
+ def question_answering():
211
+ """Question answering page"""
212
+ return render_template('question_answering.html')
213
+
214
+ @app.route('/api/question-answering', methods=['POST'])
215
+ def api_question_answering():
216
+ """API endpoint for question answering"""
217
+ data = request.get_json(silent=True) or {}
218
+ # Accept from JSON, form, or query string
219
+ text = (
220
+ data.get('context')
221
+ or data.get('text')
222
+ or request.form.get('context')
223
+ or request.form.get('text')
224
+ or request.args.get('context')
225
+ or request.args.get('text')
226
+ or ''
227
+ )
228
+ question = (
229
+ data.get('question')
230
+ or request.form.get('question')
231
+ or request.args.get('question')
232
+ or ''
233
+ )
234
+ confidence_threshold = (
235
+ data.get('confidence_threshold')
236
+ or request.form.get('confidence_threshold')
237
+ or request.args.get('confidence_threshold')
238
+ or 0.5
239
+ )
240
+ try:
241
+ confidence_threshold = float(confidence_threshold)
242
+ except Exception:
243
+ confidence_threshold = 0.5
244
+
245
+ if not text:
246
+ return jsonify({'error': 'No text provided'}), 400
247
+
248
+ try:
249
+ result = question_answering_handler(text, question, confidence_threshold=confidence_threshold)
250
+ return jsonify({'success': True, 'result': result})
251
+ except Exception as e:
252
+ return jsonify({'error': str(e)}), 500
253
+
254
+ @app.route('/text-generation')
255
+ def text_generation():
256
+ """Text generation page"""
257
+ return render_template('text_generation.html')
258
+
259
+ @app.route('/api/text-generation', methods=['POST'])
260
+ def api_text_generation():
261
+ """API endpoint for text generation"""
262
+ data = request.get_json()
263
+ text = data.get('text', '')
264
+
265
+ if not text:
266
+ return jsonify({'error': 'No text provided'}), 400
267
+
268
+ try:
269
+ result = text_generation_handler(text)
270
+ return jsonify({'success': True, 'result': result})
271
+ except Exception as e:
272
+ return jsonify({'error': str(e)}), 500
273
+
274
+ @app.route('/translation')
275
+ def translation():
276
+ """Translation page"""
277
+ return render_template('translation.html')
278
+
279
+ @app.route('/api/translation', methods=['POST'])
280
+ def api_translation():
281
+ """API endpoint for translation"""
282
+ data = request.get_json()
283
+ text = data.get('text', '')
284
+ target_language = data.get('target_language', 'en')
285
+
286
+ if not text:
287
+ return jsonify({'error': 'No text provided'}), 400
288
+
289
+ try:
290
+ result = translation_handler(text, target_language)
291
+ return jsonify({'success': True, 'result': result})
292
+ except Exception as e:
293
+ return jsonify({'error': str(e)}), 500
294
+
295
+ @app.route('/classification')
296
+ def classification():
297
+ """Classification page"""
298
+ return render_template('classification.html')
299
+
300
+ @app.route('/api/classification', methods=['POST'])
301
+ def api_classification():
302
+ """API endpoint for classification"""
303
+ data = request.get_json()
304
+ text = data.get('text', '')
305
+ scenario = data.get('scenario', 'Sentiment')
306
+ multi_label = data.get('multi_label', False)
307
+ custom_labels = data.get('custom_labels', '')
308
+
309
+ if not text:
310
+ return jsonify({'error': 'No text provided'}), 400
311
+
312
+ try:
313
+ result = classification_handler(text, scenario, multi_label, custom_labels)
314
+ return jsonify({'success': True, 'result': result})
315
+ except Exception as e:
316
+ return jsonify({'error': str(e)}), 500
317
+
318
+ @app.route('/vector-embeddings')
319
+ def vector_embeddings():
320
+ """Vector embeddings page"""
321
+ return render_template('vector_embeddings.html')
322
+
323
+ @app.route('/api/vector-embeddings', methods=['POST'])
324
+ def api_vector_embeddings():
325
+ """API endpoint for vector embeddings"""
326
+ data = request.get_json()
327
+ text = data.get('text', '')
328
+ query = data.get('query', '')
329
+
330
+ if not text:
331
+ return jsonify({'error': 'No text provided'}), 400
332
+
333
+ try:
334
+ result = vector_embeddings_handler(text, query)
335
+ return jsonify({'success': True, 'result': result})
336
+ except Exception as e:
337
+ return jsonify({'error': str(e)}), 500
338
+
339
+ @app.route('/api/semantic-search', methods=['POST'])
340
+ def api_semantic_search():
341
+ """API endpoint for semantic search"""
342
+ from components.vector_embeddings import perform_semantic_search
343
+
344
+ data = request.get_json()
345
+ context = data.get('context', '')
346
+ query = data.get('query', '')
347
+
348
+ if not context or not query:
349
+ return jsonify({'error': 'Both context and query are required'}), 400
350
+
351
+ try:
352
+ result = perform_semantic_search(context, query)
353
+ return jsonify(result)
354
+ except Exception as e:
355
+ return jsonify({'error': str(e)}), 500
356
+
357
+ if __name__ == '__main__':
358
+ # For HF Spaces, run on port 7860
359
+ port = int(os.environ.get('PORT', 7860))
360
+ app.run(debug=False, host='0.0.0.0', port=port)
components/classification.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib.pyplot as plt
2
+ import pandas as pd
3
+ from utils.model_loader import load_zero_shot
4
+ from utils.helpers import fig_to_html, df_to_html_table
5
+
6
+ def classification_handler(text_input, scenario="Sentiment", multi_label=False, custom_labels=""):
7
+ """Show zero-shot classification capabilities."""
8
+ output_html = []
9
+
10
+ # Add result area container
11
+ output_html.append('<div class="result-area">')
12
+ output_html.append('<h2 class="task-header">Zero-shot Classification</h2>')
13
+
14
+ output_html.append("""
15
+ <div class="alert alert-info">
16
+ <i class="fas fa-tags"></i>
17
+ Zero-shot classification can categorize text into arbitrary classes without having been specifically trained on those categories.
18
+ </div>
19
+ """)
20
+
21
+ # Model info
22
+ output_html.append("""
23
+ <div class="alert alert-info">
24
+ <h4><i class="fas fa-tools"></i> Model Used:</h4>
25
+ <ul>
26
+ <li><b>facebook/bart-large-mnli</b> - BART model fine-tuned on MultiNLI dataset</li>
27
+ <li><b>Capabilities</b> - Can classify text into any user-defined categories</li>
28
+ <li><b>Performance</b> - Best performance on distinct, well-defined categories</li>
29
+ </ul>
30
+ </div>
31
+ """)
32
+
33
+ # Classification scenarios
34
+ scenarios = {
35
+ "Sentiment": ["positive", "negative", "neutral"],
36
+ "Emotion": ["joy", "sadness", "anger", "fear", "surprise"],
37
+ "Writing Style": ["formal", "informal", "technical", "creative", "persuasive"],
38
+ "Intent": ["inform", "persuade", "entertain", "instruct"],
39
+ "Content Type": ["news", "opinion", "review", "instruction", "narrative"],
40
+ "Audience Level": ["beginner", "intermediate", "advanced", "expert"],
41
+ "Custom": []
42
+ }
43
+
44
+ try:
45
+ # Get labels based on scenario
46
+ if scenario == "Custom":
47
+ labels = [label.strip() for label in custom_labels.split("\n") if label.strip()]
48
+ if not labels:
49
+ output_html.append("""
50
+ <div class="alert alert-warning">
51
+ <h3>No Custom Categories</h3>
52
+ <p>Please enter at least one custom category.</p>
53
+ </div>
54
+ """)
55
+ output_html.append('</div>') # Close result-area div
56
+ return '\n'.join(output_html)
57
+ else:
58
+ labels = scenarios[scenario]
59
+
60
+ # Update multi-label default for certain categories
61
+ if scenario in ["Emotion", "Intent", "Content Type"] and not multi_label:
62
+ multi_label = True
63
+
64
+ # Load model
65
+ classifier = load_zero_shot()
66
+
67
+ # Classification process
68
+ result = classifier(text_input, labels, multi_label=multi_label)
69
+
70
+ # Display results
71
+ output_html.append('<h3 class="task-subheader">Classification Results</h3>')
72
+
73
+ # Create DataFrame
74
+ class_df = pd.DataFrame({
75
+ 'Category': result['labels'],
76
+ 'Confidence': result['scores']
77
+ })
78
+
79
+ # Visualization
80
+ fig = plt.figure(figsize=(10, 6))
81
+ bars = plt.barh(class_df['Category'], class_df['Confidence'], color='#1976D2')
82
+
83
+ # Add percentage labels
84
+ for i, bar in enumerate(bars):
85
+ plt.text(bar.get_width() + 0.01, bar.get_y() + bar.get_height()/2,
86
+ f"{bar.get_width():.1%}", va='center')
87
+
88
+ plt.xlim(0, 1.1)
89
+ plt.xlabel('Confidence Score')
90
+ plt.title(f'{scenario} Classification')
91
+ plt.tight_layout()
92
+
93
+ # Layout with vertical stacking - Chart first
94
+ output_html.append('<div class="row mb-4">')
95
+ output_html.append('<div class="col-12">')
96
+ output_html.append('<h4>Classification Confidence Chart</h4>')
97
+ output_html.append(fig_to_html(fig))
98
+ output_html.append('</div>')
99
+ output_html.append('</div>') # Close chart row
100
+
101
+ # Data table and result in next row
102
+ output_html.append('<div class="row">')
103
+ output_html.append('<div class="col-md-6">')
104
+ output_html.append('<h4>Detailed Results</h4>')
105
+ output_html.append(df_to_html_table(class_df))
106
+ output_html.append('</div>')
107
+
108
+ # Top result
109
+ output_html.append('<div class="col-md-6">')
110
+ top_class = class_df.iloc[0]['Category']
111
+ top_score = class_df.iloc[0]['Confidence']
112
+
113
+ output_html.append(f"""
114
+ <div class="alert alert-primary">
115
+ <h3>Primary Classification</h3>
116
+ <p class="h4">{top_class}</p>
117
+ <p>Confidence: {top_score:.1%}</p>
118
+ </div>
119
+ """)
120
+
121
+ output_html.append('</div>') # Close result column
122
+ output_html.append('</div>') # Close row
123
+
124
+ # Multiple categories (if multi-label)
125
+ if multi_label:
126
+ # Get all categories with significant confidence
127
+ significant_classes = class_df[class_df['Confidence'] > 0.5]
128
+
129
+ if len(significant_classes) > 1:
130
+ output_html.append(f"""
131
+ <div class="alert alert-info">
132
+ <h3>Multiple Categories Detected</h3>
133
+ <p>This text appears to belong to multiple categories:</p>
134
+ </div>
135
+ """)
136
+
137
+ category_list = []
138
+ for _, row in significant_classes.iterrows():
139
+ category_list.append(f"<li><b>{row['Category']}</b> ({row['Confidence']:.1%})</li>")
140
+
141
+ output_html.append(f"<ul>{''.join(category_list)}</ul>")
142
+
143
+ except Exception as e:
144
+ output_html.append(f"""
145
+ <div class="alert alert-danger">
146
+ <h3>Error</h3>
147
+ <p>Failed to classify text: {str(e)}</p>
148
+ </div>
149
+ """)
150
+
151
+ # About zero-shot classification
152
+ output_html.append("""
153
+ <div class="card mt-4">
154
+ <div class="card-header">
155
+ <h4 class="mb-0">
156
+ <i class="fas fa-info-circle"></i>
157
+ About Zero-shot Classification
158
+ </h4>
159
+ </div>
160
+ <div class="card-body">
161
+ <h5>What is Zero-shot Classification?</h5>
162
+
163
+ <p>Unlike traditional classifiers that need to be trained on examples from each category,
164
+ zero-shot classification can categorize text into arbitrary classes it has never seen
165
+ during training.</p>
166
+
167
+ <h5>How it works:</h5>
168
+
169
+ <ol>
170
+ <li>The model converts your text and each potential category into embeddings</li>
171
+ <li>It calculates how likely the text entails or belongs to each category</li>
172
+ <li>The model ranks categories by confidence scores</li>
173
+ </ol>
174
+
175
+ <h5>Benefits:</h5>
176
+
177
+ <ul>
178
+ <li>Flexibility to classify into any categories without retraining</li>
179
+ <li>Can work with domain-specific or custom categories</li>
180
+ <li>Useful for exploratory analysis or when training data is limited</li>
181
+ </ul>
182
+ </div>
183
+ </div>
184
+ """)
185
+
186
+ output_html.append('</div>') # Close result-area div
187
+
188
+ return '\n'.join(output_html)
components/named_entity.py ADDED
@@ -0,0 +1,342 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib
2
+ matplotlib.use('Agg') # Use non-interactive backend
3
+ import matplotlib.pyplot as plt
4
+ import pandas as pd
5
+ import numpy as np
6
+ import spacy
7
+ from collections import Counter
8
+ import networkx as nx
9
+ from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
10
+
11
+ from utils.model_loader import load_spacy
12
+ from utils.helpers import fig_to_html, df_to_html_table
13
+
14
+ def named_entity_handler(text_input):
15
+ """Show named entity recognition capabilities."""
16
+ output_html = []
17
+
18
+ # Add result area container
19
+ output_html.append('<div class="result-area">')
20
+ output_html.append('<h2 class="task-header">Named Entity Recognition</h2>')
21
+
22
+ output_html.append("""
23
+ <div class="alert alert-info">
24
+ <i class="fas fa-info-circle"></i>
25
+ Named Entity Recognition identifies and classifies key information in text into pre-defined categories such as person names, organizations, locations, etc.
26
+ </div>
27
+ """)
28
+
29
+ # Model info
30
+ output_html.append("""
31
+ <div class="alert alert-info">
32
+ <h4><i class="fas fa-tools"></i> Models Used:</h4>
33
+ <ul>
34
+ <li><b>dslim/bert-base-NER</b> - BERT-based Named Entity Recognition model</li>
35
+ <li><b>spaCy en_core_web_sm</b> - Statistical NLP model for additional analysis</li>
36
+ <li><b>Entity Types</b> - Identifies people, organizations, locations, and miscellaneous entities</li>
37
+ </ul>
38
+ </div>
39
+ """)
40
+
41
+ try:
42
+ # Load BERT NER model
43
+ try:
44
+ tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
45
+ model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
46
+ ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
47
+ except Exception as model_err:
48
+ output_html.append(f"""
49
+ <div class="alert alert-warning">
50
+ <h4>Model Loading Issue</h4>
51
+ <p>Could not load BERT NER model: {str(model_err)}</p>
52
+ <p>Falling back to spaCy model...</p>
53
+ </div>
54
+ """)
55
+ # Fallback to spaCy
56
+ nlp = load_spacy()
57
+ doc = nlp(text_input)
58
+ bert_entities = []
59
+ else:
60
+ # Process with BERT NER
61
+ bert_entities = ner_pipeline(text_input)
62
+
63
+ # Also load spaCy for additional analysis
64
+ nlp = load_spacy()
65
+ doc = nlp(text_input)
66
+
67
+ # Combine entities from both models
68
+ all_entities = []
69
+
70
+ # Add BERT entities
71
+ for entity in bert_entities:
72
+ all_entities.append({
73
+ 'text': entity['word'].replace('##', ''),
74
+ 'label': entity['entity_group'],
75
+ 'confidence': entity['score'],
76
+ 'start': entity['start'],
77
+ 'end': entity['end'],
78
+ 'source': 'BERT'
79
+ })
80
+
81
+ # Add spaCy entities
82
+ for ent in doc.ents:
83
+ all_entities.append({
84
+ 'text': ent.text,
85
+ 'label': ent.label_,
86
+ 'confidence': 1.0, # spaCy doesn't provide confidence scores
87
+ 'start': ent.start_char,
88
+ 'end': ent.end_char,
89
+ 'source': 'spaCy'
90
+ })
91
+
92
+ # If no entities were found
93
+ if len(all_entities) == 0:
94
+ output_html.append("""
95
+ <div class="alert alert-warning">
96
+ <h3>No Named Entities Found</h3>
97
+ <p>The model couldn't identify any named entities in the provided text. Try a different text that contains names, places, organizations, dates, etc.</p>
98
+ </div>
99
+ """)
100
+ else:
101
+ # Display identified entities in text
102
+ output_html.append('<h3 class="task-subheader">Identified Entities</h3>')
103
+
104
+ # Color scheme for different entity types (BERT + spaCy)
105
+ colors = {
106
+ # BERT NER labels
107
+ 'PER': '#e6194B', # Person - Red
108
+ 'ORG': '#3cb44b', # Organization - Green
109
+ 'LOC': '#4363d8', # Location - Blue
110
+ 'MISC': '#f58231', # Miscellaneous - Orange
111
+ # spaCy labels
112
+ 'PERSON': '#e6194B', # Red
113
+ 'ORG': '#3cb44b', # Green
114
+ 'GPE': '#4363d8', # Blue (locations/geopolitical)
115
+ 'LOC': '#42d4f4', # Cyan (non-GPE locations)
116
+ 'FACILITY': '#f58231', # Orange
117
+ 'PRODUCT': '#911eb4', # Purple
118
+ 'EVENT': '#f032e6', # Magenta
119
+ 'WORK_OF_ART': '#fabebe', # Pink
120
+ 'LAW': '#008080', # Teal
121
+ 'DATE': '#9A6324', # Brown
122
+ 'TIME': '#800000', # Maroon
123
+ 'PERCENT': '#808000', # Olive
124
+ 'MONEY': '#000075', # Navy
125
+ 'QUANTITY': '#000000', # Black
126
+ 'CARDINAL': '#a9a9a9', # Dark Gray
127
+ 'ORDINAL': '#808080', # Gray
128
+ 'NORP': '#469990' # Nationality/Religious/Political
129
+ }
130
+
131
+ # Remove duplicates and sort entities by position
132
+ unique_entities = []
133
+ seen_spans = set()
134
+
135
+ for entity in all_entities:
136
+ span = (entity['start'], entity['end'])
137
+ if span not in seen_spans:
138
+ unique_entities.append(entity)
139
+ seen_spans.add(span)
140
+
141
+ # Sort by start position
142
+ sorted_ents = sorted(unique_entities, key=lambda x: x['start'])
143
+
144
+ # Create HTML with highlighted entities
145
+ html_text = text_input
146
+ offset = 0
147
+
148
+ for entity in sorted_ents:
149
+ # Get the appropriate color (default to gray if not found)
150
+ color = colors.get(entity['label'], '#a9a9a9')
151
+
152
+ # Create the HTML span with tooltip including confidence and source
153
+ start = entity['start'] + offset
154
+ end = entity['end'] + offset
155
+ confidence_text = f" (Confidence: {entity['confidence']:.2f})" if entity['confidence'] < 1.0 else ""
156
+ tooltip = f"{entity['label']} - {entity['source']}{confidence_text}"
157
+
158
+ entity_html = f'<span class="entity-badge" style="background-color: {color}; color: white; border: 2px solid #fff; box-shadow: 0 2px 4px rgba(0,0,0,0.3);" title="{tooltip}"><strong>{entity["text"]}</strong> <span style="font-size: 0.8em;">({entity["label"]}) ({entity["source"]})</span></span>'
159
+
160
+ # Replace the entity text with the highlighted version
161
+ html_text = html_text[:start] + entity_html + html_text[end:]
162
+
163
+ # Update offset for subsequent entities
164
+ offset += len(entity_html) - len(entity['text'])
165
+
166
+ # Display the highlighted text
167
+ output_html.append(f'<div class="card"><div class="card-body"><div class="entity-text-container">{html_text}</div></div></div>')
168
+
169
+ # Entity count and distribution
170
+ output_html.append('<h3 class="task-subheader">Entity Distribution</h3>')
171
+
172
+ # Create a DataFrame for the entities
173
+ entities_data = []
174
+ for entity in unique_entities:
175
+ entities_data.append({
176
+ 'Entity': entity['text'],
177
+ 'Type': entity['label'],
178
+ 'Source': entity['source'],
179
+ 'Confidence': f"{entity['confidence']:.2f}" if entity['confidence'] < 1.0 else "1.00"
180
+ })
181
+
182
+ entity_df = pd.DataFrame(entities_data)
183
+
184
+ # Calculate entity type distribution
185
+ entity_counts = Counter([entity['label'] for entity in unique_entities])
186
+
187
+ # Create bar chart for entity type distribution
188
+ fig = plt.figure(figsize=(12, 8))
189
+ bars = plt.bar(entity_counts.keys(), entity_counts.values(),
190
+ color=[colors.get(k, '#a9a9a9') for k in entity_counts.keys()])
191
+ plt.xlabel('Entity Type')
192
+ plt.ylabel('Count')
193
+ plt.title('Entity Type Distribution (BERT + spaCy)')
194
+ plt.xticks(rotation=45, ha='right')
195
+
196
+ # Add count labels on top of bars
197
+ for bar in bars:
198
+ height = bar.get_height()
199
+ plt.text(bar.get_x() + bar.get_width()/2., height + 0.1,
200
+ f'{int(height)}',
201
+ ha='center', va='bottom')
202
+
203
+ plt.tight_layout()
204
+
205
+ # Chart section
206
+ output_html.append('<section class="entity-chart-section">')
207
+ output_html.append('<div class="chart-container">')
208
+ output_html.append(fig_to_html(fig))
209
+ output_html.append('</div>')
210
+ output_html.append('</section>')
211
+
212
+ # Table section
213
+ output_html.append('<section class="entity-table-container">')
214
+ output_html.append('<h4>Entities Found</h4>')
215
+ output_html.append(df_to_html_table(entity_df))
216
+ output_html.append('</section>')
217
+
218
+ # Entity relationship visualization (for texts with multiple entities)
219
+ if len(doc.ents) > 1:
220
+ output_html.append('<h3 class="task-subheader">Entity Relationships</h3>')
221
+
222
+ # Create a network graph of entities that appear in the same sentence
223
+ G = nx.Graph()
224
+
225
+ # Add nodes for each unique entity
226
+ for ent in doc.ents:
227
+ G.add_node(ent.text, type=ent.label_)
228
+
229
+ # Add edges between entities that appear in the same sentence
230
+ for sent in doc.sents:
231
+ sent_ents = [ent for ent in doc.ents if sent.start <= ent.start < sent.end]
232
+ for i, ent1 in enumerate(sent_ents):
233
+ for ent2 in sent_ents[i+1:]:
234
+ if G.has_edge(ent1.text, ent2.text):
235
+ G[ent1.text][ent2.text]['weight'] += 1
236
+ else:
237
+ G.add_edge(ent1.text, ent2.text, weight=1)
238
+
239
+ # Only show relationship visualization if there are edges
240
+ if G.number_of_edges() > 0:
241
+ # Create a network visualization
242
+ plt.figure(figsize=(10, 8))
243
+
244
+ # Node colors based on entity type
245
+ node_colors = [colors.get(G.nodes[node]['type'], '#a9a9a9') for node in G.nodes()]
246
+
247
+ # Position nodes using spring layout
248
+ pos = nx.spring_layout(G)
249
+
250
+ # Draw the network
251
+ nx.draw_networkx_nodes(G, pos, node_size=300, node_color=node_colors, alpha=0.8)
252
+ nx.draw_networkx_edges(G, pos, width=1.5, alpha=0.7, edge_color='#888888')
253
+ nx.draw_networkx_labels(G, pos, font_size=10, font_weight='bold')
254
+
255
+ plt.title('Entity Co-occurrence Network')
256
+ plt.axis('off')
257
+ plt.tight_layout()
258
+
259
+ output_html.append('<div class="alert alert-light"><p class="mb-0">This visualization shows entities that appear in the same sentences:</p></div>')
260
+ output_html.append(fig_to_html(plt.gcf()))
261
+ plt.close()
262
+ else:
263
+ output_html.append('<p>No entity relationships detected in the text.</p>')
264
+
265
+ # Legend for entity types
266
+ output_html.append('<h3 class="task-subheader">Entity Type Legend</h3>')
267
+
268
+ entity_descriptions = {
269
+ 'PERSON': 'People, including fictional',
270
+ 'ORG': 'Organizations, companies, institutions',
271
+ 'GPE': 'Geopolitical entities (countries, cities, states)',
272
+ 'LOC': 'Non-GPE locations (mountain ranges, water bodies)',
273
+ 'FACILITY': 'Buildings, airports, highways, bridges',
274
+ 'PRODUCT': 'Products, objects, vehicles, foods',
275
+ 'EVENT': 'Hurricanes, battles, wars, sports events',
276
+ 'WORK_OF_ART': 'Titles of books, songs, etc.',
277
+ 'LAW': 'Named documents made into laws',
278
+ 'DATE': 'Absolute or relative dates',
279
+ 'TIME': 'Times smaller than a day',
280
+ 'PERCENT': 'Percentage',
281
+ 'MONEY': 'Monetary values',
282
+ 'QUANTITY': 'Measurements',
283
+ 'CARDINAL': 'Numerals not falling under another type',
284
+ 'ORDINAL': 'Ordinal numbers',
285
+ 'NORP': 'Nationalities, religious or political groups'
286
+ }
287
+
288
+ output_html.append('<div class="row">')
289
+ for entity, color in colors.items():
290
+ if entity in entity_counts:
291
+ output_html.append(f"""
292
+ <div class="col-md-6 mb-2">
293
+ <div class="card">
294
+ <div class="card-body p-2">
295
+ <span class="badge me-2" style="background-color: {color}; color: white;">{entity}</span>
296
+ <small>{entity_descriptions.get(entity, '')}</small>
297
+ </div>
298
+ </div>
299
+ </div>
300
+ """)
301
+ output_html.append('</div>') # Close row
302
+
303
+ except Exception as e:
304
+ output_html.append(f"""
305
+ <div class="alert alert-danger">
306
+ <h3>Error</h3>
307
+ <p>Failed to process named entities: {str(e)}</p>
308
+ </div>
309
+ """)
310
+
311
+ # About NER section
312
+ output_html.append("""
313
+ <div class="card mt-4">
314
+ <div class="card-header">
315
+ <h4 class="mb-0">
316
+ <i class="fas fa-info-circle"></i>
317
+ About Named Entity Recognition
318
+ </h4>
319
+ </div>
320
+ <div class="card-body">
321
+ <h5>What is Named Entity Recognition?</h5>
322
+
323
+ <p>Named Entity Recognition (NER) is an NLP technique that automatically identifies and classifies named entities
324
+ in text into predefined categories. These entities are typically proper nouns such as people, organizations,
325
+ locations, expressions of times, quantities, monetary values, and percentages.</p>
326
+
327
+ <h5>Applications of NER:</h5>
328
+
329
+ <ul>
330
+ <li><b>Information Extraction</b> - Identifying key information from large volumes of text</li>
331
+ <li><b>Question Answering</b> - Helping systems understand what entities questions are referring to</li>
332
+ <li><b>Document Classification</b> - Using entity types and frequencies to categorize documents</li>
333
+ <li><b>Customer Service</b> - Identifying product names, issue types, and user information in support tickets</li>
334
+ <li><b>Content Recommendation</b> - Using entities to find related content</li>
335
+ </ul>
336
+ </div>
337
+ </div>
338
+ """)
339
+
340
+ output_html.append('</div>') # Close result-area div
341
+
342
+ return '\n'.join(output_html)
components/pos_tagging.py ADDED
@@ -0,0 +1,540 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib.pyplot as plt
2
+ import pandas as pd
3
+ import nltk
4
+ from collections import Counter
5
+ import random
6
+ import numpy as np
7
+ import io
8
+ import base64
9
+ from PIL import Image
10
+
11
+ from utils.model_loader import load_spacy
12
+ from utils.helpers import fig_to_html, df_to_html_table, format_pos_token
13
+
14
+ def pos_tagging_handler(text_input):
15
+ """Show part-of-speech tagging capabilities."""
16
+ output_html = []
17
+
18
+ # Add result area container
19
+ output_html.append('<div class="result-area">')
20
+ output_html.append('<h2 class="task-header">Part-of-Speech Tagging</h2>')
21
+
22
+ output_html.append("""
23
+ <div class="alert alert-info">
24
+ <i class="fas fa-info-circle"></i>
25
+ Part-of-Speech (POS) tagging is the process of marking up words in text according to their grammatical categories
26
+ such as noun, verb, adjective, etc.
27
+ </div>
28
+ """)
29
+
30
+ # Model info
31
+ output_html.append("""
32
+ <div class="alert alert-info">
33
+ <h4><i class="fas fa-tools"></i> Models Used:</h4>
34
+ <ul>
35
+ <li><b>NLTK</b> - Using the Perceptron tagger trained on the Penn Treebank corpus</li>
36
+ <li><b>spaCy</b> - Using the en_core_web_sm model's POS tagging capabilities</li>
37
+ </ul>
38
+ </div>
39
+ """)
40
+
41
+ try:
42
+ # Process with NLTK
43
+ words = nltk.word_tokenize(text_input)
44
+ nltk_pos = nltk.pos_tag(words)
45
+
46
+ # Process with spaCy
47
+ nlp = load_spacy()
48
+ doc = nlp(text_input)
49
+ spacy_pos = [(token.text, token.pos_) for token in doc]
50
+
51
+ # Display tagged text
52
+ output_html.append('<h3 class="task-subheader">Tagged Text</h3>')
53
+
54
+ # Color scheme for different POS tags
55
+ # Using a visually distinct color palette
56
+ colors = {
57
+ # NLTK Penn Treebank Tags
58
+ 'NN': '#e6194B', # Noun - Red
59
+ 'NNS': '#e6194B', # Plural noun - Red
60
+ 'NNP': '#3cb44b', # Proper noun - Green
61
+ 'NNPS': '#3cb44b', # Plural proper noun - Green
62
+ 'VB': '#4363d8', # Verb - Blue
63
+ 'VBD': '#4363d8', # Verb, past tense - Blue
64
+ 'VBG': '#4363d8', # Verb, gerund - Blue
65
+ 'VBN': '#4363d8', # Verb, past participle - Blue
66
+ 'VBP': '#4363d8', # Verb, non-3rd singular present - Blue
67
+ 'VBZ': '#4363d8', # Verb, 3rd singular present - Blue
68
+ 'JJ': '#f58231', # Adjective - Orange
69
+ 'JJR': '#f58231', # Comparative adjective - Orange
70
+ 'JJS': '#f58231', # Superlative adjective - Orange
71
+ 'RB': '#911eb4', # Adverb - Purple
72
+ 'RBR': '#911eb4', # Comparative adverb - Purple
73
+ 'RBS': '#911eb4', # Superlative adverb - Purple
74
+ 'IN': '#f032e6', # Preposition - Magenta
75
+ 'DT': '#fabebe', # Determiner - Pink
76
+ 'PRP': '#008080', # Personal pronoun - Teal
77
+ 'PRP$': '#008080', # Possessive pronoun - Teal
78
+ 'CC': '#9A6324', # Coordinating conjunction - Brown
79
+ 'CD': '#800000', # Cardinal number - Maroon
80
+ 'EX': '#808000', # Existential there - Olive
81
+ 'FW': '#000075', # Foreign word - Navy
82
+ 'MD': '#a9a9a9', # Modal - Dark Gray
83
+ 'PDT': '#469990', # Predeterminer - Greenish
84
+ 'POS': '#000000', # Possessive ending - Black
85
+ 'RP': '#aaffc3', # Particle - Mint
86
+ 'SYM': '#ffd8b1', # Symbol - Light Orange
87
+ 'TO': '#fffac8', # to - Light Yellow
88
+ 'UH': '#dcbeff', # Interjection - Lavender
89
+ 'WDT': '#808080', # Wh-determiner - Gray
90
+ 'WP': '#808080', # Wh-pronoun - Gray
91
+ 'WP$': '#808080', # Possessive wh-pronoun - Gray
92
+ 'WRB': '#808080', # Wh-adverb - Gray
93
+
94
+ # spaCy Universal POS Tags
95
+ 'NOUN': '#e6194B', # Noun - Red
96
+ 'PROPN': '#3cb44b', # Proper noun - Green
97
+ 'VERB': '#4363d8', # Verb - Blue
98
+ 'ADJ': '#f58231', # Adjective - Orange
99
+ 'ADV': '#911eb4', # Adverb - Purple
100
+ 'ADP': '#f032e6', # Adposition (preposition) - Magenta
101
+ 'DET': '#fabebe', # Determiner - Pink
102
+ 'PRON': '#008080', # Pronoun - Teal
103
+ 'CCONJ': '#9A6324', # Coordinating conjunction - Brown
104
+ 'NUM': '#800000', # Numeral - Maroon
105
+ 'PART': '#aaffc3', # Particle - Mint
106
+ 'INTJ': '#dcbeff', # Interjection - Lavender
107
+ 'PUNCT': '#000000', # Punctuation - Black
108
+ 'SYM': '#ffd8b1', # Symbol - Light Orange
109
+ 'X': '#808080', # Other - Gray
110
+ 'SPACE': '#ffffff' # Space - White
111
+ }
112
+
113
+ # Function to generate HTML for POS tagged text
114
+ def generate_tagged_html(pos_tags, tagset_name):
115
+ html = '<div style="line-height: 2.5; padding: 15px; background-color: #f5f5f5; border-radius: 5px; margin-bottom: 20px; overflow-wrap: break-word; word-wrap: break-word;">'
116
+
117
+ for word, tag in pos_tags:
118
+ # Skip pure whitespace tokens
119
+ if word.strip() == '':
120
+ html += ' '
121
+ continue
122
+
123
+ # Get color (default to gray if tag not in colors)
124
+ color = colors.get(tag, '#a9a9a9')
125
+
126
+ # Add tooltip with tag and make sure tags wrap properly
127
+ html += f'<span style="background-color: {color}; color: white; padding: 2px 4px; margin: 2px; border-radius: 4px; display: inline-block;" title="{tag}">{word}</span>'
128
+
129
+ html += '</div>'
130
+ return html
131
+
132
+ # Display NLTK and spaCy in a row, one after another
133
+ output_html.append('<div class="row">')
134
+
135
+ # NLTK Section
136
+ output_html.append('<div class="col-md-6">')
137
+ output_html.append('<div class="card">')
138
+ output_html.append('<div class="card-header">')
139
+ output_html.append('<h4 class="mb-0 text-primary">NLTK (Penn Treebank)</h4>')
140
+ output_html.append('</div>')
141
+ output_html.append('<div class="card-body">')
142
+ output_html.append(generate_tagged_html(nltk_pos, "Penn Treebank"))
143
+ output_html.append('</div>')
144
+ output_html.append('</div>')
145
+ output_html.append('</div>')
146
+
147
+ # spaCy Section
148
+ output_html.append('<div class="col-md-6">')
149
+ output_html.append('<div class="card">')
150
+ output_html.append('<div class="card-header">')
151
+ output_html.append('<h4 class="mb-0 text-primary">spaCy (Universal)</h4>')
152
+ output_html.append('</div>')
153
+ output_html.append('<div class="card-body">')
154
+ output_html.append(generate_tagged_html(spacy_pos, "Universal"))
155
+ output_html.append('</div>')
156
+ output_html.append('</div>')
157
+ output_html.append('</div>')
158
+
159
+ output_html.append('</div>') # Close the row
160
+
161
+ # Syntactic Tree Visualization (Dependency Parse)
162
+ output_html.append('<h3 class="task-subheader">Sentence Structure Visualization</h3>')
163
+
164
+ # Split visualizations for each sentence to avoid overcrowding
165
+ sentences = list(doc.sents)
166
+
167
+ if not sentences:
168
+ output_html.append('<p>No complete sentences found for visualization.</p>')
169
+ else:
170
+ # Add description for dependency parsing
171
+ output_html.append("""
172
+ <div class="alert alert-light">
173
+ <p class="mb-0">
174
+ These diagrams show the grammatical structure of each sentence.
175
+ Words are connected with arrows that represent the syntactic relationships between them.
176
+ </p>
177
+ </div>
178
+ """)
179
+
180
+ # For each sentence, create a dependency visualization
181
+ for i, sent in enumerate(sentences):
182
+ if len(sent) > 50: # Skip very long sentences that might break the visualization
183
+ output_html.append(f'<div class="alert alert-warning"><strong>Note:</strong> Sentence {i+1} is too long ({len(sent)} tokens) for visualization.</div>')
184
+ continue
185
+
186
+ # Create the sentence dependency visualization using matplotlib
187
+ try:
188
+ # Try to generate the dependency visualization
189
+ fig, ax = plt.subplots(figsize=(10, 3), constrained_layout=True)
190
+ # Clear the axes before drawing
191
+ ax.clear()
192
+
193
+ # Draw connecting arcs between words
194
+ words = [token.text for token in sent]
195
+ positions = list(range(len(words)))
196
+
197
+ # Draw words
198
+ for i, word in enumerate(words):
199
+ ax.text(i, 0, word, ha='center')
200
+
201
+ # Draw arcs for dependencies
202
+ max_height = 1
203
+ for token in sent:
204
+ if token.dep_ and token.head.i != token.i: # Skip root dependency
205
+ # Determine start and end positions
206
+ start = token.i - sent.start
207
+ end = token.head.i - sent.start
208
+
209
+ # Make sure start is before end
210
+ if start > end:
211
+ start, end = end, start
212
+
213
+ # Determine the height of the arc (based on distance)
214
+ height = 0.2 + (end - start) * 0.1
215
+ max_height = max(max_height, height + 0.3)
216
+
217
+ # Draw the dependency arc
218
+ arc_xs = np.linspace(start, end, 50)
219
+ arc_ys = [height * np.sin((x - start) / (end - start) * np.pi) for x in arc_xs]
220
+ ax.plot(arc_xs, arc_ys, color=colors.get(token.pos_, 'gray'), lw=1.5)
221
+
222
+ # Add dependency label at the peak of the arc
223
+ mid_point = (start + end) / 2
224
+ label_height = height * 0.95 # Just below the peak
225
+ ax.text(mid_point, label_height, token.dep_, ha='center', fontsize=8,
226
+ bbox=dict(facecolor='white', alpha=0.8, edgecolor='none', pad=0.2))
227
+
228
+ # Set axis limits
229
+ ax.set_xlim([-0.5, len(words) - 0.5])
230
+ ax.set_ylim([0, max_height + 0.2])
231
+
232
+ # Remove axes and set title
233
+ ax.axis('off')
234
+ plt.tight_layout()
235
+
236
+ # Render the plot to HTML
237
+ output_html.append(fig_to_html(fig))
238
+ plt.close(fig)
239
+
240
+ except Exception as viz_err:
241
+ output_html.append(f'<div class="alert alert-danger"><strong>Error:</strong> Failed to visualize sentence {i+1}: {str(viz_err)}</div>')
242
+
243
+ # POS Distribution Analysis
244
+ output_html.append('<h3 class="task-subheader">POS Distribution Analysis</h3>')
245
+
246
+ # Calculate POS distribution using spaCy tags (more consistent)
247
+ pos_counts = Counter([token.pos_ for token in doc])
248
+
249
+ # Create bar chart for POS distribution
250
+ fig = plt.figure(figsize=(10, 6))
251
+ bars = plt.bar(pos_counts.keys(), pos_counts.values(), color=[colors.get(k, '#a9a9a9') for k in pos_counts.keys()])
252
+ plt.xlabel('Part of Speech')
253
+ plt.ylabel('Count')
254
+ plt.title('Part-of-Speech Distribution')
255
+ plt.xticks(rotation=45, ha='right')
256
+
257
+ # Add count labels on top of bars
258
+ for bar in bars:
259
+ height = bar.get_height()
260
+ plt.text(bar.get_x() + bar.get_width()/2., height + 0.1,
261
+ f'{int(height)}',
262
+ ha='center', va='bottom')
263
+
264
+ plt.tight_layout()
265
+
266
+ # Chart section
267
+ output_html.append('<section class="pos-chart-section">')
268
+ output_html.append('<div class="chart-container">')
269
+ output_html.append(fig_to_html(fig))
270
+ output_html.append('</div>')
271
+ output_html.append('</section>')
272
+
273
+ # Table section
274
+ output_html.append('<section class="pos-table-container">')
275
+ output_html.append('<div class="row">')
276
+ output_html.append('<div class="col-md-6">')
277
+
278
+ # Create a DataFrame for the POS counts
279
+ pos_df = pd.DataFrame({
280
+ 'POS Tag': list(pos_counts.keys()),
281
+ 'Count': list(pos_counts.values()),
282
+ 'Percentage': [count/sum(pos_counts.values())*100 for count in pos_counts.values()]
283
+ })
284
+ pos_df = pos_df.sort_values('Count', ascending=False).reset_index(drop=True)
285
+
286
+ # Add percentage column
287
+ pos_df['Percentage'] = pos_df['Percentage'].map('{:.1f}%'.format)
288
+
289
+ output_html.append(df_to_html_table(pos_df))
290
+ output_html.append('</div>')
291
+
292
+ # Most common words section
293
+ output_html.append('<div class="col-md-6">')
294
+ output_html.append('<h4 class="mt-0">Most Common Words by POS</h4>')
295
+
296
+ # Get common words for major POS categories
297
+ major_pos = ['NOUN', 'VERB', 'ADJ', 'ADV']
298
+ common_words = {}
299
+
300
+ for pos in major_pos:
301
+ words = [token.text.lower() for token in doc if token.pos_ == pos]
302
+ if words:
303
+ word_counts = Counter(words).most_common(5)
304
+ common_words[pos] = word_counts
305
+
306
+ # Create HTML for common words
307
+ for pos, words in common_words.items():
308
+ if words:
309
+ output_html.append(f'<h5>{pos}</h5>')
310
+ output_html.append('<div class="d-flex flex-wrap gap-1 mb-2">')
311
+
312
+ for word, count in words:
313
+ # Get appropriate color
314
+ color = colors.get(pos, '#a9a9a9')
315
+ output_html.append(f'<span class="badge" style="background-color: {color}; color: white;">{word} ({count})</span>')
316
+
317
+ output_html.append('</div>')
318
+
319
+ output_html.append('</div>') # Close column 2
320
+ output_html.append('</div>') # Close row
321
+ output_html.append('</section>') # Close table section
322
+
323
+ # Add Sentence Grammatical Analysis
324
+ output_html.append('<h3 class="task-subheader">Grammatical Analysis</h3>')
325
+ output_html.append('<p>Detailed analysis of the grammatical components in each sentence.</p>')
326
+
327
+ # Create Grammatical Role Table
328
+ grammatical_roles = []
329
+ for token in doc:
330
+ if token.dep_ not in ["punct", "space"]: # Skip punctuation and spaces
331
+ grammatical_roles.append({
332
+ "Word": token.text,
333
+ "POS": token.pos_,
334
+ "Dependency": token.dep_,
335
+ "Head": token.head.text,
336
+ "Description": get_dependency_description(token.dep_)
337
+ })
338
+
339
+ # Convert to DataFrame
340
+ if grammatical_roles:
341
+ roles_df = pd.DataFrame(grammatical_roles)
342
+ output_html.append('<div class="table-responsive" style="max-height: 400px;">')
343
+ output_html.append(df_to_html_table(roles_df))
344
+ output_html.append('</div>')
345
+ else:
346
+ output_html.append('<p>No grammatical roles found to analyze.</p>')
347
+
348
+ # POS Tag Legend
349
+ output_html.append('<h3 class="task-subheader">POS Tag Legend</h3>')
350
+
351
+ # Create button toggle for different tagsets
352
+ output_html.append('<div class="card">')
353
+ output_html.append('<div class="card-header text-center">')
354
+ output_html.append('<div class="btn-group pos-legend-buttons" role="group" aria-label="POS Tag Types">')
355
+ output_html.append('<button type="button" class="btn btn-primary btn-lg active" id="universal-btn" onclick="showPOSTags(\'universal\')">Universal Tags</button>')
356
+ output_html.append('<button type="button" class="btn btn-outline-primary btn-lg" id="penn-btn" onclick="showPOSTags(\'penn\')">Penn Treebank Tags</button>')
357
+ output_html.append('</div>')
358
+ output_html.append('</div>')
359
+ output_html.append('<div class="card-body">')
360
+ output_html.append('<div id="pos-content">')
361
+
362
+ # Universal Tags
363
+ output_html.append('<div class="pos-tags-section" id="universal-tags" style="display: block;">')
364
+
365
+ universal_tags = {
366
+ 'NOUN': 'Nouns - people, places, things',
367
+ 'PROPN': 'Proper nouns - specific named entities',
368
+ 'VERB': 'Verbs - actions, occurrences',
369
+ 'ADJ': 'Adjectives - describe nouns',
370
+ 'ADV': 'Adverbs - modify verbs, adjectives, or other adverbs',
371
+ 'ADP': 'Adpositions - prepositions, postpositions',
372
+ 'DET': 'Determiners - articles and other noun modifiers',
373
+ 'PRON': 'Pronouns - words that substitute for nouns',
374
+ 'CCONJ': 'Coordinating conjunctions - connect words, phrases, clauses',
375
+ 'SCONJ': 'Subordinating conjunctions - connect clauses',
376
+ 'NUM': 'Numerals - numbers',
377
+ 'PART': 'Particles - function words associated with another word',
378
+ 'INTJ': 'Interjections - exclamatory words',
379
+ 'PUNCT': 'Punctuation',
380
+ 'SYM': 'Symbols',
381
+ 'X': 'Other - foreign words, typos, abbreviations',
382
+ 'SPACE': 'Space - white spaces'
383
+ }
384
+
385
+ output_html.append('<div class="row">')
386
+
387
+ for tag, description in universal_tags.items():
388
+ if tag in colors:
389
+ output_html.append(f"""
390
+ <div class="col-md-6 mb-2">
391
+ <div class="d-flex align-items-center p-2 border rounded">
392
+ <span class="badge me-2" style="background-color: {colors[tag]}; color: white; min-width: 60px;">{tag}</span>
393
+ <span class="small">{description}</span>
394
+ </div>
395
+ </div>
396
+ """)
397
+
398
+ output_html.append('</div>') # Close row
399
+ output_html.append('</div>') # Close universal tags tab
400
+
401
+ # Penn Treebank Tags
402
+ output_html.append('<div class="pos-tags-section" id="penn-tags" style="display: none;">')
403
+
404
+ penn_tags = {
405
+ 'CC': 'Coordinating conjunction',
406
+ 'CD': 'Cardinal number',
407
+ 'DT': 'Determiner',
408
+ 'EX': 'Existential there',
409
+ 'FW': 'Foreign word',
410
+ 'IN': 'Preposition or subordinating conjunction',
411
+ 'JJ': 'Adjective',
412
+ 'JJR': 'Adjective, comparative',
413
+ 'JJS': 'Adjective, superlative',
414
+ 'LS': 'List item marker',
415
+ 'MD': 'Modal',
416
+ 'NN': 'Noun, singular or mass',
417
+ 'NNS': 'Noun, plural',
418
+ 'NNP': 'Proper noun, singular',
419
+ 'NNPS': 'Proper noun, plural',
420
+ 'PDT': 'Predeterminer',
421
+ 'POS': 'Possessive ending',
422
+ 'PRP': 'Personal pronoun',
423
+ 'PRP$': 'Possessive pronoun',
424
+ 'RB': 'Adverb',
425
+ 'RBR': 'Adverb, comparative',
426
+ 'RBS': 'Adverb, superlative',
427
+ 'RP': 'Particle',
428
+ 'SYM': 'Symbol',
429
+ 'TO': 'to',
430
+ 'UH': 'Interjection',
431
+ 'VB': 'Verb, base form',
432
+ 'VBD': 'Verb, past tense',
433
+ 'VBG': 'Verb, gerund or present participle',
434
+ 'VBN': 'Verb, past participle',
435
+ 'VBP': 'Verb, non-3rd person singular present',
436
+ 'VBZ': 'Verb, 3rd person singular present',
437
+ 'WDT': 'Wh-determiner',
438
+ 'WP': 'Wh-pronoun',
439
+ 'WP$': 'Possessive wh-pronoun',
440
+ 'WRB': 'Wh-adverb'
441
+ }
442
+
443
+ output_html.append('<div class="row">')
444
+
445
+ for tag, description in penn_tags.items():
446
+ if tag in colors:
447
+ output_html.append(f"""
448
+ <div class="col-md-6 mb-2">
449
+ <div class="d-flex align-items-center p-2 border rounded">
450
+ <span class="badge me-2" style="background-color: {colors[tag]}; color: white; min-width: 60px;">{tag}</span>
451
+ <span class="small">{description}</span>
452
+ </div>
453
+ </div>
454
+ """)
455
+
456
+ output_html.append('</div>') # Close row
457
+ output_html.append('</div>') # Close penn tags section
458
+ output_html.append('</div>') # Close pos content
459
+ output_html.append('</div>') # Close card body
460
+ output_html.append('</div>') # Close card
461
+
462
+ except Exception as e:
463
+ output_html.append(f"""
464
+ <div class="alert alert-danger">
465
+ <h3>Error</h3>
466
+ <p>Failed to process part-of-speech tagging: {str(e)}</p>
467
+ </div>
468
+ """)
469
+
470
+ # About POS Tagging section
471
+ output_html.append("""
472
+ <div class="card mt-4">
473
+ <div class="card-header">
474
+ <h4 class="mb-0">
475
+ <i class="fas fa-info-circle"></i>
476
+ About Part-of-Speech Tagging
477
+ </h4>
478
+ </div>
479
+ <div class="card-body">
480
+ <h5>What is Part-of-Speech Tagging?</h5>
481
+
482
+ <p>Part-of-Speech (POS) tagging is the process of assigning grammatical categories (such as noun, verb, adjective, etc.)
483
+ to each word in a text. It's one of the fundamental steps in natural language processing.</p>
484
+
485
+ <h5>Why is POS Tagging Important?</h5>
486
+
487
+ <ol>
488
+ <li><b>Disambiguation</b> - Words can have multiple meanings depending on their usage. POS tags help disambiguate words.</li>
489
+ <li><b>Syntactic Parsing</b> - POS tags form the basis for higher-level syntactic analysis.</li>
490
+ <li><b>Named Entity Recognition</b> - POS tags help in identifying entities.</li>
491
+ <li><b>Information Extraction</b> - They help in extracting specific information from text.</li>
492
+ <li><b>Text-to-Speech Systems</b> - For correct pronunciation based on word function.</li>
493
+ </ol>
494
+
495
+ <h5>Tagsets:</h5>
496
+
497
+ <ul>
498
+ <li><b>Universal Tagset</b> - A simpler, cross-linguistic set with about 17 tags.</li>
499
+ <li><b>Penn Treebank</b> - A more detailed English-specific tagset with about 36 tags.</li>
500
+ </ul>
501
+ </div>
502
+ </div>
503
+ """)
504
+
505
+ output_html.append('</div>') # Close result-area div
506
+
507
+ return '\n'.join(output_html)
508
+
509
+ def get_dependency_description(dep_tag):
510
+ """Return a description for common dependency tags"""
511
+ descriptions = {
512
+ "ROOT": "Root of the sentence",
513
+ "nsubj": "Nominal subject",
514
+ "obj": "Direct object",
515
+ "dobj": "Direct object",
516
+ "iobj": "Indirect object",
517
+ "det": "Determiner",
518
+ "amod": "Adjectival modifier",
519
+ "advmod": "Adverbial modifier",
520
+ "pobj": "Object of preposition",
521
+ "prep": "Preposition",
522
+ "aux": "Auxiliary verb",
523
+ "cc": "Coordinating conjunction",
524
+ "conj": "Conjunct",
525
+ "mark": "Marker",
526
+ "nmod": "Nominal modifier",
527
+ "compound": "Compound word",
528
+ "attr": "Attribute",
529
+ "case": "Case marker",
530
+ "neg": "Negation modifier",
531
+ "punct": "Punctuation",
532
+ "nsubjpass": "Passive nominal subject",
533
+ "auxpass": "Passive auxiliary",
534
+ "ccomp": "Clausal complement",
535
+ "xcomp": "Open clausal complement",
536
+ "acl": "Adjectival clause",
537
+ "advcl": "Adverbial clause modifier",
538
+ "relcl": "Relative clause modifier"
539
+ }
540
+ return descriptions.get(dep_tag, "Dependency relation")
components/preprocessing.py ADDED
@@ -0,0 +1,888 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib
2
+ matplotlib.use('Agg') # Use non-interactive backend
3
+ import matplotlib.pyplot as plt
4
+ import pandas as pd
5
+ import nltk
6
+ import re
7
+ import string
8
+ import base64
9
+ import io
10
+ from collections import Counter
11
+ from nltk.corpus import stopwords
12
+ from nltk.stem import WordNetLemmatizer, PorterStemmer
13
+ from wordcloud import WordCloud
14
+ from utils.model_loader import download_nltk_resources
15
+ from utils.helpers import fig_to_html, df_to_html_table
16
+ from nltk.util import ngrams
17
+
18
+ def preprocessing_handler(text_input):
19
+ """Generate HTML for text preprocessing display"""
20
+ output_html = []
21
+
22
+ # Add result area container
23
+ output_html.append('<div class="result-area">')
24
+ output_html.append('<h2 class="task-header">Text Preprocessing</h2>')
25
+
26
+ output_html.append("""
27
+ <div class="alert alert-info">
28
+ <i class="fas fa-info-circle"></i>
29
+ Text preprocessing is the process of cleaning and transforming raw text into a format that can be easily analyzed by NLP models.
30
+ </div>
31
+ """)
32
+
33
+ # Model info
34
+ output_html.append("""
35
+ <div class="alert alert-info">
36
+ <h4><i class="fas fa-tools"></i> Tools & Libraries Used:</h4>
37
+ <ul>
38
+ <li><b>NLTK</b> - For stopwords, tokenization, stemming and lemmatization</li>
39
+ <li><b>Regular Expressions</b> - For pattern matching and text cleaning</li>
40
+ <li><b>WordCloud</b> - For visualizing word frequency</li>
41
+ </ul>
42
+ </div>
43
+ """)
44
+
45
+ # Ensure NLTK resources are downloaded
46
+ download_nltk_resources()
47
+
48
+ try:
49
+ # Original Text
50
+ output_html.append('<h3 class="task-subheader">Original Text</h3>')
51
+ output_html.append(f'<div class="card"><div class="card-body"><div class="text-content" style="word-wrap: break-word; word-break: break-word; overflow-wrap: break-word; max-height: 500px; overflow-y: auto; padding: 15px; background-color: #f8f9fa; border-radius: 5px; border: 1px solid #e9ecef; line-height: 1.6;">{text_input}</div></div></div>')
52
+
53
+ # Text statistics
54
+ word_count = len(text_input.split())
55
+ char_count = len(text_input)
56
+ sentence_count = len(nltk.sent_tokenize(text_input))
57
+
58
+ stats_html = f"""
59
+ <div class="stats-container">
60
+ <div class="row">
61
+ <div class="col-md-4">
62
+ <div class="card text-center stats-card">
63
+ <div class="card-body">
64
+ <h3 class="metric-blue">{word_count}</h3>
65
+ <p>Words</p>
66
+ </div>
67
+ </div>
68
+ </div>
69
+ <div class="col-md-4">
70
+ <div class="card text-center stats-card">
71
+ <div class="card-body">
72
+ <h3 class="metric-green">{char_count}</h3>
73
+ <p>Characters</p>
74
+ </div>
75
+ </div>
76
+ </div>
77
+ <div class="col-md-4">
78
+ <div class="card text-center stats-card">
79
+ <div class="card-body">
80
+ <h3 class="metric-orange">{sentence_count}</h3>
81
+ <p>Sentences</p>
82
+ </div>
83
+ </div>
84
+ </div>
85
+ </div>
86
+ </div>
87
+ """
88
+ output_html.append(stats_html)
89
+
90
+ # NEW SECTION: Text Cleaning with Regular Expressions
91
+ output_html.append('<div class="section-divider"></div>')
92
+ output_html.append('<h3 class="task-subheader">Text Cleaning with Regular Expressions</h3>')
93
+
94
+ output_html.append("""
95
+ <div class="alert alert-light">
96
+ <p>Regular expressions (regex) provide powerful pattern matching capabilities for cleaning and processing text data.
97
+ Common text cleaning tasks include removing URLs, HTML tags, special characters, and normalizing text formats.</p>
98
+ </div>
99
+ """)
100
+
101
+ # Several regex cleaning examples
102
+ url_pattern = r'https?://\S+|www\.\S+'
103
+ html_pattern = r'<.*?>'
104
+ whitespace_pattern = r'\s+'
105
+ email_pattern = r'\S+@\S+'
106
+
107
+ # Original text for comparison
108
+ text_cleaned = text_input
109
+
110
+ # 1. Remove URLs
111
+ urls_cleaned = re.sub(url_pattern, '[URL]', text_cleaned)
112
+
113
+ # 2. Remove HTML tags
114
+ html_cleaned = re.sub(html_pattern, '', urls_cleaned)
115
+
116
+ # 3. Remove extra whitespace
117
+ whitespace_cleaned = re.sub(whitespace_pattern, ' ', html_cleaned).strip()
118
+
119
+ # 4. Remove email addresses
120
+ email_cleaned = re.sub(email_pattern, '[EMAIL]', whitespace_cleaned)
121
+
122
+ # 5. Fix common contractions
123
+ contractions = {
124
+ r"won't": "will not",
125
+ r"can't": "cannot",
126
+ r"n't": " not",
127
+ r"'re": " are",
128
+ r"'s": " is",
129
+ r"'d": " would",
130
+ r"'ll": " will",
131
+ r"'t": " not",
132
+ r"'ve": " have",
133
+ r"'m": " am"
134
+ }
135
+
136
+ contraction_cleaned = email_cleaned
137
+ for pattern, replacement in contractions.items():
138
+ contraction_cleaned = re.sub(pattern, replacement, contraction_cleaned)
139
+
140
+ # Display the regex cleaning examples in a table
141
+ output_html.append("""
142
+ <h4>Regex Text Cleaning Operations</h4>
143
+ <div class="table-responsive">
144
+ <table class="table table-striped">
145
+ <thead class="table-primary">
146
+ <tr>
147
+ <th>Operation</th>
148
+ <th>Regex Pattern</th>
149
+ <th>Description</th>
150
+ </tr>
151
+ </thead>
152
+ <tbody>
153
+ <tr>
154
+ <td>URL Removal</td>
155
+ <td><code>https?://\\S+|www\\.\\S+</code></td>
156
+ <td>Removes or replaces web URLs in text</td>
157
+ </tr>
158
+ <tr>
159
+ <td>HTML Tag Removal</td>
160
+ <td><code>&lt;.*?&gt;</code></td>
161
+ <td>Strips HTML/XML markup tags</td>
162
+ </tr>
163
+ <tr>
164
+ <td>Whitespace Normalization</td>
165
+ <td><code>\\s+</code></td>
166
+ <td>Replaces multiple spaces, tabs, and newlines with a single space</td>
167
+ </tr>
168
+ <tr>
169
+ <td>Email Anonymization</td>
170
+ <td><code>\\S+@\\S+</code></td>
171
+ <td>Redacts email addresses for privacy</td>
172
+ </tr>
173
+ <tr>
174
+ <td>Contraction Expansion</td>
175
+ <td><code>Multiple patterns</code></td>
176
+ <td>Expands contractions like "don't" to "do not"</td>
177
+ </tr>
178
+ </tbody>
179
+ </table>
180
+ </div>
181
+ """)
182
+
183
+ # Example of cleaned text
184
+ output_html.append("""
185
+ <h4>Example of Text After Regex Cleaning</h4>
186
+ <div class="row">
187
+ <div class="col-md-6">
188
+ <div class="card">
189
+ <div class="card-header">
190
+ <h5 class="mb-0">Before Cleaning</h5>
191
+ </div>
192
+ <div class="card-body">
193
+ <div class="text-content" style="word-wrap: break-word; word-break: break-word; overflow-wrap: break-word; max-height: 400px; overflow-y: auto; padding: 15px; background-color: #f8f9fa; border-radius: 5px; border: 1px solid #e9ecef; line-height: 1.6;">""")
194
+ output_html.append(f"{text_input}")
195
+ output_html.append("""</div>
196
+ </div>
197
+ </div>
198
+ </div>
199
+ <div class="col-md-6">
200
+ <div class="card">
201
+ <div class="card-header">
202
+ <h5 class="mb-0">After Regex Cleaning</h5>
203
+ </div>
204
+ <div class="card-body">
205
+ <div class="text-content" style="word-wrap: break-word; word-break: break-word; overflow-wrap: break-word; max-height: 400px; overflow-y: auto; padding: 15px; background-color: #f8f9fa; border-radius: 5px; border: 1px solid #e9ecef; line-height: 1.6;">""")
206
+ output_html.append(f"{contraction_cleaned}")
207
+ output_html.append("""</div>
208
+ </div>
209
+ </div>
210
+ </div>
211
+ </div>
212
+ """)
213
+
214
+ output_html.append("""
215
+ <div class="alert alert-success">
216
+ <h4><i class="fas fa-lightbulb"></i> Why Use Regex for Text Cleaning?</h4>
217
+ <ul>
218
+ <li><b>Precision:</b> Regular expressions allow for precise pattern matching</li>
219
+ <li><b>Flexibility:</b> Can be customized for domain-specific cleaning needs</li>
220
+ <li><b>Efficiency:</b> Processes text in a single pass for better performance</li>
221
+ <li><b>Standardization:</b> Creates consistent formatting across documents</li>
222
+ </ul>
223
+ </div>
224
+ """)
225
+
226
+ # Word length distribution
227
+ word_lengths = [len(word) for word in text_input.split()]
228
+ fig = plt.figure(figsize=(10, 4))
229
+ plt.hist(word_lengths, bins=range(1, max(word_lengths) + 2), alpha=0.7, color='#1976D2')
230
+ plt.xlabel('Word Length')
231
+ plt.ylabel('Frequency')
232
+ plt.title('Word Length Distribution')
233
+ plt.grid(alpha=0.3)
234
+ plt.tight_layout()
235
+
236
+ output_html.append('<div class="section-divider"></div>')
237
+ output_html.append('<h3 class="task-subheader">Word Length Distribution</h3>')
238
+ output_html.append(fig_to_html(fig))
239
+
240
+ # Case Normalization
241
+ output_html.append('<div class="section-divider"></div>')
242
+ output_html.append('<h3 class="task-subheader">Case Normalization</h3>')
243
+
244
+ lowercase_text = text_input.lower()
245
+ uppercase_text = text_input.upper()
246
+
247
+ case_html = f"""
248
+ <div class="row">
249
+ <div class="col-md-4">
250
+ <div class="card">
251
+ <div class="card-header">
252
+ <h5 class="mb-0">Original Text</h5>
253
+ </div>
254
+ <div class="card-body">
255
+ <div class="text-content" style="word-wrap: break-word; word-break: break-word; overflow-wrap: break-word; max-height: 400px; overflow-y: auto; padding: 15px; background-color: #f8f9fa; border-radius: 5px; border: 1px solid #e9ecef; line-height: 1.6;">{text_input}</div>
256
+ </div>
257
+ </div>
258
+ </div>
259
+ <div class="col-md-4">
260
+ <div class="card">
261
+ <div class="card-header">
262
+ <h5 class="mb-0">Lowercase Text</h5>
263
+ </div>
264
+ <div class="card-body">
265
+ <div class="text-content" style="word-wrap: break-word; word-break: break-word; overflow-wrap: break-word; max-height: 400px; overflow-y: auto; padding: 15px; background-color: #f8f9fa; border-radius: 5px; border: 1px solid #e9ecef; line-height: 1.6;">{lowercase_text}</div>
266
+ </div>
267
+ </div>
268
+ </div>
269
+ <div class="col-md-4">
270
+ <div class="card">
271
+ <div class="card-header">
272
+ <h5 class="mb-0">Uppercase Text</h5>
273
+ </div>
274
+ <div class="card-body">
275
+ <div class="text-content" style="word-wrap: break-word; word-break: break-word; overflow-wrap: break-word; max-height: 400px; overflow-y: auto; padding: 15px; background-color: #f8f9fa; border-radius: 5px; border: 1px solid #e9ecef; line-height: 1.6;">{uppercase_text}</div>
276
+ </div>
277
+ </div>
278
+ </div>
279
+ </div>
280
+ """
281
+ output_html.append(case_html)
282
+
283
+ # Remove Punctuation & Special Characters
284
+ output_html.append('<div class="section-divider"></div>')
285
+ output_html.append('<h3 class="task-subheader">Punctuation & Special Characters Removal</h3>')
286
+
287
+ # Count original punctuation
288
+ punc_count = sum([1 for char in text_input if char in string.punctuation])
289
+
290
+ # Remove punctuation
291
+ no_punct_text = re.sub(r'[^\w\s]', '', text_input)
292
+
293
+ punct_html = f"""
294
+ <div class="row">
295
+ <div class="col-md-6">
296
+ <div class="card">
297
+ <div class="card-header">
298
+ <h5 class="mb-0">Original Text</h5>
299
+ </div>
300
+ <div class="card-body">
301
+ <div class="text-content" style="word-wrap: break-word; word-break: break-word; overflow-wrap: break-word; max-height: 400px; overflow-y: auto; padding: 15px; background-color: #f8f9fa; border-radius: 5px; border: 1px solid #e9ecef; line-height: 1.6;">{text_input}</div>
302
+ <small class="text-muted">Contains {punc_count} punctuation marks</small>
303
+ </div>
304
+ </div>
305
+ </div>
306
+ <div class="col-md-6">
307
+ <div class="card">
308
+ <div class="card-header">
309
+ <h5 class="mb-0">Without Punctuation</h5>
310
+ </div>
311
+ <div class="card-body">
312
+ <div class="text-content" style="word-wrap: break-word; word-break: break-word; overflow-wrap: break-word; max-height: 400px; overflow-y: auto; padding: 15px; background-color: #f8f9fa; border-radius: 5px; border: 1px solid #e9ecef; line-height: 1.6;">{no_punct_text}</div>
313
+ <small class="text-muted">Removed {punc_count} punctuation marks</small>
314
+ </div>
315
+ </div>
316
+ </div>
317
+ </div>
318
+ """
319
+ output_html.append(punct_html)
320
+
321
+ # Show removed punctuation
322
+ punct_chars = [char for char in text_input if char in string.punctuation]
323
+ punct_freq = Counter(punct_chars)
324
+
325
+ if punct_freq:
326
+ output_html.append('<h4>Punctuation Distribution</h4>')
327
+
328
+ fig = plt.figure(figsize=(10, 4))
329
+ plt.bar(punct_freq.keys(), punct_freq.values(), color='#1976D2')
330
+ plt.xlabel('Punctuation')
331
+ plt.ylabel('Frequency')
332
+ plt.title('Punctuation Distribution')
333
+ plt.tight_layout()
334
+
335
+ output_html.append(fig_to_html(fig))
336
+
337
+ # Tokenization
338
+ output_html.append('<div class="section-divider"></div>')
339
+ output_html.append('<h3 class="task-subheader">Tokenization</h3>')
340
+
341
+ # Word tokenization
342
+ words = nltk.word_tokenize(text_input)
343
+
344
+ # Create a multi-column layout for word tokens
345
+ output_html.append('<h4>Word Tokens</h4>')
346
+ output_html.append(f'<p>Total tokens: {len(words)} (showing first 50)</p>')
347
+
348
+ # Create a multi-column table layout
349
+ tokens_html = """
350
+ <div class="table-responsive">
351
+ <table class="table table-striped table-hover" style="table-layout: fixed;">
352
+ <thead class="table-primary">
353
+ <tr>
354
+ <th style="width: 8%;">#</th>
355
+ <th style="width: 25%;">Token</th>
356
+ <th style="width: 12%;">Length</th>
357
+ <th style="width: 8%;">#</th>
358
+ <th style="width: 25%;">Token</th>
359
+ <th style="width: 12%;">Length</th>
360
+ <th style="width: 8%;">#</th>
361
+ <th style="width: 25%;">Token</th>
362
+ <th style="width: 12%;">Length</th>
363
+ </tr>
364
+ </thead>
365
+ <tbody>
366
+ """
367
+
368
+ # Create rows with 3 tokens per row
369
+ for i in range(0, min(50, len(words)), 3):
370
+ tokens_html += "<tr>"
371
+ for j in range(3):
372
+ if i + j < min(50, len(words)):
373
+ token = words[i + j]
374
+ tokens_html += f'<td>{i + j + 1}</td><td><code>{token}</code></td><td><span class="badge bg-secondary">{len(token)}</span></td>'
375
+ else:
376
+ tokens_html += '<td></td><td></td><td></td>'
377
+ tokens_html += "</tr>"
378
+
379
+ tokens_html += """
380
+ </tbody>
381
+ </table>
382
+ </div>
383
+ """
384
+
385
+ output_html.append(tokens_html)
386
+
387
+ # Sentence tokenization
388
+ sentences = nltk.sent_tokenize(text_input)
389
+
390
+ output_html.append('<h4>Sentence Tokens</h4>')
391
+ output_html.append(f'<p>Total sentences: {len(sentences)}</p>')
392
+
393
+ for i, sentence in enumerate(sentences[:5]):
394
+ output_html.append(f'<div class="card mb-2"><div class="card-body"><strong>{i+1}.</strong> {sentence}</div></div>')
395
+
396
+ if len(sentences) > 5:
397
+ output_html.append(f'<p class="text-muted">... and {len(sentences) - 5} more sentences.</p>')
398
+
399
+ # Stopwords Removal
400
+ output_html.append('<div class="section-divider"></div>')
401
+ output_html.append('<h3 class="task-subheader">Stopwords Removal</h3>')
402
+
403
+ stop_words = set(stopwords.words('english'))
404
+ filtered_words = [word for word in words if word.lower() not in stop_words]
405
+
406
+ # Count stopwords
407
+ stopword_count = len(words) - len(filtered_words)
408
+ stopword_percentage = (stopword_count / len(words)) * 100 if words else 0
409
+
410
+ output_html.append(f"""
411
+ <div class="row mb-3">
412
+ <div class="col-md-4">
413
+ <div class="card text-center">
414
+ <div class="card-body">
415
+ <h5>Original Words</h5>
416
+ <h3 class="text-primary">{len(words)}</h3>
417
+ </div>
418
+ </div>
419
+ </div>
420
+ <div class="col-md-4">
421
+ <div class="card text-center">
422
+ <div class="card-body">
423
+ <h5>After Stopword Removal</h5>
424
+ <h3 class="text-success">{len(filtered_words)}</h3>
425
+ </div>
426
+ </div>
427
+ </div>
428
+ <div class="col-md-4">
429
+ <div class="card text-center">
430
+ <div class="card-body">
431
+ <h5>Stopwords Removed</h5>
432
+ <h3 class="text-warning">{stopword_count} ({stopword_percentage:.1f}%)</h3>
433
+ </div>
434
+ </div>
435
+ </div>
436
+ </div>
437
+ """)
438
+
439
+ # Display common stopwords in the text
440
+ text_stopwords = [word for word in words if word.lower() in stop_words]
441
+ stop_freq = Counter(text_stopwords).most_common(10)
442
+
443
+ if stop_freq:
444
+ output_html.append('<h4>Most Common Stopwords in Text</h4>')
445
+
446
+ # Create a multi-column layout for stopwords
447
+ stopwords_html = """
448
+ <div class="table-responsive">
449
+ <table class="table table-striped table-hover" style="table-layout: fixed;">
450
+ <thead class="table-primary">
451
+ <tr>
452
+ <th style="width: 10%;">#</th>
453
+ <th style="width: 35%;">Stopword</th>
454
+ <th style="width: 15%;">Frequency</th>
455
+ <th style="width: 10%;">#</th>
456
+ <th style="width: 35%;">Stopword</th>
457
+ <th style="width: 15%;">Frequency</th>
458
+ </tr>
459
+ </thead>
460
+ <tbody>
461
+ """
462
+
463
+ # Create rows with 2 stopwords per row
464
+ for i in range(0, len(stop_freq), 2):
465
+ stopwords_html += "<tr>"
466
+ for j in range(2):
467
+ if i + j < len(stop_freq):
468
+ stopword, freq = stop_freq[i + j]
469
+ stopwords_html += f'<td>{i + j + 1}</td><td><code>{stopword}</code></td><td><span class="badge bg-warning">{freq}</span></td>'
470
+ else:
471
+ stopwords_html += '<td></td><td></td><td></td>'
472
+ stopwords_html += "</tr>"
473
+
474
+ stopwords_html += """
475
+ </tbody>
476
+ </table>
477
+ </div>
478
+ """
479
+
480
+ output_html.append(stopwords_html)
481
+
482
+ # Visualization of before and after
483
+ fig, ax = plt.subplots(1, 2, figsize=(12, 5))
484
+
485
+ # Before
486
+ ax[0].hist([len(word) for word in words], bins=range(1, 15), alpha=0.7, color='#1976D2')
487
+ ax[0].set_title('Word Length Before Stopword Removal')
488
+ ax[0].set_xlabel('Word Length')
489
+ ax[0].set_ylabel('Frequency')
490
+
491
+ # After
492
+ ax[1].hist([len(word) for word in filtered_words], bins=range(1, 15), alpha=0.7, color='#4CAF50')
493
+ ax[1].set_title('Word Length After Stopword Removal')
494
+ ax[1].set_xlabel('Word Length')
495
+ ax[1].set_ylabel('Frequency')
496
+
497
+ plt.tight_layout()
498
+ output_html.append(fig_to_html(fig))
499
+
500
+ # Stemming and Lemmatization
501
+ output_html.append('<div class="section-divider"></div>')
502
+ output_html.append('<h3 class="task-subheader">Stemming & Lemmatization</h3>')
503
+
504
+ # Apply stemming (Porter Stemmer)
505
+ stemmer = PorterStemmer()
506
+ stemmed_words = [stemmer.stem(word) for word in filtered_words[:100]] # Limit to first 100 words for performance
507
+
508
+ # Apply lemmatization
509
+ lemmatizer = WordNetLemmatizer()
510
+ lemmatized_words = [lemmatizer.lemmatize(word) for word in filtered_words[:100]] # Limit to first 100 words
511
+
512
+ # Create comparison DataFrame
513
+ comparison_data = []
514
+ for i in range(min(20, len(filtered_words))): # Show first 20 examples
515
+ if i < len(filtered_words) and filtered_words[i].isalpha(): # Only include alphabetic words
516
+ comparison_data.append({
517
+ 'Original': filtered_words[i],
518
+ 'Stemmed': stemmer.stem(filtered_words[i]),
519
+ 'Lemmatized': lemmatizer.lemmatize(filtered_words[i])
520
+ })
521
+
522
+ comparison_df = pd.DataFrame(comparison_data)
523
+
524
+ output_html.append('<h4>Stemming vs. Lemmatization Comparison</h4>')
525
+
526
+ # Create a custom table for stemming vs lemmatization comparison
527
+ comparison_html = """
528
+ <div class="table-responsive">
529
+ <table class="table table-striped table-hover" style="table-layout: fixed;">
530
+ <thead class="table-primary">
531
+ <tr>
532
+ <th style="width: 30%;">Original</th>
533
+ <th style="width: 35%;">Stemmed</th>
534
+ <th style="width: 35%;">Lemmatized</th>
535
+ </tr>
536
+ </thead>
537
+ <tbody>
538
+ """
539
+
540
+ # Add comparison data rows
541
+ for _, row in comparison_df.iterrows():
542
+ comparison_html += f"""
543
+ <tr>
544
+ <td><code>{row['Original']}</code></td>
545
+ <td><code>{row['Stemmed']}</code></td>
546
+ <td><code>{row['Lemmatized']}</code></td>
547
+ </tr>
548
+ """
549
+
550
+ comparison_html += """
551
+ </tbody>
552
+ </table>
553
+ </div>
554
+ """
555
+
556
+ output_html.append(comparison_html)
557
+
558
+ output_html.append("""
559
+ <div class="alert alert-success">
560
+ <h4><i class="fas fa-lightbulb"></i> Stemming vs. Lemmatization</h4>
561
+ <ul>
562
+ <li><b>Stemming</b> - Cuts off word endings based on common patterns, faster but less accurate</li>
563
+ <li><b>Lemmatization</b> - Uses vocabulary and morphological analysis, slower but produces actual words</li>
564
+ </ul>
565
+ </div>
566
+ """)
567
+
568
+ # NEW SECTION: N-gram Analysis
569
+ output_html.append('<div class="section-divider"></div>')
570
+ output_html.append('<h3 class="task-subheader">N-gram Analysis</h3>')
571
+
572
+ output_html.append("""
573
+ <div class="alert alert-light">
574
+ <p>N-grams are contiguous sequences of n items from text. In NLP, they are used to capture word patterns and relationships,
575
+ and are helpful for language modeling, prediction, and feature extraction.</p>
576
+ </div>
577
+ """)
578
+
579
+ # Process text for n-grams (use filtered_words to avoid stopwords)
580
+ # Convert to lowercase for consistency
581
+ clean_words = [word.lower() for word in filtered_words if word.isalnum()]
582
+
583
+ # Generate n-grams
584
+ bigrams_list = list(ngrams(clean_words, 2))
585
+ trigrams_list = list(ngrams(clean_words, 3))
586
+
587
+ # Count frequencies
588
+ bigram_freq = Counter(bigrams_list)
589
+ trigram_freq = Counter(trigrams_list)
590
+
591
+ # Get most common
592
+ common_bigrams = bigram_freq.most_common(15)
593
+ common_trigrams = trigram_freq.most_common(15)
594
+
595
+ # Format for display
596
+ bigram_labels = [' '.join(bg) for bg, _ in common_bigrams]
597
+ bigram_values = [count for _, count in common_bigrams]
598
+
599
+ trigram_labels = [' '.join(tg) for tg, _ in common_trigrams]
600
+ trigram_values = [count for _, count in common_trigrams]
601
+
602
+ # Create DataFrames for display
603
+ bigram_df = pd.DataFrame({
604
+ 'Bigram': [' '.join(bg) for bg, _ in common_bigrams],
605
+ 'Frequency': [count for _, count in common_bigrams]
606
+ })
607
+
608
+ trigram_df = pd.DataFrame({
609
+ 'Trigram': [' '.join(tg) for tg, _ in common_trigrams],
610
+ 'Frequency': [count for _, count in common_trigrams]
611
+ })
612
+
613
+ # Explanation of n-grams
614
+ output_html.append("""
615
+ <div class="alert alert-info">
616
+ <h4>What are N-grams?</h4>
617
+ <ul>
618
+ <li><b>Unigrams</b> - Single words (e.g., "climate")</li>
619
+ <li><b>Bigrams</b> - Two consecutive words (e.g., "climate change")</li>
620
+ <li><b>Trigrams</b> - Three consecutive words (e.g., "global climate change")</li>
621
+ </ul>
622
+ <p>N-grams capture contextual relationships between words and are valuable for many NLP tasks including language modeling,
623
+ machine translation, speech recognition, and text classification.</p>
624
+ </div>
625
+ """)
626
+
627
+ # Create visualizations for bigrams and trigrams
628
+ if bigram_labels and len(bigram_values) > 0:
629
+ # Bigram visualization
630
+ output_html.append('<h4>Most Common Bigrams</h4>')
631
+
632
+ fig = plt.figure(figsize=(10, 6))
633
+ plt.barh(range(len(bigram_labels)), bigram_values, align='center', color='#1976D2')
634
+ plt.yticks(range(len(bigram_labels)), bigram_labels)
635
+ plt.xlabel('Frequency')
636
+ plt.title('Most Common Bigrams')
637
+ plt.tight_layout()
638
+
639
+ output_html.append(fig_to_html(fig))
640
+
641
+ # Create a multi-column layout for bigrams
642
+ bigram_html = """
643
+ <div class="table-responsive">
644
+ <table class="table table-striped table-hover" style="table-layout: fixed;">
645
+ <thead class="table-primary">
646
+ <tr>
647
+ <th style="width: 10%;">#</th>
648
+ <th style="width: 35%;">Bigram</th>
649
+ <th style="width: 15%;">Freq</th>
650
+ <th style="width: 10%;">#</th>
651
+ <th style="width: 35%;">Bigram</th>
652
+ <th style="width: 15%;">Freq</th>
653
+ </tr>
654
+ </thead>
655
+ <tbody>
656
+ """
657
+
658
+ # Create rows with 2 bigrams per row
659
+ for i in range(0, len(common_bigrams), 2):
660
+ bigram_html += "<tr>"
661
+ for j in range(2):
662
+ if i + j < len(common_bigrams):
663
+ bigram, freq = common_bigrams[i + j]
664
+ bigram_text = ' '.join(bigram)
665
+ bigram_html += f'<td>{i + j + 1}</td><td><code>{bigram_text}</code></td><td><span class="badge bg-info">{freq}</span></td>'
666
+ else:
667
+ bigram_html += '<td></td><td></td><td></td>'
668
+ bigram_html += "</tr>"
669
+
670
+ bigram_html += """
671
+ </tbody>
672
+ </table>
673
+ </div>
674
+ """
675
+
676
+ output_html.append(bigram_html)
677
+ else:
678
+ output_html.append('<p class="text-muted">Not enough text to generate meaningful bigrams.</p>')
679
+
680
+ if trigram_labels and len(trigram_values) > 0:
681
+ # Trigram visualization
682
+ output_html.append('<h4>Most Common Trigrams</h4>')
683
+
684
+ fig = plt.figure(figsize=(10, 6))
685
+ plt.barh(range(len(trigram_labels)), trigram_values, align='center', color='#4CAF50')
686
+ plt.yticks(range(len(trigram_labels)), trigram_labels)
687
+ plt.xlabel('Frequency')
688
+ plt.title('Most Common Trigrams')
689
+ plt.tight_layout()
690
+
691
+ output_html.append(fig_to_html(fig))
692
+
693
+ # Create a multi-column layout for trigrams
694
+ trigram_html = """
695
+ <div class="table-responsive">
696
+ <table class="table table-striped table-hover" style="table-layout: fixed;">
697
+ <thead class="table-primary">
698
+ <tr>
699
+ <th style="width: 10%;">#</th>
700
+ <th style="width: 35%;">Trigram</th>
701
+ <th style="width: 15%;">Freq</th>
702
+ <th style="width: 10%;">#</th>
703
+ <th style="width: 35%;">Trigram</th>
704
+ <th style="width: 15%;">Freq</th>
705
+ </tr>
706
+ </thead>
707
+ <tbody>
708
+ """
709
+
710
+ # Create rows with 2 trigrams per row
711
+ for i in range(0, len(common_trigrams), 2):
712
+ trigram_html += "<tr>"
713
+ for j in range(2):
714
+ if i + j < len(common_trigrams):
715
+ trigram, freq = common_trigrams[i + j]
716
+ trigram_text = ' '.join(trigram)
717
+ trigram_html += f'<td>{i + j + 1}</td><td><code>{trigram_text}</code></td><td><span class="badge bg-success">{freq}</span></td>'
718
+ else:
719
+ trigram_html += '<td></td><td></td><td></td>'
720
+ trigram_html += "</tr>"
721
+
722
+ trigram_html += """
723
+ </tbody>
724
+ </table>
725
+ </div>
726
+ """
727
+
728
+ output_html.append(trigram_html)
729
+ else:
730
+ output_html.append('<p class="text-muted">Not enough text to generate meaningful trigrams.</p>')
731
+
732
+ # Applications of N-grams
733
+ output_html.append("""
734
+ <div class="alert alert-info">
735
+ <h4><i class="fas fa-lightbulb"></i> Applications of N-gram Analysis</h4>
736
+ <ul>
737
+ <li><b>Language Modeling</b> - Predicting the next word in a sequence</li>
738
+ <li><b>Machine Translation</b> - Improving translation quality</li>
739
+ <li><b>Text Classification</b> - Using n-grams as features</li>
740
+ <li><b>Spelling Correction</b> - Suggesting correct spellings</li>
741
+ <li><b>Information Retrieval</b> - Enhancing search results</li>
742
+ <li><b>Sentiment Analysis</b> - Capturing phrase-level sentiments</li>
743
+ </ul>
744
+ </div>
745
+ """)
746
+
747
+ # Word Cloud
748
+ output_html.append('<div class="section-divider"></div>')
749
+ output_html.append('<h3 class="task-subheader">Word Cloud</h3>')
750
+
751
+ try:
752
+ # Create word cloud from filtered words
753
+ wordcloud_text = ' '.join(filtered_words)
754
+ wordcloud = WordCloud(
755
+ width=800,
756
+ height=400,
757
+ background_color='white',
758
+ colormap='viridis',
759
+ max_words=100,
760
+ contour_width=1,
761
+ contour_color='#1976D2'
762
+ ).generate(wordcloud_text)
763
+
764
+ # Display word cloud
765
+ fig = plt.figure(figsize=(12, 8))
766
+ plt.imshow(wordcloud, interpolation='bilinear')
767
+ plt.axis('off')
768
+ plt.tight_layout()
769
+
770
+ output_html.append(fig_to_html(fig))
771
+
772
+ except Exception as e:
773
+ output_html.append(f"<div class='alert alert-warning'>Failed to generate word cloud: {str(e)}</div>")
774
+
775
+ # Word Frequency
776
+ output_html.append('<div class="section-divider"></div>')
777
+ output_html.append('<h3 class="task-subheader">Word Frequency Analysis</h3>')
778
+
779
+ # Calculate word frequencies
780
+ word_freq = Counter(filtered_words)
781
+ most_common = word_freq.most_common(20)
782
+
783
+ # Create DataFrame
784
+ freq_df = pd.DataFrame(most_common, columns=['Word', 'Frequency'])
785
+
786
+ # Create horizontal bar chart
787
+ fig = plt.figure(figsize=(12, 16))
788
+ plt.barh(range(len(most_common)), [val[1] for val in most_common], align='center', color='#1976D2')
789
+ plt.yticks(range(len(most_common)), [val[0] for val in most_common])
790
+ plt.xlabel('Frequency')
791
+ plt.title('Top 20 Words')
792
+ plt.subplots_adjust(left=0.15, right=0.95, top=0.95, bottom=0.1)
793
+ plt.tight_layout(pad=3.0)
794
+
795
+ # Render chart
796
+ output_html.append('<section class="wf-chart-section">')
797
+ output_html.append('<div class="chart-container">')
798
+ output_html.append(fig_to_html(fig))
799
+ output_html.append('</div>')
800
+ output_html.append('</section>')
801
+
802
+ # Create a multi-column layout for word frequency
803
+ freq_html = """
804
+ <section class="wf-table-container">
805
+ <div class="table-responsive">
806
+ <table class="table table-striped table-hover" style="table-layout: fixed;">
807
+ <thead class="table-primary">
808
+ <tr>
809
+ <th style="width: 10%;">#</th>
810
+ <th style="width: 35%;">Word</th>
811
+ <th style="width: 15%;">Freq</th>
812
+ <th style="width: 10%;">#</th>
813
+ <th style="width: 35%;">Word</th>
814
+ <th style="width: 15%;">Freq</th>
815
+ </tr>
816
+ </thead>
817
+ <tbody>
818
+ """
819
+
820
+ # Create rows with 2 words per row
821
+ for i in range(0, len(most_common), 2):
822
+ freq_html += "<tr>"
823
+ for j in range(2):
824
+ if i + j < len(most_common):
825
+ word, freq = most_common[i + j]
826
+ freq_html += f'<td>{i + j + 1}</td><td><code>{word}</code></td><td><span class="badge bg-primary">{freq}</span></td>'
827
+ else:
828
+ freq_html += '<td></td><td></td><td></td>'
829
+ freq_html += "</tr>"
830
+
831
+ freq_html += """
832
+ </tbody>
833
+ </table>
834
+ </div>
835
+ </section>
836
+ """
837
+
838
+ output_html.append(freq_html)
839
+
840
+ except Exception as e:
841
+ output_html.append(f"""
842
+ <div class="alert alert-danger">
843
+ <h3>Error</h3>
844
+ <p>Failed to process text: {str(e)}</p>
845
+ </div>
846
+ """)
847
+
848
+ # About text preprocessing
849
+ output_html.append("""
850
+ <div class="card mt-4">
851
+ <div class="card-header">
852
+ <h4 class="mb-0">
853
+ <i class="fas fa-info-circle"></i>
854
+ About Text Preprocessing
855
+ </h4>
856
+ </div>
857
+ <div class="card-body">
858
+ <h5>What is Text Preprocessing?</h5>
859
+
860
+ <p>Text preprocessing is the first step in NLP pipelines that transforms raw text into a clean, structured format
861
+ suitable for analysis. It includes various techniques to standardize text and reduce noise.</p>
862
+
863
+ <h5>Common Preprocessing Steps:</h5>
864
+
865
+ <ul>
866
+ <li><b>Tokenization</b> - Splitting text into individual words or sentences</li>
867
+ <li><b>Normalization</b> - Converting text to lowercase, removing accents, etc.</li>
868
+ <li><b>Noise Removal</b> - Removing punctuation, special characters, HTML tags, etc.</li>
869
+ <li><b>Stopword Removal</b> - Filtering out common words that add little meaning</li>
870
+ <li><b>Stemming/Lemmatization</b> - Reducing words to their root forms</li>
871
+ <li><b>Spelling Correction</b> - Fixing typos and errors</li>
872
+ </ul>
873
+
874
+ <h5>Why Preprocess Text?</h5>
875
+
876
+ <ul>
877
+ <li>Reduces dimensionality and noise in the data</li>
878
+ <li>Standardizes text for consistent analysis</li>
879
+ <li>Improves performance of downstream NLP tasks</li>
880
+ <li>Makes text more suitable for machine learning models</li>
881
+ </ul>
882
+ </div>
883
+ </div>
884
+ """)
885
+
886
+ output_html.append('</div>') # Close result-area div
887
+
888
+ return '\n'.join(output_html)
components/question_answering.py ADDED
@@ -0,0 +1,498 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib.pyplot as plt
2
+ import pandas as pd
3
+ import numpy as np
4
+ from transformers import pipeline
5
+ import nltk
6
+ from collections import Counter
7
+ import re
8
+ from sklearn.feature_extraction.text import TfidfVectorizer
9
+ from sklearn.metrics.pairwise import cosine_similarity
10
+
11
+ from utils.model_loader import load_qa_pipeline
12
+ from utils.helpers import fig_to_html, df_to_html_table
13
+
14
+ def question_answering_handler(context_text, question, answer_type="extractive", confidence_threshold=0.5):
15
+ """Show question answering capabilities with comprehensive analysis."""
16
+ output_html = []
17
+
18
+ # Add result area container
19
+ output_html.append('<div class="result-area">')
20
+ output_html.append('<h2 class="task-header">Question Answering System</h2>')
21
+
22
+ output_html.append("""
23
+ <div class="alert alert-info">
24
+ <i class="fas fa-info-circle"></i>
25
+ Question Answering (QA) systems extract or generate answers to questions based on a given context or knowledge base.
26
+ This system can handle both extractive (finding answers in text) and abstractive (generating new answers) approaches.
27
+ </div>
28
+ """)
29
+
30
+ # Model info
31
+ output_html.append("""
32
+ <div class="alert alert-info">
33
+ <h4><i class="fas fa-tools"></i> Models & Techniques Used:</h4>
34
+ <ul>
35
+ <li><b>RoBERTa-SQuAD2</b> - Fine-tuned transformer model for extractive QA (F1: ~83.7 on SQuAD 2.0)</li>
36
+ <li><b>BERT-based QA</b> - Bidirectional encoder representations for understanding context</li>
37
+ <li><b>TF-IDF Similarity</b> - Traditional approach for finding relevant text spans</li>
38
+ <li><b>Confidence Scoring</b> - Model uncertainty estimation for answer reliability</li>
39
+ </ul>
40
+ </div>
41
+ """)
42
+
43
+ try:
44
+ # Validate inputs
45
+ if not context_text or not context_text.strip():
46
+ output_html.append('<div class="alert alert-warning">⚠️ Please provide a context text for question answering.</div>')
47
+ output_html.append('</div>')
48
+ return "\n".join(output_html)
49
+
50
+ if not question or not question.strip():
51
+ output_html.append('<div class="alert alert-warning">⚠️ Please provide a question to answer.</div>')
52
+ output_html.append('</div>')
53
+ return "\n".join(output_html)
54
+
55
+ # Display input information
56
+ output_html.append('<h3 class="task-subheader">Input Analysis</h3>')
57
+
58
+ context_stats = {
59
+ "Context Length": len(context_text),
60
+ "Word Count": len(context_text.split()),
61
+ "Sentence Count": len(nltk.sent_tokenize(context_text)),
62
+ "Question Length": len(question),
63
+ "Question Words": len(question.split())
64
+ }
65
+
66
+ stats_df = pd.DataFrame(list(context_stats.items()), columns=['Metric', 'Value'])
67
+ output_html.append('<h4>Input Statistics</h4>')
68
+ output_html.append(df_to_html_table(stats_df))
69
+
70
+ # Question Analysis
71
+ output_html.append('<h3 class="task-subheader">Question Analysis</h3>')
72
+
73
+ # Classify question type
74
+ question_lower = question.lower().strip()
75
+ question_type = classify_question_type(question_lower)
76
+
77
+ output_html.append(f"""
78
+ <div class="card">
79
+ <div class="card-header">
80
+ <h4 class="mb-0">Question Classification</h4>
81
+ </div>
82
+ <div class="card-body">
83
+ <p><strong>Question:</strong> {question}</p>
84
+ <p><strong>Type:</strong> {question_type['type']}</p>
85
+ <p><strong>Expected Answer:</strong> {question_type['expected']}</p>
86
+ <p><strong>Keywords:</strong> {', '.join(question_type['keywords'])}</p>
87
+ </div>
88
+ </div>
89
+ """)
90
+
91
+ # Extractive Question Answering using Transformer
92
+ output_html.append('<h3 class="task-subheader">Transformer-based Answer Extraction</h3>')
93
+
94
+ try:
95
+ qa_pipeline = load_qa_pipeline()
96
+
97
+ # Get answer from the model
98
+ result = qa_pipeline(question=question, context=context_text)
99
+
100
+ answer = result['answer']
101
+ confidence = result['score']
102
+ start_pos = result['start']
103
+ end_pos = result['end']
104
+
105
+ # Create confidence visualization
106
+ fig, ax = plt.subplots(1, 1, figsize=(8, 4))
107
+
108
+ # Confidence bar
109
+ colors = ['red' if confidence < 0.3 else 'orange' if confidence < 0.7 else 'green']
110
+ bars = ax.barh(['Confidence'], [confidence], color=colors[0])
111
+ ax.set_xlim(0, 1)
112
+ ax.set_xlabel('Confidence Score')
113
+ ax.set_title('Answer Confidence')
114
+
115
+ # Add confidence threshold line
116
+ ax.axvline(x=confidence_threshold, color='red', linestyle='--', label=f'Threshold ({confidence_threshold})')
117
+ ax.legend()
118
+
119
+ # Add value labels
120
+ for bar in bars:
121
+ width = bar.get_width()
122
+ ax.text(width/2, bar.get_y() + bar.get_height()/2,
123
+ f'{width:.3f}', ha='center', va='center', fontweight='bold')
124
+
125
+ plt.tight_layout()
126
+ output_html.append(fig_to_html(fig))
127
+ plt.close()
128
+
129
+ # Display answer with context highlighting
130
+ confidence_status = "High" if confidence >= 0.7 else "Medium" if confidence >= 0.3 else "Low"
131
+ confidence_color = "#4CAF50" if confidence >= 0.7 else "#FF9800" if confidence >= 0.3 else "#F44336"
132
+
133
+ output_html.append(f"""
134
+ <div class="card" style="border-color: {confidence_color};">
135
+ <div class="card-header" style="background-color: {confidence_color}22;">
136
+ <h4 class="mb-0">📝 Extracted Answer</h4>
137
+ </div>
138
+ <div class="card-body">
139
+ <div class="alert alert-light">
140
+ <strong>Answer:</strong> <span class="badge bg-warning text-dark fs-6">{answer}</span>
141
+ </div>
142
+ <p><strong>Confidence:</strong> {confidence:.3f} ({confidence_status})</p>
143
+ <p><strong>Position in Text:</strong> Characters {start_pos}-{end_pos}</p>
144
+ </div>
145
+ </div>
146
+ """)
147
+
148
+ # Show context with answer highlighted
149
+ highlighted_context = highlight_answer_in_context(context_text, start_pos, end_pos)
150
+ output_html.append(f"""
151
+ <div class="card">
152
+ <div class="card-header">
153
+ <h4 class="mb-0">📄 Context with Highlighted Answer</h4>
154
+ </div>
155
+ <div class="card-body">
156
+ <div style="line-height: 1.6; border: 1px solid #ddd; padding: 1rem; border-radius: 5px;">
157
+ {highlighted_context}
158
+ </div>
159
+ </div>
160
+ </div>
161
+ """)
162
+
163
+ except Exception as e:
164
+ output_html.append(f'<div class="alert alert-danger">❌ Error in transformer QA: {str(e)}</div>')
165
+
166
+ # Alternative: TF-IDF based answer extraction
167
+ output_html.append('<h3 class="task-subheader">TF-IDF Based Answer Extraction</h3>')
168
+
169
+ try:
170
+ tfidf_answer = extract_answer_tfidf(context_text, question)
171
+
172
+ output_html.append(f"""
173
+ <div class="alert alert-success">
174
+ <h4>🔍 TF-IDF Based Answer</h4>
175
+ <div class="alert alert-light">
176
+ <strong>Most Relevant Sentence:</strong> {tfidf_answer['sentence']}
177
+ </div>
178
+ <p><strong>Similarity Score:</strong> {tfidf_answer['score']:.3f}</p>
179
+ <p><strong>Method:</strong> Cosine similarity between question and context sentences using TF-IDF vectors</p>
180
+ </div>
181
+ """)
182
+
183
+ except Exception as e:
184
+ output_html.append(f'<div class="alert alert-danger">❌ Error in TF-IDF QA: {str(e)}</div>')
185
+
186
+ # Answer Quality Assessment
187
+ output_html.append('<h3 class="task-subheader">Answer Quality Assessment</h3>')
188
+
189
+ if 'confidence' in locals():
190
+ quality_metrics = assess_answer_quality(question, answer, confidence, context_text)
191
+
192
+ # Create quality assessment visualization
193
+ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
194
+
195
+ # Quality metrics radar chart
196
+ categories = list(quality_metrics.keys())
197
+ values = list(quality_metrics.values())
198
+
199
+ ax1.bar(categories, values, color=['#4CAF50', '#2196F3', '#FF9800', '#9C27B0'])
200
+ ax1.set_ylim(0, 1)
201
+ ax1.set_title('Answer Quality Metrics')
202
+ ax1.set_ylabel('Score')
203
+ plt.setp(ax1.get_xticklabels(), rotation=45, ha='right')
204
+
205
+ # Overall quality score
206
+ overall_score = sum(values) / len(values)
207
+ quality_label = "Excellent" if overall_score >= 0.8 else "Good" if overall_score >= 0.6 else "Fair" if overall_score >= 0.4 else "Poor"
208
+
209
+ ax2.pie([overall_score, 1-overall_score], labels=[f'{quality_label}\n({overall_score:.2f})', 'Room for Improvement'],
210
+ colors=['#4CAF50', '#E0E0E0'], startangle=90)
211
+ ax2.set_title('Overall Answer Quality')
212
+
213
+ plt.tight_layout()
214
+ output_html.append(fig_to_html(fig))
215
+ plt.close()
216
+
217
+ # Quality metrics table
218
+ quality_df = pd.DataFrame([
219
+ {'Metric': 'Confidence', 'Score': f"{quality_metrics['Confidence']:.3f}", 'Description': 'Model confidence in the answer'},
220
+ {'Metric': 'Relevance', 'Score': f"{quality_metrics['Relevance']:.3f}", 'Description': 'Semantic similarity to question'},
221
+ {'Metric': 'Completeness', 'Score': f"{quality_metrics['Completeness']:.3f}", 'Description': 'Answer length appropriateness'},
222
+ {'Metric': 'Context Match', 'Score': f"{quality_metrics['Context_Match']:.3f}", 'Description': 'How well answer fits context'}
223
+ ])
224
+
225
+ output_html.append('<h4>Quality Assessment Details</h4>')
226
+ output_html.append(df_to_html_table(quality_df))
227
+
228
+ # Question-Answer Pairs Suggestions
229
+ output_html.append('<h3 class="task-subheader">Suggested Follow-up Questions</h3>')
230
+
231
+ try:
232
+ suggested_questions = generate_followup_questions(context_text, question, answer if 'answer' in locals() else "")
233
+
234
+ output_html.append('<div class="alert alert-warning">')
235
+ output_html.append('<h4>💡 Follow-up Questions:</h4>')
236
+ output_html.append('<ul>')
237
+ for i, q in enumerate(suggested_questions, 1):
238
+ output_html.append(f'<li><strong>Q{i}:</strong> {q}</li>')
239
+ output_html.append('</ul>')
240
+ output_html.append('</div>')
241
+
242
+ except Exception as e:
243
+ output_html.append(f'<div class="alert alert-danger">❌ Error generating suggestions: {str(e)}</div>')
244
+
245
+ except Exception as e:
246
+ output_html.append(f'<div class="alert alert-danger">❌ Unexpected error: {str(e)}</div>')
247
+
248
+ output_html.append('</div>')
249
+ return "\n".join(output_html)
250
+
251
+ def classify_question_type(question):
252
+ """Classify the type of question and expected answer format."""
253
+ question = question.lower().strip()
254
+
255
+ # Question word patterns
256
+ patterns = {
257
+ 'what': {'type': 'Definition/Fact', 'expected': 'Entity, concept, or description'},
258
+ 'who': {'type': 'Person', 'expected': 'Person name or group'},
259
+ 'when': {'type': 'Time', 'expected': 'Date, time, or temporal expression'},
260
+ 'where': {'type': 'Location', 'expected': 'Place, location, or spatial reference'},
261
+ 'why': {'type': 'Reason/Cause', 'expected': 'Explanation or causal relationship'},
262
+ 'how': {'type': 'Method/Process', 'expected': 'Process, method, or manner'},
263
+ 'which': {'type': 'Selection', 'expected': 'Specific choice from options'},
264
+ 'how much': {'type': 'Quantity', 'expected': 'Numerical amount or quantity'},
265
+ 'how many': {'type': 'Count', 'expected': 'Numerical count'},
266
+ 'is': {'type': 'Yes/No', 'expected': 'Boolean answer'},
267
+ 'are': {'type': 'Yes/No', 'expected': 'Boolean answer'},
268
+ 'can': {'type': 'Ability/Possibility', 'expected': 'Yes/No with explanation'},
269
+ 'will': {'type': 'Future/Prediction', 'expected': 'Future state or prediction'},
270
+ 'did': {'type': 'Past Action', 'expected': 'Yes/No about past events'}
271
+ }
272
+
273
+ # Extract keywords from question
274
+ words = question.split()
275
+ keywords = [word for word in words if len(word) > 2 and word not in ['the', 'and', 'but', 'for']]
276
+
277
+ # Determine question type
278
+ for pattern, info in patterns.items():
279
+ if question.startswith(pattern):
280
+ return {
281
+ 'type': info['type'],
282
+ 'expected': info['expected'],
283
+ 'keywords': keywords[:5] # Top 5 keywords
284
+ }
285
+
286
+ # Default classification
287
+ return {
288
+ 'type': 'General',
289
+ 'expected': 'Text span or explanation',
290
+ 'keywords': keywords[:5]
291
+ }
292
+
293
+ def extract_answer_tfidf(context, question):
294
+ """Extract answer using TF-IDF similarity."""
295
+ # Split context into sentences
296
+ sentences = nltk.sent_tokenize(context)
297
+
298
+ if len(sentences) == 0:
299
+ return {'sentence': 'No sentences found', 'score': 0.0}
300
+
301
+ # Create TF-IDF vectors
302
+ vectorizer = TfidfVectorizer(stop_words='english', lowercase=True)
303
+
304
+ # Combine question with sentences for vectorization
305
+ texts = [question] + sentences
306
+ tfidf_matrix = vectorizer.fit_transform(texts)
307
+
308
+ # Calculate cosine similarity between question and each sentence
309
+ question_vector = tfidf_matrix[0:1]
310
+ sentence_vectors = tfidf_matrix[1:]
311
+
312
+ similarities = cosine_similarity(question_vector, sentence_vectors).flatten()
313
+
314
+ # Find the most similar sentence
315
+ best_idx = np.argmax(similarities)
316
+ best_sentence = sentences[best_idx]
317
+ best_score = similarities[best_idx]
318
+
319
+ return {
320
+ 'sentence': best_sentence,
321
+ 'score': best_score
322
+ }
323
+
324
+ def highlight_answer_in_context(context, start_pos, end_pos):
325
+ """Highlight the answer span in the context."""
326
+ before = context[:start_pos]
327
+ answer = context[start_pos:end_pos]
328
+ after = context[end_pos:]
329
+
330
+ highlighted = f'{before}<mark style="background-color: #FFEB3B; padding: 2px 4px; border-radius: 3px; font-weight: bold;">{answer}</mark>{after}'
331
+
332
+ return highlighted
333
+
334
+ def assess_answer_quality(question, answer, confidence, context):
335
+ """Assess the quality of the extracted answer."""
336
+ metrics = {}
337
+
338
+ # Confidence score (from model)
339
+ metrics['Confidence'] = confidence
340
+
341
+ # Relevance (simple keyword overlap)
342
+ question_words = set(question.lower().split())
343
+ answer_words = set(answer.lower().split())
344
+ overlap = len(question_words.intersection(answer_words))
345
+ metrics['Relevance'] = min(overlap / max(len(question_words), 1), 1.0)
346
+
347
+ # Completeness (answer length appropriateness)
348
+ answer_length = len(answer.split())
349
+ if answer_length == 0:
350
+ metrics['Completeness'] = 0.0
351
+ elif answer_length < 3:
352
+ metrics['Completeness'] = 0.6
353
+ elif answer_length <= 20:
354
+ metrics['Completeness'] = 1.0
355
+ else:
356
+ metrics['Completeness'] = 0.8 # Very long answers might be too verbose
357
+
358
+ # Context match (how well the answer fits in context)
359
+ answer_in_context = answer.lower() in context.lower()
360
+ metrics['Context_Match'] = 1.0 if answer_in_context else 0.5
361
+
362
+ return metrics
363
+
364
+ def generate_followup_questions(context, original_question, answer):
365
+ """Generate relevant follow-up questions based on the context and answer."""
366
+ suggestions = []
367
+
368
+ # Extract key entities and concepts from context
369
+ words = context.split()
370
+
371
+ # Template-based question generation
372
+ templates = [
373
+ f"What else can you tell me about {answer}?",
374
+ "Can you provide more details about this topic?",
375
+ "What are the implications of this information?",
376
+ "How does this relate to other concepts mentioned?",
377
+ "What evidence supports this answer?"
378
+ ]
379
+
380
+ # Add context-specific questions
381
+ if "when" not in original_question.lower():
382
+ suggestions.append("When did this happen?")
383
+
384
+ if "where" not in original_question.lower():
385
+ suggestions.append("Where did this take place?")
386
+
387
+ if "why" not in original_question.lower():
388
+ suggestions.append("Why is this significant?")
389
+
390
+ if "how" not in original_question.lower():
391
+ suggestions.append("How does this work?")
392
+
393
+ # Combine and limit suggestions
394
+ all_suggestions = templates + suggestions
395
+ return all_suggestions[:5] # Return top 5 suggestions
396
+
397
+ def qa_api_handler(context, question):
398
+ """API handler for question answering that returns structured data."""
399
+ try:
400
+ qa_pipeline = load_qa_pipeline()
401
+ result = qa_pipeline(question=question, context=context)
402
+
403
+ return {
404
+ "answer": result['answer'],
405
+ "confidence": result['score'],
406
+ "start_position": result['start'],
407
+ "end_position": result['end'],
408
+ "success": True,
409
+ "error": None
410
+ }
411
+ except Exception as e:
412
+ return {
413
+ "answer": "",
414
+ "confidence": 0.0,
415
+ "start_position": 0,
416
+ "end_position": 0,
417
+ "success": False,
418
+ "error": str(e)
419
+ }
420
+
421
+ def process_question_with_context(context_text, question):
422
+ """Process a question with the given context and return a formatted result."""
423
+ if not context_text or not context_text.strip():
424
+ return {
425
+ "success": False,
426
+ "error": "No context text provided",
427
+ "html": '<div class="alert alert-warning">⚠️ No context text provided.</div>'
428
+ }
429
+
430
+ if not question or not question.strip():
431
+ return {
432
+ "success": False,
433
+ "error": "No question provided",
434
+ "html": '<div class="alert alert-warning">⚠️ Please enter a question.</div>'
435
+ }
436
+
437
+ try:
438
+ qa_pipeline = load_qa_pipeline()
439
+ result = qa_pipeline(question=question, context=context_text)
440
+
441
+ answer = result['answer']
442
+ confidence = result['score']
443
+ start_pos = result['start']
444
+ end_pos = result['end']
445
+
446
+ # Determine confidence level
447
+ confidence_status = "High" if confidence >= 0.7 else "Medium" if confidence >= 0.3 else "Low"
448
+ confidence_color = "#4CAF50" if confidence >= 0.7 else "#FF9800" if confidence >= 0.3 else "#F44336"
449
+
450
+ # Highlight answer in context
451
+ highlighted_context = highlight_answer_in_context(context_text, start_pos, end_pos)
452
+
453
+ # Create formatted HTML result
454
+ html_result = f"""
455
+ <div class="card">
456
+ <div class="card-header">
457
+ <h5 class="mb-0">📝 Answer Found!</h5>
458
+ </div>
459
+ <div class="card-body">
460
+ <div class="alert alert-light">
461
+ <p><strong>Question:</strong> {question}</p>
462
+ <p><strong>Answer:</strong> <span class="badge bg-warning text-dark fs-6">{answer}</span></p>
463
+ <p><strong>Confidence:</strong> {confidence:.3f} ({confidence_status})</p>
464
+ </div>
465
+
466
+ <div class="alert alert-light">
467
+ <h6>📄 Context with Highlighted Answer:</h6>
468
+ <div style="line-height: 1.6; font-size: 0.9rem; max-height: 200px; overflow-y: auto;">
469
+ {highlighted_context}
470
+ </div>
471
+ </div>
472
+
473
+ <div class="alert alert-info">
474
+ <strong>Quality Assessment:</strong>
475
+ <ul class="mb-0">
476
+ <li>Confidence: {confidence_status} ({confidence:.1%})</li>
477
+ <li>Answer found at position: {start_pos}-{end_pos}</li>
478
+ <li>Answer length: {len(answer.split())} words</li>
479
+ </ul>
480
+ </div>
481
+ </div>
482
+ </div>
483
+ """
484
+
485
+ return {
486
+ "success": True,
487
+ "answer": answer,
488
+ "confidence": confidence,
489
+ "html": html_result
490
+ }
491
+
492
+ except Exception as e:
493
+ error_html = f'<div class="alert alert-danger">❌ Error processing question: {str(e)}</div>'
494
+ return {
495
+ "success": False,
496
+ "error": str(e),
497
+ "html": error_html
498
+ }
components/sentiment.py ADDED
@@ -0,0 +1,549 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib
2
+ matplotlib.use('Agg') # Use non-interactive backend
3
+ import matplotlib.pyplot as plt
4
+ import pandas as pd
5
+ import numpy as np
6
+ from nltk.sentiment.vader import SentimentIntensityAnalyzer
7
+ import nltk
8
+ from collections import Counter
9
+
10
+ from utils.model_loader import load_sentiment_analyzer, load_emotion_classifier
11
+ from utils.helpers import fig_to_html, df_to_html_table
12
+
13
+ def sentiment_handler(text_input):
14
+ """Show sentiment analysis capabilities."""
15
+ output_html = []
16
+
17
+ # Add result area container
18
+ output_html.append('<div class="result-area">')
19
+ output_html.append('<h2 class="task-header">Sentiment Analysis</h2>')
20
+
21
+ output_html.append("""
22
+ <div class="alert alert-info">
23
+ <i class="fas fa-info-circle"></i>
24
+ Sentiment analysis determines the emotional tone behind text to identify if it expresses positive, negative, or neutral sentiment.
25
+ </div>
26
+ """)
27
+
28
+ # Model info
29
+ output_html.append("""
30
+ <div class="alert alert-info">
31
+ <h4><i class="fas fa-tools"></i> Models Used:</h4>
32
+ <ul>
33
+ <li><b>NLTK VADER</b> - Rule-based sentiment analyzer specifically tuned for social media text</li>
34
+ <li><b>DistilBERT</b> - Transformer model fine-tuned on SST-2 dataset, achieving ~91% accuracy</li>
35
+ <li><b>RoBERTa Emotion</b> - Transformer model for multi-label emotion detection</li>
36
+ </ul>
37
+ </div>
38
+ """)
39
+
40
+ try:
41
+ # VADER Analysis
42
+ output_html.append('<h3 class="task-subheader">VADER Sentiment Analysis</h3>')
43
+ output_html.append('<p>VADER (Valence Aware Dictionary and sEntiment Reasoner) is a lexicon and rule-based sentiment analysis tool specifically attuned to sentiments expressed in social media.</p>')
44
+
45
+ # Get VADER analyzer
46
+ vader_analyzer = SentimentIntensityAnalyzer()
47
+ vader_scores = vader_analyzer.polarity_scores(text_input)
48
+
49
+ # Extract scores
50
+ compound_score = vader_scores['compound']
51
+ pos_score = vader_scores['pos']
52
+ neg_score = vader_scores['neg']
53
+ neu_score = vader_scores['neu']
54
+
55
+ # Determine sentiment category
56
+ if compound_score >= 0.05:
57
+ sentiment_category = "Positive"
58
+ sentiment_color = "#4CAF50" # Green
59
+ sentiment_emoji = "😊"
60
+ elif compound_score <= -0.05:
61
+ sentiment_category = "Negative"
62
+ sentiment_color = "#F44336" # Red
63
+ sentiment_emoji = "😞"
64
+ else:
65
+ sentiment_category = "Neutral"
66
+ sentiment_color = "#FFC107" # Amber
67
+ sentiment_emoji = "😐"
68
+
69
+ # Create sentiment gauge display
70
+ output_html.append(f"""
71
+ <div class="card">
72
+ <div class="card-body">
73
+ <div class="text-center mb-3">
74
+ <span style="font-size: 3rem; margin-right: 15px;">{sentiment_emoji}</span>
75
+ <div>
76
+ <h3 class="mb-0" style="color: {sentiment_color};">{sentiment_category}</h3>
77
+ <p class="mb-0 fs-5">Compound Score: {compound_score:.2f}</p>
78
+ </div>
79
+ </div>
80
+
81
+ <div style="height: 30px; background-color: #e0e0e0; border-radius: 15px; position: relative; overflow: hidden; margin: 10px 0;">
82
+ <div style="position: absolute; top: 0; bottom: 0; left: 50%; width: 2px; background-color: #000; z-index: 2;"></div>
83
+ <div style="position: absolute; top: 0; bottom: 0; left: {(compound_score + 1) / 2 * 100}%; width: 10px; background-color: {sentiment_color}; border-radius: 5px; transform: translateX(-50%); z-index: 3;"></div>
84
+ <div style="position: absolute; top: 0; bottom: 0; left: 0; width: 50%; background: linear-gradient(90deg, #F44336 0%, #FFC107 100%);"></div>
85
+ <div style="position: absolute; top: 0; bottom: 0; right: 0; width: 50%; background: linear-gradient(90deg, #FFC107 0%, #4CAF50 100%);"></div>
86
+ </div>
87
+ <div class="d-flex justify-content-between mt-2">
88
+ <span>Negative (-1.0)</span>
89
+ <span>Neutral (0.0)</span>
90
+ <span>Positive (1.0)</span>
91
+ </div>
92
+ </div>
93
+ </div>
94
+ """)
95
+
96
+ # VADER score breakdown
97
+ output_html.append('<h4>VADER Score Breakdown</h4>')
98
+
99
+ # Create pie chart
100
+ fig = plt.figure(figsize=(8, 8))
101
+ labels = ['Positive', 'Neutral', 'Negative']
102
+ sizes = [pos_score, neu_score, neg_score]
103
+ colors = ['#4CAF50', '#FFC107', '#F44336']
104
+ explode = (0.1, 0, 0) if pos_score > neg_score and pos_score > neu_score else \
105
+ (0, 0.1, 0) if neu_score > pos_score and neu_score > neg_score else \
106
+ (0, 0, 0.1)
107
+
108
+ plt.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%',
109
+ shadow=True, startangle=90)
110
+ plt.axis('equal')
111
+ plt.title('VADER Sentiment Distribution')
112
+
113
+ # Create detail table
114
+ detail_df = pd.DataFrame({
115
+ 'Metric': ['Positive Score', 'Neutral Score', 'Negative Score', 'Compound Score'],
116
+ 'Value': [pos_score, neu_score, neg_score, compound_score]
117
+ })
118
+
119
+ # Layout with columns for VADER results
120
+ output_html.append('<div class="row">')
121
+
122
+ # Column 1: Chart
123
+ output_html.append('<div class="col-md-6">')
124
+ output_html.append(fig_to_html(fig))
125
+ output_html.append('</div>')
126
+
127
+ # Column 2: Data
128
+ output_html.append('<div class="col-md-6">')
129
+ output_html.append(df_to_html_table(detail_df))
130
+
131
+ # Add interpretation
132
+ if compound_score >= 0.75:
133
+ interpretation = "Extremely positive sentiment"
134
+ elif compound_score >= 0.5:
135
+ interpretation = "Moderately positive sentiment"
136
+ elif compound_score >= 0.05:
137
+ interpretation = "Slightly positive sentiment"
138
+ elif compound_score > -0.05:
139
+ interpretation = "Neutral sentiment"
140
+ elif compound_score > -0.5:
141
+ interpretation = "Slightly negative sentiment"
142
+ elif compound_score > -0.75:
143
+ interpretation = "Moderately negative sentiment"
144
+ else:
145
+ interpretation = "Extremely negative sentiment"
146
+
147
+ output_html.append(f"""
148
+ <div class="alert alert-success mt-3">
149
+ <h4>Interpretation</h4>
150
+ <p class="mb-0">{interpretation}</p>
151
+ </div>
152
+ """)
153
+
154
+ output_html.append('</div>') # Close column 2
155
+ output_html.append('</div>') # Close row
156
+
157
+ # Transformer-based Sentiment Analysis
158
+ output_html.append('<h3 class="task-subheader">Transformer-based Sentiment Analysis</h3>')
159
+ output_html.append('<p>This analysis uses a DistilBERT model fine-tuned on the Stanford Sentiment Treebank dataset.</p>')
160
+
161
+ try:
162
+ # Load transformer model
163
+ sentiment_model = load_sentiment_analyzer()
164
+
165
+ # Maximum text length for transformer model (BERT has a 512 token limit)
166
+ max_length = 512
167
+
168
+ # Get prediction
169
+ truncated_text = text_input[:max_length * 4] # Rough character estimate
170
+ transformer_result = sentiment_model(truncated_text)
171
+
172
+ if len(text_input) > max_length * 4:
173
+ output_html.append(f"""
174
+ <div class="alert alert-warning">
175
+ <p class="mb-0"><b>⚠️ Note:</b> Text was truncated for analysis as it exceeds the model's length limit.</p>
176
+ </div>
177
+ """)
178
+
179
+ # Extract prediction
180
+ transformer_label = transformer_result[0]['label']
181
+ transformer_score = transformer_result[0]['score']
182
+
183
+ # Display transformer result
184
+ sentiment_color = "#4CAF50" if transformer_label == "POSITIVE" else "#F44336"
185
+ sentiment_emoji = "😊" if transformer_label == "POSITIVE" else "😞"
186
+
187
+ output_html.append(f"""
188
+ <div class="card" style="border-color: {sentiment_color};">
189
+ <div class="card-body" style="background-color: {sentiment_color}22;">
190
+ <div class="d-flex align-items-center">
191
+ <span style="font-size: 3rem; margin-right: 15px;">{sentiment_emoji}</span>
192
+ <div>
193
+ <h3 class="mb-0" style="color: {sentiment_color};">{transformer_label.capitalize()}</h3>
194
+ <p class="mb-0 fs-5">Confidence: {transformer_score:.2%}</p>
195
+ </div>
196
+ </div>
197
+ </div>
198
+ </div>
199
+ """)
200
+
201
+ # Confidence bar
202
+ output_html.append(f"""
203
+ <div style="height: 30px; background-color: #e0e0e0; border-radius: 15px; position: relative; overflow: hidden; margin: 10px 0;">
204
+ <div style="position: absolute; top: 0; bottom: 0; left: 0; width: {transformer_score * 100}%; background-color: {sentiment_color}; border-radius: 5px;"></div>
205
+ <div style="position: absolute; top: 0; bottom: 0; width: 100%; text-align: center; line-height: 30px; color: #000; font-weight: bold;">
206
+ {transformer_score:.1%} Confidence
207
+ </div>
208
+ </div>
209
+ """)
210
+
211
+ except Exception as e:
212
+ output_html.append(f"""
213
+ <div class="alert alert-danger">
214
+ <h4>Transformer Model Error</h4>
215
+ <p>Failed to load or run transformer sentiment model: {str(e)}</p>
216
+ <p>Falling back to VADER results only.</p>
217
+ </div>
218
+ """)
219
+
220
+ # Emotion Analysis
221
+ output_html.append('<h3 class="task-subheader">Emotion Analysis</h3>')
222
+ output_html.append('<p>Identifying specific emotions in text using a RoBERTa model fine-tuned on the emotion dataset.</p>')
223
+
224
+ try:
225
+ # Load emotion classifier
226
+ emotion_classifier = load_emotion_classifier()
227
+
228
+ # Get predictions
229
+ truncated_text = text_input[:max_length * 4] # Rough character estimate
230
+ emotion_result = emotion_classifier(truncated_text)
231
+
232
+ # Extract emotion scores
233
+ emotion_scores = {}
234
+ for item in emotion_result[0]:
235
+ emotion_scores[item['label']] = item['score']
236
+
237
+ # Create emotion dataframe
238
+ emotion_df = pd.DataFrame({
239
+ 'Emotion': list(emotion_scores.keys()),
240
+ 'Score': list(emotion_scores.values())
241
+ }).sort_values('Score', ascending=False)
242
+
243
+ # Get primary emotion
244
+ primary_emotion = emotion_df.iloc[0]['Emotion']
245
+ primary_score = emotion_df.iloc[0]['Score']
246
+
247
+ # Emotion color map
248
+ emotion_colors = {
249
+ 'joy': '#FFD54F',
250
+ 'anger': '#EF5350',
251
+ 'sadness': '#42A5F5',
252
+ 'fear': '#9C27B0',
253
+ 'surprise': '#26C6DA',
254
+ 'love': '#EC407A',
255
+ 'disgust': '#66BB6A',
256
+ 'optimism': '#FF9800',
257
+ 'pessimism': '#795548',
258
+ 'trust': '#4CAF50',
259
+ 'anticipation': '#FF7043',
260
+ 'neutral': '#9E9E9E'
261
+ }
262
+
263
+ # Emotion emoji map
264
+ emotion_emojis = {
265
+ 'joy': '😃',
266
+ 'anger': '😠',
267
+ 'sadness': '😢',
268
+ 'fear': '😨',
269
+ 'surprise': '😲',
270
+ 'love': '❤️',
271
+ 'disgust': '🤢',
272
+ 'optimism': '🤩',
273
+ 'pessimism': '😒',
274
+ 'trust': '🤝',
275
+ 'anticipation': '🤔',
276
+ 'neutral': '😐'
277
+ }
278
+
279
+ # Create bar chart
280
+ fig = plt.figure(figsize=(10, 6))
281
+ bars = plt.barh(
282
+ emotion_df['Emotion'],
283
+ emotion_df['Score'],
284
+ color=[emotion_colors.get(emotion, '#9E9E9E') for emotion in emotion_df['Emotion']]
285
+ )
286
+ plt.xlabel('Score')
287
+ plt.title('Emotion Scores')
288
+
289
+ # Add value labels
290
+ for i, bar in enumerate(bars):
291
+ plt.text(bar.get_width() + 0.01, bar.get_y() + bar.get_height()/2,
292
+ f"{bar.get_width():.2f}", va='center')
293
+
294
+ plt.xlim(0, 1)
295
+ plt.tight_layout()
296
+
297
+ # Chart section
298
+ output_html.append('<section class="emotion-chart-section">')
299
+ output_html.append('<div class="chart-container">')
300
+ output_html.append(fig_to_html(fig))
301
+ output_html.append('</div>')
302
+ output_html.append('</section>')
303
+
304
+ # Primary emotion section
305
+ primary_color = emotion_colors.get(primary_emotion, '#9E9E9E')
306
+ primary_emoji = emotion_emojis.get(primary_emotion, '😐')
307
+
308
+ output_html.append('<section class="emotion-result-container">')
309
+ output_html.append(f"""
310
+ <div class="card" style="border-color: {primary_color};">
311
+ <div class="card-body" style="background-color: {primary_color}22;">
312
+ <div class="d-flex align-items-center">
313
+ <span style="font-size: 3rem; margin-right: 15px;">{primary_emoji}</span>
314
+ <div>
315
+ <h3 class="mb-0" style="color: {primary_color};">{primary_emotion.capitalize()}</h3>
316
+ <p class="mb-0 fs-5">Score: {primary_score:.2f}</p>
317
+ </div>
318
+ </div>
319
+ </div>
320
+ </div>
321
+ """)
322
+
323
+ # Show top emotions table
324
+ output_html.append('<h4>Top Emotions</h4>')
325
+ output_html.append(df_to_html_table(emotion_df.head(5)))
326
+ output_html.append('</section>') # Close emotion result container
327
+
328
+ except Exception as e:
329
+ output_html.append(f"""
330
+ <div class="alert alert-danger">
331
+ <h4>Emotion Analysis Error</h4>
332
+ <p>Failed to load or run emotion classifier: {str(e)}</p>
333
+ </div>
334
+ """)
335
+
336
+ # Sentence-level Analysis
337
+ output_html.append('<h3 class="task-subheader">Sentence-level Analysis</h3>')
338
+ output_html.append('<p>Breaking down sentiment by individual sentences to identify sentiment variations throughout the text.</p>')
339
+
340
+ # Split text into sentences
341
+ sentences = nltk.sent_tokenize(text_input)
342
+
343
+ # Minimum 2 sentences to do the analysis
344
+ if len(sentences) >= 2:
345
+ # Calculate sentiment for each sentence
346
+ sentence_sentiments = []
347
+ for i, sentence in enumerate(sentences):
348
+ vader_score = vader_analyzer.polarity_scores(sentence)
349
+ sentence_sentiments.append({
350
+ 'Sentence': sentence,
351
+ 'Index': i + 1,
352
+ 'Compound': vader_score['compound'],
353
+ 'Positive': vader_score['pos'],
354
+ 'Negative': vader_score['neg'],
355
+ 'Neutral': vader_score['neu'],
356
+ 'Sentiment': 'Positive' if vader_score['compound'] >= 0.05 else 'Negative' if vader_score['compound'] <= -0.05 else 'Neutral'
357
+ })
358
+
359
+ # Create DataFrame
360
+ sent_df = pd.DataFrame(sentence_sentiments)
361
+
362
+ # Create line graph of sentiment flow
363
+ fig = plt.figure(figsize=(10, 6))
364
+ plt.plot(sent_df['Index'], sent_df['Compound'], 'o-', color='#1976D2', linewidth=2, markersize=8)
365
+ plt.axhline(y=0, color='#9E9E9E', linestyle='-', alpha=0.3)
366
+ plt.axhline(y=0.05, color='#4CAF50', linestyle='--', alpha=0.3)
367
+ plt.axhline(y=-0.05, color='#F44336', linestyle='--', alpha=0.3)
368
+
369
+ # Annotate with sentiment
370
+ for i, row in sent_df.iterrows():
371
+ if row['Sentiment'] == 'Positive':
372
+ color = '#4CAF50'
373
+ elif row['Sentiment'] == 'Negative':
374
+ color = '#F44336'
375
+ else:
376
+ color = '#9E9E9E'
377
+
378
+ plt.scatter(row['Index'], row['Compound'], color=color, s=100, zorder=5)
379
+
380
+ plt.grid(alpha=0.3)
381
+ plt.xlabel('Sentence Number')
382
+ plt.ylabel('Compound Sentiment Score')
383
+ plt.title('Sentiment Flow Through Text')
384
+ plt.ylim(-1.05, 1.05)
385
+ plt.tight_layout()
386
+
387
+ # Calculate statistics
388
+ positive_count = sum(1 for score in sent_df['Compound'] if score >= 0.05)
389
+ negative_count = sum(1 for score in sent_df['Compound'] if score <= -0.05)
390
+ neutral_count = len(sent_df) - positive_count - negative_count
391
+
392
+ # Chart section
393
+ output_html.append('<section class="sentence-chart-section">')
394
+ output_html.append('<div class="chart-container">')
395
+ output_html.append(fig_to_html(fig))
396
+ output_html.append('</div>')
397
+ output_html.append('</section>')
398
+
399
+ # Sentence analysis section
400
+ output_html.append('<section class="sentence-analysis-container">')
401
+
402
+ # Create sentence stats
403
+ output_html.append(f"""
404
+ <div class="row mb-3">
405
+ <div class="col-4">
406
+ <div class="card text-center">
407
+ <div class="card-body p-2">
408
+ <h5 class="text-success">{positive_count}</h5>
409
+ <small>Positive</small>
410
+ </div>
411
+ </div>
412
+ </div>
413
+ <div class="col-4">
414
+ <div class="card text-center">
415
+ <div class="card-body p-2">
416
+ <h5 class="text-warning">{neutral_count}</h5>
417
+ <small>Neutral</small>
418
+ </div>
419
+ </div>
420
+ </div>
421
+ <div class="col-4">
422
+ <div class="card text-center">
423
+ <div class="card-body p-2">
424
+ <h5 class="text-danger">{negative_count}</h5>
425
+ <small>Negative</small>
426
+ </div>
427
+ </div>
428
+ </div>
429
+ </div>
430
+ """)
431
+
432
+ # Display sentiment swings
433
+ sentiment_changes = 0
434
+ prev_sentiment = None
435
+ for sentiment in sent_df['Sentiment']:
436
+ if prev_sentiment is not None and sentiment != prev_sentiment:
437
+ sentiment_changes += 1
438
+ prev_sentiment = sentiment
439
+
440
+ if sentiment_changes > 0:
441
+ output_html.append(f"""
442
+ <div class="alert alert-success">
443
+ <p class="mb-0"><b>Sentiment Shifts:</b> {sentiment_changes}</p>
444
+ <p class="mb-0">The text shows {sentiment_changes} shifts in sentiment between sentences.</p>
445
+ </div>
446
+ """)
447
+
448
+ # Show sentence breakdown table
449
+ output_html.append('<h4>Sentence-by-Sentence Analysis</h4>')
450
+
451
+ # Custom HTML table for better formatting
452
+ output_html.append('<div class="table-responsive" style="max-height: 400px;">')
453
+ output_html.append('<table class="table table-striped">')
454
+ output_html.append('<thead><tr><th>#</th><th>Sentence</th><th>Sentiment</th></tr></thead>')
455
+ output_html.append('<tbody>')
456
+
457
+ for i, row in sent_df.iterrows():
458
+ if row['Sentiment'] == 'Positive':
459
+ bg_class = 'table-success'
460
+ sentiment_html = f"""
461
+ <div class="d-flex align-items-center">
462
+ <span class="me-2">😊</span>
463
+ <span class="text-success fw-bold">Positive</span>
464
+ <span class="ms-2 text-muted">({row['Compound']:.2f})</span>
465
+ </div>
466
+ """
467
+ elif row['Sentiment'] == 'Negative':
468
+ bg_class = 'table-danger'
469
+ sentiment_html = f"""
470
+ <div class="d-flex align-items-center">
471
+ <span class="me-2">😞</span>
472
+ <span class="text-danger fw-bold">Negative</span>
473
+ <span class="ms-2 text-muted">({row['Compound']:.2f})</span>
474
+ </div>
475
+ """
476
+ else:
477
+ bg_class = 'table-warning'
478
+ sentiment_html = f"""
479
+ <div class="d-flex align-items-center">
480
+ <span class="me-2">😐</span>
481
+ <span class="text-warning fw-bold">Neutral</span>
482
+ <span class="ms-2 text-muted">({row['Compound']:.2f})</span>
483
+ </div>
484
+ """
485
+
486
+ output_html.append(f'<tr class="{bg_class}">')
487
+ output_html.append(f'<td>{i+1}</td>')
488
+ output_html.append(f'<td>{row["Sentence"]}</td>')
489
+ output_html.append(f'<td>{sentiment_html}</td>')
490
+ output_html.append('</tr>')
491
+
492
+ output_html.append('</tbody></table>')
493
+ output_html.append('</div>')
494
+ output_html.append('</section>') # Close sentence analysis container
495
+ else:
496
+ output_html.append("""
497
+ <div class="alert alert-warning">
498
+ <p class="mb-0">Sentence-level analysis requires at least two sentences. The provided text doesn't have enough sentences for this analysis.</p>
499
+ </div>
500
+ """)
501
+
502
+ except Exception as e:
503
+ output_html.append(f"""
504
+ <div class="alert alert-danger">
505
+ <h3>Error</h3>
506
+ <p>Failed to analyze sentiment: {str(e)}</p>
507
+ </div>
508
+ """)
509
+
510
+ # About Sentiment Analysis section
511
+ output_html.append("""
512
+ <div class="card mt-4">
513
+ <div class="card-header">
514
+ <h4 class="mb-0">
515
+ <i class="fas fa-info-circle"></i>
516
+ About Sentiment Analysis
517
+ </h4>
518
+ </div>
519
+ <div class="card-body">
520
+ <h5>What is Sentiment Analysis?</h5>
521
+
522
+ <p>Sentiment Analysis (also known as opinion mining) is a natural language processing technique that identifies
523
+ and extracts subjective information from text. It determines whether a piece of text expresses positive, negative,
524
+ or neutral sentiment.</p>
525
+
526
+ <h5>Common Approaches:</h5>
527
+
528
+ <ol>
529
+ <li><b>Lexicon-based</b> (like VADER) - Uses dictionaries of words with pre-assigned sentiment scores</li>
530
+ <li><b>Machine learning</b> - Supervised techniques that learn from labeled data</li>
531
+ <li><b>Deep learning</b> (like our Transformer models) - Neural networks that can capture complex patterns and contexts</li>
532
+ </ol>
533
+
534
+ <h5>Applications:</h5>
535
+
536
+ <ul>
537
+ <li><b>Brand monitoring</b> - Track public perception of a brand</li>
538
+ <li><b>Customer feedback analysis</b> - Understand customer satisfaction</li>
539
+ <li><b>Market research</b> - Analyze product reviews and consumer opinions</li>
540
+ <li><b>Social media monitoring</b> - Track public sentiment on topics or events</li>
541
+ <li><b>Stock market prediction</b> - Analyze news sentiment to predict stock movements</li>
542
+ </ul>
543
+ </div>
544
+ </div>
545
+ """)
546
+
547
+ output_html.append('</div>') # Close result-area div
548
+
549
+ return '\n'.join(output_html)
components/summarization.py ADDED
@@ -0,0 +1,541 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib.pyplot as plt
2
+ import pandas as pd
3
+ import numpy as np
4
+ import nltk
5
+ from collections import Counter
6
+ import networkx as nx
7
+ from nltk.corpus import stopwords
8
+ from nltk.tokenize import sent_tokenize, word_tokenize
9
+ from nltk.stem import WordNetLemmatizer
10
+ import re
11
+ from sklearn.feature_extraction.text import TfidfVectorizer
12
+ from matplotlib_venn import venn2
13
+
14
+ from utils.model_loader import load_summarizer
15
+ from utils.helpers import fig_to_html, df_to_html_table
16
+
17
+ def summarization_handler(text_input, min_length=30, max_length=300, use_sampling=False):
18
+ """Show text summarization capabilities."""
19
+ output_html = []
20
+
21
+ # Add result area container
22
+ output_html.append('<div class="result-area">')
23
+ output_html.append('<h2 class="task-header">Text Summarization</h2>')
24
+
25
+ output_html.append("""
26
+ <div class="alert alert-info">
27
+ <i class="fas fa-info-circle"></i>
28
+ Text summarization condenses text to capture its main points, enabling quicker comprehension of large volumes of information.
29
+ </div>
30
+ """)
31
+
32
+ # Model info
33
+ output_html.append("""
34
+ <div class="alert alert-info">
35
+ <h4><i class="fas fa-tools"></i> Models & Techniques Used:</h4>
36
+ <ul>
37
+ <li><b>Extractive Summarization</b> - Selects important sentences from the original text</li>
38
+ <li><b>Abstractive Summarization</b> - BART model fine-tuned on CNN/DM dataset to generate new summary text</li>
39
+ <li><b>Performance</b> - ROUGE scores of approximately 40-45 on CNN/DM benchmark</li>
40
+ </ul>
41
+ </div>
42
+ """)
43
+
44
+ try:
45
+ # Check if text is long enough for summarization
46
+ sentences = nltk.sent_tokenize(text_input)
47
+ word_count = len(text_input.split())
48
+
49
+ if len(sentences) < 3 or word_count < 40:
50
+ output_html.append(f"""
51
+ <div class="alert alert-warning">
52
+ <h3>Text Too Short for Summarization</h3>
53
+ <p>The provided text contains only {len(sentences)} sentences and {word_count} words.
54
+ For effective summarization, please provide a longer text (at least 3 sentences and 40 words).</p>
55
+ </div>
56
+ """)
57
+ else:
58
+ # Original Text Section
59
+ output_html.append('<h3 class="task-subheader">Original Text</h3>')
60
+ output_html.append(f"""
61
+ <div class="card">
62
+ <div class="card-body">
63
+ <div class="text-content" style="word-wrap: break-word; word-break: break-word; overflow-wrap: break-word; max-height: 500px; overflow-y: auto; padding: 15px; background-color: #f8f9fa; border-radius: 5px; border: 1px solid #e9ecef; line-height: 1.6;">{text_input}</div>
64
+ </div>
65
+ </div>
66
+ <p>Length: {word_count} words.</p>
67
+ """)
68
+
69
+ # Text Statistics
70
+ char_count = len(text_input)
71
+ avg_sentence_length = word_count / len(sentences)
72
+ avg_word_length = sum(len(word) for word in text_input.split()) / word_count
73
+
74
+ # Neural Summarization Section
75
+ output_html.append('<h3 class="task-subheader">Neural Abstractive Summarization</h3>')
76
+ output_html.append('<p>Using BART model to generate a human-like summary</p>')
77
+
78
+ # Parameter summary
79
+ output_html.append(f"""
80
+ <div class="alert alert-light">
81
+ <span><strong>Parameters:</strong> Min Length: {min_length} | Max Length: {max_length} | Sampling: {'Enabled' if use_sampling else 'Disabled'}</span>
82
+ </div>
83
+ """)
84
+
85
+ try:
86
+ # Load summarizer model
87
+ summarizer = load_summarizer()
88
+
89
+ if summarizer is None:
90
+ output_html.append("""
91
+ <div class="alert alert-danger">
92
+ <p>Failed to load the abstractive summarization model. This may be due to memory constraints or missing dependencies.</p>
93
+ </div>
94
+ """)
95
+ else:
96
+ # Check length limitations
97
+ max_token_limit = 1024 # BART typically has 1024 token limit
98
+
99
+ # If text is too long, warn user and truncate
100
+ if word_count > max_token_limit:
101
+ output_html.append(f"""
102
+ <div class="alert alert-warning">
103
+ <p><b>⚠️ Note:</b> Text exceeds model's length limit. Only the first ~{max_token_limit} tokens will be used for summarization.</p>
104
+ </div>
105
+ """)
106
+
107
+ # Generate summary using the specified min_length and max_length
108
+ abstractive_results = summarizer(
109
+ text_input,
110
+ max_length=max_length,
111
+ min_length=min_length,
112
+ do_sample=use_sampling,
113
+ temperature=0.7 if use_sampling else 1.0,
114
+ top_p=0.9 if use_sampling else 1.0,
115
+ length_penalty=2.0
116
+ )
117
+
118
+ abstractive_summary = abstractive_results[0]['summary_text']
119
+
120
+ # Calculate reduction statistics
121
+ abstractive_word_count = len(abstractive_summary.split())
122
+ abstractive_reduction = (1 - abstractive_word_count / word_count) * 100
123
+
124
+ # Summary Results
125
+ output_html.append(f"""
126
+ <div class="card">
127
+ <div class="card-header">
128
+ <h4 class="mb-0">Neural Summary</h4>
129
+ </div>
130
+ <div class="card-body">
131
+ <div style="line-height: 1.6;">
132
+ {abstractive_summary}
133
+ </div>
134
+ </div>
135
+ </div>
136
+
137
+ <div class="row mt-3">
138
+ <div class="col-md-4">
139
+ <div class="card text-center">
140
+ <div class="card-body">
141
+ <h5 class="text-muted">Original Length</h5>
142
+ <h3 class="text-primary">{word_count} words</h3>
143
+ </div>
144
+ </div>
145
+ </div>
146
+ <div class="col-md-4">
147
+ <div class="card text-center">
148
+ <div class="card-body">
149
+ <h5 class="text-muted">Summary Length</h5>
150
+ <h3 class="text-success">{abstractive_word_count} words</h3>
151
+ </div>
152
+ </div>
153
+ </div>
154
+ <div class="col-md-4">
155
+ <div class="card text-center">
156
+ <div class="card-body">
157
+ <h5 class="text-muted">Compression</h5>
158
+ <h3 class="text-info">{abstractive_reduction:.1f}%</h3>
159
+ </div>
160
+ </div>
161
+ </div>
162
+ </div>
163
+ """)
164
+
165
+ # Key Terms & Topics Section
166
+ output_html.append('<h3 class="task-subheader">Key Topics & Terms</h3>')
167
+
168
+ # Extract key terms with TF-IDF
169
+ key_terms = extract_key_terms(text_input, n=10)
170
+
171
+ # Create layout stacked vertically: table first, then chart
172
+ output_html.append('<div class="row">')
173
+
174
+ # Row 1: Key terms table (full width)
175
+ output_html.append('<div class="col-12">')
176
+ output_html.append('<h4>Key Terms</h4>')
177
+
178
+ # Create key terms table
179
+ terms_df = pd.DataFrame({
180
+ '#': range(1, len(key_terms) + 1),
181
+ 'Keyword': [term[0] for term in key_terms],
182
+ 'TF-IDF Score': [f"{term[1]:.4f}" for term in key_terms]
183
+ })
184
+
185
+ output_html.append(df_to_html_table(terms_df))
186
+ output_html.append('</div>') # Close row 1 column
187
+ output_html.append('</div>') # Close row 1
188
+
189
+ # Row 2: Term importance chart (full width)
190
+ output_html.append('<div class="row mt-3">')
191
+ output_html.append('<div class="col-12">')
192
+ output_html.append('<h4>Term Importance</h4>')
193
+
194
+ # Create horizontal bar chart of key terms
195
+ fig = plt.figure(figsize=(10, 8))
196
+
197
+ # Reverse the order for bottom-to-top display
198
+ terms = [term[0] for term in key_terms]
199
+ scores = [term[1] for term in key_terms]
200
+
201
+ # Sort by score for better visualization
202
+ sorted_data = sorted(zip(terms, scores), key=lambda x: x[1])
203
+ terms = [x[0] for x in sorted_data]
204
+ scores = [x[1] for x in sorted_data]
205
+
206
+ # Create horizontal bar chart
207
+ plt.barh(terms, scores, color='#1976D2')
208
+ plt.xlabel('TF-IDF Score')
209
+ plt.ylabel('Keyword')
210
+ plt.title('Key Terms by TF-IDF Score')
211
+ plt.tight_layout()
212
+
213
+ output_html.append(fig_to_html(fig))
214
+
215
+ output_html.append('</div>') # Close row 2 column
216
+ output_html.append('</div>') # Close row 2
217
+
218
+ except Exception as e:
219
+ output_html.append(f"""
220
+ <div class="alert alert-danger">
221
+ <h4>Abstractive Summarization Error</h4>
222
+ <p>Failed to perform abstractive summarization: {str(e)}</p>
223
+ </div>
224
+ """)
225
+
226
+ # Extractive Summarization
227
+ output_html.append('<h3 class="task-subheader">Extractive Summarization</h3>')
228
+ output_html.append("""
229
+ <div class="alert alert-light">
230
+ <p class="mb-0">
231
+ Extractive summarization works by identifying important sentences in the text and extracting them to form a summary.
232
+ This implementation uses a variant of the TextRank algorithm, which is based on Google's PageRank.
233
+ </p>
234
+ </div>
235
+ """)
236
+
237
+ # Perform TextRank Summarization
238
+ extractive_summary = textrank_summarize(text_input, num_sentences=min(3, max(1, len(sentences) // 3)))
239
+
240
+ # Clean up the placeholder separator
241
+ extractive_summary = extractive_summary.replace("SENTBREAKOS.OS", " ")
242
+
243
+ # Calculate reduction statistics
244
+ extractive_word_count = len(extractive_summary.split())
245
+ extractive_reduction = (1 - extractive_word_count / word_count) * 100
246
+
247
+ output_html.append(f"""
248
+ <div class="alert alert-success">
249
+ <h4>Extractive Summary ({extractive_reduction:.1f}% reduction)</h4>
250
+ <div style="line-height: 1.6;">
251
+ {extractive_summary}
252
+ </div>
253
+ </div>
254
+ """)
255
+
256
+ # Sentence importance visualization
257
+ output_html.append('<h4>Sentence Importance</h4>')
258
+ output_html.append('<p>The graph below shows the relative importance of each sentence based on the TextRank algorithm:</p>')
259
+
260
+ # Get sentence scores from TextRank
261
+ sentence_scores = textrank_sentence_scores(text_input)
262
+
263
+ # Sort sentences by their original order
264
+ sentence_items = list(sentence_scores.items())
265
+ sentence_items.sort(key=lambda x: int(x[0].split('_')[1]))
266
+
267
+ # Create visualization
268
+ fig = plt.figure(figsize=(10, 6))
269
+ bars = plt.bar(
270
+ [f"Sent {item[0].split('_')[1]}" for item in sentence_items],
271
+ [item[1] for item in sentence_items],
272
+ color='#1976D2'
273
+ )
274
+
275
+ # Highlight selected sentences
276
+ selected_indices = [int(idx.split('_')[1]) for idx in sentence_scores.keys() if idx in extractive_summary.split('SENTBREAKOS.OS')]
277
+ for i, bar in enumerate(bars):
278
+ if i+1 in selected_indices:
279
+ bar.set_color('#4CAF50')
280
+ plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02,
281
+ 'Selected', ha='center', va='bottom', fontsize=8, rotation=90)
282
+
283
+ plt.xlabel('Sentence')
284
+ plt.ylabel('Importance Score')
285
+ plt.title('Sentence Importance Based on TextRank')
286
+ plt.xticks(rotation=45)
287
+ plt.tight_layout()
288
+
289
+ output_html.append(fig_to_html(fig))
290
+
291
+ # Compare the two approaches
292
+ output_html.append('<h3 class="task-subheader">Summary Comparison</h3>')
293
+
294
+ # Calculate overlap between summaries
295
+ extractive_words = set(re.findall(r'\b\w+\b', extractive_summary.lower()))
296
+ abstractive_words = set(re.findall(r'\b\w+\b', abstractive_summary.lower()))
297
+ common_words = extractive_words.intersection(abstractive_words)
298
+
299
+ if len(extractive_words) > 0 and len(abstractive_words) > 0:
300
+ overlap_percentage = len(common_words) / ((len(extractive_words) + len(abstractive_words)) / 2) * 100
301
+ else:
302
+ overlap_percentage = 0
303
+
304
+ # Create comparison table
305
+ comparison_data = {
306
+ 'Metric': ['Word Count', 'Reduction %', 'Sentences', 'Words per Sentence', 'Unique Words'],
307
+ 'Extractive': [
308
+ extractive_word_count,
309
+ f"{extractive_reduction:.1f}%",
310
+ len(nltk.sent_tokenize(extractive_summary)),
311
+ f"{extractive_word_count / max(1, len(nltk.sent_tokenize(extractive_summary))):.1f}",
312
+ len(extractive_words)
313
+ ],
314
+ 'Abstractive': [
315
+ abstractive_word_count,
316
+ f"{abstractive_reduction:.1f}%",
317
+ len(nltk.sent_tokenize(abstractive_summary)),
318
+ f"{abstractive_word_count / max(1, len(nltk.sent_tokenize(abstractive_summary))):.1f}",
319
+ len(abstractive_words)
320
+ ]
321
+ }
322
+
323
+ comparison_df = pd.DataFrame(comparison_data)
324
+
325
+ output_html.append('<div class="row">')
326
+
327
+ # Column 1: Comparison table
328
+ output_html.append('<div class="col-md-6">')
329
+ output_html.append('<h4>Summary Statistics</h4>')
330
+ output_html.append(df_to_html_table(comparison_df))
331
+ output_html.append('</div>')
332
+
333
+ # Column 2: Venn diagram of word overlap
334
+ output_html.append('<div class="col-md-6">')
335
+ output_html.append('<h4>Word Overlap Visualization</h4>')
336
+
337
+ # Create Venn diagram
338
+ fig = plt.figure(figsize=(8, 6))
339
+ venn = venn2(
340
+ subsets=(
341
+ len(extractive_words - abstractive_words),
342
+ len(abstractive_words - extractive_words),
343
+ len(common_words)
344
+ ),
345
+ set_labels=('Extractive', 'Abstractive')
346
+ )
347
+
348
+ # Set colors
349
+ venn.get_patch_by_id('10').set_color('#4CAF50')
350
+ venn.get_patch_by_id('01').set_color('#03A9F4')
351
+ venn.get_patch_by_id('11').set_color('#9C27B0')
352
+
353
+ plt.title('Word Overlap Between Summaries')
354
+ plt.text(0, -0.25, f"Overlap: {overlap_percentage:.1f}%", ha='center')
355
+
356
+ output_html.append(fig_to_html(fig))
357
+
358
+ # Show key shared and unique words
359
+ shared_words_list = list(common_words)
360
+ extractive_only = list(extractive_words - abstractive_words)
361
+ abstractive_only = list(abstractive_words - extractive_words)
362
+
363
+ # Limit the number of words shown
364
+ max_words = 10
365
+
366
+ output_html.append(f"""
367
+ <div class="mt-3">
368
+ <h5>Key Shared Words ({min(max_words, len(shared_words_list))} of {len(shared_words_list)})</h5>
369
+ <div class="d-flex flex-wrap gap-1 mb-2">
370
+ {' '.join([f'<span class="badge bg-primary">{word}</span>' for word in shared_words_list[:max_words]])}
371
+ </div>
372
+
373
+ <h5>Unique to Extractive ({min(max_words, len(extractive_only))} of {len(extractive_only)})</h5>
374
+ <div class="d-flex flex-wrap gap-1 mb-2">
375
+ {' '.join([f'<span class="badge bg-success">{word}</span>' for word in extractive_only[:max_words]])}
376
+ </div>
377
+
378
+ <h5>Unique to Abstractive ({min(max_words, len(abstractive_only))} of {len(abstractive_only)})</h5>
379
+ <div class="d-flex flex-wrap gap-1 mb-2">
380
+ {' '.join([f'<span class="badge bg-info">{word}</span>' for word in abstractive_only[:max_words]])}
381
+ </div>
382
+ </div>
383
+ """)
384
+
385
+ output_html.append('</div>') # Close column 2
386
+ output_html.append('</div>') # Close row
387
+
388
+ except Exception as e:
389
+ output_html.append(f"""
390
+ <div class="alert alert-danger">
391
+ <h3>Error</h3>
392
+ <p>Failed to summarize text: {str(e)}</p>
393
+ </div>
394
+ """)
395
+
396
+ # About Text Summarization section
397
+ output_html.append("""
398
+ <div class="card mt-4">
399
+ <div class="card-header">
400
+ <h4 class="mb-0">
401
+ <i class="fas fa-info-circle"></i>
402
+ About Text Summarization
403
+ </h4>
404
+ </div>
405
+ <div class="card-body">
406
+ <h5>What is Text Summarization?</h5>
407
+
408
+ <p>Text summarization is the process of creating a shorter version of a text while preserving its key information
409
+ and meaning. It helps users quickly grasp the main points without reading the entire document.</p>
410
+
411
+ <h5>Two Main Approaches:</h5>
412
+
413
+ <ul>
414
+ <li><b>Extractive Summarization:</b> Selects and extracts existing sentences from the source text based on their importance</li>
415
+ <li><b>Abstractive Summarization:</b> Generates new sentences that capture the meaning of the source text (similar to how humans write summaries)</li>
416
+ </ul>
417
+
418
+ <h5>Applications:</h5>
419
+
420
+ <ul>
421
+ <li><b>News digests</b> - Quick summaries of news articles</li>
422
+ <li><b>Research papers</b> - Condensing long academic papers</li>
423
+ <li><b>Legal documents</b> - Summarizing complex legal text</li>
424
+ <li><b>Meeting notes</b> - Extracting key points from discussions</li>
425
+ <li><b>Content curation</b> - Creating snippets for content recommendations</li>
426
+ </ul>
427
+ </div>
428
+ </div>
429
+ """)
430
+
431
+ output_html.append('</div>') # Close result-area div
432
+
433
+ return '\n'.join(output_html)
434
+
435
+ def extract_key_terms(text, n=10):
436
+ """Extract key terms using TF-IDF"""
437
+ try:
438
+ # Tokenize and preprocess
439
+ stop_words = set(stopwords.words('english'))
440
+ lemmatizer = WordNetLemmatizer()
441
+
442
+ # Tokenize and clean text
443
+ words = word_tokenize(text.lower())
444
+ words = [lemmatizer.lemmatize(word) for word in words
445
+ if word.isalnum() and word not in stop_words and len(word) > 2]
446
+
447
+ # Create document for TF-IDF
448
+ document = [' '.join(words)]
449
+
450
+ # Create TF-IDF vectorizer
451
+ vectorizer = TfidfVectorizer(max_features=100)
452
+ tfidf_matrix = vectorizer.fit_transform(document)
453
+
454
+ # Get feature names and scores
455
+ feature_names = vectorizer.get_feature_names_out()
456
+ scores = tfidf_matrix.toarray()[0]
457
+
458
+ # Create term-score pairs and sort by score
459
+ term_scores = [(term, score) for term, score in zip(feature_names, scores)]
460
+ term_scores.sort(key=lambda x: x[1], reverse=True)
461
+
462
+ return term_scores[:n]
463
+ except Exception as e:
464
+ print(f"Error extracting key terms: {str(e)}")
465
+ return [("term", 0.0) for _ in range(n)] # Return empty placeholder
466
+
467
+ # TextRank extractive summarization algorithm
468
+ def textrank_summarize(text, num_sentences=3):
469
+ """Generate an extractive summary using TextRank algorithm"""
470
+ # Tokenize text into sentences
471
+ sentences = sent_tokenize(text)
472
+
473
+ # If text is too short, return the original text
474
+ if len(sentences) <= num_sentences:
475
+ return text
476
+
477
+ # Build a graph of sentences with similarity edges
478
+ sentence_scores = textrank_sentence_scores(text)
479
+
480
+ # Sort sentences by score
481
+ ranked_sentences = sorted([(score, i, s) for i, (s, score) in enumerate(zip(sentences, sentence_scores.values()))], reverse=True)
482
+
483
+ # Select top sentences based on score
484
+ selected_sentences = sorted(ranked_sentences[:num_sentences], key=lambda x: x[1])
485
+
486
+ # Combine selected sentences
487
+ summary = "SENTBREAKOS.OS".join([s[2] for s in selected_sentences])
488
+
489
+ return summary
490
+
491
+ def textrank_sentence_scores(text):
492
+ """Generate sentence scores using TextRank algorithm"""
493
+ # Tokenize text into sentences
494
+ sentences = sent_tokenize(text)
495
+
496
+ # Create sentence IDs
497
+ sentence_ids = [f"sentence_{i+1}" for i in range(len(sentences))]
498
+
499
+ # Create sentence graph
500
+ G = nx.Graph()
501
+
502
+ # Add nodes
503
+ for sentence_id in sentence_ids:
504
+ G.add_node(sentence_id)
505
+
506
+ # Remove stopwords and preprocess sentences
507
+ stop_words = set(stopwords.words('english'))
508
+ sentence_words = []
509
+
510
+ for sentence in sentences:
511
+ words = [word.lower() for word in word_tokenize(sentence) if word.lower() not in stop_words and word.isalnum()]
512
+ sentence_words.append(words)
513
+
514
+ # Add edges based on sentence similarity
515
+ for i in range(len(sentence_ids)):
516
+ for j in range(i+1, len(sentence_ids)):
517
+ similarity = sentence_similarity(sentence_words[i], sentence_words[j])
518
+ if similarity > 0:
519
+ G.add_edge(sentence_ids[i], sentence_ids[j], weight=similarity)
520
+
521
+ # Run PageRank
522
+ scores = nx.pagerank(G)
523
+
524
+ return scores
525
+
526
+ def sentence_similarity(words1, words2):
527
+ """Calculate similarity between two sentences based on word overlap"""
528
+ if not words1 or not words2:
529
+ return 0
530
+
531
+ # Convert to sets for intersection
532
+ set1 = set(words1)
533
+ set2 = set(words2)
534
+
535
+ # Jaccard similarity
536
+ intersection = len(set1.intersection(set2))
537
+ union = len(set1.union(set2))
538
+
539
+ if union == 0:
540
+ return 0
541
+ return intersection / union
components/text_generation.py ADDED
@@ -0,0 +1,259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib.pyplot as plt
2
+ import pandas as pd
3
+ import nltk
4
+ import time
5
+
6
+ from utils.model_loader import load_text_generator
7
+ from utils.helpers import fig_to_html, df_to_html_table
8
+
9
+ def text_generation_handler(text_input, max_length=100, temperature=0.7, top_p=0.9, num_sequences=1):
10
+ """Show text generation capabilities."""
11
+ output_html = []
12
+
13
+ # Add result area container
14
+ output_html.append('<div class="result-area">')
15
+ output_html.append('<h2 class="task-header">Text Generation</h2>')
16
+
17
+ output_html.append("""
18
+ <div class="alert alert-info">
19
+ <i class="fas fa-info-circle"></i>
20
+ Text generation models can continue or expand on a given text prompt, creating new content that follows the style and context of the input.
21
+ </div>
22
+ """)
23
+
24
+ # Model info
25
+ output_html.append("""
26
+ <div class="alert alert-info">
27
+ <h4><i class="fas fa-tools"></i> Model Used:</h4>
28
+ <ul>
29
+ <li><b>GPT-2</b> - 124M parameter language model trained on a diverse corpus of internet text</li>
30
+ <li><b>Capabilities</b> - Can generate coherent text continuations and completions</li>
31
+ <li><b>Limitations</b> - May occasionally produce repetitive or nonsensical content</li>
32
+ </ul>
33
+ </div>
34
+ """)
35
+
36
+ try:
37
+ # Check text length and possibly truncate
38
+ MAX_PROMPT_LENGTH = 100 # tokens
39
+
40
+ # Count tokens (rough approximation)
41
+ token_count = len(text_input.split())
42
+
43
+ # Truncate if necessary
44
+ if token_count > MAX_PROMPT_LENGTH:
45
+ prompt_text = " ".join(text_input.split()[:MAX_PROMPT_LENGTH])
46
+ output_html.append("""
47
+ <div class="alert alert-warning">
48
+ <p class="mb-0">⚠️ Text truncated to approximately 100 tokens for better generation results.</p>
49
+ </div>
50
+ """)
51
+ else:
52
+ prompt_text = text_input
53
+
54
+ # Display prompt
55
+ output_html.append('<h3 class="task-subheader">Prompt</h3>')
56
+ output_html.append(f'<div class="card"><div class="card-body">{prompt_text}</div></div>')
57
+
58
+ # Load model
59
+ text_generator = load_text_generator()
60
+
61
+ # Set up generation parameters
62
+ generation_kwargs = {
63
+ "max_length": token_count + max_length,
64
+ "num_return_sequences": num_sequences,
65
+ "temperature": temperature,
66
+ "top_p": top_p,
67
+ "do_sample": True,
68
+ "no_repeat_ngram_size": 2,
69
+ "pad_token_id": 50256 # GPT-2's pad token ID
70
+ }
71
+
72
+ # Generate text
73
+ start_time = time.time()
74
+ result = text_generator(prompt_text, **generation_kwargs)
75
+ generation_time = time.time() - start_time
76
+
77
+ # Display results
78
+ output_html.append('<h3 class="task-subheader">Generated Text</h3>')
79
+
80
+ for i, sequence in enumerate(result):
81
+ generated_text = sequence['generated_text']
82
+ new_text = generated_text[len(prompt_text):]
83
+
84
+ # Display in a nice format with the prompt and generated text distinguished
85
+ if num_sequences > 1:
86
+ output_html.append(f'<h4>Version {i+1}</h4>')
87
+
88
+ output_html.append(f"""
89
+ <div class="card">
90
+ <div class="card-body">
91
+ <span class="text-muted">{prompt_text}</span>
92
+ <span class="text-primary fw-bold">{new_text}</span>
93
+ </div>
94
+ </div>
95
+ """)
96
+
97
+ # Generation stats for this sequence
98
+ prompt_tokens = len(prompt_text.split())
99
+ gen_tokens = len(new_text.split())
100
+
101
+ # Calculate average word length as a crude complexity metric
102
+ avg_word_len = sum(len(word) for word in new_text.split()) / max(1, len(new_text.split()))
103
+
104
+ output_html.append(f"""
105
+ <div class="alert alert-success">
106
+ <h4 class="mb-3">Generation Statistics</h4>
107
+ <div class="row">
108
+ <div class="col-md-6">
109
+ <p><b>Prompt length:</b> {prompt_tokens} tokens</p>
110
+ <p><b>Generated length:</b> {gen_tokens} tokens</p>
111
+ <p><b>Total length:</b> {prompt_tokens + gen_tokens} tokens</p>
112
+ </div>
113
+ <div class="col-md-6">
114
+ <p><b>Temperature:</b> {temperature}</p>
115
+ <p><b>Top-p:</b> {top_p}</p>
116
+ <p><b>Avg word length:</b> {avg_word_len:.2f} characters</p>
117
+ </div>
118
+ </div>
119
+ <p><b>Generation time:</b> {generation_time:.2f} seconds</p>
120
+ </div>
121
+ """)
122
+
123
+ # Option to see full text
124
+ output_html.append(f"""
125
+ <div class="card">
126
+ <div class="card-header">
127
+ <h5 class="mb-0">
128
+ <button class="btn btn-link" type="button" data-bs-toggle="collapse" data-bs-target="#fullText{i}" aria-expanded="false">
129
+ Show full text (copy-paste friendly)
130
+ </button>
131
+ </h5>
132
+ </div>
133
+ <div class="collapse" id="fullText{i}">
134
+ <div class="card-body">
135
+ <div class="text-content" style="word-wrap: break-word; word-break: break-word; overflow-wrap: break-word; max-height: 500px; overflow-y: auto; padding: 15px; background-color: #f8f9fa; border-radius: 5px; border: 1px solid #e9ecef; line-height: 1.6;">{generated_text}</div>
136
+ </div>
137
+ </div>
138
+ </div>
139
+ """)
140
+
141
+ # Generate a text complexity analysis
142
+ if len(result) > 0:
143
+ output_html.append('<h3 class="task-subheader">Text Analysis</h3>')
144
+
145
+ # Get the first generated text for analysis
146
+ full_text = result[0]['generated_text']
147
+ prompt_words = prompt_text.split()
148
+ full_words = full_text.split()
149
+ generated_words = full_words[len(prompt_words):]
150
+
151
+ # Analyze word length distribution
152
+ prompt_word_lengths = [len(word) for word in prompt_words]
153
+ generated_word_lengths = [len(word) for word in generated_words]
154
+
155
+ # Create comparison chart
156
+ fig, ax = plt.subplots(figsize=(10, 5))
157
+
158
+ # Plot histograms
159
+ bins = range(1, 16) # Word lengths from 1 to 15
160
+ ax.hist(prompt_word_lengths, bins=bins, alpha=0.7, label='Prompt', color='#1976D2')
161
+ ax.hist(generated_word_lengths, bins=bins, alpha=0.7, label='Generated', color='#4CAF50')
162
+
163
+ ax.set_xlabel('Word Length (characters)')
164
+ ax.set_ylabel('Frequency')
165
+ ax.set_title('Word Length Distribution: Prompt vs Generated')
166
+ ax.legend()
167
+ ax.grid(alpha=0.3)
168
+
169
+ output_html.append(fig_to_html(fig))
170
+
171
+ # Calculate some linguistic statistics
172
+ prompt_avg_word_len = sum(prompt_word_lengths) / len(prompt_word_lengths) if prompt_word_lengths else 0
173
+ generated_avg_word_len = sum(generated_word_lengths) / len(generated_word_lengths) if generated_word_lengths else 0
174
+
175
+ # Create comparison table
176
+ stats_data = {
177
+ 'Metric': ['Word count', 'Average word length', 'Unique words', 'Lexical diversity*'],
178
+ 'Prompt': [
179
+ len(prompt_words),
180
+ f"{prompt_avg_word_len:.2f}",
181
+ len(set(word.lower() for word in prompt_words)),
182
+ f"{len(set(word.lower() for word in prompt_words)) / len(prompt_words):.2f}" if prompt_words else "0"
183
+ ],
184
+ 'Generated': [
185
+ len(generated_words),
186
+ f"{generated_avg_word_len:.2f}",
187
+ len(set(word.lower() for word in generated_words)),
188
+ f"{len(set(word.lower() for word in generated_words)) / len(generated_words):.2f}" if generated_words else "0"
189
+ ]
190
+ }
191
+
192
+ stats_df = pd.DataFrame(stats_data)
193
+
194
+ output_html.append('<div class="mt-3">')
195
+ output_html.append(df_to_html_table(stats_df))
196
+ output_html.append('<p><small>*Lexical diversity = unique words / total words</small></p>')
197
+ output_html.append('</div>')
198
+
199
+ # Show tips for better results
200
+ output_html.append("""
201
+ <div class="alert alert-info">
202
+ <h4>Tips for Better Generation Results</h4>
203
+ <ul class="mb-0">
204
+ <li><b>Be specific</b> - More detailed prompts give the model better context</li>
205
+ <li><b>Format matters</b> - If you want a list, start with a list item; if you want dialogue, include dialogue format</li>
206
+ <li><b>Play with temperature</b> - Lower values (0.3-0.5) for focused, consistent text; higher values (0.7-1.0) for creative, varied output</li>
207
+ <li><b>Try multiple generations</b> - Generate several options to pick the best result</li>
208
+ </ul>
209
+ </div>
210
+ """)
211
+
212
+ except Exception as e:
213
+ output_html.append(f"""
214
+ <div class="alert alert-danger">
215
+ <h3>Error</h3>
216
+ <p>Failed to generate text: {str(e)}</p>
217
+ </div>
218
+ """)
219
+
220
+ # About Text Generation section
221
+ output_html.append("""
222
+ <div class="card mt-4">
223
+ <div class="card-header">
224
+ <h4 class="mb-0">
225
+ <i class="fas fa-info-circle"></i>
226
+ About Text Generation
227
+ </h4>
228
+ </div>
229
+ <div class="card-body">
230
+ <h5>What is Text Generation?</h5>
231
+
232
+ <p>Text generation is the task of creating human-like text using machine learning models. Modern text generation
233
+ systems use large neural networks trained on vast amounts of text data to predict the next tokens in a sequence.</p>
234
+
235
+ <h5>How It Works:</h5>
236
+
237
+ <ol>
238
+ <li><b>Training</b> - Models learn patterns in language by predicting the next word in billions of text examples</li>
239
+ <li><b>Prompting</b> - You provide a starting text that gives context and direction</li>
240
+ <li><b>Generation</b> - The model repeatedly predicts the most likely next token based on previous context</li>
241
+ <li><b>Sampling</b> - Various techniques (temperature, top-p) control the randomness and creativity of output</li>
242
+ </ol>
243
+
244
+ <h5>Applications:</h5>
245
+
246
+ <ul>
247
+ <li><b>Content creation</b> - Drafting articles, stories, and marketing copy</li>
248
+ <li><b>Assistive writing</b> - Helping with email drafting, summarization, and editing</li>
249
+ <li><b>Conversational AI</b> - Powering chatbots and digital assistants</li>
250
+ <li><b>Code generation</b> - Assisting developers with coding tasks</li>
251
+ <li><b>Creative writing</b> - Generating stories, poetry, and other creative content</li>
252
+ </ul>
253
+ </div>
254
+ </div>
255
+ """)
256
+
257
+ output_html.append('</div>') # Close result-area div
258
+
259
+ return '\n'.join(output_html)
components/tokenization.py ADDED
@@ -0,0 +1,460 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib.pyplot as plt
2
+ import pandas as pd
3
+ import nltk
4
+ import re
5
+ from collections import Counter
6
+ from nltk.tokenize import word_tokenize, sent_tokenize
7
+ import spacy
8
+
9
+ from utils.model_loader import load_spacy, download_nltk_resources
10
+ from utils.helpers import fig_to_html, df_to_html_table
11
+
12
+ def tokenization_handler(text_input):
13
+ """Show tokenization capabilities."""
14
+ output_html = []
15
+
16
+ # Add result area container
17
+ output_html.append('<div class="result-area">')
18
+ output_html.append('<h2 class="task-header">Tokenization</h2>')
19
+
20
+ output_html.append("""
21
+ <div class="alert alert-info">
22
+ <i class="fas fa-info-circle"></i>
23
+ Tokenization is the process of breaking text into smaller units called tokens, which can be words, characters, or subwords.
24
+ </div>
25
+ """)
26
+
27
+ # Model info
28
+ output_html.append("""
29
+ <div class="alert alert-info">
30
+ <h4><i class="fas fa-tools"></i> Tools Used:</h4>
31
+ <ul>
32
+ <li><b>NLTK</b> - Natural Language Toolkit for basic word and sentence tokenization</li>
33
+ <li><b>spaCy</b> - Advanced tokenization with linguistic features</li>
34
+ <li><b>WordPiece</b> - Subword tokenization used by BERT and other transformers</li>
35
+ </ul>
36
+ </div>
37
+ """)
38
+
39
+ try:
40
+ # Ensure NLTK resources are downloaded
41
+ download_nltk_resources()
42
+
43
+ # Original Text
44
+ output_html.append('<h3 class="task-subheader">Original Text</h3>')
45
+ output_html.append(f'<div class="card"><div class="card-body"><div class="text-content" style="word-wrap: break-word; word-break: break-word; overflow-wrap: break-word; max-height: 500px; overflow-y: auto; padding: 15px; background-color: #f8f9fa; border-radius: 5px; border: 1px solid #e9ecef; line-height: 1.6;">{text_input}</div></div></div>')
46
+
47
+ # Word Tokenization
48
+ output_html.append('<h3 class="task-subheader">Word Tokenization</h3>')
49
+ output_html.append('<p>Breaking text into individual words and punctuation marks.</p>')
50
+
51
+ # NLTK Word Tokenization
52
+ nltk_tokens = word_tokenize(text_input)
53
+
54
+ # Format tokens
55
+ token_html = ""
56
+ for token in nltk_tokens:
57
+ token_html += f'<span class="token">{token}</span>'
58
+
59
+ output_html.append(f"""
60
+ <div class="card">
61
+ <div class="card-body">
62
+ <div style="background-color: #f5f5f5; padding: 15px; border-radius: 5px; line-height: 2.5;">
63
+ {token_html}
64
+ </div>
65
+ </div>
66
+ </div>
67
+ <style>
68
+ .token {{
69
+ background-color: #E3F2FD;
70
+ border: 1px solid #1976D2;
71
+ border-radius: 4px;
72
+ padding: 3px 6px;
73
+ margin: 3px;
74
+ display: inline-block;
75
+ }}
76
+ </style>
77
+ """)
78
+
79
+ # Token statistics
80
+ token_count = len(nltk_tokens)
81
+ unique_tokens = len(set([t.lower() for t in nltk_tokens]))
82
+ alpha_only = sum(1 for t in nltk_tokens if t.isalpha())
83
+ numeric = sum(1 for t in nltk_tokens if t.isnumeric())
84
+ punct = sum(1 for t in nltk_tokens if all(c in '.,;:!?-"\'()[]{}' for c in t))
85
+
86
+ output_html.append(f"""
87
+ <div class="row mt-3">
88
+ <div class="col-md-2">
89
+ <div class="card text-center">
90
+ <div class="card-body">
91
+ <h5 class="text-primary">{token_count}</h5>
92
+ <small>Total Tokens</small>
93
+ </div>
94
+ </div>
95
+ </div>
96
+ <div class="col-md-2">
97
+ <div class="card text-center">
98
+ <div class="card-body">
99
+ <h5 class="text-success">{unique_tokens}</h5>
100
+ <small>Unique Tokens</small>
101
+ </div>
102
+ </div>
103
+ </div>
104
+ <div class="col-md-2">
105
+ <div class="card text-center">
106
+ <div class="card-body">
107
+ <h5 class="text-info">{alpha_only}</h5>
108
+ <small>Alphabetic</small>
109
+ </div>
110
+ </div>
111
+ </div>
112
+ <div class="col-md-2">
113
+ <div class="card text-center">
114
+ <div class="card-body">
115
+ <h5 class="text-warning">{numeric}</h5>
116
+ <small>Numeric</small>
117
+ </div>
118
+ </div>
119
+ </div>
120
+ <div class="col-md-2">
121
+ <div class="card text-center">
122
+ <div class="card-body">
123
+ <h5 class="text-danger">{punct}</h5>
124
+ <small>Punctuation</small>
125
+ </div>
126
+ </div>
127
+ </div>
128
+ </div>
129
+ """)
130
+
131
+ # Sentence Tokenization
132
+ output_html.append('<h3 class="task-subheader">Sentence Tokenization</h3>')
133
+ output_html.append('<p>Dividing text into individual sentences.</p>')
134
+
135
+ # NLTK Sentence Tokenization
136
+ nltk_sentences = sent_tokenize(text_input)
137
+
138
+ # Format sentences
139
+ sentence_html = ""
140
+ for i, sentence in enumerate(nltk_sentences):
141
+ sentence_html += f'<div class="sentence"><span class="sentence-num">{i+1}</span> {sentence}</div>'
142
+
143
+ output_html.append(f"""
144
+ <div class="card">
145
+ <div class="card-body">
146
+ <div style="background-color: #f5f5f5; padding: 15px; border-radius: 5px;">
147
+ {sentence_html}
148
+ </div>
149
+ </div>
150
+ </div>
151
+ <style>
152
+ .sentence {{
153
+ background-color: #E1F5FE;
154
+ border-left: 3px solid #03A9F4;
155
+ padding: 10px;
156
+ margin: 8px 0;
157
+ border-radius: 0 5px 5px 0;
158
+ position: relative;
159
+ }}
160
+ .sentence-num {{
161
+ font-weight: bold;
162
+ color: #0277BD;
163
+ margin-right: 5px;
164
+ }}
165
+ </style>
166
+ """)
167
+
168
+ output_html.append(f'<p class="mt-3">Text contains {len(nltk_sentences)} sentences with an average of {token_count / len(nltk_sentences):.1f} tokens per sentence.</p>')
169
+
170
+ # Advanced Tokenization with spaCy
171
+ output_html.append('<h3 class="task-subheader">Linguistic Tokenization (spaCy)</h3>')
172
+ output_html.append('<p>spaCy provides more linguistically-aware tokenization with additional token properties.</p>')
173
+
174
+ # Load spaCy model
175
+ nlp = load_spacy()
176
+ doc = nlp(text_input)
177
+
178
+ # Create token table
179
+ token_data = []
180
+ for token in doc:
181
+ token_data.append({
182
+ 'Text': token.text,
183
+ 'Lemma': token.lemma_,
184
+ 'POS': token.pos_,
185
+ 'Tag': token.tag_,
186
+ 'Dep': token.dep_,
187
+ 'Shape': token.shape_,
188
+ 'Alpha': token.is_alpha,
189
+ 'Stop': token.is_stop
190
+ })
191
+
192
+ token_df = pd.DataFrame(token_data)
193
+
194
+ # Display interactive table with expandable rows
195
+ output_html.append("""
196
+ <div class="table-responsive">
197
+ <table class="table table-striped table-hover">
198
+ <thead class="table-primary sticky-top">
199
+ <tr>
200
+ <th>Token</th>
201
+ <th>Lemma</th>
202
+ <th>POS</th>
203
+ <th>Tag</th>
204
+ <th>Dependency</th>
205
+ <th>Properties</th>
206
+ </tr>
207
+ </thead>
208
+ <tbody>
209
+ """)
210
+
211
+ for token in doc:
212
+ # Determine row color based on token type
213
+ row_class = ""
214
+ if token.is_stop:
215
+ row_class = "table-danger" # Light red for stopwords
216
+ elif token.pos_ == "VERB":
217
+ row_class = "table-success" # Light green for verbs
218
+ elif token.pos_ == "NOUN" or token.pos_ == "PROPN":
219
+ row_class = "table-primary" # Light blue for nouns
220
+ elif token.pos_ == "ADJ":
221
+ row_class = "table-warning" # Light yellow for adjectives
222
+
223
+ output_html.append(f"""
224
+ <tr class="{row_class}">
225
+ <td><strong>{token.text}</strong></td>
226
+ <td>{token.lemma_}</td>
227
+ <td>{token.pos_}</td>
228
+ <td>{token.tag_}</td>
229
+ <td>{token.dep_}</td>
230
+ <td>
231
+ <span class="badge {'bg-success' if token.is_alpha else 'bg-danger'}">
232
+ {'Alpha' if token.is_alpha else 'Non-alpha'}
233
+ </span>
234
+ <span class="badge {'bg-danger' if token.is_stop else 'bg-success'}">
235
+ {'Stopword' if token.is_stop else 'Content'}
236
+ </span>
237
+ <span class="badge bg-info">
238
+ Shape: {token.shape_}
239
+ </span>
240
+ </td>
241
+ </tr>
242
+ """)
243
+
244
+ output_html.append("""
245
+ </tbody>
246
+ </table>
247
+ </div>
248
+ """)
249
+
250
+ # Create visualization for POS distribution
251
+ pos_counts = Counter([token.pos_ for token in doc])
252
+
253
+ # Create bar chart for POS distribution
254
+ fig = plt.figure(figsize=(10, 6))
255
+ plt.bar(pos_counts.keys(), pos_counts.values(), color='#1976D2')
256
+ plt.xlabel('Part of Speech')
257
+ plt.ylabel('Count')
258
+ plt.title('Part-of-Speech Distribution')
259
+ plt.xticks(rotation=45)
260
+ plt.tight_layout()
261
+
262
+ output_html.append('<h4>Token Distribution by Part of Speech</h4>')
263
+ output_html.append(fig_to_html(fig))
264
+
265
+ # Subword Tokenization
266
+ output_html.append('<h3 class="task-subheader">Subword Tokenization (WordPiece/BPE)</h3>')
267
+ output_html.append("""
268
+ <div class="alert alert-light">
269
+ <p>
270
+ Subword tokenization breaks words into smaller units to handle rare words and morphologically rich languages.
271
+ This technique is widely used in modern transformer models like BERT, GPT, etc.
272
+ </p>
273
+ </div>
274
+ """)
275
+
276
+ try:
277
+ from transformers import BertTokenizer, GPT2Tokenizer
278
+
279
+ # Load tokenizers
280
+ bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
281
+ gpt2_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
282
+
283
+ # Tokenize with BERT
284
+ bert_tokens = bert_tokenizer.tokenize(text_input)
285
+
286
+ # Tokenize with GPT-2
287
+ # GPT-2 doesn't have a special tokenize method like BERT, so we encode and decode
288
+ gpt2_encoding = gpt2_tokenizer.encode(text_input)
289
+ gpt2_tokens = [gpt2_tokenizer.decode([token]).strip() for token in gpt2_encoding]
290
+
291
+ # BERT WordPiece Section
292
+ output_html.append('<h4 class="bg-primary text-white p-3 rounded">BERT WordPiece</h4>')
293
+ output_html.append('<p>BERT uses WordPiece tokenization which marks subword units with ##.</p>')
294
+
295
+ # Create token display
296
+ output_html.append('<div class="card"><div class="card-body">')
297
+ output_html.append('<div style="background-color: #f5f5f5; padding: 15px; border-radius: 5px; line-height: 2.5;">')
298
+
299
+ for token in bert_tokens:
300
+ if token.startswith("##"):
301
+ output_html.append(f'<span class="token" style="background-color: #FFECB3; border-color: #FFA000;">{token}</span>')
302
+ else:
303
+ output_html.append(f'<span class="token">{token}</span>')
304
+
305
+ output_html.append('</div></div></div>')
306
+ output_html.append(f'<p class="mt-2">Total BERT tokens: {len(bert_tokens)}</p>')
307
+
308
+ # GPT-2 BPE Section
309
+ output_html.append('<h4 class="bg-primary text-white p-3 rounded mt-4">GPT-2 BPE</h4>')
310
+ output_html.append('<p>GPT-2 uses Byte-Pair Encoding (BPE) tokenization where Ġ represents a space before the token.</p>')
311
+
312
+ output_html.append('<div class="card"><div class="card-body">')
313
+ output_html.append('<div style="background-color: #f5f5f5; padding: 15px; border-radius: 5px; line-height: 2.5;">')
314
+
315
+ for token in gpt2_tokens:
316
+ if token.startswith("Ġ"):
317
+ output_html.append(f'<span class="token">{token}</span>')
318
+ else:
319
+ output_html.append(f'<span class="token" style="background-color: #FFECB3; border-color: #FFA000;">{token}</span>')
320
+
321
+ output_html.append('</div></div></div>')
322
+ output_html.append(f'<p class="mt-2">Total GPT-2 tokens: {len(gpt2_tokens)}</p>')
323
+
324
+ # Compare token counts
325
+ output_html.append('<h4>Token Count Comparison</h4>')
326
+ token_count_data = {
327
+ 'Tokenizer': ['Words (spaces)', 'NLTK', 'spaCy', 'BERT WordPiece', 'GPT-2 BPE'],
328
+ 'Token Count': [
329
+ len(text_input.split()),
330
+ len(nltk_tokens),
331
+ len(doc),
332
+ len(bert_tokens),
333
+ len(gpt2_tokens)
334
+ ]
335
+ }
336
+
337
+ token_count_df = pd.DataFrame(token_count_data)
338
+
339
+ # Create comparison chart
340
+ fig = plt.figure(figsize=(10, 6))
341
+ bars = plt.bar(token_count_df['Tokenizer'], token_count_df['Token Count'], color=['#BBDEFB', '#90CAF9', '#64B5F6', '#42A5F5', '#2196F3'])
342
+
343
+ # Add value labels on top of bars
344
+ for bar in bars:
345
+ height = bar.get_height()
346
+ plt.text(bar.get_x() + bar.get_width()/2., height + 0.5,
347
+ f'{height}',
348
+ ha='center', va='bottom')
349
+
350
+ plt.ylabel('Token Count')
351
+ plt.title('Tokenization Comparison by Method')
352
+ plt.ylim(0, max(token_count_df['Token Count']) * 1.1) # Add some headroom for labels
353
+ plt.tight_layout()
354
+
355
+ output_html.append(fig_to_html(fig))
356
+
357
+ # Add token length distribution analysis
358
+ output_html.append('<h4>Token Length Distribution</h4>')
359
+ token_lengths = [len(token) for token in nltk_tokens]
360
+
361
+ fig = plt.figure(figsize=(10, 6))
362
+ plt.hist(token_lengths, bins=range(1, max(token_lengths) + 2), color='#4CAF50', alpha=0.7)
363
+ plt.xlabel('Token Length')
364
+ plt.ylabel('Frequency')
365
+ plt.title('Token Length Distribution')
366
+ plt.grid(axis='y', alpha=0.3)
367
+ plt.tight_layout()
368
+
369
+ output_html.append(fig_to_html(fig))
370
+
371
+ # Add tokenization statistics summary
372
+ avg_token_length = sum(token_lengths) / len(token_lengths) if token_lengths else 0
373
+ output_html.append(f"""
374
+ <h4>Tokenization Statistics</h4>
375
+ <div class="row mt-3">
376
+ <div class="col-md-4">
377
+ <div class="card text-center">
378
+ <div class="card-body">
379
+ <h3 class="text-success">{token_count}</h3>
380
+ <p class="mb-0">Total Tokens</p>
381
+ </div>
382
+ </div>
383
+ </div>
384
+ <div class="col-md-4">
385
+ <div class="card text-center">
386
+ <div class="card-body">
387
+ <h3 class="text-primary">{avg_token_length:.2f}</h3>
388
+ <p class="mb-0">Average Token Length</p>
389
+ </div>
390
+ </div>
391
+ </div>
392
+ <div class="col-md-4">
393
+ <div class="card text-center">
394
+ <div class="card-body">
395
+ <h3 class="text-warning">{token_count / len(nltk_sentences):.2f}</h3>
396
+ <p class="mb-0">Tokens per Sentence</p>
397
+ </div>
398
+ </div>
399
+ </div>
400
+ </div>
401
+ """)
402
+
403
+ except Exception as e:
404
+ output_html.append(f"""
405
+ <div class="alert alert-warning">
406
+ <h4>Subword Tokenization Error</h4>
407
+ <p>Failed to load transformer tokenizers: {str(e)}</p>
408
+ <p>The transformers library may not be installed or there might be network issues when downloading models.</p>
409
+ </div>
410
+ """)
411
+
412
+ except Exception as e:
413
+ output_html.append(f"""
414
+ <div class="alert alert-danger">
415
+ <h3>Error</h3>
416
+ <p>Failed to process tokenization: {str(e)}</p>
417
+ </div>
418
+ """)
419
+
420
+ # About Tokenization section
421
+ output_html.append("""
422
+ <div class="card mt-4">
423
+ <div class="card-header">
424
+ <h4 class="mb-0">
425
+ <i class="fas fa-info-circle"></i>
426
+ About Tokenization
427
+ </h4>
428
+ </div>
429
+ <div class="card-body">
430
+ <h5>What is Tokenization?</h5>
431
+
432
+ <p>Tokenization is the process of breaking down text into smaller units called tokens.
433
+ These tokens can be words, subwords, characters, or symbols, depending on the approach.
434
+ It's typically the first step in most NLP pipelines.</p>
435
+
436
+ <h5>Types of Tokenization:</h5>
437
+
438
+ <ul>
439
+ <li><b>Word Tokenization</b> - Splits text on whitespace and punctuation (with various rules)</li>
440
+ <li><b>Sentence Tokenization</b> - Divides text into sentences using punctuation and other rules</li>
441
+ <li><b>Subword Tokenization</b> - Splits words into meaningful subunits (WordPiece, BPE, SentencePiece)</li>
442
+ <li><b>Character Tokenization</b> - Treats each character as a separate token</li>
443
+ </ul>
444
+
445
+ <h5>Why Subword Tokenization?</h5>
446
+
447
+ <p>Modern NLP models use subword tokenization because:</p>
448
+ <ul>
449
+ <li>It handles out-of-vocabulary words better</li>
450
+ <li>It represents rare words by decomposing them</li>
451
+ <li>It works well for morphologically rich languages</li>
452
+ <li>It balances vocabulary size and token length</li>
453
+ </ul>
454
+ </div>
455
+ </div>
456
+ """)
457
+
458
+ output_html.append('</div>') # Close result-area div
459
+
460
+ return '\n'.join(output_html)
components/topic_analysis.py ADDED
@@ -0,0 +1,766 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib.pyplot as plt
2
+ import pandas as pd
3
+ import numpy as np
4
+ import nltk
5
+ from collections import Counter
6
+ import networkx as nx
7
+ from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
8
+ from sklearn.decomposition import LatentDirichletAllocation, NMF
9
+ import wordcloud
10
+ from nltk.corpus import stopwords
11
+ from nltk.tokenize import word_tokenize
12
+ from nltk.stem import WordNetLemmatizer
13
+ import matplotlib.colors as mcolors
14
+ import io
15
+ import base64
16
+
17
+ from utils.model_loader import download_nltk_resources
18
+ from utils.helpers import fig_to_html, df_to_html_table
19
+
20
+ def classify_topic(text_input):
21
+ """Classify the topic of the text into predefined categories."""
22
+ # Define topic keywords
23
+ topic_keywords = {
24
+ 'environment': ['climate', 'environment', 'weather', 'earth', 'temperature', 'pollution', 'warming', 'planet', 'ecosystem', 'sustainable'],
25
+ 'science': ['science', 'scientific', 'research', 'study', 'experiment', 'discovery', 'theory', 'laboratory', 'data'],
26
+ 'business': ['business', 'company', 'market', 'economy', 'economic', 'finance', 'industry', 'corporate', 'trade'],
27
+ 'education': ['education', 'school', 'student', 'learn', 'teach', 'academic', 'university', 'college', 'knowledge'],
28
+ 'health': ['health', 'medical', 'doctor', 'patient', 'disease', 'treatment', 'hospital', 'medicine', 'healthcare'],
29
+ 'technology': ['technology', 'tech', 'computer', 'digital', 'software', 'hardware', 'internet', 'device', 'innovation'],
30
+ 'politics': ['politics', 'government', 'policy', 'election', 'political', 'law', 'president', 'party', 'vote'],
31
+ 'sports': ['sport', 'game', 'team', 'player', 'competition', 'athlete', 'championship', 'tournament', 'coach'],
32
+ 'entertainment': ['entertainment', 'movie', 'music', 'film', 'television', 'celebrity', 'actor', 'actress', 'show'],
33
+ 'travel': ['travel', 'trip', 'vacation', 'tourist', 'destination', 'journey', 'adventure', 'flight', 'hotel']
34
+ }
35
+
36
+ # Convert text to lowercase
37
+ text = text_input.lower()
38
+
39
+ # Count keyword occurrences for each topic
40
+ topic_scores = {}
41
+ for topic, keywords in topic_keywords.items():
42
+ score = 0
43
+ for keyword in keywords:
44
+ # Count occurrences of the keyword
45
+ count = text.count(keyword)
46
+ # Add to the topic score
47
+ score += count
48
+
49
+ # Store the normalized score
50
+ topic_scores[topic] = score / (len(text.split()) + 0.001) # Normalize by text length
51
+
52
+ # Get the main topic and confidence
53
+ main_topic = max(topic_scores.items(), key=lambda x: x[1])
54
+ total_score = sum(topic_scores.values()) + 0.001 # Avoid division by zero
55
+ confidence = main_topic[1] / total_score if total_score > 0 else 0
56
+ confidence = round(confidence * 100, 1) # Convert to percentage
57
+
58
+ # Sort topics by score for visualization
59
+ sorted_topics = sorted(topic_scores.items(), key=lambda x: x[1], reverse=True)
60
+
61
+ return main_topic[0], confidence, sorted_topics, topic_scores
62
+
63
+ def extract_key_phrases(text_input, top_n=10):
64
+ """Extract key phrases from text."""
65
+ # Download required NLTK resources
66
+ download_nltk_resources()
67
+
68
+ # Define stop words
69
+ stop_words = set(stopwords.words('english'))
70
+
71
+ # Tokenize into sentences
72
+ sentences = nltk.sent_tokenize(text_input)
73
+
74
+ # Extract 2-3 word phrases (n-grams)
75
+ phrases = []
76
+
77
+ # Get bigrams
78
+ bigram_vectorizer = CountVectorizer(ngram_range=(2, 2), stop_words='english', max_features=100)
79
+ try:
80
+ bigram_matrix = bigram_vectorizer.fit_transform([text_input])
81
+ bigram_features = bigram_vectorizer.get_feature_names_out()
82
+ bigram_scores = bigram_matrix.toarray()[0]
83
+
84
+ for phrase, score in zip(bigram_features, bigram_scores):
85
+ if score >= 1: # Must appear at least once
86
+ phrases.append((phrase, int(score)))
87
+ except:
88
+ pass # Handle potential errors
89
+
90
+ # Get trigrams
91
+ trigram_vectorizer = CountVectorizer(ngram_range=(3, 3), stop_words='english', max_features=100)
92
+ try:
93
+ trigram_matrix = trigram_vectorizer.fit_transform([text_input])
94
+ trigram_features = trigram_vectorizer.get_feature_names_out()
95
+ trigram_scores = trigram_matrix.toarray()[0]
96
+
97
+ for phrase, score in zip(trigram_features, trigram_scores):
98
+ if score >= 1: # Must appear at least once
99
+ phrases.append((phrase, int(score)))
100
+ except:
101
+ pass
102
+
103
+ # Also extract single important words (nouns, verbs, adjectives)
104
+ words = word_tokenize(text_input)
105
+ pos_tags = nltk.pos_tag(words)
106
+
107
+ important_words = []
108
+ for word, tag in pos_tags:
109
+ # Only consider nouns, verbs, and adjectives
110
+ if (tag.startswith('NN') or tag.startswith('VB') or tag.startswith('JJ')) and word.lower() not in stop_words and len(word) > 2:
111
+ important_words.append(word.lower())
112
+
113
+ # Count word frequencies
114
+ word_freq = Counter(important_words)
115
+
116
+ # Add important single words to phrases
117
+ for word, freq in word_freq.most_common(top_n):
118
+ if freq >= 1:
119
+ phrases.append((word, freq))
120
+
121
+ # Sort phrases by frequency
122
+ sorted_phrases = sorted(phrases, key=lambda x: x[1], reverse=True)
123
+
124
+ # Return top N phrases
125
+ return sorted_phrases[:top_n]
126
+
127
+ def create_phrase_cloud(phrases):
128
+ """Create a word cloud from phrases."""
129
+ # Convert phrases to a dictionary of {phrase: frequency}
130
+ phrase_freq = {phrase: freq for phrase, freq in phrases}
131
+
132
+ # Create word cloud
133
+ wc = wordcloud.WordCloud(
134
+ background_color='white',
135
+ width=600,
136
+ height=400,
137
+ colormap='viridis',
138
+ max_words=50,
139
+ prefer_horizontal=0.9,
140
+ random_state=42
141
+ )
142
+
143
+ try:
144
+ # Generate word cloud from phrases
145
+ wc.generate_from_frequencies(phrase_freq)
146
+
147
+ # Create figure
148
+ fig = plt.figure(figsize=(10, 6))
149
+ plt.imshow(wc, interpolation='bilinear')
150
+ plt.axis('off')
151
+ plt.tight_layout()
152
+
153
+ return fig_to_html(fig)
154
+ except:
155
+ return "<p>Could not generate phrase cloud due to insufficient data.</p>"
156
+
157
+ def topic_analysis_handler(text_input):
158
+ """Show topic analysis capabilities."""
159
+ output_html = []
160
+
161
+ # Add result area container
162
+ output_html.append('<div class="result-area">')
163
+ output_html.append('<h2 class="task-header">Topic Analysis</h2>')
164
+
165
+ output_html.append("""
166
+ <div class="alert alert-info">
167
+ <i class="fas fa-info-circle"></i>
168
+ Topic analysis identifies the main themes and subjects in a text, helping to categorize content and understand what it's about.
169
+ </div>
170
+ """)
171
+
172
+ # Model info
173
+ output_html.append("""
174
+ <div class="alert alert-info">
175
+ <h4><i class="fas fa-tools"></i> Models & Techniques Used:</h4>
176
+ <ul>
177
+ <li><b>Zero-shot Classification</b> - BART model that can classify text without specific training</li>
178
+ <li><b>TF-IDF Vectorizer</b> - Statistical method to identify important terms</li>
179
+ <li><b>Word/Phrase Analysis</b> - Extraction of important n-grams</li>
180
+ </ul>
181
+ </div>
182
+ """)
183
+
184
+ try:
185
+ # Ensure NLTK resources are downloaded
186
+ download_nltk_resources()
187
+
188
+ # Check if text is long enough for meaningful analysis
189
+ if len(text_input.split()) < 50:
190
+ output_html.append(f"""
191
+ <div class="alert alert-warning">
192
+ <h3>Text Too Short for Full Topic Analysis</h3>
193
+ <p>The provided text contains only {len(text_input.split())} words.
194
+ For meaningful topic analysis, please provide a longer text (at least 50 words).
195
+ We'll still perform basic frequency analysis, but topic modeling results may not be reliable.</p>
196
+ </div>
197
+ """)
198
+
199
+ # Text cleaning and preprocessing
200
+ stop_words = set(stopwords.words('english'))
201
+ lemmatizer = WordNetLemmatizer()
202
+
203
+ def preprocess_text(text):
204
+ # Tokenize
205
+ tokens = word_tokenize(text.lower())
206
+ # Remove stopwords and non-alphabetic tokens
207
+ filtered_tokens = [token for token in tokens if token.isalpha() and token not in stop_words]
208
+ # Lemmatize
209
+ lemmatized_tokens = [lemmatizer.lemmatize(token) for token in filtered_tokens]
210
+ return lemmatized_tokens
211
+
212
+ # Process the text
213
+ processed_tokens = preprocess_text(text_input)
214
+ processed_text = ' '.join(processed_tokens)
215
+
216
+ # Add Topic Classification section
217
+ output_html.append('<h3 class="task-subheader">Topic Classification</h3>')
218
+
219
+ # Get topic classification
220
+ main_topic, confidence, sorted_topics, topic_scores = classify_topic(text_input)
221
+
222
+ # Display topic classification results
223
+ output_html.append(f"""
224
+ <div class="alert alert-success">
225
+ <p class="mb-0 fs-5">This text is primarily about <strong>{main_topic}</strong> with {confidence}% confidence</p>
226
+ </div>
227
+ """)
228
+
229
+ # Display topic scores (stacked rows to avoid overlap)
230
+ output_html.append('<div class="row">')
231
+
232
+ # Row 1: Topic Relevance Chart (full width)
233
+ output_html.append('<div class="col-12">')
234
+ output_html.append('<h4>Topic Relevance</h4>')
235
+
236
+ # Create horizontal bar chart for topic scores
237
+ plt.figure(figsize=(10, 6))
238
+ topics = [topic for topic, score in sorted_topics]
239
+ scores = [score for topic, score in sorted_topics]
240
+
241
+ # Only show top topics for clarity
242
+ top_n = min(10, len(topics))
243
+ y_pos = np.arange(top_n)
244
+
245
+ # Get a color gradient
246
+ colors = plt.cm.Blues(np.linspace(0.4, 0.8, top_n))
247
+
248
+ # Create horizontal bars
249
+ bars = plt.barh(y_pos, [s * 100 for s in scores[:top_n]], color=colors)
250
+
251
+ # Add labels and values
252
+ for i, bar in enumerate(bars):
253
+ width = bar.get_width()
254
+ plt.text(width + 0.5, bar.get_y() + bar.get_height()/2,
255
+ f"{width:.1f}%",
256
+ va='center')
257
+
258
+ plt.yticks(y_pos, topics[:top_n])
259
+ plt.xlabel('Relevance')
260
+ plt.title('Topic Scores')
261
+ plt.tight_layout()
262
+
263
+ output_html.append(fig_to_html(plt.gcf()))
264
+ output_html.append('</div>')
265
+ output_html.append('</div>') # Close row 1
266
+
267
+ # Row 2: Topic Scores Table (full width)
268
+ output_html.append('<div class="row mt-3">')
269
+ output_html.append('<div class="col-12">')
270
+ output_html.append('<h4>Topic Scores</h4>')
271
+
272
+ # Create table of topic scores
273
+ topic_scores_df = pd.DataFrame({
274
+ 'Rank': range(1, len(sorted_topics) + 1),
275
+ 'Topic': [topic.capitalize() for topic, _ in sorted_topics],
276
+ 'Confidence': [f"{score:.4f}" for _, score in sorted_topics]
277
+ })
278
+
279
+ output_html.append(df_to_html_table(topic_scores_df))
280
+ output_html.append('</div>')
281
+ output_html.append('</div>') # Close row 2
282
+
283
+ # Extract and display key phrases
284
+ output_html.append('<h3 class="task-subheader">Key Phrases</h3>')
285
+
286
+ # Extract key phrases
287
+ key_phrases = extract_key_phrases(text_input)
288
+
289
+ # Display key phrases in a table
290
+ if key_phrases:
291
+ phrase_df = pd.DataFrame({
292
+ 'Phrase': [phrase for phrase, _ in key_phrases],
293
+ 'Frequency': [freq for _, freq in key_phrases]
294
+ })
295
+
296
+ output_html.append('<div class="row">')
297
+
298
+ # Row 1: Key phrases table (full width)
299
+ output_html.append('<div class="col-12">')
300
+ output_html.append(df_to_html_table(phrase_df))
301
+ output_html.append('</div>')
302
+
303
+ # Row 2: Phrase cloud (full width)
304
+ output_html.append('</div>') # Close row 1
305
+ output_html.append('<div class="row mt-3">')
306
+ output_html.append('<div class="col-12">')
307
+ output_html.append(create_phrase_cloud(key_phrases))
308
+ output_html.append('</div>')
309
+
310
+ output_html.append('</div>') # Close row 2
311
+ else:
312
+ output_html.append("<p>No key phrases could be extracted from the text.</p>")
313
+
314
+ # Term Frequency Analysis
315
+ output_html.append('<h3 class="task-subheader">Key Term Frequency Analysis</h3>')
316
+
317
+ # Get token frequencies
318
+ token_freq = Counter(processed_tokens)
319
+
320
+ # Sort by frequency
321
+ sorted_word_freq = dict(sorted(token_freq.items(), key=lambda item: item[1], reverse=True))
322
+
323
+ # Take top 25 words for visualization
324
+ top_n = 25
325
+ top_words = list(sorted_word_freq.keys())[:top_n]
326
+ top_freqs = list(sorted_word_freq.values())[:top_n]
327
+
328
+ # Create visualization
329
+ fig = plt.figure(figsize=(10, 6))
330
+ colors = plt.cm.viridis(np.linspace(0.3, 0.85, len(top_words)))
331
+ bars = plt.bar(top_words, top_freqs, color=colors)
332
+ plt.xlabel('Term')
333
+ plt.ylabel('Frequency')
334
+ plt.title(f'Top {top_n} Term Frequencies')
335
+ plt.xticks(rotation=45, ha='right')
336
+ plt.tight_layout()
337
+
338
+ # Add value labels on top of bars
339
+ for bar in bars:
340
+ height = bar.get_height()
341
+ plt.text(bar.get_x() + bar.get_width()/2., height + 0.1,
342
+ f'{height}',
343
+ ha='center', va='bottom',
344
+ fontsize=8)
345
+
346
+ # Show plots and table in stacked rows
347
+ output_html.append('<div class="row">')
348
+
349
+ # Row 1: Chart (full width)
350
+ output_html.append('<div class="col-12">')
351
+ output_html.append(fig_to_html(fig))
352
+ output_html.append('</div>')
353
+
354
+ # Row 2: Top terms table (full width)
355
+ output_html.append('</div>') # Close row 1
356
+ output_html.append('<div class="row mt-3">')
357
+ output_html.append('<div class="col-12">')
358
+ output_html.append('<h4>Top Terms</h4>')
359
+
360
+ # Create DataFrame of top terms
361
+ top_terms_df = pd.DataFrame({
362
+ 'Term': list(sorted_word_freq.keys())[:15],
363
+ 'Frequency': list(sorted_word_freq.values())[:15]
364
+ })
365
+
366
+ output_html.append(df_to_html_table(top_terms_df))
367
+ output_html.append('</div>')
368
+ output_html.append('</div>') # Close row 2
369
+
370
+ # WordCloud visualization
371
+ output_html.append('<h3 class="task-subheader">Word Cloud Visualization</h3>')
372
+ output_html.append('<p>The size of each word represents its frequency in the text.</p>')
373
+
374
+ # Generate word cloud
375
+ wc = wordcloud.WordCloud(
376
+ background_color='white',
377
+ max_words=100,
378
+ width=800,
379
+ height=400,
380
+ colormap='viridis',
381
+ contour_width=1,
382
+ contour_color='steelblue'
383
+ )
384
+ wc.generate_from_frequencies(sorted_word_freq)
385
+
386
+ # Create figure
387
+ fig = plt.figure(figsize=(12, 6))
388
+ plt.imshow(wc, interpolation='bilinear')
389
+ plt.axis('off')
390
+ plt.tight_layout()
391
+
392
+ output_html.append(fig_to_html(fig))
393
+
394
+ # TF-IDF Analysis
395
+ output_html.append('<h3 class="task-subheader">TF-IDF Analysis</h3>')
396
+ output_html.append("""
397
+ <div class="alert alert-light">
398
+ <p class="mb-0">
399
+ Term Frequency-Inverse Document Frequency (TF-IDF) identifies terms that are distinctive to parts of the text.
400
+ In this case, we treat each sentence as a separate "document" for the analysis.
401
+ </p>
402
+ </div>
403
+ """)
404
+
405
+ # Split text into sentences
406
+ sentences = nltk.sent_tokenize(text_input)
407
+
408
+ # Only perform TF-IDF if there are enough sentences
409
+ if len(sentences) >= 3:
410
+ # Create TF-IDF vectorizer
411
+ tfidf_vectorizer = TfidfVectorizer(
412
+ max_features=100,
413
+ stop_words='english',
414
+ min_df=1
415
+ )
416
+
417
+ # Fit and transform the sentences
418
+ tfidf_matrix = tfidf_vectorizer.fit_transform(sentences)
419
+
420
+ # Get feature names
421
+ feature_names = tfidf_vectorizer.get_feature_names_out()
422
+
423
+ # Create a table of top TF-IDF terms for each sentence
424
+ tfidf_data = []
425
+
426
+ for i, sentence in enumerate(sentences[:min(len(sentences), 5)]): # Show max 5 sentences to avoid clutter
427
+ # Get top terms for this sentence
428
+ tfidf_scores = tfidf_matrix[i].toarray()[0]
429
+ top_indices = np.argsort(tfidf_scores)[-5:][::-1] # Top 5 terms
430
+
431
+ top_terms = [feature_names[idx] for idx in top_indices]
432
+ top_scores = [tfidf_scores[idx] for idx in top_indices]
433
+
434
+ # Format for display
435
+ formatted_terms = ', '.join([f"{term} ({score:.3f})" for term, score in zip(top_terms, top_scores)])
436
+
437
+ shortened_sentence = (sentence[:75] + '...') if len(sentence) > 75 else sentence
438
+
439
+ tfidf_data.append({
440
+ 'Sentence': shortened_sentence,
441
+ 'Distinctive Terms (TF-IDF scores)': formatted_terms
442
+ })
443
+
444
+ # Create dataframe
445
+ tfidf_df = pd.DataFrame(tfidf_data)
446
+
447
+ output_html.append('<div class="mt-3">')
448
+ output_html.append(df_to_html_table(tfidf_df))
449
+ output_html.append('</div>')
450
+
451
+ # Create a TF-IDF term-sentence heatmap
452
+ if len(sentences) <= 10: # Only create heatmap for reasonable number of sentences
453
+ # Get top terms across all sentences
454
+ mean_tfidf = np.mean(tfidf_matrix.toarray(), axis=0)
455
+ top_indices = np.argsort(mean_tfidf)[-10:][::-1] # Top 10 terms
456
+ top_terms = [feature_names[idx] for idx in top_indices]
457
+
458
+ # Create heatmap data
459
+ heatmap_data = tfidf_matrix[:, top_indices].toarray()
460
+
461
+ # Create heatmap
462
+ fig, ax = plt.subplots(figsize=(10, 6))
463
+ plt.imshow(heatmap_data, cmap='viridis', aspect='auto')
464
+
465
+ # Add labels
466
+ plt.yticks(range(len(sentences)), [f"Sent {i+1}" for i in range(len(sentences))])
467
+ plt.xticks(range(len(top_terms)), top_terms, rotation=45, ha='right')
468
+
469
+ plt.colorbar(label='TF-IDF Score')
470
+ plt.xlabel('Terms')
471
+ plt.ylabel('Sentences')
472
+ plt.title('TF-IDF Heatmap: Term Importance by Sentence')
473
+ plt.tight_layout()
474
+
475
+ output_html.append('<h4>Term Importance Heatmap</h4>')
476
+ output_html.append('<p>This heatmap shows which terms are most distinctive in each sentence.</p>')
477
+ output_html.append(fig_to_html(fig))
478
+ else:
479
+ output_html.append("""
480
+ <div class="alert alert-warning">
481
+ <p class="mb-0">TF-IDF analysis requires at least 3 sentences. The provided text doesn't have enough sentences for this analysis.</p>
482
+ </div>
483
+ """)
484
+
485
+ # Topic Modeling
486
+ output_html.append('<h3 class="task-subheader">Topic Modeling</h3>')
487
+ output_html.append("""
488
+ <div class="alert alert-light">
489
+ <p class="mb-0">
490
+ Topic modeling uses statistical methods to discover abstract "topics" that occur in a collection of documents.
491
+ Here, we use Latent Dirichlet Allocation (LDA) to identify potential topics.
492
+ </p>
493
+ </div>
494
+ """)
495
+
496
+ # Check if text is long enough for topic modeling
497
+ if len(text_input.split()) < 50:
498
+ output_html.append("""
499
+ <div class="alert alert-warning">
500
+ <p class="mb-0">Topic modeling works best with longer texts. The provided text is too short for reliable topic modeling.</p>
501
+ </div>
502
+ """)
503
+ else:
504
+ # Create document-term matrix
505
+ # For short single-document text, we'll split by sentences to create a "corpus"
506
+ sentences = nltk.sent_tokenize(text_input)
507
+
508
+ if len(sentences) < 4:
509
+ output_html.append("""
510
+ <div class="alert alert-warning">
511
+ <p class="mb-0">Topic modeling works best with multiple documents or paragraphs. Since the provided text has few sentences,
512
+ the topic modeling results may not be meaningful.</p>
513
+ </div>
514
+ """)
515
+
516
+ # Create document-term matrix using CountVectorizer
517
+ vectorizer = CountVectorizer(
518
+ max_features=1000,
519
+ stop_words='english',
520
+ min_df=1
521
+ )
522
+
523
+ # Create a document-term matrix
524
+ dtm = vectorizer.fit_transform(sentences)
525
+ feature_names = vectorizer.get_feature_names_out()
526
+
527
+ # Set number of topics based on text length
528
+ n_topics = min(3, max(2, len(sentences) // 3))
529
+
530
+ # LDA Topic Modeling
531
+ lda_model = LatentDirichletAllocation(
532
+ n_components=n_topics,
533
+ max_iter=10,
534
+ learning_method='online',
535
+ random_state=42
536
+ )
537
+
538
+ lda_model.fit(dtm)
539
+
540
+ # Get top terms for each topic
541
+ n_top_words = 10
542
+ topic_terms = []
543
+ for topic_idx, topic in enumerate(lda_model.components_):
544
+ top_indices = topic.argsort()[:-n_top_words - 1:-1]
545
+ top_terms = [feature_names[i] for i in top_indices]
546
+ topic_weight = topic[top_indices].sum() / topic.sum() # Approximation of topic "importance"
547
+ topic_terms.append({
548
+ "Topic": f"Topic {topic_idx + 1}",
549
+ "Top Terms": ", ".join(top_terms),
550
+ "Weight": f"{topic_weight:.2f}"
551
+ })
552
+
553
+ topic_df = pd.DataFrame(topic_terms)
554
+
555
+ output_html.append('<h4>LDA Topic Model Results</h4>')
556
+ output_html.append(df_to_html_table(topic_df))
557
+
558
+ # Create word cloud for each topic
559
+ output_html.append('<h4>Topic Word Clouds</h4>')
560
+ output_html.append('<div class="row">')
561
+
562
+ for topic_idx, topic in enumerate(lda_model.components_):
563
+ # Get topic words and weights
564
+ word_weights = {feature_names[i]: topic[i] for i in topic.argsort()[:-50-1:-1]}
565
+
566
+ # Generate word cloud
567
+ wc = wordcloud.WordCloud(
568
+ background_color='white',
569
+ max_words=30,
570
+ width=400,
571
+ height=300,
572
+ colormap='plasma',
573
+ contour_width=1,
574
+ contour_color='steelblue'
575
+ )
576
+ wc.generate_from_frequencies(word_weights)
577
+
578
+ # Create figure
579
+ fig = plt.figure(figsize=(6, 4))
580
+ plt.imshow(wc, interpolation='bilinear')
581
+ plt.axis('off')
582
+ plt.title(f'Topic {topic_idx + 1}')
583
+ plt.tight_layout()
584
+
585
+ output_html.append(f'<div class="col-12 mb-3">')
586
+ output_html.append(fig_to_html(fig))
587
+ output_html.append('</div>')
588
+
589
+ output_html.append('</div>') # Close row for word clouds
590
+
591
+ # Topic distribution visualization
592
+ topic_distribution = lda_model.transform(dtm)
593
+
594
+ # Calculate dominant topic for each sentence
595
+ dominant_topics = np.argmax(topic_distribution, axis=1)
596
+
597
+ # Count number of sentences for each dominant topic
598
+ topic_counts = Counter(dominant_topics)
599
+
600
+ # Prepare data for visualization
601
+ topics = [f"Topic {i+1}" for i in range(n_topics)]
602
+ counts = [topic_counts.get(i, 0) for i in range(n_topics)]
603
+
604
+ # Create visualization
605
+ fig = plt.figure(figsize=(8, 5))
606
+ bars = plt.bar(topics, counts, color=plt.cm.plasma(np.linspace(0.15, 0.85, n_topics)))
607
+
608
+ # Add value labels
609
+ for bar in bars:
610
+ height = bar.get_height()
611
+ plt.text(bar.get_x() + bar.get_width()/2., height + 0.1,
612
+ f'{height}',
613
+ ha='center', va='bottom')
614
+
615
+ plt.xlabel('Topic')
616
+ plt.ylabel('Number of Sentences')
617
+ plt.title('Distribution of Dominant Topics Across Sentences')
618
+ plt.tight_layout()
619
+
620
+ output_html.append('<h4>Topic Distribution</h4>')
621
+ output_html.append(fig_to_html(fig))
622
+
623
+ # Topic network graph
624
+ output_html.append('<h4>Topic-Term Network</h4>')
625
+ output_html.append('<p>This visualization shows the relationships between topics and their most important terms.</p>')
626
+
627
+ # Create network graph
628
+ G = nx.Graph()
629
+
630
+ # Add topic nodes
631
+ for i in range(n_topics):
632
+ G.add_node(f"Topic {i+1}", type='topic', size=1000)
633
+
634
+ # Add term nodes and edges
635
+ for topic_idx, topic in enumerate(lda_model.components_):
636
+ topic_name = f"Topic {topic_idx+1}"
637
+
638
+ # Get top terms for this topic
639
+ top_indices = topic.argsort()[:-11:-1]
640
+
641
+ for i in top_indices:
642
+ term = feature_names[i]
643
+ weight = topic[i]
644
+
645
+ # Only add terms with significant weight
646
+ if weight > 0.01:
647
+ if not G.has_node(term):
648
+ G.add_node(term, type='term', size=300)
649
+
650
+ G.add_edge(topic_name, term, weight=weight)
651
+
652
+ # Create graph visualization
653
+ fig = plt.figure(figsize=(10, 8))
654
+
655
+ # Position nodes using spring layout
656
+ pos = nx.spring_layout(G, k=0.3, seed=42)
657
+
658
+ # Draw nodes
659
+ topic_nodes = [node for node in G.nodes() if G.nodes[node]['type'] == 'topic']
660
+ term_nodes = [node for node in G.nodes() if G.nodes[node]['type'] == 'term']
661
+
662
+ # Draw topic nodes
663
+ nx.draw_networkx_nodes(
664
+ G, pos,
665
+ nodelist=topic_nodes,
666
+ node_color='#E53935',
667
+ node_size=[G.nodes[node]['size'] for node in topic_nodes],
668
+ alpha=0.8
669
+ )
670
+
671
+ # Draw term nodes
672
+ nx.draw_networkx_nodes(
673
+ G, pos,
674
+ nodelist=term_nodes,
675
+ node_color='#1976D2',
676
+ node_size=[G.nodes[node]['size'] for node in term_nodes],
677
+ alpha=0.6
678
+ )
679
+
680
+ # Draw edges with varying thickness
681
+ edge_weights = [G[u][v]['weight'] * 5 for u, v in G.edges()]
682
+ nx.draw_networkx_edges(
683
+ G, pos,
684
+ width=edge_weights,
685
+ alpha=0.5,
686
+ edge_color='gray'
687
+ )
688
+
689
+ # Draw labels
690
+ nx.draw_networkx_labels(
691
+ G, pos,
692
+ font_size=10,
693
+ font_weight='bold'
694
+ )
695
+
696
+ plt.axis('off')
697
+ plt.tight_layout()
698
+
699
+ output_html.append(fig_to_html(fig))
700
+
701
+ # Add note about interpreting results
702
+ output_html.append("""
703
+ <div class="alert alert-info">
704
+ <h4>Interpreting Topic Models</h4>
705
+ <p>Topic modeling is an unsupervised technique that works best with large collections of documents.
706
+ For a single text, especially shorter ones, topics may be less distinct or meaningful.
707
+ The "topics" shown here represent clusters of words that frequently appear together in the text.</p>
708
+ <p>For better topic modeling results:</p>
709
+ <ul>
710
+ <li>Use longer texts with at least several paragraphs</li>
711
+ <li>Provide multiple related documents for analysis</li>
712
+ <li>Consider domain-specific preprocessing</li>
713
+ </ul>
714
+ </div>
715
+ """)
716
+
717
+ except Exception as e:
718
+ output_html.append(f"""
719
+ <div class="alert alert-danger">
720
+ <h3>Error</h3>
721
+ <p>Failed to analyze topics: {str(e)}</p>
722
+ </div>
723
+ """)
724
+
725
+ # About Topic Analysis section
726
+ output_html.append("""
727
+ <div class="card mt-4">
728
+ <div class="card-header">
729
+ <h4 class="mb-0">
730
+ <i class="fas fa-info-circle"></i>
731
+ About Topic Analysis
732
+ </h4>
733
+ </div>
734
+ <div class="card-body">
735
+ <h5>What is Topic Analysis?</h5>
736
+
737
+ <p>Topic analysis, also known as topic modeling or topic extraction, is the process of identifying the main themes
738
+ or topics that occur in a collection of documents. It uses statistical models to discover abstract topics based
739
+ on word distributions throughout the texts.</p>
740
+
741
+ <h5>Common Approaches:</h5>
742
+
743
+ <ul>
744
+ <li><b>Term Frequency Analysis</b> - Simple counting of terms to find the most common topics</li>
745
+ <li><b>TF-IDF (Term Frequency-Inverse Document Frequency)</b> - Identifies terms that are distinctive to particular documents or sections</li>
746
+ <li><b>LDA (Latent Dirichlet Allocation)</b> - A probabilistic model that assigns topic distributions to documents</li>
747
+ <li><b>NMF (Non-negative Matrix Factorization)</b> - A linear-algebraic approach to topic discovery</li>
748
+ <li><b>BERTopic</b> - A modern approach that uses BERT embeddings and clustering for topic modeling</li>
749
+ </ul>
750
+
751
+ <h5>Applications:</h5>
752
+
753
+ <ul>
754
+ <li><b>Content organization</b> - Categorizing documents by topic</li>
755
+ <li><b>Trend analysis</b> - Tracking how topics evolve over time</li>
756
+ <li><b>Content recommendation</b> - Suggesting related content based on topic similarity</li>
757
+ <li><b>Customer feedback analysis</b> - Understanding main themes in reviews or feedback</li>
758
+ <li><b>Research insights</b> - Identifying research themes in academic papers</li>
759
+ </ul>
760
+ </div>
761
+ </div>
762
+ """)
763
+
764
+ output_html.append('</div>') # Close result-area div
765
+
766
+ return '\n'.join(output_html)
components/translation.py ADDED
@@ -0,0 +1,337 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib.pyplot as plt
2
+ import pandas as pd
3
+ import numpy as np
4
+ from collections import Counter
5
+ import time
6
+
7
+ from utils.model_loader import load_translator
8
+ from utils.helpers import fig_to_html, df_to_html_table
9
+
10
+ def translation_handler(text_input, source_lang="auto", target_lang="en"):
11
+ """Show machine translation capabilities."""
12
+ output_html = []
13
+
14
+ # Add result area container
15
+ output_html.append('<div class="result-area">')
16
+ output_html.append('<h2 class="task-header">Machine Translation</h2>')
17
+
18
+ output_html.append("""
19
+ <div class="alert alert-info">
20
+ <i class="fas fa-language"></i>
21
+ Machine translation converts text from one language to another while preserving meaning and context as accurately as possible.
22
+ </div>
23
+ """)
24
+
25
+ # Model info
26
+ output_html.append("""
27
+ <div class="alert alert-info">
28
+ <h4><i class="fas fa-tools"></i> Model Used:</h4>
29
+ <ul>
30
+ <li><b>Helsinki-NLP/opus-mt</b> - A collection of pre-trained neural machine translation models</li>
31
+ <li><b>Capabilities</b> - Translates between various language pairs with good accuracy</li>
32
+ <li><b>Architecture</b> - Transformer-based sequence-to-sequence model</li>
33
+ </ul>
34
+ </div>
35
+ """)
36
+
37
+ try:
38
+ # Check if text is empty
39
+ if not text_input.strip():
40
+ output_html.append("""
41
+ <div class="alert alert-warning">
42
+ <h3>No Text Provided</h3>
43
+ <p>Please enter some text to translate.</p>
44
+ </div>
45
+ """)
46
+ output_html.append('</div>') # Close result-area div
47
+ return '\n'.join(output_html)
48
+
49
+ # Display source text
50
+ output_html.append('<h3 class="task-subheader">Source Text</h3>')
51
+
52
+ # Language mapping for display
53
+ language_names = {
54
+ "auto": "Auto-detect",
55
+ "en": "English",
56
+ "es": "Spanish",
57
+ "fr": "French",
58
+ "de": "German",
59
+ "ru": "Russian",
60
+ "zh": "Chinese",
61
+ "ar": "Arabic",
62
+ "hi": "Hindi",
63
+ "ja": "Japanese",
64
+ "pt": "Portuguese",
65
+ "it": "Italian"
66
+ }
67
+
68
+ source_lang_display = language_names.get(source_lang, source_lang)
69
+ target_lang_display = language_names.get(target_lang, target_lang)
70
+
71
+ # Format source text info
72
+ output_html.append(f"""
73
+ <div class="mb-2">
74
+ <span class="badge bg-primary">
75
+ {source_lang_display}
76
+ </span>
77
+ </div>
78
+ """)
79
+
80
+ # Display source text
81
+ output_html.append(f'<div class="card"><div class="card-body">{text_input}</div></div>')
82
+
83
+ # Load translation model
84
+ translator = load_translator(source_lang, target_lang)
85
+
86
+ # Translate text
87
+ start_time = time.time()
88
+
89
+ # Check text length and apply limit if needed
90
+ MAX_TEXT_LENGTH = 500 # Characters
91
+ truncated = False
92
+
93
+ if len(text_input) > MAX_TEXT_LENGTH:
94
+ truncated_text = text_input[:MAX_TEXT_LENGTH]
95
+ truncated = True
96
+ else:
97
+ truncated_text = text_input
98
+
99
+ # Perform translation
100
+ translation = translator(truncated_text)
101
+ translated_text = translation[0]['translation_text']
102
+
103
+ # Calculate processing time
104
+ translation_time = time.time() - start_time
105
+
106
+ # Display translation results
107
+ output_html.append('<h3 class="task-subheader">Translation</h3>')
108
+
109
+ # Show target language
110
+ output_html.append(f"""
111
+ <div class="mb-2">
112
+ <span class="badge bg-success">
113
+ {target_lang_display}
114
+ </span>
115
+ </div>
116
+ """)
117
+
118
+ # Display translated text
119
+ output_html.append(f'<div class="card"><div class="card-body bg-light">{translated_text}</div></div>')
120
+
121
+ # Show truncation warning if needed
122
+ if truncated:
123
+ output_html.append(f"""
124
+ <div class="alert alert-warning">
125
+ <p class="mb-0"><b>⚠️ Note:</b> Your text was truncated to {MAX_TEXT_LENGTH} characters due to model limitations. Only the first part was translated.</p>
126
+ </div>
127
+ """)
128
+
129
+ # Translation statistics
130
+ output_html.append('<h3 class="task-subheader">Translation Analysis</h3>')
131
+
132
+ # Calculate basic stats
133
+ source_chars = len(text_input)
134
+ source_words = len(text_input.split())
135
+ target_chars = len(translated_text)
136
+ target_words = len(translated_text.split())
137
+
138
+ # Display stats in a nice format
139
+ output_html.append(f"""
140
+ <div class="row text-center mb-4">
141
+ <div class="col-md-4">
142
+ <div class="card">
143
+ <div class="card-body">
144
+ <div class="display-4 text-primary">{source_words}</div>
145
+ <div>Source Words</div>
146
+ </div>
147
+ </div>
148
+ </div>
149
+ <div class="col-md-4">
150
+ <div class="card">
151
+ <div class="card-body">
152
+ <div class="display-4 text-success">{target_words}</div>
153
+ <div>Translated Words</div>
154
+ </div>
155
+ </div>
156
+ </div>
157
+ <div class="col-md-4">
158
+ <div class="card">
159
+ <div class="card-body">
160
+ <div class="display-4 text-warning">{translation_time:.2f}s</div>
161
+ <div>Processing Time</div>
162
+ </div>
163
+ </div>
164
+ </div>
165
+ </div>
166
+ """)
167
+
168
+ # Length comparison
169
+ output_html.append('<h4>Length Comparison</h4>')
170
+
171
+ # Create bar chart comparing text lengths
172
+ fig, ax = plt.subplots(figsize=(10, 5))
173
+
174
+ # Create grouped bar chart
175
+ x = np.arange(2)
176
+ width = 0.35
177
+
178
+ ax.bar(x - width/2, [source_words, source_chars], width, label='Source Text', color='#1976D2')
179
+ ax.bar(x + width/2, [target_words, target_chars], width, label='Translated Text', color='#4CAF50')
180
+
181
+ ax.set_xticks(x)
182
+ ax.set_xticklabels(['Word Count', 'Character Count'])
183
+ ax.legend()
184
+
185
+ # Add value labels on top of bars
186
+ for i, v in enumerate([source_words, source_chars]):
187
+ ax.text(i - width/2, v + 0.5, str(v), ha='center')
188
+
189
+ for i, v in enumerate([target_words, target_chars]):
190
+ ax.text(i + width/2, v + 0.5, str(v), ha='center')
191
+
192
+ plt.title('Source vs. Translation Length Comparison')
193
+ plt.tight_layout()
194
+
195
+ output_html.append(fig_to_html(fig))
196
+
197
+ # Expansion/contraction ratio
198
+ word_ratio = target_words / source_words if source_words > 0 else 0
199
+ char_ratio = target_chars / source_chars if source_chars > 0 else 0
200
+
201
+ expansion_type = "expansion" if word_ratio > 1.1 else "contraction" if word_ratio < 0.9 else "similar length"
202
+
203
+ output_html.append(f"""
204
+ <div class="alert alert-info">
205
+ <h4>Translation Length Analysis</h4>
206
+ <p>The translation shows <b>{expansion_type}</b> compared to the source text.</p>
207
+ <ul>
208
+ <li>Word ratio: {word_ratio:.2f} (target/source)</li>
209
+ <li>Character ratio: {char_ratio:.2f} (target/source)</li>
210
+ </ul>
211
+ <p><small>Note: Different languages naturally have different word and character counts when expressing the same meaning.</small></p>
212
+ </div>
213
+ """)
214
+
215
+ # Language characteristics comparison
216
+ source_avg_word_len = source_chars / source_words if source_words > 0 else 0
217
+ target_avg_word_len = target_chars / target_words if target_words > 0 else 0
218
+
219
+ output_html.append('<h4>Language Characteristics</h4>')
220
+
221
+ # Create comparison table
222
+ lang_data = {
223
+ 'Metric': ['Average Word Length', 'Words per Character', 'Characters per Word'],
224
+ f'Source ({source_lang_display})': [
225
+ f"{source_avg_word_len:.2f} chars",
226
+ f"{source_words / source_chars:.3f}" if source_chars > 0 else "N/A",
227
+ f"{source_chars / source_words:.2f}" if source_words > 0 else "N/A"
228
+ ],
229
+ f'Target ({target_lang_display})': [
230
+ f"{target_avg_word_len:.2f} chars",
231
+ f"{target_words / target_chars:.3f}" if target_chars > 0 else "N/A",
232
+ f"{target_chars / target_words:.2f}" if target_words > 0 else "N/A"
233
+ ]
234
+ }
235
+
236
+ lang_df = pd.DataFrame(lang_data)
237
+
238
+ output_html.append(df_to_html_table(lang_df))
239
+
240
+ # Alternative translations section
241
+ output_html.append('<h3 class="task-subheader">Alternative Translation Options</h3>')
242
+ output_html.append('<p>Machine translation models often have different ways of translating the same text. Here are some general tips for better translations:</p>')
243
+
244
+ output_html.append("""
245
+ <div class="alert alert-info">
246
+ <h4>Tips for Better Machine Translation</h4>
247
+ <ul class="mb-0">
248
+ <li><b>Use clear, simple language</b> in your source text</li>
249
+ <li><b>Avoid idioms and slang</b> that may not translate well across cultures</li>
250
+ <li><b>Break up long, complex sentences</b> into simpler ones</li>
251
+ <li><b>Provide context</b> when dealing with ambiguous terms</li>
252
+ <li><b>Review and post-edit</b> machine translations for important documents</li>
253
+ </ul>
254
+ </div>
255
+ """)
256
+
257
+ # Common translation challenges
258
+ output_html.append('<h4>Common Translation Challenges</h4>')
259
+
260
+ challenge_data = {
261
+ 'Challenge': [
262
+ 'Ambiguity',
263
+ 'Idioms & Expressions',
264
+ 'Cultural References',
265
+ 'Technical Terminology',
266
+ 'Grammatical Differences'
267
+ ],
268
+ 'Description': [
269
+ 'Words with multiple meanings may be incorrectly translated without proper context',
270
+ 'Expressions that are unique to a culture often lose meaning when translated literally',
271
+ 'References to culture-specific concepts may not have direct equivalents',
272
+ 'Specialized terminology may not translate accurately without domain-specific models',
273
+ 'Different languages have different grammatical structures that can affect translation'
274
+ ],
275
+ 'Example': [
276
+ '"Bank" could mean financial institution or river edge',
277
+ '"It\'s raining cats and dogs" translated literally loses its meaning',
278
+ 'References to local holidays or customs may be confusing when translated',
279
+ 'Medical or legal terms often need specialized translation knowledge',
280
+ 'Languages differ in word order, gender agreement, verb tenses, etc.'
281
+ ]
282
+ }
283
+
284
+ challenge_df = pd.DataFrame(challenge_data)
285
+
286
+ output_html.append(df_to_html_table(challenge_df))
287
+
288
+ except Exception as e:
289
+ output_html.append(f"""
290
+ <div class="alert alert-danger">
291
+ <h3>Translation Error</h3>
292
+ <p>{str(e)}</p>
293
+ <p>This could be due to an unsupported language pair or an issue loading the translation model.</p>
294
+ </div>
295
+ """)
296
+
297
+ # About Machine Translation section
298
+ output_html.append("""
299
+ <div class="card mt-4">
300
+ <div class="card-header">
301
+ <h4 class="mb-0">
302
+ <i class="fas fa-info-circle"></i>
303
+ About Machine Translation
304
+ </h4>
305
+ </div>
306
+ <div class="card-body">
307
+ <h5>What is Machine Translation?</h5>
308
+
309
+ <p>Machine translation is the automated translation of text from one language to another using computer software.
310
+ Modern machine translation systems use neural networks to understand and generate text, leading to significant
311
+ improvements in fluency and accuracy compared to older rule-based or statistical systems.</p>
312
+
313
+ <h5>Types of Machine Translation:</h5>
314
+
315
+ <ul>
316
+ <li><b>Rule-based MT</b> - Uses linguistic rules crafted by human experts</li>
317
+ <li><b>Statistical MT</b> - Uses statistical models trained on parallel texts</li>
318
+ <li><b>Neural MT</b> - Uses deep learning and neural networks (current state-of-the-art)</li>
319
+ <li><b>Hybrid MT</b> - Combines multiple approaches for better results</li>
320
+ </ul>
321
+
322
+ <h5>Applications:</h5>
323
+
324
+ <ul>
325
+ <li><b>Website localization</b> - Translating web content for international audiences</li>
326
+ <li><b>Document translation</b> - Quickly obtaining translations of documents</li>
327
+ <li><b>Real-time communication</b> - Enabling conversations across language barriers</li>
328
+ <li><b>E-commerce</b> - Making product listings available in multiple languages</li>
329
+ <li><b>Content accessibility</b> - Making information available to speakers of different languages</li>
330
+ </ul>
331
+ </div>
332
+ </div>
333
+ """)
334
+
335
+ output_html.append('</div>') # Close result-area div
336
+
337
+ return '\n'.join(output_html)
components/vector_embeddings.py ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib
2
+ matplotlib.use('Agg') # Use non-GUI backend
3
+ import matplotlib.pyplot as plt
4
+ import numpy as np
5
+ import pandas as pd
6
+ import spacy
7
+ import time
8
+ import faiss
9
+ from sentence_transformers import SentenceTransformer, util
10
+ from sklearn.decomposition import PCA
11
+ import textwrap
12
+ from sklearn.metrics.pairwise import cosine_similarity
13
+
14
+ from utils.model_loader import load_embedding_model
15
+ from utils.helpers import fig_to_html, df_to_html_table
16
+
17
+ def vector_embeddings_handler(text_input, search_query=""):
18
+ """Show vector embeddings and semantic search capabilities."""
19
+ output_html = []
20
+
21
+ # Add result area container
22
+ output_html.append('<div class="result-area">')
23
+ output_html.append('<h2 class="task-header">Vector Embeddings Analysis Results</h2>')
24
+
25
+ output_html.append("""
26
+ <div class="alert alert-success">
27
+ <h4><i class="fas fa-check-circle me-2"></i>Embeddings Generated Successfully!</h4>
28
+ <p class="mb-0">Your text has been processed and converted into high-dimensional vector representations.</p>
29
+ </div>
30
+ """)
31
+
32
+ # Load model and create embeddings
33
+ try:
34
+ model = load_embedding_model()
35
+
36
+ # Split the text into chunks (sentences)
37
+ import spacy
38
+ nlp = spacy.load("en_core_web_sm")
39
+ doc = nlp(text_input)
40
+ sentences = [sent.text.strip() for sent in doc.sents if len(sent.text.strip()) > 10]
41
+
42
+ # If we have too few sentences, create artificial chunks
43
+ if len(sentences) < 3:
44
+ words = text_input.split()
45
+ chunk_size = max(10, len(words) // 3)
46
+ sentences = [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size) if i+chunk_size <= len(words)]
47
+
48
+ # Limit to 10 sentences to avoid overwhelming the visualization
49
+ if len(sentences) > 10:
50
+ sentences = sentences[:10]
51
+
52
+ # Create embeddings
53
+ embeddings = model.encode(sentences)
54
+
55
+ # Text Statistics
56
+ output_html.append(f"""
57
+ <div class="row mb-4">
58
+ <div class="col-12">
59
+ <div class="card">
60
+ <div class="card-header bg-primary text-white">
61
+ <h4 class="mb-0"><i class="fas fa-chart-bar me-2"></i>Processing Statistics</h4>
62
+ </div>
63
+ <div class="card-body">
64
+ <div class="row text-center">
65
+ <div class="col-md-3">
66
+ <div class="stat-item">
67
+ <h3 class="text-primary">{len(text_input)}</h3>
68
+ <p class="text-muted mb-0">Characters</p>
69
+ </div>
70
+ </div>
71
+ <div class="col-md-3">
72
+ <div class="stat-item">
73
+ <h3 class="text-success">{len(sentences)}</h3>
74
+ <p class="text-muted mb-0">Text Segments</p>
75
+ </div>
76
+ </div>
77
+ <div class="col-md-3">
78
+ <div class="stat-item">
79
+ <h3 class="text-info">{embeddings.shape[1]}</h3>
80
+ <p class="text-muted mb-0">Vector Dimensions</p>
81
+ </div>
82
+ </div>
83
+ <div class="col-md-3">
84
+ <div class="stat-item">
85
+ <h3 class="text-warning">{embeddings.shape[0]}</h3>
86
+ <p class="text-muted mb-0">Embedding Vectors</p>
87
+ </div>
88
+ </div>
89
+ </div>
90
+ </div>
91
+ </div>
92
+ </div>
93
+ </div>
94
+ """)
95
+
96
+ # Text Segments Display
97
+ output_html.append("""
98
+ <div class="row mb-4">
99
+ <div class="col-12">
100
+ <div class="card">
101
+ <div class="card-header bg-info text-white">
102
+ <h4 class="mb-0"><i class="fas fa-list me-2"></i>Text Segments</h4>
103
+ </div>
104
+ <div class="card-body">
105
+ <div class="row">
106
+ """)
107
+
108
+ for i, sentence in enumerate(sentences[:6]): # Show max 6 segments
109
+ output_html.append(f"""
110
+ <div class="col-md-6 mb-3">
111
+ <div class="p-3 border rounded bg-light">
112
+ <h6 class="text-primary mb-2">Segment {i+1}</h6>
113
+ <p class="mb-0 small">{sentence}</p>
114
+ </div>
115
+ </div>
116
+ """)
117
+
118
+ output_html.append("""
119
+ </div>
120
+ </div>
121
+ </div>
122
+ </div>
123
+ </div>
124
+ """)
125
+
126
+ # Semantic Search Interface
127
+ output_html.append("""
128
+ <div class="row mb-4">
129
+ <div class="col-12">
130
+ <div class="card border-warning">
131
+ <div class="card-header bg-warning text-dark">
132
+ <h4 class="mb-0"><i class="fas fa-search me-2"></i>Semantic Search</h4>
133
+ </div>
134
+ <div class="card-body">
135
+ <p class="mb-3">Search for content by meaning, not just keywords. The system will find the most semantically similar text segments.</p>
136
+
137
+ <div class="row mb-3">
138
+ <div class="col-md-10">
139
+ <input type="text" id="search-input" class="form-control form-control-lg" placeholder="Enter a search query to find similar content...">
140
+ </div>
141
+ <div class="col-md-2">
142
+ <button onclick="performSemanticSearch()" class="btn btn-warning btn-lg w-100">
143
+ <i class="fas fa-search me-1"></i>Search
144
+ </button>
145
+ </div>
146
+ </div>
147
+
148
+ <div class="mb-3">
149
+ <h6 class="mb-2"><i class="fas fa-lightbulb me-2"></i>Try these example searches:</h6>
150
+ <div class="d-flex flex-wrap gap-2">
151
+ <button onclick="document.getElementById('search-input').value = 'space research'; performSemanticSearch();"
152
+ class="btn btn-outline-secondary btn-sm">
153
+ <i class="fas fa-rocket me-1"></i>space research
154
+ </button>
155
+ <button onclick="document.getElementById('search-input').value = 'scientific collaboration'; performSemanticSearch();"
156
+ class="btn btn-outline-secondary btn-sm">
157
+ <i class="fas fa-users me-1"></i>scientific collaboration
158
+ </button>
159
+ <button onclick="document.getElementById('search-input').value = 'international project'; performSemanticSearch();"
160
+ class="btn btn-outline-secondary btn-sm">
161
+ <i class="fas fa-globe me-1"></i>international project
162
+ </button>
163
+ <button onclick="document.getElementById('search-input').value = 'laboratory experiments'; performSemanticSearch();"
164
+ class="btn btn-outline-secondary btn-sm">
165
+ <i class="fas fa-flask me-1"></i>laboratory experiments
166
+ </button>
167
+ <button onclick="document.getElementById('search-input').value = 'space agencies'; performSemanticSearch();"
168
+ class="btn btn-outline-secondary btn-sm">
169
+ <i class="fas fa-building me-1"></i>space agencies
170
+ </button>
171
+ <button onclick="document.getElementById('search-input').value = 'microgravity environment'; performSemanticSearch();"
172
+ class="btn btn-outline-secondary btn-sm">
173
+ <i class="fas fa-weight me-1"></i>microgravity environment
174
+ </button>
175
+ </div>
176
+ </div>
177
+
178
+ <div id="search-results" style="display: none;">
179
+ <hr>
180
+ <h5><i class="fas fa-list-ol me-2"></i>Search Results:</h5>
181
+ <div id="results-container" class="border rounded p-3 bg-light" style="max-height: 400px; overflow-y: auto;">
182
+ </div>
183
+ </div>
184
+ </div>
185
+ </div>
186
+ </div>
187
+ </div>
188
+ """)
189
+
190
+ except Exception as e:
191
+ output_html.append(f"""
192
+ <div class="alert alert-danger">
193
+ <h4><i class="fas fa-exclamation-triangle me-2"></i>Error</h4>
194
+ <p>Could not generate embeddings: {str(e)}</p>
195
+ </div>
196
+ """)
197
+
198
+ # Close result-area div
199
+ output_html.append('</div>')
200
+ return '\n'.join(output_html)
201
+
202
+ def perform_semantic_search(context, query):
203
+ """Perform semantic search on the given context with the query."""
204
+ try:
205
+ # Load model
206
+ model = load_embedding_model()
207
+
208
+ # Split context into sentences
209
+ import spacy
210
+ nlp = spacy.load("en_core_web_sm")
211
+ doc = nlp(context)
212
+ sentences = [sent.text.strip() for sent in doc.sents if len(sent.text.strip()) > 5]
213
+
214
+ # Create embeddings
215
+ sentence_embeddings = model.encode(sentences)
216
+ query_embedding = model.encode([query])[0]
217
+
218
+ # Calculate similarities
219
+ from sentence_transformers import util
220
+ similarities = util.pytorch_cos_sim(query_embedding, sentence_embeddings)[0].cpu().numpy()
221
+
222
+ # Create result pairs (sentence, similarity)
223
+ results = [(sentences[i], float(similarities[i])) for i in range(len(sentences))]
224
+
225
+ # Sort by similarity (descending)
226
+ results.sort(key=lambda x: x[1], reverse=True)
227
+
228
+ # Return top results
229
+ return {
230
+ "success": True,
231
+ "results": [
232
+ {"text": text, "score": score}
233
+ for text, score in results[:5] # Return top 5 results
234
+ ]
235
+ }
236
+
237
+ except Exception as e:
238
+ return {
239
+ "success": False,
240
+ "error": str(e)
241
+ }
requirements.txt ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core web framework
2
+ Flask==3.0.0
3
+ flask-cors==4.0.0
4
+
5
+ # NLP Core Libraries
6
+ transformers==4.36.2
7
+ torch==2.1.2
8
+ tokenizers==0.15.0
9
+ sentence-transformers==2.2.2
10
+ nltk==3.8.1
11
+ spacy==3.7.2
12
+
13
+ # Data Science Libraries
14
+ numpy==1.24.3
15
+ pandas==2.0.3
16
+ scikit-learn==1.3.2
17
+ scipy==1.11.4
18
+
19
+ # Text Processing
20
+ textblob==0.17.1
21
+ langdetect==1.0.9
22
+
23
+ # Visualization (lightweight versions)
24
+ matplotlib==3.7.5
25
+ plotly==5.17.0
26
+ seaborn==0.13.0
27
+ networkx==3.1.1
28
+ matplotlib-venn==0.11.9
29
+
30
+ # Utilities
31
+ requests==2.31.0
32
+ Pillow==10.1.0
33
+ faiss-cpu==1.7.4
34
+ wordcloud==1.9.3
35
+
36
+ # Memory optimization
37
+ psutil==5.9.6
static/css/components.css ADDED
@@ -0,0 +1,1756 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* Enhanced CSS for NLP Ultimate Tutorial - Component Specific Styles */
2
+
3
+ /* Vector Embeddings Page Enhancements */
4
+ .vector-embeddings-page {
5
+ background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
6
+ min-height: 100vh;
7
+ padding: 20px 0;
8
+ }
9
+
10
+ .model-info-cards .card {
11
+ transition: all 0.3s ease;
12
+ border: none;
13
+ box-shadow: 0 4px 8px rgba(0,0,0,0.1);
14
+ overflow: hidden;
15
+ position: relative;
16
+ }
17
+
18
+ .model-info-cards .card:hover {
19
+ transform: translateY(-5px);
20
+ box-shadow: 0 8px 25px rgba(0,0,0,0.15);
21
+ }
22
+
23
+ .model-info-cards .card::before {
24
+ content: '';
25
+ position: absolute;
26
+ top: 0;
27
+ left: 0;
28
+ right: 0;
29
+ height: 4px;
30
+ background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
31
+ }
32
+
33
+ .model-info-cards .card-header {
34
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
35
+ border: none;
36
+ padding: 20px;
37
+ position: relative;
38
+ }
39
+
40
+ .model-info-cards .card-header.bg-primary {
41
+ background: linear-gradient(135deg, #4e73df 0%, #224abe 100%) !important;
42
+ }
43
+
44
+ .model-info-cards .card-header.bg-success {
45
+ background: linear-gradient(135deg, #1cc88a 0%, #13855c 100%) !important;
46
+ }
47
+
48
+ .model-info-cards .card-header.bg-info {
49
+ background: linear-gradient(135deg, #36b9cc 0%, #258391 100%) !important;
50
+ }
51
+
52
+ .model-info-cards .card-body {
53
+ padding: 25px;
54
+ background: rgba(255,255,255,0.95);
55
+ }
56
+
57
+ .model-info-cards .list-unstyled li {
58
+ padding: 8px 0;
59
+ border-bottom: 1px solid rgba(0,0,0,0.05);
60
+ transition: all 0.2s ease;
61
+ }
62
+
63
+ .model-info-cards .list-unstyled li:hover {
64
+ background: rgba(0,0,0,0.02);
65
+ padding-left: 10px;
66
+ }
67
+
68
+ .model-info-cards .list-unstyled li:last-child {
69
+ border-bottom: none;
70
+ }
71
+
72
+ /* Visualization Cards */
73
+ .visualization-card {
74
+ background: linear-gradient(135deg, #ffffff 0%, #f8f9fa 100%);
75
+ border: none;
76
+ box-shadow: 0 6px 20px rgba(0,0,0,0.1);
77
+ border-radius: 15px;
78
+ overflow: hidden;
79
+ transition: all 0.3s ease;
80
+ }
81
+
82
+ .visualization-card:hover {
83
+ transform: translateY(-3px);
84
+ box-shadow: 0 10px 30px rgba(0,0,0,0.15);
85
+ }
86
+
87
+ .visualization-card .card-header {
88
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
89
+ border: none;
90
+ padding: 20px;
91
+ color: white;
92
+ }
93
+
94
+ .visualization-card .card-body {
95
+ padding: 30px;
96
+ }
97
+
98
+ /* Semantic Search Enhancement */
99
+ .semantic-search-card {
100
+ background: linear-gradient(135deg, #ffeaa7 0%, #fab1a0 100%);
101
+ border: none;
102
+ box-shadow: 0 8px 25px rgba(255,193,7,0.3);
103
+ border-radius: 15px;
104
+ overflow: hidden;
105
+ transition: all 0.3s ease;
106
+ }
107
+
108
+ .semantic-search-card:hover {
109
+ transform: translateY(-2px);
110
+ box-shadow: 0 12px 35px rgba(255,193,7,0.4);
111
+ }
112
+
113
+ .semantic-search-card .card-header {
114
+ background: linear-gradient(135deg, #fdcb6e 0%, #e17055 100%) !important;
115
+ border: none;
116
+ padding: 25px;
117
+ color: white;
118
+ }
119
+
120
+ .semantic-search-card .card-body {
121
+ background: rgba(255,255,255,0.9);
122
+ padding: 30px;
123
+ }
124
+
125
+ .semantic-search-card .form-control-lg {
126
+ border: 2px solid #fdcb6e;
127
+ border-radius: 10px;
128
+ transition: all 0.3s ease;
129
+ box-shadow: 0 2px 10px rgba(253,203,110,0.2);
130
+ }
131
+
132
+ .semantic-search-card .form-control-lg:focus {
133
+ border-color: #e17055;
134
+ box-shadow: 0 4px 15px rgba(225,112,85,0.3);
135
+ transform: translateY(-2px);
136
+ }
137
+
138
+ .semantic-search-card .btn-warning {
139
+ background: linear-gradient(135deg, #fdcb6e 0%, #e17055 100%);
140
+ border: none;
141
+ border-radius: 10px;
142
+ font-weight: 600;
143
+ transition: all 0.3s ease;
144
+ box-shadow: 0 4px 15px rgba(253,203,110,0.4);
145
+ }
146
+
147
+ .semantic-search-card .btn-warning:hover {
148
+ transform: translateY(-2px);
149
+ box-shadow: 0 6px 20px rgba(225,112,85,0.5);
150
+ }
151
+
152
+ /* Example Search Buttons */
153
+ .example-search-card {
154
+ background: linear-gradient(135deg, #a8edea 0%, #fed6e3 100%);
155
+ border: none;
156
+ box-shadow: 0 6px 20px rgba(168,237,234,0.3);
157
+ border-radius: 15px;
158
+ overflow: hidden;
159
+ }
160
+
161
+ .example-search-card .card-header {
162
+ background: linear-gradient(135deg, #74b9ff 0%, #0984e3 100%) !important;
163
+ border: none;
164
+ padding: 20px;
165
+ color: white;
166
+ }
167
+
168
+ .example-search-card .card-body {
169
+ background: rgba(255,255,255,0.8);
170
+ padding: 25px;
171
+ }
172
+
173
+ .example-search-card .btn-outline-secondary {
174
+ border: 2px solid #74b9ff;
175
+ color: #0984e3;
176
+ background: rgba(255,255,255,0.9);
177
+ border-radius: 25px;
178
+ padding: 10px 20px;
179
+ margin: 5px;
180
+ font-weight: 500;
181
+ transition: all 0.3s ease;
182
+ box-shadow: 0 2px 10px rgba(116,185,255,0.2);
183
+ }
184
+
185
+ .example-search-card .btn-outline-secondary:hover {
186
+ background: linear-gradient(135deg, #74b9ff 0%, #0984e3 100%);
187
+ color: white;
188
+ transform: translateY(-2px);
189
+ box-shadow: 0 4px 15px rgba(116,185,255,0.4);
190
+ }
191
+
192
+ /* Process Flow Cards */
193
+ .process-flow-card {
194
+ background: linear-gradient(135deg, #e8f4fd 0%, #d1ecf1 100%);
195
+ border: none;
196
+ border-radius: 15px;
197
+ box-shadow: 0 4px 15px rgba(0,0,0,0.1);
198
+ transition: all 0.3s ease;
199
+ overflow: hidden;
200
+ }
201
+
202
+ .process-flow-card:hover {
203
+ transform: translateY(-3px);
204
+ box-shadow: 0 8px 25px rgba(0,0,0,0.15);
205
+ }
206
+
207
+ .process-flow-card h4 {
208
+ color: #0984e3;
209
+ font-weight: 600;
210
+ margin-bottom: 15px;
211
+ }
212
+
213
+ .process-flow-card p {
214
+ color: #636e72;
215
+ line-height: 1.6;
216
+ }
217
+
218
+ /* Info Alert Enhancements */
219
+ .vector-embeddings-page .alert-info {
220
+ background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%);
221
+ border: none;
222
+ border-left: 5px solid #2196f3;
223
+ border-radius: 10px;
224
+ box-shadow: 0 3px 10px rgba(33,150,243,0.2);
225
+ }
226
+
227
+ .vector-embeddings-page .alert-success {
228
+ background: linear-gradient(135deg, #e8f5e8 0%, #c8e6c9 100%);
229
+ border: none;
230
+ border-left: 5px solid #4caf50;
231
+ border-radius: 10px;
232
+ box-shadow: 0 3px 10px rgba(76,175,80,0.2);
233
+ }
234
+
235
+ /* Chart Container Enhancements */
236
+ .chart-container {
237
+ background: white;
238
+ border-radius: 15px;
239
+ padding: 20px;
240
+ box-shadow: 0 4px 15px rgba(0,0,0,0.1);
241
+ margin: 20px 0;
242
+ transition: all 0.3s ease;
243
+ }
244
+
245
+ .chart-container:hover {
246
+ box-shadow: 0 8px 25px rgba(0,0,0,0.15);
247
+ }
248
+
249
+ /* Animation for page load */
250
+ @keyframes fadeInUp {
251
+ from {
252
+ opacity: 0;
253
+ transform: translateY(30px);
254
+ }
255
+ to {
256
+ opacity: 1;
257
+ transform: translateY(0);
258
+ }
259
+ }
260
+
261
+ .vector-embeddings-page .card {
262
+ animation: fadeInUp 0.6s ease forwards;
263
+ }
264
+
265
+ .vector-embeddings-page .card:nth-child(1) { animation-delay: 0.1s; }
266
+ .vector-embeddings-page .card:nth-child(2) { animation-delay: 0.2s; }
267
+ .vector-embeddings-page .card:nth-child(3) { animation-delay: 0.3s; }
268
+ .vector-embeddings-page .card:nth-child(4) { animation-delay: 0.4s; }
269
+
270
+ /* Enhanced Vector Embeddings Components */
271
+ .feature-list .d-flex {
272
+ transition: all 0.2s ease;
273
+ border-radius: 8px;
274
+ padding: 8px;
275
+ margin: 0 -8px 8px -8px;
276
+ }
277
+
278
+ .feature-list .d-flex:hover {
279
+ background: rgba(0,0,0,0.02);
280
+ transform: translateX(5px);
281
+ }
282
+
283
+ .process-step-icon {
284
+ transition: all 0.3s ease;
285
+ box-shadow: 0 4px 15px rgba(0,0,0,0.1);
286
+ }
287
+
288
+ .process-step-icon:hover {
289
+ transform: scale(1.1);
290
+ box-shadow: 0 8px 25px rgba(0,0,0,0.2);
291
+ }
292
+
293
+ .progress-bar {
294
+ transition: width 1s ease-in-out;
295
+ }
296
+
297
+ .bg-gradient-primary {
298
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
299
+ }
300
+
301
+ .bg-gradient-info {
302
+ background: linear-gradient(135deg, #36b9cc 0%, #258391 100%) !important;
303
+ }
304
+
305
+ /* Model Information Cards Enhancement */
306
+ .model-info-cards .card {
307
+ transition: all 0.4s cubic-bezier(0.25, 0.8, 0.25, 1);
308
+ border-radius: 12px;
309
+ }
310
+
311
+ .model-info-cards .card:hover {
312
+ transform: translateY(-8px) scale(1.02);
313
+ box-shadow: 0 15px 40px rgba(0,0,0,0.15);
314
+ }
315
+
316
+ .model-info-cards .card-header {
317
+ border-radius: 12px 12px 0 0;
318
+ }
319
+
320
+ /* Process Flow Enhancement */
321
+ .process-flow-card h5 {
322
+ position: relative;
323
+ }
324
+
325
+ .process-flow-card h5::after {
326
+ content: '';
327
+ position: absolute;
328
+ bottom: -8px;
329
+ left: 50%;
330
+ transform: translateX(-50%);
331
+ width: 40px;
332
+ height: 2px;
333
+ background: currentColor;
334
+ opacity: 0.3;
335
+ }
336
+
337
+ /* Interactive Elements */
338
+ .btn-outline-secondary:hover {
339
+ transform: translateY(-2px) scale(1.05);
340
+ box-shadow: 0 6px 20px rgba(0,0,0,0.15);
341
+ }
342
+
343
+ /* Chart Container Improvements */
344
+ .chart-container {
345
+ position: relative;
346
+ overflow: hidden;
347
+ }
348
+
349
+ .chart-container::before {
350
+ content: '';
351
+ position: absolute;
352
+ top: -2px;
353
+ left: -2px;
354
+ right: -2px;
355
+ bottom: -2px;
356
+ background: linear-gradient(45deg, #667eea, #764ba2, #36b9cc, #1cc88a);
357
+ border-radius: 17px;
358
+ z-index: -1;
359
+ opacity: 0;
360
+ transition: opacity 0.3s ease;
361
+ }
362
+
363
+ .chart-container:hover::before {
364
+ opacity: 0.1;
365
+ }
366
+
367
+ /* Responsive Improvements */
368
+ @media (max-width: 768px) {
369
+ .model-info-cards .card {
370
+ margin-bottom: 1.5rem;
371
+ }
372
+
373
+ .process-step-icon {
374
+ width: 50px !important;
375
+ height: 50px !important;
376
+ }
377
+
378
+ .feature-list .d-flex:hover {
379
+ transform: none;
380
+ }
381
+ }
382
+
383
+ /* POS Tagging specific styles */
384
+ .pos-token {
385
+ display: inline-block;
386
+ margin: 2px;
387
+ padding: 4px 8px;
388
+ border-radius: 4px;
389
+ font-size: 0.9em;
390
+ transition: all 0.2s ease;
391
+ }
392
+
393
+ .pos-token:hover {
394
+ transform: scale(1.05);
395
+ box-shadow: 0 2px 4px rgba(0,0,0,0.2);
396
+ }
397
+
398
+ /* Named Entity Recognition specific styles */
399
+ .entity-token {
400
+ display: inline-block;
401
+ margin: 2px;
402
+ padding: 4px 8px;
403
+ border-radius: 4px;
404
+ font-size: 0.9em;
405
+ transition: all 0.2s ease;
406
+ cursor: pointer;
407
+ }
408
+
409
+ .entity-token:hover {
410
+ transform: scale(1.05);
411
+ box-shadow: 0 2px 4px rgba(0,0,0,0.2);
412
+ }
413
+
414
+ /* Sentiment Analysis specific styles */
415
+ .sentiment-gauge {
416
+ position: relative;
417
+ width: 200px;
418
+ height: 200px;
419
+ margin: 0 auto;
420
+ }
421
+
422
+ .sentiment-score {
423
+ position: absolute;
424
+ top: 50%;
425
+ left: 50%;
426
+ transform: translate(-50%, -50%);
427
+ font-size: 2rem;
428
+ font-weight: bold;
429
+ }
430
+
431
+ .sentiment-label {
432
+ position: absolute;
433
+ bottom: -30px;
434
+ left: 50%;
435
+ transform: translateX(-50%);
436
+ font-size: 1.2rem;
437
+ font-weight: 600;
438
+ }
439
+
440
+ /* Text Generation specific styles */
441
+ .generated-text {
442
+ background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);
443
+ border-left: 4px solid var(--primary-color);
444
+ padding: 15px;
445
+ margin: 10px 0;
446
+ border-radius: 8px;
447
+ font-family: 'Georgia', serif;
448
+ line-height: 1.6;
449
+ }
450
+
451
+ .prompt-text {
452
+ color: #666;
453
+ font-style: italic;
454
+ }
455
+
456
+ .generated-content {
457
+ color: var(--primary-color);
458
+ font-weight: 500;
459
+ }
460
+
461
+ /* Translation specific styles */
462
+ .translation-pair {
463
+ display: flex;
464
+ gap: 20px;
465
+ margin: 20px 0;
466
+ }
467
+
468
+ .source-text, .target-text {
469
+ flex: 1;
470
+ padding: 15px;
471
+ border-radius: 8px;
472
+ border: 2px solid #e0e0e0;
473
+ }
474
+
475
+ .source-text {
476
+ background: linear-gradient(135deg, #E3F2FD 0%, #BBDEFB 100%);
477
+ border-color: var(--primary-color);
478
+ }
479
+
480
+ .target-text {
481
+ background: linear-gradient(135deg, #E8F5E9 0%, #C8E6C9 100%);
482
+ border-color: var(--success-color);
483
+ }
484
+
485
+ .language-badge {
486
+ display: inline-block;
487
+ padding: 4px 12px;
488
+ border-radius: 20px;
489
+ font-size: 0.8em;
490
+ font-weight: 600;
491
+ margin-bottom: 10px;
492
+ }
493
+
494
+ /* Classification specific styles */
495
+ .classification-result {
496
+ display: flex;
497
+ align-items: center;
498
+ padding: 10px;
499
+ margin: 5px 0;
500
+ border-radius: 8px;
501
+ background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);
502
+ transition: all 0.2s ease;
503
+ }
504
+
505
+ .classification-result:hover {
506
+ transform: translateX(5px);
507
+ box-shadow: 0 2px 8px rgba(0,0,0,0.1);
508
+ }
509
+
510
+ .classification-label {
511
+ flex: 1;
512
+ font-weight: 600;
513
+ }
514
+
515
+ .classification-score {
516
+ font-size: 1.2rem;
517
+ font-weight: bold;
518
+ margin-left: 10px;
519
+ }
520
+
521
+ /* Vector Embeddings specific styles */
522
+ .embedding-visualization {
523
+ text-align: center;
524
+ margin: 20px 0;
525
+ }
526
+
527
+ .similarity-matrix {
528
+ display: grid;
529
+ gap: 2px;
530
+ margin: 20px 0;
531
+ }
532
+
533
+ .similarity-cell {
534
+ padding: 10px;
535
+ text-align: center;
536
+ border-radius: 4px;
537
+ font-weight: 600;
538
+ transition: all 0.2s ease;
539
+ }
540
+
541
+ .similarity-cell:hover {
542
+ transform: scale(1.1);
543
+ z-index: 10;
544
+ position: relative;
545
+ }
546
+
547
+ /* Search results specific styles */
548
+ .search-result {
549
+ padding: 15px;
550
+ margin: 10px 0;
551
+ border-radius: 8px;
552
+ border-left: 4px solid var(--primary-color);
553
+ background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);
554
+ transition: all 0.2s ease;
555
+ }
556
+
557
+ .search-result:hover {
558
+ transform: translateY(-2px);
559
+ box-shadow: 0 4px 12px rgba(0,0,0,0.1);
560
+ }
561
+
562
+ .search-score {
563
+ font-size: 0.9em;
564
+ color: var(--primary-color);
565
+ font-weight: 600;
566
+ margin-top: 5px;
567
+ }
568
+
569
+ /* Chart containers */
570
+ .chart-wrapper {
571
+ position: relative;
572
+ margin: 20px 0;
573
+ padding: 20px;
574
+ background: white;
575
+ border-radius: 10px;
576
+ box-shadow: 0 2px 8px rgba(0,0,0,0.1);
577
+ }
578
+
579
+ [data-theme="dark"] .chart-wrapper {
580
+ background: #2d2d2d;
581
+ }
582
+
583
+ /* Loading animations */
584
+ .loading-dots {
585
+ display: inline-block;
586
+ }
587
+
588
+ .loading-dots::after {
589
+ content: '';
590
+ animation: dots 1.5s steps(4, end) infinite;
591
+ }
592
+
593
+ @keyframes dots {
594
+ 0%, 20% { content: ''; }
595
+ 40% { content: '.'; }
596
+ 60% { content: '..'; }
597
+ 80%, 100% { content: '...'; }
598
+ }
599
+
600
+ /* Pulse animation for important elements */
601
+ .pulse {
602
+ animation: pulse 2s infinite;
603
+ }
604
+
605
+ @keyframes pulse {
606
+ 0% { transform: scale(1); }
607
+ 50% { transform: scale(1.05); }
608
+ 100% { transform: scale(1); }
609
+ }
610
+
611
+ /* Slide animations */
612
+ .slide-in-left {
613
+ animation: slideInLeft 0.5s ease-out;
614
+ }
615
+
616
+ .slide-in-right {
617
+ animation: slideInRight 0.5s ease-out;
618
+ }
619
+
620
+ @keyframes slideInLeft {
621
+ from { transform: translateX(-100%); opacity: 0; }
622
+ to { transform: translateX(0); opacity: 1; }
623
+ }
624
+
625
+ @keyframes slideInRight {
626
+ from { transform: translateX(100%); opacity: 0; }
627
+ to { transform: translateX(0); opacity: 1; }
628
+ }
629
+
630
+ /* Bounce animation for success states */
631
+ .bounce-in {
632
+ animation: bounceIn 0.6s ease-out;
633
+ }
634
+
635
+ @keyframes bounceIn {
636
+ 0% { transform: scale(0.3); opacity: 0; }
637
+ 50% { transform: scale(1.05); }
638
+ 70% { transform: scale(0.9); }
639
+ 100% { transform: scale(1); opacity: 1; }
640
+ }
641
+
642
+ /* Shake animation for error states */
643
+ .shake {
644
+ animation: shake 0.5s ease-in-out;
645
+ }
646
+
647
+ @keyframes shake {
648
+ 0%, 100% { transform: translateX(0); }
649
+ 10%, 30%, 50%, 70%, 90% { transform: translateX(-5px); }
650
+ 20%, 40%, 60%, 80% { transform: translateX(5px); }
651
+ }
652
+
653
+ /* Gradient text effects */
654
+ .gradient-text {
655
+ background: linear-gradient(135deg, var(--primary-color) 0%, var(--success-color) 100%);
656
+ -webkit-background-clip: text;
657
+ -webkit-text-fill-color: transparent;
658
+ background-clip: text;
659
+ font-weight: 700;
660
+ }
661
+
662
+ /* Custom scrollbar for specific containers */
663
+ .custom-scrollbar {
664
+ scrollbar-width: thin;
665
+ scrollbar-color: var(--primary-color) #f1f1f1;
666
+ }
667
+
668
+ .custom-scrollbar::-webkit-scrollbar {
669
+ width: 6px;
670
+ }
671
+
672
+ .custom-scrollbar::-webkit-scrollbar-track {
673
+ background: #f1f1f1;
674
+ border-radius: 3px;
675
+ }
676
+
677
+ .custom-scrollbar::-webkit-scrollbar-thumb {
678
+ background: var(--primary-color);
679
+ border-radius: 3px;
680
+ }
681
+
682
+ .custom-scrollbar::-webkit-scrollbar-thumb:hover {
683
+ background: var(--primary-hover);
684
+ }
685
+
686
+ /* Responsive design enhancements */
687
+ @media (max-width: 576px) {
688
+ .translation-pair {
689
+ flex-direction: column;
690
+ }
691
+
692
+ .classification-result {
693
+ flex-direction: column;
694
+ text-align: center;
695
+ }
696
+
697
+ .classification-score {
698
+ margin-left: 0;
699
+ margin-top: 5px;
700
+ }
701
+
702
+ .sentiment-gauge {
703
+ width: 150px;
704
+ height: 150px;
705
+ }
706
+
707
+ .sentiment-score {
708
+ font-size: 1.5rem;
709
+ }
710
+ }
711
+
712
+ /* Print styles */
713
+ @media print {
714
+ .navbar, .btn, .alert {
715
+ display: none !important;
716
+ }
717
+
718
+ .card {
719
+ border: 1px solid #000 !important;
720
+ box-shadow: none !important;
721
+ }
722
+
723
+ body {
724
+ background: white !important;
725
+ color: black !important;
726
+ }
727
+ }
728
+
729
+ /* High contrast mode support */
730
+ @media (prefers-contrast: high) {
731
+ :root {
732
+ --primary-color: #0000FF;
733
+ --success-color: #008000;
734
+ --warning-color: #FF8000;
735
+ --danger-color: #FF0000;
736
+ }
737
+
738
+ .card {
739
+ border: 2px solid #000;
740
+ }
741
+
742
+ .btn {
743
+ border: 2px solid #000;
744
+ }
745
+ }
746
+
747
+ /* Reduced motion support */
748
+ @media (prefers-reduced-motion: reduce) {
749
+ * {
750
+ animation-duration: 0.01ms !important;
751
+ animation-iteration-count: 1 !important;
752
+ transition-duration: 0.01ms !important;
753
+ }
754
+ }
755
+
756
+ /* Focus styles for accessibility */
757
+ .btn:focus,
758
+ .form-control:focus,
759
+ .form-select:focus {
760
+ outline: 3px solid var(--primary-color);
761
+ outline-offset: 2px;
762
+ }
763
+
764
+ /* Skip link for screen readers */
765
+ .skip-link {
766
+ position: absolute;
767
+ top: -40px;
768
+ left: 6px;
769
+ background: var(--primary-color);
770
+ color: white;
771
+ padding: 8px;
772
+ text-decoration: none;
773
+ border-radius: 4px;
774
+ z-index: 1000;
775
+ }
776
+
777
+ .skip-link:focus {
778
+ top: 6px;
779
+ }
780
+
781
+ /* ARIA live region for dynamic content */
782
+ .aria-live {
783
+ position: absolute;
784
+ left: -10000px;
785
+ width: 1px;
786
+ height: 1px;
787
+ overflow: hidden;
788
+ }
789
+
790
+ /* Prevent horizontal scrolling in text containers */
791
+ .card-body {
792
+ overflow-x: hidden;
793
+ max-width: 100%;
794
+ }
795
+
796
+ .text-content {
797
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
798
+ line-height: 1.6;
799
+ font-size: 14px;
800
+ color: #333;
801
+ word-wrap: break-word;
802
+ word-break: break-word;
803
+ overflow-wrap: break-word;
804
+ white-space: normal;
805
+ max-width: 100%;
806
+ overflow-x: hidden;
807
+ overflow-y: auto;
808
+ padding: 15px;
809
+ background-color: #f8f9fa;
810
+ border-radius: 5px;
811
+ border: 1px solid #e9ecef;
812
+ margin-bottom: 15px;
813
+ }
814
+
815
+ [data-theme="dark"] .text-content {
816
+ background-color: #2d2d2d !important;
817
+ color: #e0e0e0 !important;
818
+ border-color: #555 !important;
819
+ }
820
+
821
+ /* Responsive text containers */
822
+ .text-container {
823
+ max-width: 100%;
824
+ overflow-x: auto;
825
+ }
826
+
827
+ @media (max-width: 768px) {
828
+ .text-content {
829
+ font-size: 13px;
830
+ padding: 10px !important;
831
+ max-height: 150px !important;
832
+ }
833
+ }
834
+
835
+ /* Additional text wrapping rules */
836
+ * {
837
+ box-sizing: border-box;
838
+ }
839
+
840
+ .container, .container-fluid {
841
+ overflow-x: hidden;
842
+ max-width: 100%;
843
+ }
844
+
845
+ /* Ensure all text elements wrap properly */
846
+ p, div, span, pre, code {
847
+ word-wrap: break-word;
848
+ word-break: break-word;
849
+ overflow-wrap: break-word;
850
+ max-width: 100%;
851
+ }
852
+
853
+ /* Enhanced table styling for multi-column layouts */
854
+ .table-responsive {
855
+ border-radius: 8px;
856
+ overflow: hidden;
857
+ box-shadow: 0 2px 8px rgba(0,0,0,0.1);
858
+ }
859
+
860
+ .table th {
861
+ font-weight: 600;
862
+ font-size: 0.9em;
863
+ padding: 12px 8px;
864
+ text-align: center !important;
865
+ vertical-align: middle !important;
866
+ background-color: var(--primary-color) !important;
867
+ color: white !important;
868
+ border: none !important;
869
+ }
870
+
871
+ .table td {
872
+ padding: 10px 8px;
873
+ text-align: center !important;
874
+ vertical-align: middle !important;
875
+ font-size: 0.9em;
876
+ border: 1px solid #dee2e6;
877
+ }
878
+
879
+ .table td code {
880
+ background-color: #f8f9fa;
881
+ padding: 2px 6px;
882
+ border-radius: 4px;
883
+ font-size: 0.85em;
884
+ color: #e83e8c;
885
+ display: inline-block;
886
+ }
887
+
888
+ [data-theme="dark"] .table td code {
889
+ background-color: #2d2d2d;
890
+ color: #ff6b9d;
891
+ }
892
+
893
+ .table td .badge {
894
+ font-size: 0.8em;
895
+ padding: 4px 8px;
896
+ display: inline-block;
897
+ }
898
+
899
+ /* Fix table header alignment */
900
+ .table thead th {
901
+ text-align: center !important;
902
+ vertical-align: middle !important;
903
+ white-space: nowrap;
904
+ }
905
+
906
+ /* Ensure proper table structure */
907
+ .table {
908
+ margin-bottom: 0;
909
+ width: 100%;
910
+ table-layout: fixed;
911
+ }
912
+
913
+ .table tbody tr:nth-of-type(odd) {
914
+ background-color: rgba(0,0,0,.02);
915
+ }
916
+
917
+ .table tbody tr:hover {
918
+ background-color: rgba(0,0,0,.075);
919
+ }
920
+
921
+ /* Responsive table adjustments */
922
+ @media (max-width: 768px) {
923
+ .table th, .table td {
924
+ padding: 8px 4px;
925
+ font-size: 0.8em;
926
+ }
927
+
928
+ .table td code {
929
+ font-size: 0.75em;
930
+ padding: 1px 4px;
931
+ }
932
+
933
+ .table td .badge {
934
+ font-size: 0.7em;
935
+ padding: 2px 6px;
936
+ }
937
+ }
938
+
939
+ /* Additional table header fixes */
940
+ .table-primary th {
941
+ background-color: var(--primary-color) !important;
942
+ color: white !important;
943
+ text-align: center !important;
944
+ vertical-align: middle !important;
945
+ font-weight: 600 !important;
946
+ border: none !important;
947
+ padding: 12px 8px !important;
948
+ }
949
+
950
+ /* Override Bootstrap table styles */
951
+ .table thead th {
952
+ border-bottom: 2px solid #dee2e6 !important;
953
+ text-align: center !important;
954
+ vertical-align: middle !important;
955
+ }
956
+
957
+ /* Ensure consistent column widths */
958
+ .table th[style*="width"] {
959
+ text-align: center !important;
960
+ }
961
+
962
+ /* Fix any alignment issues */
963
+ .table td, .table th {
964
+ text-align: center !important;
965
+ vertical-align: middle !important;
966
+ }
967
+
968
+ /* ===== VERTICAL SPACING AND TITLE HIERARCHY ===== */
969
+
970
+ /* Main section headers */
971
+ .task-subheader {
972
+ font-size: 1.5rem;
973
+ font-weight: 600;
974
+ color: var(--primary-color);
975
+ margin-top: 2rem;
976
+ margin-bottom: 1.5rem;
977
+ padding-bottom: 0.5rem;
978
+ border-bottom: 2px solid var(--primary-color);
979
+ position: relative;
980
+ }
981
+
982
+ .task-subheader:first-child {
983
+ margin-top: 1rem;
984
+ }
985
+
986
+ .task-subheader::after {
987
+ content: '';
988
+ position: absolute;
989
+ bottom: -2px;
990
+ left: 0;
991
+ width: 50px;
992
+ height: 2px;
993
+ background-color: var(--accent-color);
994
+ }
995
+
996
+ /* Subsection headers */
997
+ h4 {
998
+ font-size: 1.25rem;
999
+ font-weight: 600;
1000
+ color: #2c3e50;
1001
+ margin-top: 1.5rem;
1002
+ margin-bottom: 1rem;
1003
+ padding-left: 0.5rem;
1004
+ border-left: 4px solid var(--primary-color);
1005
+ background-color: #f8f9fa;
1006
+ padding: 0.75rem 1rem;
1007
+ border-radius: 0 5px 5px 0;
1008
+ }
1009
+
1010
+ [data-theme="dark"] h4 {
1011
+ color: #e0e0e0;
1012
+ background-color: #2d2d2d;
1013
+ border-left-color: var(--primary-color);
1014
+ }
1015
+
1016
+ /* Card spacing */
1017
+ .card {
1018
+ margin-bottom: 1.5rem;
1019
+ box-shadow: 0 2px 8px rgba(0,0,0,0.1);
1020
+ border: none;
1021
+ border-radius: 8px;
1022
+ }
1023
+
1024
+ .card-header {
1025
+ background-color: var(--primary-color);
1026
+ color: white;
1027
+ border-radius: 8px 8px 0 0 !important;
1028
+ padding: 1rem 1.25rem;
1029
+ border-bottom: none;
1030
+ }
1031
+
1032
+ .card-header h5 {
1033
+ margin: 0;
1034
+ font-weight: 600;
1035
+ font-size: 1.1rem;
1036
+ }
1037
+
1038
+ .card-body {
1039
+ padding: 1.25rem;
1040
+ background-color: white;
1041
+ border-radius: 0 0 8px 8px;
1042
+ }
1043
+
1044
+ [data-theme="dark"] .card-body {
1045
+ background-color: #2d2d2d;
1046
+ }
1047
+
1048
+ /* Row spacing */
1049
+ .row {
1050
+ margin-bottom: 1.5rem;
1051
+ }
1052
+
1053
+ .row:last-child {
1054
+ margin-bottom: 0;
1055
+ }
1056
+
1057
+ /* Alert boxes spacing */
1058
+ .alert {
1059
+ margin: 1.5rem 0;
1060
+ padding: 1rem 1.25rem;
1061
+ border-radius: 8px;
1062
+ border: none;
1063
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
1064
+ }
1065
+
1066
+ .alert h4 {
1067
+ margin-top: 0;
1068
+ margin-bottom: 0.75rem;
1069
+ font-size: 1.1rem;
1070
+ }
1071
+
1072
+ .alert ul {
1073
+ margin-bottom: 0;
1074
+ }
1075
+
1076
+ .alert li {
1077
+ margin-bottom: 0.5rem;
1078
+ }
1079
+
1080
+ /* Statistics boxes */
1081
+ .stats-container {
1082
+ margin: 1.5rem 0;
1083
+ }
1084
+
1085
+ .stats-container .col-md-4 {
1086
+ margin-bottom: 1rem;
1087
+ }
1088
+
1089
+ /* Base stats card layout */
1090
+ .stats-container .card {
1091
+ text-align: center;
1092
+ }
1093
+
1094
+ /* Dark-mode stats styling (gradient + white text) */
1095
+ [data-theme="dark"] .stats-container .card {
1096
+ background: linear-gradient(135deg, var(--primary-color), var(--accent-color)) !important;
1097
+ color: #fff !important;
1098
+ border: none !important;
1099
+ }
1100
+
1101
+ .stats-container .card-body {
1102
+ background: transparent !important;
1103
+ padding: 1.5rem 1rem;
1104
+ }
1105
+
1106
+ /* Text colors for dark-mode stats */
1107
+ [data-theme="dark"] .stats-container .card-body h3,
1108
+ [data-theme="dark"] .stats-container .card-body p,
1109
+ [data-theme="dark"] .stats-container .card-body h2,
1110
+ [data-theme="dark"] .stats-container .card-title {
1111
+ color: #fff !important;
1112
+ }
1113
+
1114
+ .stats-container .card-body h3 {
1115
+ font-size: 2rem;
1116
+ font-weight: 700;
1117
+ margin-bottom: 0.5rem;
1118
+ }
1119
+
1120
+ .stats-container .card-body p {
1121
+ margin: 0;
1122
+ font-size: 0.9rem;
1123
+ opacity: 0.9;
1124
+ }
1125
+
1126
+ /* Table spacing */
1127
+ .table-responsive {
1128
+ margin: 1rem 0 1.5rem 0;
1129
+ }
1130
+
1131
+ /* Chart spacing */
1132
+ .chart-container {
1133
+ margin: 1.5rem 0;
1134
+ padding: 1rem;
1135
+ background-color: white;
1136
+ border-radius: 8px;
1137
+ box-shadow: 0 2px 8px rgba(0,0,0,0.1);
1138
+ text-align: center;
1139
+ }
1140
+
1141
+ [data-theme="dark"] .chart-container {
1142
+ background-color: #2d2d2d;
1143
+ }
1144
+
1145
+ .chart-container img {
1146
+ max-width: 100%;
1147
+ height: auto;
1148
+ border-radius: 4px;
1149
+ }
1150
+
1151
+ /* Paragraph spacing */
1152
+ p {
1153
+ margin-bottom: 1rem;
1154
+ line-height: 1.6;
1155
+ }
1156
+
1157
+ p:last-child {
1158
+ margin-bottom: 0;
1159
+ }
1160
+
1161
+ /* Small text spacing */
1162
+ small {
1163
+ margin-top: 0.5rem;
1164
+ display: block;
1165
+ color: #6c757d;
1166
+ }
1167
+
1168
+ [data-theme="dark"] small {
1169
+ color: #adb5bd;
1170
+ }
1171
+
1172
+ /* Button spacing */
1173
+ .btn {
1174
+ margin: 0.25rem;
1175
+ border-radius: 6px;
1176
+ }
1177
+
1178
+ /* List spacing */
1179
+ ul, ol {
1180
+ margin-bottom: 1rem;
1181
+ padding-left: 1.5rem;
1182
+ }
1183
+
1184
+ ul li, ol li {
1185
+ margin-bottom: 0.5rem;
1186
+ line-height: 1.6;
1187
+ }
1188
+
1189
+ /* Collapsible content spacing */
1190
+ .collapse {
1191
+ margin-top: 0.5rem;
1192
+ }
1193
+
1194
+ /* Section dividers */
1195
+ .section-divider {
1196
+ height: 2px;
1197
+ background: linear-gradient(90deg, var(--primary-color), var(--accent-color), var(--primary-color));
1198
+ margin: 2rem 0;
1199
+ border-radius: 1px;
1200
+ }
1201
+
1202
+ /* Responsive spacing adjustments */
1203
+ @media (max-width: 768px) {
1204
+ .task-subheader {
1205
+ font-size: 1.3rem;
1206
+ margin-top: 1.5rem;
1207
+ margin-bottom: 1rem;
1208
+ }
1209
+
1210
+ h4 {
1211
+ font-size: 1.1rem;
1212
+ margin-top: 1.25rem;
1213
+ margin-bottom: 0.75rem;
1214
+ }
1215
+
1216
+ .card {
1217
+ margin-bottom: 1rem;
1218
+ }
1219
+
1220
+ .alert {
1221
+ margin: 1rem 0;
1222
+ }
1223
+
1224
+ .stats-container .card-body h3 {
1225
+ font-size: 1.5rem;
1226
+ }
1227
+ }
1228
+
1229
+ /* Quick Navigation styling (scoped) */
1230
+ .quick-nav.card {
1231
+ border: none;
1232
+ box-shadow: 0 6px 18px rgba(0,0,0,0.08);
1233
+ border-radius: 12px;
1234
+ overflow: hidden;
1235
+ }
1236
+ .quick-nav .card-header {
1237
+ background: linear-gradient(135deg, var(--primary-color), var(--accent-color)) !important;
1238
+ color: #fff !important;
1239
+ border-radius: 12px 12px 0 0 !important;
1240
+ padding: 0.875rem 1.25rem;
1241
+ }
1242
+ .quick-nav .card-header h5 { margin: 0; font-weight: 700; letter-spacing: .2px; }
1243
+
1244
+ .quick-nav h6 {
1245
+ color: #2c3e50;
1246
+ font-weight: 700;
1247
+ margin: .75rem 0 .5rem 0;
1248
+ display: flex; align-items: center; gap: .5rem;
1249
+ }
1250
+
1251
+ .quick-nav .btn {
1252
+ border-width: 2px;
1253
+ font-weight: 600;
1254
+ border-radius: 28px;
1255
+ padding: 10px 16px;
1256
+ display: inline-flex; align-items: center; gap: .35rem;
1257
+ transition: all .15s ease;
1258
+ }
1259
+ .quick-nav .btn i { margin: 0; font-size: .95rem; }
1260
+ .quick-nav .btn:hover { transform: translateY(-1px); box-shadow: 0 6px 12px rgba(0,0,0,.08); }
1261
+
1262
+ /* subtle background so outline buttons look like chips */
1263
+ .quick-nav .btn-outline-primary,
1264
+ .quick-nav .btn-outline-success,
1265
+ .quick-nav .btn-outline-info { background-color: #f8f9fa; }
1266
+
1267
+ /* layout tweaks */
1268
+ .quick-nav .row { row-gap: .75rem; }
1269
+
1270
+ @media (max-width: 768px) {
1271
+ .quick-nav .btn { width: 100%; justify-content: center; }
1272
+ }
1273
+
1274
+ /* WORD FREQUENCY OVERLAP FIX - Force vertical stacking */
1275
+ .wf-chart-section {
1276
+ display: block !important;
1277
+ width: 100% !important;
1278
+ float: none !important;
1279
+ clear: both !important;
1280
+ position: static !important;
1281
+ margin-bottom: 4rem !important;
1282
+ overflow: hidden !important;
1283
+ page-break-inside: avoid !important;
1284
+ }
1285
+
1286
+ .wf-table-container {
1287
+ display: block !important;
1288
+ width: 100% !important;
1289
+ float: none !important;
1290
+ clear: both !important;
1291
+ position: static !important;
1292
+ margin-top: 4rem !important;
1293
+ overflow: visible !important;
1294
+ page-break-inside: avoid !important;
1295
+ }
1296
+
1297
+ /* Force chart container to be completely separate */
1298
+ .wf-chart-section .chart-container {
1299
+ display: block !important;
1300
+ width: 100% !important;
1301
+ float: none !important;
1302
+ clear: both !important;
1303
+ position: static !important;
1304
+ margin: 0 !important;
1305
+ }
1306
+
1307
+ .chart-container {
1308
+ display: block !important;
1309
+ width: 100% !important;
1310
+ height: auto !important;
1311
+ min-height: auto !important;
1312
+ max-height: none !important;
1313
+ margin: 1.5rem 0 !important;
1314
+ position: static !important;
1315
+ z-index: auto !important;
1316
+ clear: both !important;
1317
+ overflow: visible !important;
1318
+ }
1319
+
1320
+ .chart-container img, .chart-container canvas, .chart-container svg {
1321
+ display: block !important;
1322
+ width: 100% !important;
1323
+ max-width: 100% !important;
1324
+ height: auto !important;
1325
+ margin: 0 auto !important;
1326
+ }
1327
+
1328
+ /* Kill all floats in word frequency section */
1329
+ .wf-table-container .table-responsive {
1330
+ float: none !important;
1331
+ clear: both !important;
1332
+ display: block !important;
1333
+ width: 100% !important;
1334
+ position: static !important;
1335
+ }
1336
+
1337
+ /* POS DISTRIBUTION OVERLAP FIX - Force vertical stacking */
1338
+ .pos-chart-section {
1339
+ display: block !important;
1340
+ width: 100% !important;
1341
+ float: none !important;
1342
+ clear: both !important;
1343
+ position: static !important;
1344
+ margin-bottom: 3rem !important;
1345
+ overflow: hidden !important;
1346
+ page-break-inside: avoid !important;
1347
+ }
1348
+
1349
+ .pos-table-container {
1350
+ display: block !important;
1351
+ width: 100% !important;
1352
+ float: none !important;
1353
+ clear: both !important;
1354
+ position: static !important;
1355
+ margin-top: 3rem !important;
1356
+ overflow: visible !important;
1357
+ page-break-inside: avoid !important;
1358
+ }
1359
+
1360
+ /* Force POS chart container to be completely separate */
1361
+ .pos-chart-section .chart-container {
1362
+ display: block !important;
1363
+ width: 100% !important;
1364
+ float: none !important;
1365
+ clear: both !important;
1366
+ position: static !important;
1367
+ margin: 0 !important;
1368
+ }
1369
+
1370
+ /* ENTITY DISTRIBUTION OVERLAP FIX - Force vertical stacking */
1371
+ .entity-chart-section {
1372
+ display: block !important;
1373
+ width: 100% !important;
1374
+ float: none !important;
1375
+ clear: both !important;
1376
+ position: static !important;
1377
+ margin-bottom: 3rem !important;
1378
+ overflow: hidden !important;
1379
+ page-break-inside: avoid !important;
1380
+ }
1381
+
1382
+ .entity-table-container {
1383
+ display: block !important;
1384
+ width: 100% !important;
1385
+ float: none !important;
1386
+ clear: both !important;
1387
+ position: static !important;
1388
+ margin-top: 3rem !important;
1389
+ overflow: visible !important;
1390
+ page-break-inside: avoid !important;
1391
+ }
1392
+
1393
+ /* Force entity chart container to be completely separate */
1394
+ .entity-chart-section .chart-container {
1395
+ display: block !important;
1396
+ width: 100% !important;
1397
+ float: none !important;
1398
+ clear: both !important;
1399
+ position: static !important;
1400
+ margin: 0 !important;
1401
+ }
1402
+
1403
+ /* Entity Badge Styling for Better Visibility */
1404
+ .entity-badge {
1405
+ display: inline-block !important;
1406
+ padding: 0.5rem 0.75rem !important;
1407
+ margin: 0.2rem !important;
1408
+ border-radius: 8px !important;
1409
+ font-size: 0.9rem !important;
1410
+ font-weight: 600 !important;
1411
+ text-shadow: 1px 1px 2px rgba(0,0,0,0.5) !important;
1412
+ border: 2px solid #ffffff !important;
1413
+ box-shadow: 0 3px 6px rgba(0,0,0,0.3) !important;
1414
+ transition: all 0.2s ease !important;
1415
+ cursor: help !important;
1416
+ }
1417
+
1418
+ .entity-badge:hover {
1419
+ transform: translateY(-1px) !important;
1420
+ box-shadow: 0 4px 8px rgba(0,0,0,0.4) !important;
1421
+ }
1422
+
1423
+ /* Ensure entity text container has clean white background */
1424
+ .entity-text-container {
1425
+ background-color: #ffffff !important;
1426
+ padding: 1.5rem !important;
1427
+ border-radius: 8px !important;
1428
+ border: 1px solid #dee2e6 !important;
1429
+ line-height: 2 !important;
1430
+ margin: 1rem 0 !important;
1431
+ box-shadow: 0 2px 4px rgba(0,0,0,0.05) !important;
1432
+ }
1433
+
1434
+ /* POS Legend Button Styling */
1435
+ .pos-legend-buttons {
1436
+ margin: 1rem 0;
1437
+ }
1438
+
1439
+ .pos-legend-buttons .btn {
1440
+ font-weight: 700 !important;
1441
+ border-radius: 8px !important;
1442
+ padding: 0.875rem 2.5rem !important;
1443
+ margin: 0 0.5rem;
1444
+ transition: all 0.3s ease;
1445
+ box-shadow: 0 4px 12px rgba(0,0,0,0.2) !important;
1446
+ font-size: 1.1rem !important;
1447
+ }
1448
+
1449
+ /* Active button - white background with dark text for high contrast */
1450
+ .pos-legend-buttons .btn.active {
1451
+ background-color: #ffffff !important;
1452
+ color: #1a365d !important;
1453
+ border: 3px solid #ffffff !important;
1454
+ transform: none;
1455
+ box-shadow: 0 4px 16px rgba(0,0,0,0.3) !important;
1456
+ }
1457
+
1458
+ /* Inactive button - dark background with white text */
1459
+ .pos-legend-buttons .btn:not(.active) {
1460
+ background-color: rgba(0,0,0,0.3) !important;
1461
+ color: #ffffff !important;
1462
+ border: 3px solid rgba(255,255,255,0.5) !important;
1463
+ }
1464
+
1465
+ .pos-legend-buttons .btn:hover {
1466
+ transform: translateY(-2px) !important;
1467
+ box-shadow: 0 6px 20px rgba(0,0,0,0.3) !important;
1468
+ }
1469
+
1470
+ /* Ensure text is always visible */
1471
+ .pos-legend-buttons .btn.active:hover {
1472
+ color: #1a365d !important;
1473
+ background-color: #f8f9fa !important;
1474
+ }
1475
+
1476
+ .pos-legend-buttons .btn:not(.active):hover {
1477
+ background-color: rgba(0,0,0,0.5) !important;
1478
+ color: #ffffff !important;
1479
+ border-color: #ffffff !important;
1480
+ }
1481
+
1482
+ .pos-tags-section {
1483
+ animation: fadeIn 0.3s ease-in;
1484
+ }
1485
+
1486
+ @keyframes fadeIn {
1487
+ from { opacity: 0; }
1488
+ to { opacity: 1; }
1489
+ }
1490
+
1491
+ /* SENTIMENT ANALYSIS METHOD CARDS - Modern Design */
1492
+ .sentiment-method-card {
1493
+ border: none !important;
1494
+ border-radius: 16px !important;
1495
+ box-shadow: 0 8px 32px rgba(0,0,0,0.1) !important;
1496
+ overflow: hidden !important;
1497
+ transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
1498
+ position: relative !important;
1499
+ }
1500
+
1501
+ .sentiment-method-card:hover {
1502
+ transform: translateY(-8px) !important;
1503
+ box-shadow: 0 16px 48px rgba(0,0,0,0.15) !important;
1504
+ }
1505
+
1506
+ /* Method Header with Gradients */
1507
+ .method-header {
1508
+ padding: 2rem 1.5rem 1rem !important;
1509
+ text-align: center !important;
1510
+ position: relative !important;
1511
+ color: white !important;
1512
+ }
1513
+
1514
+ .vader-card .method-header {
1515
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
1516
+ }
1517
+
1518
+ .distilbert-card .method-header {
1519
+ background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%) !important;
1520
+ }
1521
+
1522
+ .roberta-card .method-header {
1523
+ background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%) !important;
1524
+ }
1525
+
1526
+ .method-icon {
1527
+ width: 80px !important;
1528
+ height: 80px !important;
1529
+ border-radius: 50% !important;
1530
+ background: rgba(255,255,255,0.2) !important;
1531
+ display: flex !important;
1532
+ align-items: center !important;
1533
+ justify-content: center !important;
1534
+ margin: 0 auto 1rem !important;
1535
+ backdrop-filter: blur(10px) !important;
1536
+ }
1537
+
1538
+ .method-icon i {
1539
+ font-size: 2.5rem !important;
1540
+ color: white !important;
1541
+ }
1542
+
1543
+ .method-title {
1544
+ font-size: 1.5rem !important;
1545
+ font-weight: 700 !important;
1546
+ margin: 0 0 0.5rem 0 !important;
1547
+ color: white !important;
1548
+ text-shadow: 2px 2px 4px rgba(0,0,0,0.3) !important;
1549
+ }
1550
+
1551
+ .method-subtitle {
1552
+ font-size: 0.9rem !important;
1553
+ opacity: 1 !important;
1554
+ margin: 0 !important;
1555
+ font-weight: 500 !important;
1556
+ color: white !important;
1557
+ text-shadow: 1px 1px 2px rgba(0,0,0,0.3) !important;
1558
+ }
1559
+
1560
+ /* Method Body */
1561
+ .method-body {
1562
+ padding: 1.5rem !important;
1563
+ background: white !important;
1564
+ color: #333333 !important;
1565
+ }
1566
+
1567
+ .method-description {
1568
+ font-size: 0.95rem !important;
1569
+ color: #6c757d !important;
1570
+ margin-bottom: 1.5rem !important;
1571
+ line-height: 1.6 !important;
1572
+ }
1573
+
1574
+ .method-features {
1575
+ display: flex !important;
1576
+ flex-direction: column !important;
1577
+ gap: 0.75rem !important;
1578
+ }
1579
+
1580
+ .feature-item {
1581
+ display: flex !important;
1582
+ align-items: center !important;
1583
+ gap: 0.75rem !important;
1584
+ }
1585
+
1586
+ .feature-item i {
1587
+ color: #28a745 !important;
1588
+ font-size: 0.9rem !important;
1589
+ flex-shrink: 0 !important;
1590
+ }
1591
+
1592
+ .feature-item span {
1593
+ font-size: 0.9rem !important;
1594
+ color: #495057 !important;
1595
+ font-weight: 500 !important;
1596
+ }
1597
+
1598
+ /* Responsive adjustments */
1599
+ @media (max-width: 768px) {
1600
+ .method-header {
1601
+ padding: 1.5rem 1rem 0.75rem !important;
1602
+ }
1603
+
1604
+ .method-icon {
1605
+ width: 60px !important;
1606
+ height: 60px !important;
1607
+ }
1608
+
1609
+ .method-icon i {
1610
+ font-size: 2rem !important;
1611
+ }
1612
+
1613
+ .method-title {
1614
+ font-size: 1.25rem !important;
1615
+ }
1616
+ }
1617
+
1618
+ /* SENTIMENT ANALYSIS OVERLAP FIX - Force vertical stacking */
1619
+ .emotion-chart-section,
1620
+ .sentence-chart-section {
1621
+ display: block !important;
1622
+ width: 100% !important;
1623
+ float: none !important;
1624
+ clear: both !important;
1625
+ position: static !important;
1626
+ margin-bottom: 3rem !important;
1627
+ overflow: hidden !important;
1628
+ page-break-inside: avoid !important;
1629
+ }
1630
+
1631
+ .emotion-result-container,
1632
+ .sentence-analysis-container {
1633
+ display: block !important;
1634
+ width: 100% !important;
1635
+ float: none !important;
1636
+ clear: both !important;
1637
+ position: static !important;
1638
+ margin-top: 3rem !important;
1639
+ overflow: visible !important;
1640
+ page-break-inside: avoid !important;
1641
+ }
1642
+
1643
+ /* Force sentiment chart containers to be completely separate */
1644
+ .emotion-chart-section .chart-container,
1645
+ .sentence-chart-section .chart-container {
1646
+ display: block !important;
1647
+ width: 100% !important;
1648
+ float: none !important;
1649
+ clear: both !important;
1650
+ position: static !important;
1651
+ margin: 0 !important;
1652
+ }
1653
+
1654
+ /* Stats cards - high contrast in light mode */
1655
+ .stats-card {
1656
+ background: #eef5ff !important; /* light blue background for readability */
1657
+ border: 1px solid #d6e4ff !important;
1658
+ color: #0b3d91 !important;
1659
+ box-shadow: 0 4px 12px rgba(0,0,0,.06) !important;
1660
+ min-height: 110px;
1661
+ }
1662
+ /* Force dark text for all children in light mode */
1663
+ .stats-card *, .stats-card .card-body *, .stats-card .card-body {
1664
+ color: #0b3d91 !important;
1665
+ }
1666
+ .stats-card .card-body { background: transparent !important; display: flex; flex-direction: column; align-items: center; justify-content: center; }
1667
+ .stats-card h3 { font-size: 2rem !important; margin: 0 0 .25rem 0 !important; }
1668
+ .stats-card p, .stats-card .card-title { font-weight: 700 !important; margin: 0 !important; opacity: 1; }
1669
+
1670
+ /* Dark mode variant keeps white text on gradient */
1671
+ [data-theme="dark"] .stats-card {
1672
+ background: linear-gradient(135deg, var(--primary-color), var(--accent-color)) !important;
1673
+ border: none !important;
1674
+ color: #fff !important;
1675
+ }
1676
+ [data-theme="dark"] .stats-card *, [data-theme="dark"] .stats-card .card-body * { color: #fff !important; }
1677
+
1678
+ /* Quick Navigation button color fixes */
1679
+ .quick-nav .btn-outline-primary { color: #0d6efd; border-color: #0d6efd; }
1680
+ .quick-nav .btn-outline-primary:hover,
1681
+ .quick-nav .btn-outline-primary:focus,
1682
+ .quick-nav .btn-outline-primary:active { color: #fff; background-color: #0d6efd; border-color: #0d6efd; box-shadow: none; }
1683
+
1684
+ .quick-nav .btn-outline-success { color: #198754; border-color: #198754; }
1685
+ .quick-nav .btn-outline-success:hover,
1686
+ .quick-nav .btn-outline-success:focus,
1687
+ .quick-nav .btn-outline-success:active { color: #fff; background-color: #198754; border-color: #198754; box-shadow: none; }
1688
+
1689
+ .quick-nav .btn-outline-info { color: #0dcaf0; border-color: #0dcaf0; }
1690
+ .quick-nav .btn-outline-info:hover,
1691
+ .quick-nav .btn-outline-info:focus,
1692
+ .quick-nav .btn-outline-info:active { color: #0b2a2f; background-color: #0dcaf0; border-color: #0dcaf0; box-shadow: none; }
1693
+
1694
+ /* Ensure readable default text color on chip background */
1695
+ .quick-nav .btn { color: inherit; }
1696
+
1697
+ /* Quick Nav title color fixes */
1698
+ .quick-nav .card-header {
1699
+ background: transparent !important; /* keep card bg */
1700
+ color: #0d6efd !important; /* visible in light mode */
1701
+ border-bottom: 2px solid rgba(13,110,253,.15) !important;
1702
+ }
1703
+
1704
+ [data-theme="dark"] .quick-nav .card-header {
1705
+ color: #ffffff !important;
1706
+ border-bottom: 2px solid rgba(255,255,255,.2) !important;
1707
+ }
1708
+
1709
+ /* Dark mode quick navigation fixes */
1710
+ [data-theme="dark"] .quick-nav .card {
1711
+ background-color: rgba(255,255,255,0.1) !important;
1712
+ border: 1px solid rgba(255,255,255,0.2) !important;
1713
+ }
1714
+
1715
+ [data-theme="dark"] .quick-nav .btn-outline-primary {
1716
+ color: #6ea8fe !important;
1717
+ border-color: #6ea8fe !important;
1718
+ background-color: rgba(110,168,254,0.1) !important;
1719
+ }
1720
+
1721
+ [data-theme="dark"] .quick-nav .btn-outline-primary:hover,
1722
+ [data-theme="dark"] .quick-nav .btn-outline-primary:focus,
1723
+ [data-theme="dark"] .quick-nav .btn-outline-primary:active {
1724
+ color: #000 !important;
1725
+ background-color: #6ea8fe !important;
1726
+ border-color: #6ea8fe !important;
1727
+ }
1728
+
1729
+ [data-theme="dark"] .quick-nav .btn-outline-success {
1730
+ color: #75b798 !important;
1731
+ border-color: #75b798 !important;
1732
+ background-color: rgba(117,183,152,0.1) !important;
1733
+ }
1734
+
1735
+ [data-theme="dark"] .quick-nav .btn-outline-success:hover,
1736
+ [data-theme="dark"] .quick-nav .btn-outline-success:focus,
1737
+ [data-theme="dark"] .quick-nav .btn-outline-success:active {
1738
+ color: #000 !important;
1739
+ background-color: #75b798 !important;
1740
+ border-color: #75b798 !important;
1741
+ }
1742
+
1743
+ [data-theme="dark"] .quick-nav .btn-outline-info {
1744
+ color: #6edff6 !important;
1745
+ border-color: #6edff6 !important;
1746
+ background-color: rgba(110,223,246,0.1) !important;
1747
+ }
1748
+
1749
+ [data-theme="dark"] .quick-nav .btn-outline-info:hover,
1750
+ [data-theme="dark"] .quick-nav .btn-outline-info:focus,
1751
+ [data-theme="dark"] .quick-nav .btn-outline-info:active {
1752
+ color: #000 !important;
1753
+ background-color: #6edff6 !important;
1754
+ border-color: #6edff6 !important;
1755
+ }
1756
+
static/css/style.css ADDED
@@ -0,0 +1,762 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* Custom CSS for NLP Ultimate Tutorial */
2
+
3
+ :root {
4
+ --primary-color: #1976D2;
5
+ --primary-hover: #1565C0;
6
+ --secondary-color: #424242;
7
+ --success-color: #4CAF50;
8
+ --info-color: #2196F3;
9
+ --warning-color: #FF9800;
10
+ --danger-color: #F44336;
11
+ --light-color: #f8f9fa;
12
+ --dark-color: #212529;
13
+ --gradient-subtle: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
14
+ }
15
+
16
+ /* Dark theme variables */
17
+ [data-theme="dark"] {
18
+ --primary-color: #64B5F6;
19
+ --primary-hover: #42A5F5;
20
+ --secondary-color: #e0e0e0;
21
+ --success-color: #81C784;
22
+ --info-color: #64B5F6;
23
+ --warning-color: #FFB74D;
24
+ --danger-color: #FF8A80;
25
+ --light-color: #2d2d2d;
26
+ --dark-color: #e0e0e0;
27
+ --gradient-subtle: linear-gradient(135deg, #2d2d2d 0%, #1a1a1a 100%);
28
+ }
29
+
30
+ /* Body and background */
31
+ body {
32
+ background: var(--gradient-subtle);
33
+ transition: all 0.3s ease;
34
+ overflow-x: hidden;
35
+ max-width: 100%;
36
+ }
37
+
38
+ /* Navbar styling */
39
+ .navbar {
40
+ background: linear-gradient(135deg, var(--primary-color) 0%, var(--primary-hover) 100%) !important;
41
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
42
+ z-index: 1030 !important;
43
+ }
44
+
45
+ /* Ensure navbar container spans full width comfortably */
46
+ .navbar .container {
47
+ max-width: 100% !important;
48
+ padding-left: 1rem !important;
49
+ padding-right: 1rem !important;
50
+ }
51
+
52
+ /* Dropdown menus should overlay the page and scroll internally if too tall */
53
+ .navbar-nav .dropdown-menu {
54
+ z-index: 2000 !important; /* stay above navbar */
55
+ max-height: 70vh !important; /* avoid pushing page */
56
+ overflow-y: auto !important; /* scroll inside menu when needed */
57
+ margin-top: 0.5rem !important; /* small gap below trigger */
58
+ border-radius: 12px !important;
59
+ box-shadow: 0 12px 28px rgba(0,0,0,0.22) !important;
60
+ }
61
+
62
+ /* Navbar dropdown visual style */
63
+ .navbar-nav .dropdown-menu {
64
+ background: #ffffff !important;
65
+ border: 1px solid rgba(0,0,0,0.08) !important;
66
+ border-radius: 12px !important;
67
+ padding: 0.5rem 0 !important;
68
+ box-shadow: 0 12px 28px rgba(0,0,0,0.18) !important;
69
+ min-width: 220px !important;
70
+ }
71
+
72
+ .navbar-nav .dropdown-item {
73
+ padding: 0.6rem 1rem !important;
74
+ font-weight: 500 !important;
75
+ color: #2b2b2b !important;
76
+ border-radius: 8px !important;
77
+ margin: 0.1rem 0.5rem !important;
78
+ }
79
+
80
+ .navbar-nav .dropdown-item:hover,
81
+ .navbar-nav .dropdown-item:focus {
82
+ background: linear-gradient(135deg, var(--primary-color), var(--primary-hover)) !important;
83
+ color: #ffffff !important;
84
+ }
85
+
86
+ /* Dark mode dropdown contrast */
87
+ [data-theme="dark"] .navbar-nav .dropdown-menu {
88
+ background: #1f1f1f !important;
89
+ border-color: rgba(255,255,255,0.12) !important;
90
+ }
91
+
92
+ [data-theme="dark"] .navbar-nav .dropdown-item { color: #e0e0e0 !important; }
93
+ [data-theme="dark"] .navbar-nav .dropdown-item:hover, [data-theme="dark"] .navbar-nav .dropdown-item:focus { color: #ffffff !important; }
94
+
95
+ /* Mobile: let dropdowns be part of the flow inside the collapse */
96
+ @media (max-width: 991.98px) {
97
+ .navbar-nav .dropdown-menu {
98
+ position: static !important;
99
+ max-height: none !important;
100
+ overflow: visible !important;
101
+ width: 100% !important;
102
+ margin: 0.25rem 0 !important;
103
+ box-shadow: 0 6px 18px rgba(0,0,0,0.18) !important;
104
+ }
105
+ .navbar-collapse {
106
+ max-height: 80vh !important;
107
+ overflow-y: auto !important;
108
+ overflow-x: hidden !important;
109
+ padding-bottom: 1rem !important;
110
+ }
111
+ }
112
+
113
+ /* Anchor dropdowns directly under their toggles (desktop) */
114
+ .navbar-nav .dropdown { position: relative !important; }
115
+ .navbar-nav .dropdown-menu {
116
+ position: absolute !important;
117
+ top: 100% !important;
118
+ left: 0 !important;
119
+ right: auto !important;
120
+ transform: none !important;
121
+ }
122
+
123
+ / * Respect Bootstrap end-aligned menus */
124
+ .navbar-nav .dropdown-menu-end { left: auto !important; right: 0 !important; }
125
+
126
+ /* Constrain dropdown height so page doesn't vertically scroll when open */
127
+ .navbar-nav .dropdown-menu { max-height: 70vh !important; overflow-y: auto !important; }
128
+
129
+ .navbar-nav .dropdown-menu { overflow-x: hidden !important; }
130
+
131
+ .navbar-brand {
132
+ font-weight: 600;
133
+ font-size: 1.5rem;
134
+ }
135
+
136
+ /* Modern Footer Styling */
137
+ .modern-footer {
138
+ background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%);
139
+ color: #ffffff;
140
+ padding: 3rem 0 2rem;
141
+ margin-top: 4rem;
142
+ position: relative;
143
+ overflow: hidden;
144
+ }
145
+
146
+ .modern-footer::before {
147
+ content: '';
148
+ position: absolute;
149
+ top: 0;
150
+ left: 0;
151
+ right: 0;
152
+ height: 1px;
153
+ background: linear-gradient(90deg, transparent, rgba(255,255,255,0.3), transparent);
154
+ }
155
+
156
+ .footer-brand h5 {
157
+ color: #64B5F6;
158
+ font-weight: 700;
159
+ margin-bottom: 1rem;
160
+ font-size: 1.4rem;
161
+ }
162
+
163
+ .footer-description {
164
+ color: rgba(255,255,255,0.8);
165
+ font-size: 0.95rem;
166
+ line-height: 1.6;
167
+ margin: 0;
168
+ }
169
+
170
+ .footer-credit {
171
+ display: flex;
172
+ justify-content: center;
173
+ align-items: center;
174
+ }
175
+
176
+ .credit-badge {
177
+ background: rgba(255,255,255,0.1);
178
+ backdrop-filter: blur(10px);
179
+ border: 1px solid rgba(255,255,255,0.2);
180
+ border-radius: 12px;
181
+ padding: 1rem 1.5rem;
182
+ text-align: center;
183
+ box-shadow: 0 4px 15px rgba(0,0,0,0.1);
184
+ transition: all 0.3s ease;
185
+ }
186
+
187
+ .credit-badge:hover {
188
+ background: rgba(255,255,255,0.15);
189
+ transform: translateY(-2px);
190
+ box-shadow: 0 6px 20px rgba(0,0,0,0.15);
191
+ }
192
+
193
+ .credit-text {
194
+ display: block;
195
+ font-size: 0.85rem;
196
+ color: rgba(255,255,255,0.8);
197
+ margin-bottom: 0.25rem;
198
+ }
199
+
200
+ .developer-name {
201
+ display: block;
202
+ font-size: 1.1rem;
203
+ color: #64B5F6;
204
+ font-weight: 700;
205
+ }
206
+
207
+ .social-links {
208
+ display: flex;
209
+ gap: 1rem;
210
+ justify-content: flex-end;
211
+ align-items: center;
212
+ }
213
+
214
+ .social-link {
215
+ display: flex;
216
+ align-items: center;
217
+ gap: 0.5rem;
218
+ padding: 0.75rem 1rem;
219
+ border-radius: 8px;
220
+ text-decoration: none;
221
+ transition: all 0.3s ease;
222
+ border: 1px solid rgba(255,255,255,0.2);
223
+ background: rgba(255,255,255,0.05);
224
+ color: #ffffff;
225
+ font-weight: 500;
226
+ }
227
+
228
+ .social-link:hover {
229
+ color: #ffffff;
230
+ text-decoration: none;
231
+ transform: translateY(-2px);
232
+ box-shadow: 0 4px 12px rgba(0,0,0,0.2);
233
+ }
234
+
235
+ .github-link:hover {
236
+ background: #333333;
237
+ border-color: #333333;
238
+ }
239
+
240
+ .linkedin-link:hover {
241
+ background: #0077B5;
242
+ border-color: #0077B5;
243
+ }
244
+
245
+ .social-link i {
246
+ font-size: 1.2rem;
247
+ }
248
+
249
+ /* Dark mode footer adjustments */
250
+ [data-theme="dark"] .modern-footer {
251
+ background: #0d1117 !important; /* solid dark, no glassy gradient */
252
+ }
253
+
254
+ [data-theme="dark"] .modern-footer::before {
255
+ background: transparent !important; /* remove glossy top line */
256
+ }
257
+
258
+ [data-theme="dark"] .credit-badge {
259
+ background: rgba(255,255,255,0.08);
260
+ border-color: rgba(255,255,255,0.15);
261
+ }
262
+
263
+ [data-theme="dark"] .social-link {
264
+ background: rgba(255,255,255,0.03);
265
+ border-color: rgba(255,255,255,0.15);
266
+ }
267
+
268
+ /* Responsive footer */
269
+ @media (max-width: 768px) {
270
+ .modern-footer {
271
+ padding: 2rem 0 1.5rem;
272
+ text-align: center;
273
+ }
274
+
275
+ .social-links {
276
+ justify-content: center;
277
+ margin-top: 1rem;
278
+ }
279
+
280
+ .footer-credit {
281
+ margin: 1.5rem 0;
282
+ }
283
+ }
284
+
285
+ /* Card styling */
286
+ .card {
287
+ border: none;
288
+ box-shadow: 0 4px 6px rgba(0,0,0,0.1);
289
+ border-radius: 10px;
290
+ transition: transform 0.2s ease, box-shadow 0.2s ease;
291
+ }
292
+
293
+ .card:hover {
294
+ transform: translateY(-2px);
295
+ box-shadow: 0 8px 15px rgba(0,0,0,0.15);
296
+ }
297
+
298
+ .card-header {
299
+ background: linear-gradient(135deg, var(--primary-color) 0%, var(--primary-hover) 100%);
300
+ color: white;
301
+ border-radius: 10px 10px 0 0 !important;
302
+ border: none;
303
+ }
304
+
305
+ /* Button styling */
306
+ .btn {
307
+ border-radius: 8px;
308
+ font-weight: 500;
309
+ transition: all 0.3s ease;
310
+ border: none;
311
+ }
312
+
313
+ .btn:hover {
314
+ transform: translateY(-1px);
315
+ box-shadow: 0 4px 8px rgba(0,0,0,0.2);
316
+ }
317
+
318
+ .btn-primary {
319
+ background: linear-gradient(135deg, var(--primary-color) 0%, var(--primary-hover) 100%);
320
+ }
321
+
322
+ .btn-success {
323
+ background: linear-gradient(135deg, var(--success-color) 0%, #388E3C 100%);
324
+ }
325
+
326
+ .btn-info {
327
+ background: linear-gradient(135deg, var(--info-color) 0%, #1976D2 100%);
328
+ }
329
+
330
+ /* Form styling */
331
+ .form-control, .form-select {
332
+ border-radius: 8px;
333
+ border: 2px solid #e0e0e0;
334
+ transition: border-color 0.3s ease, box-shadow 0.3s ease;
335
+ }
336
+
337
+ .form-control:focus, .form-select:focus {
338
+ border-color: var(--primary-color);
339
+ box-shadow: 0 0 0 0.2rem rgba(25, 118, 210, 0.25);
340
+ }
341
+
342
+ /* Alert styling */
343
+ .alert {
344
+ border-radius: 10px;
345
+ border: none;
346
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
347
+ }
348
+
349
+ .alert-info {
350
+ background: linear-gradient(135deg, #E3F2FD 0%, #BBDEFB 100%);
351
+ color: #1565C0;
352
+ border-left: 5px solid var(--primary-color);
353
+ }
354
+
355
+ .alert-warning {
356
+ background: linear-gradient(135deg, #FFF8E1 0%, #FFECB3 100%);
357
+ color: #E65100;
358
+ border-left: 5px solid var(--warning-color);
359
+ }
360
+
361
+ .alert-success {
362
+ background: linear-gradient(135deg, #E8F5E9 0%, #C8E6C9 100%);
363
+ color: #2E7D32;
364
+ border-left: 5px solid var(--success-color);
365
+ }
366
+
367
+ .alert-danger {
368
+ background: linear-gradient(135deg, #FFEBEE 0%, #FFCDD2 100%);
369
+ color: #C62828;
370
+ border-left: 5px solid var(--danger-color);
371
+ }
372
+
373
+ /* Tab styling */
374
+ .nav-tabs {
375
+ border-bottom: 2px solid #e0e0e0;
376
+ }
377
+
378
+ .nav-tabs .nav-link {
379
+ border: none;
380
+ border-radius: 8px 8px 0 0;
381
+ margin-right: 5px;
382
+ color: var(--secondary-color);
383
+ font-weight: 500;
384
+ transition: all 0.3s ease;
385
+ }
386
+
387
+ .nav-tabs .nav-link:hover {
388
+ border-color: transparent;
389
+ background-color: rgba(25, 118, 210, 0.1);
390
+ }
391
+
392
+ .nav-tabs .nav-link.active {
393
+ background: linear-gradient(135deg, var(--primary-color) 0%, var(--primary-hover) 100%);
394
+ color: white;
395
+ border-color: transparent;
396
+ }
397
+
398
+ /* Statistics cards */
399
+ .card.text-center {
400
+ background: white;
401
+ border: 2px solid #e0e0e0;
402
+ }
403
+
404
+ .card.text-center:hover {
405
+ border-color: var(--primary-color);
406
+ transform: scale(1.02);
407
+ }
408
+
409
+ /* Footer styling */
410
+ footer {
411
+ background: linear-gradient(135deg, var(--dark-color) 0%, #1a1a1a 100%) !important;
412
+ margin-top: auto;
413
+ }
414
+
415
+ /* Loading spinner */
416
+ .spinner-border {
417
+ color: var(--primary-color);
418
+ }
419
+
420
+ /* Results container */
421
+ #resultsContainer {
422
+ min-height: 200px;
423
+ }
424
+
425
+ /* Dark theme styles */
426
+ [data-theme="dark"] {
427
+ background: var(--gradient-subtle);
428
+ color: var(--dark-color);
429
+ }
430
+
431
+ [data-theme="dark"] .card {
432
+ background-color: #2d2d2d;
433
+ color: var(--dark-color);
434
+ }
435
+
436
+ [data-theme="dark"] .card-header {
437
+ background: linear-gradient(135deg, var(--primary-color) 0%, var(--primary-hover) 100%);
438
+ }
439
+
440
+ [data-theme="dark"] .form-control,
441
+ [data-theme="dark"] .form-select {
442
+ background-color: #2d2d2d;
443
+ color: var(--dark-color);
444
+ border-color: #555;
445
+ }
446
+
447
+ [data-theme="dark"] .form-control:focus,
448
+ [data-theme="dark"] .form-select:focus {
449
+ background-color: #2d2d2d;
450
+ color: var(--dark-color);
451
+ border-color: var(--primary-color);
452
+ }
453
+
454
+ [data-theme="dark"] .nav-tabs {
455
+ border-bottom-color: #555;
456
+ }
457
+
458
+ [data-theme="dark"] .nav-tabs .nav-link {
459
+ color: var(--dark-color);
460
+ }
461
+
462
+ [data-theme="dark"] .nav-tabs .nav-link:hover {
463
+ background-color: rgba(100, 181, 246, 0.1);
464
+ }
465
+
466
+ [data-theme="dark"] .card.text-center {
467
+ background-color: #2d2d2d;
468
+ border-color: #555;
469
+ color: var(--dark-color);
470
+ }
471
+
472
+ [data-theme="dark"] .alert-info {
473
+ background: linear-gradient(135deg, #1a3c5a 0%, #0d2a3f 100%);
474
+ color: var(--primary-color);
475
+ }
476
+
477
+ [data-theme="dark"] .alert-warning {
478
+ background: linear-gradient(135deg, #3e2e00 0%, #2a1f00 100%);
479
+ color: var(--warning-color);
480
+ }
481
+
482
+ [data-theme="dark"] .alert-success {
483
+ background: linear-gradient(135deg, #1b3525 0%, #0f1f15 100%);
484
+ color: var(--success-color);
485
+ }
486
+
487
+ [data-theme="dark"] .alert-danger {
488
+ background: linear-gradient(135deg, #4A1515 0%, #2a0a0a 100%);
489
+ color: var(--danger-color);
490
+ }
491
+
492
+ /* Mobile responsiveness */
493
+ @media (max-width: 768px) {
494
+ .container-fluid {
495
+ padding: 10px;
496
+ }
497
+
498
+ .card {
499
+ margin-bottom: 15px;
500
+ }
501
+
502
+ .btn {
503
+ margin-bottom: 10px;
504
+ }
505
+
506
+ .display-4 {
507
+ font-size: 2rem;
508
+ }
509
+
510
+ .lead {
511
+ font-size: 1rem;
512
+ }
513
+ }
514
+
515
+ /* Animation classes */
516
+ .fade-in {
517
+ animation: fadeIn 0.5s ease-in;
518
+ }
519
+
520
+ @keyframes fadeIn {
521
+ from { opacity: 0; transform: translateY(20px); }
522
+ to { opacity: 1; transform: translateY(0); }
523
+ }
524
+
525
+ .slide-in {
526
+ animation: slideIn 0.3s ease-out;
527
+ }
528
+
529
+ @keyframes slideIn {
530
+ from { transform: translateX(-100%); }
531
+ to { transform: translateX(0); }
532
+ }
533
+
534
+ /* Custom scrollbar */
535
+ ::-webkit-scrollbar {
536
+ width: 8px;
537
+ }
538
+
539
+ ::-webkit-scrollbar-track {
540
+ background: #f1f1f1;
541
+ border-radius: 4px;
542
+ }
543
+
544
+ ::-webkit-scrollbar-thumb {
545
+ background: var(--primary-color);
546
+ border-radius: 4px;
547
+ }
548
+
549
+ ::-webkit-scrollbar-thumb:hover {
550
+ background: var(--primary-hover);
551
+ }
552
+
553
+ [data-theme="dark"] ::-webkit-scrollbar-track {
554
+ background: #2d2d2d;
555
+ }
556
+
557
+ [data-theme="dark"] ::-webkit-scrollbar-thumb {
558
+ background: var(--primary-color);
559
+ }
560
+
561
+ /* Table styling */
562
+ .table {
563
+ border-radius: 8px;
564
+ overflow: hidden;
565
+ }
566
+
567
+ .table thead th {
568
+ background: linear-gradient(135deg, var(--primary-color) 0%, var(--primary-hover) 100%);
569
+ color: white;
570
+ border: none;
571
+ font-weight: 600;
572
+ }
573
+
574
+ .table tbody tr:hover {
575
+ background-color: rgba(25, 118, 210, 0.1);
576
+ }
577
+
578
+ [data-theme="dark"] .table {
579
+ color: var(--dark-color);
580
+ }
581
+
582
+ [data-theme="dark"] .table tbody tr:hover {
583
+ background-color: rgba(100, 181, 246, 0.1);
584
+ }
585
+
586
+ /* Chart container */
587
+ .chart-container {
588
+ position: relative;
589
+ height: 400px;
590
+ margin: 20px 0;
591
+ }
592
+
593
+ /* Code blocks */
594
+ pre {
595
+ background-color: #f8f9fa;
596
+ border: 1px solid #e9ecef;
597
+ border-radius: 8px;
598
+ padding: 15px;
599
+ overflow-x: auto;
600
+ }
601
+
602
+ [data-theme="dark"] pre {
603
+ background-color: #2d2d2d;
604
+ border-color: #555;
605
+ color: var(--dark-color);
606
+ }
607
+
608
+ /* Badge styling */
609
+ .badge {
610
+ font-size: 0.8em;
611
+ padding: 0.5em 0.75em;
612
+ border-radius: 6px;
613
+ }
614
+
615
+ /* Progress bar */
616
+ .progress {
617
+ height: 8px;
618
+ border-radius: 4px;
619
+ background-color: #e9ecef;
620
+ }
621
+
622
+ .progress-bar {
623
+ background: linear-gradient(135deg, var(--primary-color) 0%, var(--primary-hover) 100%);
624
+ border-radius: 4px;
625
+ }
626
+
627
+ [data-theme="dark"] .progress {
628
+ background-color: #555;
629
+ }
630
+
631
+ /* Tooltip styling */
632
+ .tooltip {
633
+ font-size: 0.875rem;
634
+ }
635
+
636
+ .tooltip-inner {
637
+ background-color: var(--dark-color);
638
+ border-radius: 6px;
639
+ }
640
+
641
+ /* Modal styling */
642
+ .modal-content {
643
+ border-radius: 10px;
644
+ border: none;
645
+ box-shadow: 0 10px 30px rgba(0,0,0,0.3);
646
+ }
647
+
648
+ .modal-header {
649
+ background: linear-gradient(135deg, var(--primary-color) 0%, var(--primary-hover) 100%);
650
+ color: white;
651
+ border-radius: 10px 10px 0 0;
652
+ }
653
+
654
+ [data-theme="dark"] .modal-content {
655
+ background-color: #2d2d2d;
656
+ color: var(--dark-color);
657
+ }
658
+
659
+ /* Utility classes */
660
+ .text-gradient {
661
+ background: linear-gradient(135deg, var(--primary-color) 0%, var(--primary-hover) 100%);
662
+ -webkit-background-clip: text;
663
+ -webkit-text-fill-color: transparent;
664
+ background-clip: text;
665
+ }
666
+
667
+ .shadow-custom {
668
+ box-shadow: 0 8px 25px rgba(0,0,0,0.15);
669
+ }
670
+
671
+ .border-gradient {
672
+ border: 2px solid;
673
+ border-image: linear-gradient(135deg, var(--primary-color) 0%, var(--primary-hover) 100%) 1;
674
+ }
675
+
676
+ /***** Global horizontal scroll guards *****/
677
+ html, body { overflow-x: hidden !important; max-width: 100% !important; }
678
+ .container-fluid, .container { overflow-x: hidden !important; max-width: 100% !important; }
679
+
680
+ /* Reset dropdown positioning to Bootstrap defaults */
681
+ .navbar-nav .dropdown { position: static !important; }
682
+ .navbar-nav .dropdown-menu { position: absolute !important; top: auto !important; left: auto !important; right: auto !important; }
683
+
684
+ /* Cap dropdown width to viewport and allow wrapping */
685
+ .navbar-nav .dropdown-menu { max-width: calc(100vw - 2rem) !important; overflow-wrap: anywhere; }
686
+
687
+ /* Ensure quick-nav and badges shadows don't trigger scroll */
688
+ .quick-nav, .entity-text-container, .card { overflow: visible !important; }
689
+
690
+ /* --- Dark mode global text legibility --- */
691
+ [data-theme="dark"] body,
692
+ [data-theme="dark"] .container,
693
+ [data-theme="dark"] .container-fluid {
694
+ color: #e6e6e6 !important;
695
+ }
696
+
697
+ /* Links in dark mode */
698
+ [data-theme="dark"] a { color: #82b1ff !important; }
699
+ [data-theme="dark"] a:hover { color: #b3c8ff !important; }
700
+
701
+ /* Cards */
702
+ [data-theme="dark"] .card { background-color: #1f1f1f !important; color: #e8e8e8 !important; border-color: rgba(255,255,255,0.08) !important; }
703
+ [data-theme="dark"] .card-header { background-color: #242424 !important; color: #e8e8e8 !important; border-bottom-color: rgba(255,255,255,0.08) !important; }
704
+ [data-theme="dark"] .card .text-muted { color: #b0b0b0 !important; }
705
+
706
+ /* Alerts */
707
+ [data-theme="dark"] .alert { background-color: #262626 !important; color: #f0f0f0 !important; border-color: rgba(255,255,255,0.12) !important; }
708
+ [data-theme="dark"] .alert-info { background-color: rgba(33,150,243,0.12) !important; color: #dbe9ff !important; border-color: rgba(33,150,243,0.35) !important; }
709
+ [data-theme="dark"] .alert-warning { background-color: rgba(255,193,7,0.12) !important; color: #ffe6a3 !important; border-color: rgba(255,193,7,0.35) !important; }
710
+
711
+ /* Forms */
712
+ [data-theme="dark"] .form-control,
713
+ [data-theme="dark"] .form-select,
714
+ [data-theme="dark"] textarea.form-control {
715
+ background-color: #1e1e1e !important;
716
+ color: #f0f0f0 !important;
717
+ border-color: rgba(255,255,255,0.15) !important;
718
+ }
719
+ [data-theme="dark"] .form-control:focus,
720
+ [data-theme="dark"] .form-select:focus { box-shadow: 0 0 0 0.25rem rgba(100,181,246,0.25) !important; border-color: #64B5F6 !important; }
721
+ [data-theme="dark"] ::placeholder { color: #b8b8b8 !important; opacity: 1 !important; }
722
+
723
+ /* Tables */
724
+ [data-theme="dark"] .table { color: #e6e6e6 !important; }
725
+ [data-theme="dark"] .table-striped>tbody>tr:nth-of-type(odd) { --bs-table-accent-bg: rgba(255,255,255,0.04) !important; color: #e6e6e6 !important; }
726
+ [data-theme="dark"] .table-hover tbody tr:hover { background-color: rgba(255,255,255,0.06) !important; }
727
+
728
+ /* Badges and small chips */
729
+ [data-theme="dark"] .badge { filter: brightness(1.05) contrast(1.05); }
730
+
731
+ /* Home route: improve dark mode contrast for CHOOSE AN OPERATION header */
732
+ [data-theme="dark"] .card-header.bg-primary,
733
+ [data-theme="dark"] .card-header.bg-primary * {
734
+ color: #ffffff !important;
735
+ }
736
+
737
+ [data-theme="dark"] .card-header.bg-primary { filter: brightness(1.05) contrast(1.1); }
738
+
739
+ /* Dark mode navbar text visibility */
740
+ [data-theme="dark"] .navbar .navbar-brand,
741
+ [data-theme="dark"] .navbar .nav-link,
742
+ [data-theme="dark"] .navbar .dropdown-toggle,
743
+ [data-theme="dark"] .navbar .navbar-toggler-icon::after {
744
+ color: #ffffff !important;
745
+ }
746
+
747
+ /* Strengthen gradient navbar contrast in dark mode */
748
+ [data-theme="dark"] .navbar { filter: brightness(1.05) contrast(1.15); }
749
+
750
+ /* Home: CHOOSE AN OPERATION title contrast in dark mode */
751
+ [data-theme="dark"] .card-header.bg-primary h2,
752
+ [data-theme="dark"] .card-header.bg-primary .mb-0 {
753
+ color: #ffffff !important;
754
+ text-shadow: 0 1px 2px rgba(0,0,0,0.35);
755
+ }
756
+
757
+ /* Dark mode: ensure button text is readable */
758
+ [data-theme="dark"] .btn,
759
+ [data-theme="dark"] .btn * {
760
+ color: #ffffff !important;
761
+ }
762
+ [data-theme="dark"] .btn { text-shadow: 0 1px 2px rgba(0,0,0,0.35); }
static/js/api.js ADDED
@@ -0,0 +1,335 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // API utilities for NLP Ultimate Tutorial Flask Application
2
+
3
+ class NLPAPI {
4
+ constructor(baseUrl = '') {
5
+ this.baseUrl = baseUrl;
6
+ this.endpoints = {
7
+ // Text processing endpoints
8
+ preprocessing: '/api/preprocessing',
9
+ tokenization: '/api/tokenization',
10
+ posTagging: '/api/pos-tagging',
11
+ namedEntity: '/api/named-entity',
12
+ sentiment: '/api/sentiment',
13
+ summarization: '/api/summarization',
14
+ topicAnalysis: '/api/topic-analysis',
15
+ questionAnswering: '/api/question-answering',
16
+ textGeneration: '/api/text-generation',
17
+ translation: '/api/translation',
18
+ classification: '/api/classification',
19
+ vectorEmbeddings: '/api/vector-embeddings',
20
+
21
+ // Utility endpoints
22
+ updateText: '/api/update_current_text',
23
+ getText: '/api/get_current_text',
24
+ textStatistics: '/api/text_statistics'
25
+ };
26
+ }
27
+
28
+ // Generic API request method
29
+ async request(endpoint, data = {}, method = 'POST') {
30
+ try {
31
+ const response = await fetch(this.baseUrl + endpoint, {
32
+ method: method,
33
+ headers: {
34
+ 'Content-Type': 'application/json',
35
+ },
36
+ body: JSON.stringify(data)
37
+ });
38
+
39
+ if (!response.ok) {
40
+ throw new Error(`HTTP error! status: ${response.status}`);
41
+ }
42
+
43
+ return await response.json();
44
+ } catch (error) {
45
+ console.error(`API request failed for ${endpoint}:`, error);
46
+ throw error;
47
+ }
48
+ }
49
+
50
+ // Text preprocessing
51
+ async preprocessText(text, options = {}) {
52
+ return await this.request(this.endpoints.preprocessing, {
53
+ text: text,
54
+ ...options
55
+ });
56
+ }
57
+
58
+ // Tokenization
59
+ async tokenizeText(text, tokenizerType = 'word') {
60
+ return await this.request(this.endpoints.tokenization, {
61
+ text: text,
62
+ tokenizer_type: tokenizerType
63
+ });
64
+ }
65
+
66
+ // POS Tagging
67
+ async posTagText(text, taggerType = 'nltk') {
68
+ return await this.request(this.endpoints.posTagging, {
69
+ text: text,
70
+ tagger_type: taggerType
71
+ });
72
+ }
73
+
74
+ // Named Entity Recognition
75
+ async recognizeEntities(text, modelType = 'spacy') {
76
+ return await this.request(this.endpoints.namedEntity, {
77
+ text: text,
78
+ model_type: modelType
79
+ });
80
+ }
81
+
82
+ // Sentiment Analysis
83
+ async analyzeSentiment(text, analyzerType = 'vader') {
84
+ return await this.request(this.endpoints.sentiment, {
85
+ text: text,
86
+ analyzer_type: analyzerType
87
+ });
88
+ }
89
+
90
+ // Text Summarization
91
+ async summarizeText(text, method = 'extractive', options = {}) {
92
+ return await this.request(this.endpoints.summarization, {
93
+ text: text,
94
+ method: method,
95
+ ...options
96
+ });
97
+ }
98
+
99
+ // Topic Analysis
100
+ async analyzeTopics(text, method = 'lda') {
101
+ return await this.request(this.endpoints.topicAnalysis, {
102
+ text: text,
103
+ method: method
104
+ });
105
+ }
106
+
107
+ // Question Answering
108
+ async answerQuestion(context, question, options = {}) {
109
+ return await this.request(this.endpoints.questionAnswering, {
110
+ context: context,
111
+ question: question,
112
+ ...options
113
+ });
114
+ }
115
+
116
+ // Text Generation
117
+ async generateText(prompt, options = {}) {
118
+ return await this.request(this.endpoints.textGeneration, {
119
+ prompt: prompt,
120
+ ...options
121
+ });
122
+ }
123
+
124
+ // Translation
125
+ async translateText(text, sourceLang = 'auto', targetLang = 'en') {
126
+ return await this.request(this.endpoints.translation, {
127
+ text: text,
128
+ source_lang: sourceLang,
129
+ target_lang: targetLang
130
+ });
131
+ }
132
+
133
+ // Classification
134
+ async classifyText(text, scenario = 'sentiment', options = {}) {
135
+ return await this.request(this.endpoints.classification, {
136
+ text: text,
137
+ scenario: scenario,
138
+ ...options
139
+ });
140
+ }
141
+
142
+ // Vector Embeddings
143
+ async getEmbeddings(text, query = '') {
144
+ return await this.request(this.endpoints.vectorEmbeddings, {
145
+ text: text,
146
+ query: query
147
+ });
148
+ }
149
+
150
+ // Utility methods
151
+ async updateCurrentText(text) {
152
+ return await this.request(this.endpoints.updateText, { text: text });
153
+ }
154
+
155
+ async getCurrentText() {
156
+ return await this.request(this.endpoints.getText, {}, 'GET');
157
+ }
158
+
159
+ async getTextStatistics(text) {
160
+ return await this.request(this.endpoints.textStatistics, { text: text });
161
+ }
162
+ }
163
+
164
+ // Batch processing utility
165
+ class BatchProcessor {
166
+ constructor(api) {
167
+ this.api = api;
168
+ this.queue = [];
169
+ this.processing = false;
170
+ }
171
+
172
+ addTask(task) {
173
+ this.queue.push(task);
174
+ if (!this.processing) {
175
+ this.processQueue();
176
+ }
177
+ }
178
+
179
+ async processQueue() {
180
+ this.processing = true;
181
+
182
+ while (this.queue.length > 0) {
183
+ const task = this.queue.shift();
184
+ try {
185
+ await task.execute();
186
+ if (task.onSuccess) task.onSuccess(task.result);
187
+ } catch (error) {
188
+ if (task.onError) task.onError(error);
189
+ }
190
+ }
191
+
192
+ this.processing = false;
193
+ }
194
+ }
195
+
196
+ // Caching utility
197
+ class APICache {
198
+ constructor(maxSize = 100) {
199
+ this.cache = new Map();
200
+ this.maxSize = maxSize;
201
+ }
202
+
203
+ get(key) {
204
+ if (this.cache.has(key)) {
205
+ const item = this.cache.get(key);
206
+ // Move to end (most recently used)
207
+ this.cache.delete(key);
208
+ this.cache.set(key, item);
209
+ return item;
210
+ }
211
+ return null;
212
+ }
213
+
214
+ set(key, value) {
215
+ if (this.cache.has(key)) {
216
+ this.cache.delete(key);
217
+ } else if (this.cache.size >= this.maxSize) {
218
+ // Remove least recently used item
219
+ const firstKey = this.cache.keys().next().value;
220
+ this.cache.delete(firstKey);
221
+ }
222
+ this.cache.set(key, value);
223
+ }
224
+
225
+ clear() {
226
+ this.cache.clear();
227
+ }
228
+ }
229
+
230
+ // Rate limiting utility
231
+ class RateLimiter {
232
+ constructor(requestsPerMinute = 60) {
233
+ this.requestsPerMinute = requestsPerMinute;
234
+ this.requests = [];
235
+ }
236
+
237
+ async waitIfNeeded() {
238
+ const now = Date.now();
239
+ const oneMinuteAgo = now - 60000;
240
+
241
+ // Remove old requests
242
+ this.requests = this.requests.filter(time => time > oneMinuteAgo);
243
+
244
+ if (this.requests.length >= this.requestsPerMinute) {
245
+ const oldestRequest = Math.min(...this.requests);
246
+ const waitTime = 60000 - (now - oldestRequest);
247
+ if (waitTime > 0) {
248
+ await new Promise(resolve => setTimeout(resolve, waitTime));
249
+ }
250
+ }
251
+
252
+ this.requests.push(now);
253
+ }
254
+ }
255
+
256
+ // Error handling utility
257
+ class ErrorHandler {
258
+ static handle(error, context = '') {
259
+ console.error(`Error in ${context}:`, error);
260
+
261
+ let message = 'An unexpected error occurred';
262
+
263
+ if (error.name === 'TypeError' && error.message.includes('fetch')) {
264
+ message = 'Network error: Unable to connect to the server';
265
+ } else if (error.message.includes('HTTP error')) {
266
+ message = `Server error: ${error.message}`;
267
+ } else if (error.message) {
268
+ message = error.message;
269
+ }
270
+
271
+ return {
272
+ success: false,
273
+ error: message,
274
+ context: context,
275
+ timestamp: new Date().toISOString()
276
+ };
277
+ }
278
+
279
+ static createErrorResponse(message, context = '') {
280
+ return {
281
+ success: false,
282
+ error: message,
283
+ context: context,
284
+ timestamp: new Date().toISOString()
285
+ };
286
+ }
287
+ }
288
+
289
+ // Progress tracking utility
290
+ class ProgressTracker {
291
+ constructor() {
292
+ this.progress = 0;
293
+ this.total = 0;
294
+ this.callbacks = [];
295
+ }
296
+
297
+ setTotal(total) {
298
+ this.total = total;
299
+ this.progress = 0;
300
+ this.notifyCallbacks();
301
+ }
302
+
303
+ increment(amount = 1) {
304
+ this.progress += amount;
305
+ this.notifyCallbacks();
306
+ }
307
+
308
+ setProgress(progress) {
309
+ this.progress = progress;
310
+ this.notifyCallbacks();
311
+ }
312
+
313
+ onProgress(callback) {
314
+ this.callbacks.push(callback);
315
+ }
316
+
317
+ notifyCallbacks() {
318
+ const percentage = this.total > 0 ? (this.progress / this.total) * 100 : 0;
319
+ this.callbacks.forEach(callback => callback(percentage, this.progress, this.total));
320
+ }
321
+
322
+ reset() {
323
+ this.progress = 0;
324
+ this.total = 0;
325
+ this.notifyCallbacks();
326
+ }
327
+ }
328
+
329
+ // Export utilities
330
+ window.NLPAPI = NLPAPI;
331
+ window.BatchProcessor = BatchProcessor;
332
+ window.APICache = APICache;
333
+ window.RateLimiter = RateLimiter;
334
+ window.ErrorHandler = ErrorHandler;
335
+ window.ProgressTracker = ProgressTracker;
static/js/components.js ADDED
@@ -0,0 +1,380 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Component-specific JavaScript for NLP Ultimate Tutorial
2
+
3
+ // POS Tagging functionality
4
+ class POSTagging {
5
+ static highlightTokens(tokens, containerId) {
6
+ const container = document.getElementById(containerId);
7
+ if (!container) return;
8
+
9
+ container.innerHTML = tokens.map(token => {
10
+ const color = this.getPOSColor(token.pos);
11
+ return `<span class="pos-token" style="background-color: ${color};"
12
+ title="${token.explanation || ''}">${token.text}
13
+ <small>(${token.pos})</small></span>`;
14
+ }).join(' ');
15
+ }
16
+
17
+ static getPOSColor(pos) {
18
+ const colors = {
19
+ 'NOUN': '#E3F2FD', 'PROPN': '#E3F2FD', 'VERB': '#E8F5E9',
20
+ 'ADJ': '#FFF8E1', 'ADV': '#F3E5F5', 'ADP': '#EFEBE9',
21
+ 'PRON': '#E8EAF6', 'DET': '#E0F7FA', 'CONJ': '#FBE9E7',
22
+ 'NUM': '#FFEBEE', 'PART': '#F1F8E9', 'INTJ': '#FFF3E0',
23
+ 'PUNCT': '#FAFAFA', 'SYM': '#FAFAFA', 'X': '#FAFAFA'
24
+ };
25
+ return colors[pos] || '#FAFAFA';
26
+ }
27
+ }
28
+
29
+ // Named Entity Recognition functionality
30
+ class NER {
31
+ static highlightEntities(entities, containerId) {
32
+ const container = document.getElementById(containerId);
33
+ if (!container) return;
34
+
35
+ container.innerHTML = entities.map(entity => {
36
+ const color = this.getEntityColor(entity.type);
37
+ return `<span class="entity-token" style="background-color: ${color};"
38
+ title="${entity.explanation || ''}">${entity.text}
39
+ <small>(${entity.type})</small></span>`;
40
+ }).join(' ');
41
+ }
42
+
43
+ static getEntityColor(type) {
44
+ const colors = {
45
+ 'PERSON': '#E3F2FD', 'ORG': '#E8F5E9', 'GPE': '#FFF8E1',
46
+ 'LOC': '#F3E5F5', 'PRODUCT': '#EFEBE9', 'EVENT': '#E8EAF6',
47
+ 'WORK_OF_ART': '#E0F7FA', 'LAW': '#FBE9E7', 'LANGUAGE': '#FFEBEE',
48
+ 'DATE': '#F1F8E9', 'TIME': '#FFF3E0', 'PERCENT': '#FAFAFA',
49
+ 'MONEY': '#FAFAFA', 'QUANTITY': '#FAFAFA', 'ORDINAL': '#FAFAFA',
50
+ 'CARDINAL': '#FAFAFA'
51
+ };
52
+ return colors[type] || '#FAFAFA';
53
+ }
54
+ }
55
+
56
+ // Sentiment Analysis functionality
57
+ class SentimentAnalysis {
58
+ static createGauge(score, containerId) {
59
+ const container = document.getElementById(containerId);
60
+ if (!container) return;
61
+
62
+ const color = this.getSentimentColor(score);
63
+ const label = this.getSentimentLabel(score);
64
+
65
+ container.innerHTML = `
66
+ <div class="sentiment-gauge">
67
+ <div class="sentiment-score" style="color: ${color};">${score.toFixed(3)}</div>
68
+ <div class="sentiment-label" style="color: ${color};">${label}</div>
69
+ </div>
70
+ `;
71
+ }
72
+
73
+ static getSentimentColor(score) {
74
+ if (score > 0.1) return '#4CAF50';
75
+ if (score < -0.1) return '#F44336';
76
+ return '#FF9800';
77
+ }
78
+
79
+ static getSentimentLabel(score) {
80
+ if (score > 0.1) return 'Positive';
81
+ if (score < -0.1) return 'Negative';
82
+ return 'Neutral';
83
+ }
84
+ }
85
+
86
+ // Text Generation functionality
87
+ class TextGeneration {
88
+ static displayGeneratedText(prompt, generated, containerId) {
89
+ const container = document.getElementById(containerId);
90
+ if (!container) return;
91
+
92
+ container.innerHTML = `
93
+ <div class="generated-text">
94
+ <span class="prompt-text">${prompt}</span>
95
+ <span class="generated-content">${generated}</span>
96
+ </div>
97
+ `;
98
+ }
99
+
100
+ static createParameterControls(containerId) {
101
+ const container = document.getElementById(containerId);
102
+ if (!container) return;
103
+
104
+ container.innerHTML = `
105
+ <div class="row">
106
+ <div class="col-md-4">
107
+ <label for="temperature" class="form-label">Temperature</label>
108
+ <input type="range" class="form-range" id="temperature" min="0.1" max="1.5" value="0.7" step="0.1">
109
+ <div class="d-flex justify-content-between">
110
+ <small>0.1</small>
111
+ <small id="temperature-value">0.7</small>
112
+ <small>1.5</small>
113
+ </div>
114
+ </div>
115
+ <div class="col-md-4">
116
+ <label for="top-p" class="form-label">Top-p</label>
117
+ <input type="range" class="form-range" id="top-p" min="0.1" max="1.0" value="0.9" step="0.1">
118
+ <div class="d-flex justify-content-between">
119
+ <small>0.1</small>
120
+ <small id="top-p-value">0.9</small>
121
+ <small>1.0</small>
122
+ </div>
123
+ </div>
124
+ <div class="col-md-4">
125
+ <label for="max-length" class="form-label">Max Length</label>
126
+ <input type="range" class="form-range" id="max-length" min="30" max="250" value="100" step="10">
127
+ <div class="d-flex justify-content-between">
128
+ <small>30</small>
129
+ <small id="max-length-value">100</small>
130
+ <small>250</small>
131
+ </div>
132
+ </div>
133
+ </div>
134
+ `;
135
+
136
+ // Add event listeners for parameter updates
137
+ ['temperature', 'top-p', 'max-length'].forEach(param => {
138
+ const slider = document.getElementById(param);
139
+ const valueDisplay = document.getElementById(`${param}-value`);
140
+ if (slider && valueDisplay) {
141
+ slider.addEventListener('input', () => {
142
+ valueDisplay.textContent = slider.value;
143
+ });
144
+ }
145
+ });
146
+ }
147
+ }
148
+
149
+ // Translation functionality
150
+ class Translation {
151
+ static displayTranslationPair(sourceText, targetText, sourceLang, targetLang, containerId) {
152
+ const container = document.getElementById(containerId);
153
+ if (!container) return;
154
+
155
+ container.innerHTML = `
156
+ <div class="translation-pair">
157
+ <div class="source-text">
158
+ <div class="language-badge" style="background-color: var(--primary-color); color: white;">
159
+ ${sourceLang}
160
+ </div>
161
+ <p>${sourceText}</p>
162
+ </div>
163
+ <div class="target-text">
164
+ <div class="language-badge" style="background-color: var(--success-color); color: white;">
165
+ ${targetLang}
166
+ </div>
167
+ <p>${targetText}</p>
168
+ </div>
169
+ </div>
170
+ `;
171
+ }
172
+
173
+ static createLanguageSelector(containerId) {
174
+ const container = document.getElementById(containerId);
175
+ if (!container) return;
176
+
177
+ const languages = [
178
+ { code: 'en', name: 'English' },
179
+ { code: 'es', name: 'Spanish' },
180
+ { code: 'fr', name: 'French' },
181
+ { code: 'de', name: 'German' },
182
+ { code: 'ru', name: 'Russian' },
183
+ { code: 'zh', name: 'Chinese' },
184
+ { code: 'ar', name: 'Arabic' },
185
+ { code: 'hi', name: 'Hindi' },
186
+ { code: 'ja', name: 'Japanese' },
187
+ { code: 'pt', name: 'Portuguese' },
188
+ { code: 'it', name: 'Italian' }
189
+ ];
190
+
191
+ container.innerHTML = `
192
+ <div class="row">
193
+ <div class="col-md-6">
194
+ <label for="source-lang" class="form-label">Source Language</label>
195
+ <select id="source-lang" class="form-select">
196
+ <option value="auto">Auto-detect</option>
197
+ ${languages.map(lang => `<option value="${lang.code}">${lang.name}</option>`).join('')}
198
+ </select>
199
+ </div>
200
+ <div class="col-md-6">
201
+ <label for="target-lang" class="form-label">Target Language</label>
202
+ <select id="target-lang" class="form-select">
203
+ ${languages.map(lang => `<option value="${lang.code}" ${lang.code === 'en' ? 'selected' : ''}>${lang.name}</option>`).join('')}
204
+ </select>
205
+ </div>
206
+ </div>
207
+ `;
208
+ }
209
+ }
210
+
211
+ // Classification functionality
212
+ class Classification {
213
+ static displayResults(results, containerId) {
214
+ const container = document.getElementById(containerId);
215
+ if (!container) return;
216
+
217
+ container.innerHTML = results.map(result => `
218
+ <div class="classification-result">
219
+ <div class="classification-label">${result.label}</div>
220
+ <div class="classification-score" style="color: ${this.getScoreColor(result.score)};">
221
+ ${(result.score * 100).toFixed(1)}%
222
+ </div>
223
+ </div>
224
+ `).join('');
225
+ }
226
+
227
+ static getScoreColor(score) {
228
+ if (score > 0.7) return '#4CAF50';
229
+ if (score > 0.4) return '#FF9800';
230
+ return '#F44336';
231
+ }
232
+ }
233
+
234
+ // Vector Embeddings functionality
235
+ class VectorEmbeddings {
236
+ static displaySearchResults(results, containerId) {
237
+ const container = document.getElementById(containerId);
238
+ if (!container) return;
239
+
240
+ container.innerHTML = results.map(result => `
241
+ <div class="search-result">
242
+ <div class="result-text">${result.text}</div>
243
+ <div class="search-score">Similarity: ${(result.score * 100).toFixed(1)}%</div>
244
+ <div class="progress mt-2" style="height: 8px;">
245
+ <div class="progress-bar" role="progressbar"
246
+ style="width: ${result.score * 100}%; background-color: ${this.getScoreColor(result.score)};">
247
+ </div>
248
+ </div>
249
+ </div>
250
+ `).join('');
251
+ }
252
+
253
+ static getScoreColor(score) {
254
+ if (score > 0.7) return '#4CAF50';
255
+ if (score > 0.4) return '#FF9800';
256
+ return '#F44336';
257
+ }
258
+ }
259
+
260
+ // Chart utilities
261
+ class ChartUtils {
262
+ static createBarChart(canvasId, data, options = {}) {
263
+ const ctx = document.getElementById(canvasId);
264
+ if (!ctx) return null;
265
+
266
+ const defaultOptions = {
267
+ responsive: true,
268
+ maintainAspectRatio: false,
269
+ plugins: {
270
+ legend: {
271
+ position: 'top',
272
+ }
273
+ },
274
+ scales: {
275
+ y: {
276
+ beginAtZero: true
277
+ }
278
+ }
279
+ };
280
+
281
+ return new Chart(ctx, {
282
+ type: 'bar',
283
+ data: data,
284
+ options: { ...defaultOptions, ...options }
285
+ });
286
+ }
287
+
288
+ static createPieChart(canvasId, data, options = {}) {
289
+ const ctx = document.getElementById(canvasId);
290
+ if (!ctx) return null;
291
+
292
+ const defaultOptions = {
293
+ responsive: true,
294
+ maintainAspectRatio: false,
295
+ plugins: {
296
+ legend: {
297
+ position: 'bottom',
298
+ }
299
+ }
300
+ };
301
+
302
+ return new Chart(ctx, {
303
+ type: 'pie',
304
+ data: data,
305
+ options: { ...defaultOptions, ...options }
306
+ });
307
+ }
308
+
309
+ static createLineChart(canvasId, data, options = {}) {
310
+ const ctx = document.getElementById(canvasId);
311
+ if (!ctx) return null;
312
+
313
+ const defaultOptions = {
314
+ responsive: true,
315
+ maintainAspectRatio: false,
316
+ plugins: {
317
+ legend: {
318
+ position: 'top',
319
+ }
320
+ },
321
+ scales: {
322
+ y: {
323
+ beginAtZero: true
324
+ }
325
+ }
326
+ };
327
+
328
+ return new Chart(ctx, {
329
+ type: 'line',
330
+ data: data,
331
+ options: { ...defaultOptions, ...options }
332
+ });
333
+ }
334
+ }
335
+
336
+ // Animation utilities
337
+ class AnimationUtils {
338
+ static fadeIn(element, duration = 500) {
339
+ element.style.opacity = '0';
340
+ element.style.transition = `opacity ${duration}ms ease-in`;
341
+
342
+ setTimeout(() => {
343
+ element.style.opacity = '1';
344
+ }, 10);
345
+ }
346
+
347
+ static slideIn(element, direction = 'left', duration = 500) {
348
+ const transform = direction === 'left' ? 'translateX(-100%)' : 'translateX(100%)';
349
+ element.style.transform = transform;
350
+ element.style.transition = `transform ${duration}ms ease-out`;
351
+
352
+ setTimeout(() => {
353
+ element.style.transform = 'translateX(0)';
354
+ }, 10);
355
+ }
356
+
357
+ static bounceIn(element, duration = 600) {
358
+ element.style.transform = 'scale(0.3)';
359
+ element.style.opacity = '0';
360
+ element.style.transition = `all ${duration}ms ease-out`;
361
+
362
+ setTimeout(() => {
363
+ element.style.transform = 'scale(1)';
364
+ element.style.opacity = '1';
365
+ }, 10);
366
+ }
367
+ }
368
+
369
+ // Export classes for global use
370
+ window.NLPComponents = {
371
+ POSTagging,
372
+ NER,
373
+ SentimentAnalysis,
374
+ TextGeneration,
375
+ Translation,
376
+ Classification,
377
+ VectorEmbeddings,
378
+ ChartUtils,
379
+ AnimationUtils
380
+ };
static/js/main.js ADDED
@@ -0,0 +1,370 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Main JavaScript for NLP Ultimate Tutorial
2
+
3
+ // Theme management
4
+ function toggleTheme() {
5
+ const currentTheme = document.documentElement.getAttribute('data-theme');
6
+ const newTheme = currentTheme === 'dark' ? 'light' : 'dark';
7
+
8
+ document.documentElement.setAttribute('data-theme', newTheme);
9
+ localStorage.setItem('theme', newTheme);
10
+
11
+ // Update theme icon
12
+ const themeIcon = document.getElementById('theme-icon');
13
+ if (themeIcon) {
14
+ themeIcon.className = newTheme === 'dark' ? 'fas fa-sun' : 'fas fa-moon';
15
+ }
16
+ }
17
+
18
+ // Initialize theme on page load
19
+ function initializeTheme() {
20
+ const savedTheme = localStorage.getItem('theme') || 'light';
21
+ document.documentElement.setAttribute('data-theme', savedTheme);
22
+
23
+ const themeIcon = document.getElementById('theme-icon');
24
+ if (themeIcon) {
25
+ themeIcon.className = savedTheme === 'dark' ? 'fas fa-sun' : 'fas fa-moon';
26
+ }
27
+ }
28
+
29
+ // Loading state management
30
+ function showLoading(elementId) {
31
+ const element = document.getElementById(elementId);
32
+ if (element) {
33
+ element.innerHTML = `
34
+ <div class="text-center py-4">
35
+ <div class="spinner-border text-primary" role="status">
36
+ <span class="visually-hidden">Loading...</span>
37
+ </div>
38
+ <p class="mt-2">Processing your request...</p>
39
+ </div>
40
+ `;
41
+ }
42
+ }
43
+
44
+ function hideLoading(elementId) {
45
+ const element = document.getElementById(elementId);
46
+ if (element && element.innerHTML.includes('spinner-border')) {
47
+ element.innerHTML = '';
48
+ }
49
+ }
50
+
51
+ // Error handling
52
+ function showError(message, elementId = 'resultsContainer') {
53
+ const element = document.getElementById(elementId);
54
+ if (element) {
55
+ element.innerHTML = `
56
+ <div class="alert alert-danger fade-in">
57
+ <i class="fas fa-exclamation-triangle"></i>
58
+ <strong>Error:</strong> ${message}
59
+ </div>
60
+ `;
61
+ }
62
+ }
63
+
64
+ // Success message
65
+ function showSuccess(message, elementId = 'resultsContainer') {
66
+ const element = document.getElementById(elementId);
67
+ if (element) {
68
+ element.innerHTML = `
69
+ <div class="alert alert-success fade-in">
70
+ <i class="fas fa-check-circle"></i>
71
+ <strong>Success:</strong> ${message}
72
+ </div>
73
+ `;
74
+ }
75
+ }
76
+
77
+ // API request helper
78
+ async function makeApiRequest(url, data, method = 'POST') {
79
+ try {
80
+ const response = await fetch(url, {
81
+ method: method,
82
+ headers: {
83
+ 'Content-Type': 'application/json',
84
+ },
85
+ body: JSON.stringify(data)
86
+ });
87
+
88
+ if (!response.ok) {
89
+ throw new Error(`HTTP error! status: ${response.status}`);
90
+ }
91
+
92
+ return await response.json();
93
+ } catch (error) {
94
+ console.error('API request failed:', error);
95
+ throw error;
96
+ }
97
+ }
98
+
99
+ // Text processing functions
100
+ function processText(endpoint, text, additionalData = {}) {
101
+ const data = { text: text, ...additionalData };
102
+
103
+ showLoading('resultsContainer');
104
+
105
+ makeApiRequest(endpoint, data)
106
+ .then(response => {
107
+ if (response.success) {
108
+ displayResults(response.result);
109
+ } else {
110
+ showError(response.error || 'An error occurred while processing the text');
111
+ }
112
+ })
113
+ .catch(error => {
114
+ showError('Failed to process text: ' + error.message);
115
+ })
116
+ .finally(() => {
117
+ hideLoading('resultsContainer');
118
+ });
119
+ }
120
+
121
+ // Display results
122
+ function displayResults(result) {
123
+ const container = document.getElementById('resultsContainer');
124
+ if (container) {
125
+ container.innerHTML = result;
126
+ container.classList.add('fade-in');
127
+ }
128
+ }
129
+
130
+ // Copy to clipboard
131
+ function copyToClipboard(text) {
132
+ navigator.clipboard.writeText(text).then(() => {
133
+ // Show temporary success message
134
+ const toast = document.createElement('div');
135
+ toast.className = 'alert alert-success position-fixed';
136
+ toast.style.cssText = 'top: 20px; right: 20px; z-index: 9999; min-width: 200px;';
137
+ toast.innerHTML = '<i class="fas fa-check"></i> Copied to clipboard!';
138
+ document.body.appendChild(toast);
139
+
140
+ setTimeout(() => {
141
+ toast.remove();
142
+ }, 2000);
143
+ }).catch(err => {
144
+ console.error('Failed to copy text: ', err);
145
+ });
146
+ }
147
+
148
+ // Download text as file
149
+ function downloadText(text, filename = 'nlp_result.txt') {
150
+ const blob = new Blob([text], { type: 'text/plain' });
151
+ const url = window.URL.createObjectURL(blob);
152
+ const a = document.createElement('a');
153
+ a.href = url;
154
+ a.download = filename;
155
+ document.body.appendChild(a);
156
+ a.click();
157
+ document.body.removeChild(a);
158
+ window.URL.revokeObjectURL(url);
159
+ }
160
+
161
+ // Format JSON for display
162
+ function formatJSON(obj) {
163
+ return JSON.stringify(obj, null, 2);
164
+ }
165
+
166
+ // Create data table
167
+ function createDataTable(data, headers) {
168
+ let table = '<div class="table-responsive"><table class="table table-striped table-hover">';
169
+
170
+ // Header
171
+ if (headers) {
172
+ table += '<thead><tr>';
173
+ headers.forEach(header => {
174
+ table += `<th>${header}</th>`;
175
+ });
176
+ table += '</tr></thead>';
177
+ }
178
+
179
+ // Body
180
+ table += '<tbody>';
181
+ data.forEach(row => {
182
+ table += '<tr>';
183
+ if (Array.isArray(row)) {
184
+ row.forEach(cell => {
185
+ table += `<td>${cell}</td>`;
186
+ });
187
+ } else {
188
+ Object.values(row).forEach(value => {
189
+ table += `<td>${value}</td>`;
190
+ });
191
+ }
192
+ table += '</tr>';
193
+ });
194
+ table += '</tbody></table></div>';
195
+
196
+ return table;
197
+ }
198
+
199
+ // Create chart
200
+ function createChart(canvasId, type, data, options = {}) {
201
+ const ctx = document.getElementById(canvasId);
202
+ if (!ctx) return null;
203
+
204
+ const defaultOptions = {
205
+ responsive: true,
206
+ maintainAspectRatio: false,
207
+ plugins: {
208
+ legend: {
209
+ position: 'top',
210
+ }
211
+ }
212
+ };
213
+
214
+ const chartOptions = { ...defaultOptions, ...options };
215
+
216
+ return new Chart(ctx, {
217
+ type: type,
218
+ data: data,
219
+ options: chartOptions
220
+ });
221
+ }
222
+
223
+ // Smooth scroll to element
224
+ function scrollToElement(elementId) {
225
+ const element = document.getElementById(elementId);
226
+ if (element) {
227
+ element.scrollIntoView({
228
+ behavior: 'smooth',
229
+ block: 'start'
230
+ });
231
+ }
232
+ }
233
+
234
+ // Debounce function for input handling
235
+ function debounce(func, wait) {
236
+ let timeout;
237
+ return function executedFunction(...args) {
238
+ const later = () => {
239
+ clearTimeout(timeout);
240
+ func(...args);
241
+ };
242
+ clearTimeout(timeout);
243
+ timeout = setTimeout(later, wait);
244
+ };
245
+ }
246
+
247
+ // Throttle function for scroll handling
248
+ function throttle(func, limit) {
249
+ let inThrottle;
250
+ return function() {
251
+ const args = arguments;
252
+ const context = this;
253
+ if (!inThrottle) {
254
+ func.apply(context, args);
255
+ inThrottle = true;
256
+ setTimeout(() => inThrottle = false, limit);
257
+ }
258
+ };
259
+ }
260
+
261
+ // Local storage helpers
262
+ function saveToStorage(key, value) {
263
+ try {
264
+ localStorage.setItem(key, JSON.stringify(value));
265
+ } catch (error) {
266
+ console.error('Failed to save to localStorage:', error);
267
+ }
268
+ }
269
+
270
+ function loadFromStorage(key, defaultValue = null) {
271
+ try {
272
+ const item = localStorage.getItem(key);
273
+ return item ? JSON.parse(item) : defaultValue;
274
+ } catch (error) {
275
+ console.error('Failed to load from localStorage:', error);
276
+ return defaultValue;
277
+ }
278
+ }
279
+
280
+ // Session storage helpers
281
+ function saveToSession(key, value) {
282
+ try {
283
+ sessionStorage.setItem(key, JSON.stringify(value));
284
+ } catch (error) {
285
+ console.error('Failed to save to sessionStorage:', error);
286
+ }
287
+ }
288
+
289
+ function loadFromSession(key, defaultValue = null) {
290
+ try {
291
+ const item = sessionStorage.getItem(key);
292
+ return item ? JSON.parse(item) : defaultValue;
293
+ } catch (error) {
294
+ console.error('Failed to load from sessionStorage:', error);
295
+ return defaultValue;
296
+ }
297
+ }
298
+
299
+ // Initialize page
300
+ document.addEventListener('DOMContentLoaded', function() {
301
+ // Initialize theme
302
+ initializeTheme();
303
+
304
+ // Add fade-in animation to cards
305
+ const cards = document.querySelectorAll('.card');
306
+ cards.forEach((card, index) => {
307
+ card.style.animationDelay = `${index * 0.1}s`;
308
+ card.classList.add('fade-in');
309
+ });
310
+
311
+ // Add click handlers for copy buttons
312
+ document.addEventListener('click', function(e) {
313
+ if (e.target.classList.contains('copy-btn')) {
314
+ const text = e.target.getAttribute('data-copy');
315
+ if (text) {
316
+ copyToClipboard(text);
317
+ }
318
+ }
319
+ });
320
+
321
+ // Add click handlers for download buttons
322
+ document.addEventListener('click', function(e) {
323
+ if (e.target.classList.contains('download-btn')) {
324
+ const text = e.target.getAttribute('data-download');
325
+ const filename = e.target.getAttribute('data-filename') || 'nlp_result.txt';
326
+ if (text) {
327
+ downloadText(text, filename);
328
+ }
329
+ }
330
+ });
331
+
332
+ // Handle form submissions
333
+ const forms = document.querySelectorAll('form');
334
+ forms.forEach(form => {
335
+ form.addEventListener('submit', function(e) {
336
+ e.preventDefault();
337
+ // Handle form submission here
338
+ });
339
+ });
340
+
341
+ // Add tooltips
342
+ const tooltipTriggerList = [].slice.call(document.querySelectorAll('[data-bs-toggle="tooltip"]'));
343
+ tooltipTriggerList.map(function (tooltipTriggerEl) {
344
+ return new bootstrap.Tooltip(tooltipTriggerEl);
345
+ });
346
+ });
347
+
348
+ // Export functions for global use
349
+ window.NLPUtils = {
350
+ toggleTheme,
351
+ showLoading,
352
+ hideLoading,
353
+ showError,
354
+ showSuccess,
355
+ makeApiRequest,
356
+ processText,
357
+ displayResults,
358
+ copyToClipboard,
359
+ downloadText,
360
+ formatJSON,
361
+ createDataTable,
362
+ createChart,
363
+ scrollToElement,
364
+ debounce,
365
+ throttle,
366
+ saveToStorage,
367
+ loadFromStorage,
368
+ saveToSession,
369
+ loadFromSession
370
+ };
templates/_analysis_nav.html ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div class="card mb-4 quick-nav">
2
+ <div class="card-header">
3
+ <h5 class="mb-0"><i class="fas fa-compass me-2"></i>Quick Navigation</h5>
4
+ </div>
5
+ <div class="card-body">
6
+ <!-- Text Processing -->
7
+ <h6 class="mb-2"><i class="fas fa-edit me-2"></i>Text Processing</h6>
8
+ <div class="row mb-3">
9
+ <div class="col-md-3 mb-2">
10
+ <a href="{{ url_for('preprocessing') }}" class="btn btn-outline-primary w-100"><i class="fas fa-tools"></i> Preprocessing</a>
11
+ </div>
12
+ <div class="col-md-3 mb-2">
13
+ <a href="{{ url_for('tokenization') }}" class="btn btn-outline-primary w-100"><i class="fas fa-cut"></i> Tokenization</a>
14
+ </div>
15
+ <div class="col-md-3 mb-2">
16
+ <a href="{{ url_for('pos_tagging') }}" class="btn btn-outline-primary w-100"><i class="fas fa-tags"></i> POS</a>
17
+ </div>
18
+ <div class="col-md-3 mb-2">
19
+ <a href="{{ url_for('named_entity') }}" class="btn btn-outline-primary w-100"><i class="fas fa-user-tag"></i> NER</a>
20
+ </div>
21
+ </div>
22
+
23
+ <!-- Analysis -->
24
+ <h6 class="mb-2"><i class="fas fa-chart-line me-2"></i>Analysis</h6>
25
+ <div class="row mb-3">
26
+ <div class="col-md-4 mb-2">
27
+ <a href="{{ url_for('sentiment') }}" class="btn btn-outline-success w-100"><i class="fas fa-smile"></i> Sentiment</a>
28
+ </div>
29
+ <div class="col-md-4 mb-2">
30
+ <a href="{{ url_for('summarization') }}" class="btn btn-outline-success w-100"><i class="fas fa-compress"></i> Summarization</a>
31
+ </div>
32
+ <div class="col-md-4 mb-2">
33
+ <a href="{{ url_for('topic_analysis') }}" class="btn btn-outline-success w-100"><i class="fas fa-project-diagram"></i> Topics</a>
34
+ </div>
35
+ </div>
36
+
37
+ <!-- Advanced NLP -->
38
+ <h6 class="mb-2"><i class="fas fa-robot me-2"></i>Advanced NLP</h6>
39
+ <div class="row">
40
+ <div class="col-md-2 mb-2">
41
+ <a href="{{ url_for('question_answering') }}" class="btn btn-outline-info w-100"><i class="fas fa-question-circle"></i> QA</a>
42
+ </div>
43
+ <div class="col-md-2 mb-2">
44
+ <a href="{{ url_for('text_generation') }}" class="btn btn-outline-info w-100"><i class="fas fa-magic"></i> Generation</a>
45
+ </div>
46
+ <div class="col-md-2 mb-2">
47
+ <a href="{{ url_for('translation') }}" class="btn btn-outline-info w-100"><i class="fas fa-language"></i> Translate</a>
48
+ </div>
49
+ <div class="col-md-2 mb-2">
50
+ <a href="{{ url_for('classification') }}" class="btn btn-outline-info w-100"><i class="fas fa-sitemap"></i> Classify</a>
51
+ </div>
52
+ <div class="col-md-2 mb-2">
53
+ <a href="{{ url_for('vector_embeddings') }}" class="btn btn-outline-info w-100"><i class="fas fa-vector-square"></i> Embeddings</a>
54
+ </div>
55
+ </div>
56
+ </div>
57
+ </div>
58
+
59
+ <script>
60
+ // Remove Quick Nav functionality - let navbar handle all navigation
61
+ (function() {
62
+ const links = document.currentScript.parentElement.querySelectorAll('a[href]');
63
+ links.forEach(link => {
64
+ link.addEventListener('click', function(event) {
65
+ // Prevent Quick Nav from doing anything special
66
+ // Just let the normal browser navigation happen like navbar does
67
+ event.stopPropagation();
68
+ // Don't set any sessionStorage flags - let it work like navbar
69
+ });
70
+ });
71
+ })();
72
+ </script>
templates/base.html ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>{% block title %}NLP Ultimate Tutorial{% endblock %}</title>
7
+
8
+ <!-- Bootstrap CSS -->
9
+ <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
10
+
11
+ <!-- Custom CSS -->
12
+ <link rel="stylesheet" href="{{ url_for('static', filename='css/style.css') }}">
13
+ <link rel="stylesheet" href="{{ url_for('static', filename='css/components.css') }}">
14
+
15
+ <!-- Font Awesome -->
16
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
17
+
18
+ <!-- Chart.js -->
19
+ <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
20
+
21
+ {% block extra_head %}{% endblock %}
22
+ </head>
23
+ <body>
24
+ <!-- Navigation -->
25
+ <nav class="navbar navbar-expand-lg navbar-dark bg-primary">
26
+ <div class="container-fluid">
27
+ <a class="navbar-brand" href="{{ url_for('index') }}">
28
+ <i class="fas fa-brain"></i> NLP Ultimate Tutorial
29
+ </a>
30
+ <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#mainNavbar" aria-controls="mainNavbar" aria-expanded="false" aria-label="Toggle navigation">
31
+ <span class="navbar-toggler-icon"></span>
32
+ </button>
33
+ <div class="collapse navbar-collapse" id="mainNavbar">
34
+ <ul class="navbar-nav me-auto mb-2 mb-lg-0">
35
+ <li class="nav-item dropdown">
36
+ <a class="nav-link dropdown-toggle" href="#" id="navTextProcessing" role="button" data-bs-toggle="dropdown" aria-expanded="false">
37
+ <i class="fas fa-edit"></i> Text Processing
38
+ </a>
39
+ <ul class="dropdown-menu" aria-labelledby="navTextProcessing">
40
+ <li><a class="dropdown-item" href="{{ url_for('preprocessing') }}">Preprocessing</a></li>
41
+ <li><a class="dropdown-item" href="{{ url_for('tokenization') }}">Tokenization</a></li>
42
+ <li><a class="dropdown-item" href="{{ url_for('pos_tagging') }}">POS Tagging</a></li>
43
+ <li><a class="dropdown-item" href="{{ url_for('named_entity') }}">Named Entities</a></li>
44
+ </ul>
45
+ </li>
46
+ <li class="nav-item dropdown">
47
+ <a class="nav-link dropdown-toggle" href="#" id="navAnalysis" role="button" data-bs-toggle="dropdown" aria-expanded="false">
48
+ <i class="fas fa-chart-line"></i> Analysis
49
+ </a>
50
+ <ul class="dropdown-menu" aria-labelledby="navAnalysis">
51
+ <li><a class="dropdown-item" href="{{ url_for('sentiment') }}">Sentiment</a></li>
52
+ <li><a class="dropdown-item" href="{{ url_for('summarization') }}">Summarization</a></li>
53
+ <li><a class="dropdown-item" href="{{ url_for('topic_analysis') }}">Topic Analysis</a></li>
54
+ <li><a class="dropdown-item" href="{{ url_for('question_answering') }}">Question Answering</a></li>
55
+ </ul>
56
+ </li>
57
+ <li class="nav-item dropdown">
58
+ <a class="nav-link dropdown-toggle" href="#" id="navAdvanced" role="button" data-bs-toggle="dropdown" aria-expanded="false">
59
+ <i class="fas fa-robot"></i> Advanced NLP
60
+ </a>
61
+ <ul class="dropdown-menu dropdown-menu-end" aria-labelledby="navAdvanced">
62
+ <li><a class="dropdown-item" href="{{ url_for('text_generation') }}">Text Generation</a></li>
63
+ <li><a class="dropdown-item" href="{{ url_for('translation') }}">Translation</a></li>
64
+ <li><a class="dropdown-item" href="{{ url_for('classification') }}">Classification</a></li>
65
+ <li><a class="dropdown-item" href="{{ url_for('vector_embeddings') }}">Embeddings</a></li>
66
+ </ul>
67
+ </li>
68
+ </ul>
69
+ <div class="d-flex">
70
+ <button class="btn btn-outline-light btn-sm" onclick="toggleTheme()" title="Toggle theme">
71
+ <i class="fas fa-moon" id="theme-icon"></i>
72
+ </button>
73
+ </div>
74
+ </div>
75
+ </div>
76
+ </nav>
77
+
78
+ <!-- Main Content -->
79
+ <main class="container-fluid py-4">
80
+ {% block content %}{% endblock %}
81
+ </main>
82
+
83
+ <!-- Footer -->
84
+ <footer class="modern-footer">
85
+ <div class="container">
86
+ <div class="row align-items-center">
87
+ <div class="col-md-4">
88
+ <div class="footer-brand">
89
+ <h5><i class="fas fa-brain"></i> NLP Ultimate Tutorial</h5>
90
+ <p class="footer-description">Comprehensive guide to Natural Language Processing concepts and techniques.</p>
91
+ </div>
92
+ </div>
93
+ <div class="col-md-4 text-center">
94
+ <div class="footer-credit">
95
+ <div class="credit-badge">
96
+ <span class="credit-text">Designed and developed by</span>
97
+ <strong class="developer-name">Aradhya Pavan H S</strong>
98
+ </div>
99
+ </div>
100
+ </div>
101
+ <div class="col-md-4 text-md-end">
102
+ <div class="social-links">
103
+ <a href="https://github.com/aradhyapavan" target="_blank" rel="noopener noreferrer" class="social-link github-link">
104
+ <i class="fab fa-github"></i>
105
+ <span>GitHub</span>
106
+ </a>
107
+ <a href="https://www.linkedin.com/in/aradhya-pavan/" target="_blank" rel="noopener noreferrer" class="social-link linkedin-link">
108
+ <i class="fab fa-linkedin"></i>
109
+ <span>LinkedIn</span>
110
+ </a>
111
+ </div>
112
+ </div>
113
+ </div>
114
+ </div>
115
+ </footer>
116
+
117
+ <!-- Bootstrap JS -->
118
+ <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script>
119
+
120
+ <!-- Custom JS -->
121
+ <script src="{{ url_for('static', filename='js/main.js') }}"></script>
122
+
123
+ {% block extra_scripts %}{% endblock %}
124
+ </body>
125
+ </html>
templates/classification.html ADDED
@@ -0,0 +1,449 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+
3
+ {% block title %}Zero-shot Classification - NLP Ultimate Tutorial{% endblock %}
4
+
5
+ {% block content %}
6
+ <div class="container">
7
+ <!-- Header Section -->
8
+ <div class="row mb-4">
9
+ <div class="col-12">
10
+ <div class="card">
11
+ <div class="card-header">
12
+ <h1 class="mb-0">
13
+ <i class="fas fa-tags"></i>
14
+ Zero-shot Classification
15
+ </h1>
16
+ </div>
17
+ <div class="card-body">
18
+ <p class="lead">Classify text into arbitrary categories without training on specific examples.</p>
19
+
20
+ <div class="alert alert-info">
21
+ <i class="fas fa-info-circle"></i>
22
+ <strong>About:</strong> Zero-shot classification can categorize text into arbitrary classes without having been specifically trained on those categories.
23
+ </div>
24
+ </div>
25
+ </div>
26
+ </div>
27
+ </div>
28
+
29
+ {% include "_analysis_nav.html" %}
30
+
31
+ <!-- Input Section -->
32
+ <div class="row mb-4">
33
+ <div class="col-12">
34
+ <div class="card">
35
+ <div class="card-header">
36
+ <h3 class="mb-0">
37
+ <i class="fas fa-keyboard"></i>
38
+ Enter text to classify:
39
+ </h3>
40
+ </div>
41
+ <div class="card-body">
42
+ <div class="row mb-3">
43
+ <div class="col-md-8">
44
+ <textarea id="textInput" class="form-control" rows="6" placeholder="Enter text here...">I absolutely love this new product! It's amazing and works perfectly.</textarea>
45
+ </div>
46
+ <div class="col-md-4">
47
+ <label for="sampleSelect" class="form-label">Or choose a sample:</label>
48
+ <select id="sampleSelect" class="form-select">
49
+ <option value="Custom">Custom</option>
50
+ <option value="Sentiment">Sentiment</option>
51
+ <option value="Emotion">Emotion</option>
52
+ <option value="Writing Style">Writing Style</option>
53
+ <option value="Intent">Intent</option>
54
+ </select>
55
+ </div>
56
+ </div>
57
+
58
+ <div class="d-flex justify-content-between align-items-center">
59
+ <div>
60
+ <button id="processBtn" class="btn btn-primary btn-lg">
61
+ <i class="fas fa-tags"></i>
62
+ Classify Text
63
+ </button>
64
+ </div>
65
+ <div>
66
+ <button id="clearBtn" class="btn btn-outline-secondary">
67
+ <i class="fas fa-trash"></i>
68
+ Clear
69
+ </button>
70
+ </div>
71
+ </div>
72
+ </div>
73
+ </div>
74
+ </div>
75
+ </div>
76
+
77
+ <!-- Classification Settings Section -->
78
+ <div class="row mb-4">
79
+ <div class="col-12">
80
+ <div class="card">
81
+ <div class="card-header">
82
+ <h3 class="mb-0">
83
+ <i class="fas fa-cog"></i>
84
+ Classification Settings
85
+ </h3>
86
+ </div>
87
+ <div class="card-body">
88
+ <div class="row">
89
+ <div class="col-md-6">
90
+ <label for="scenario" class="form-label">Classification Scenario</label>
91
+ <select id="scenario" class="form-select">
92
+ <option value="Sentiment" selected>Sentiment</option>
93
+ <option value="Emotion">Emotion</option>
94
+ <option value="Writing Style">Writing Style</option>
95
+ <option value="Intent">Intent</option>
96
+ <option value="Content Type">Content Type</option>
97
+ <option value="Audience Level">Audience Level</option>
98
+ <option value="Custom">Custom</option>
99
+ </select>
100
+ </div>
101
+ <div class="col-md-6">
102
+ <div class="form-check form-switch mt-4">
103
+ <input class="form-check-input" type="checkbox" id="multiLabel">
104
+ <label class="form-check-label" for="multiLabel">
105
+ Multi-label classification
106
+ </label>
107
+ <small class="form-text text-muted">Allow multiple categories</small>
108
+ </div>
109
+ </div>
110
+ </div>
111
+
112
+ <!-- Custom labels input (hidden by default) -->
113
+ <div id="customLabelsDiv" class="mt-3" style="display: none;">
114
+ <label for="customLabels" class="form-label">Custom Categories (one per line)</label>
115
+ <textarea id="customLabels" class="form-control" rows="4" placeholder="Enter custom categories here..."></textarea>
116
+ </div>
117
+ </div>
118
+ </div>
119
+ </div>
120
+ </div>
121
+
122
+ <!-- Model Info Section -->
123
+ <div class="row mb-4">
124
+ <div class="col-12">
125
+ <div class="card">
126
+ <div class="card-header">
127
+ <h3 class="mb-0">
128
+ <i class="fas fa-info-circle"></i>
129
+ Model Information
130
+ </h3>
131
+ </div>
132
+ <div class="card-body">
133
+ <div class="row">
134
+ <div class="col-md-4">
135
+ <div class="card h-100">
136
+ <div class="card-body text-center">
137
+ <i class="fas fa-brain fa-2x text-primary mb-2"></i>
138
+ <h5>BART-large-mnli</h5>
139
+ <p class="small">BART model fine-tuned on MultiNLI dataset</p>
140
+ <ul class="list-unstyled small text-start">
141
+ <li>• Zero-shot classification</li>
142
+ <li>• Arbitrary categories</li>
143
+ <li>• High accuracy</li>
144
+ </ul>
145
+ </div>
146
+ </div>
147
+ </div>
148
+ <div class="col-md-4">
149
+ <div class="card h-100">
150
+ <div class="card-body text-center">
151
+ <i class="fas fa-sliders-h fa-2x text-success mb-2"></i>
152
+ <h5>Flexible Classification</h5>
153
+ <p class="small">Classify into any user-defined categories</p>
154
+ <ul class="list-unstyled small text-start">
155
+ <li>• Pre-defined scenarios</li>
156
+ <li>• Custom categories</li>
157
+ <li>• Multi-label support</li>
158
+ </ul>
159
+ </div>
160
+ </div>
161
+ </div>
162
+ <div class="col-md-4">
163
+ <div class="card h-100">
164
+ <div class="card-body text-center">
165
+ <i class="fas fa-chart-bar fa-2x text-info mb-2"></i>
166
+ <h5>Confidence Scoring</h5>
167
+ <p class="small">Detailed confidence scores for each category</p>
168
+ <ul class="list-unstyled small text-start">
169
+ <li>• Confidence visualization</li>
170
+ <li>• Ranking by score</li>
171
+ <li>• Multiple category detection</li>
172
+ </ul>
173
+ </div>
174
+ </div>
175
+ </div>
176
+ </div>
177
+ </div>
178
+ </div>
179
+ </div>
180
+ </div>
181
+
182
+ <!-- Example Texts Section -->
183
+ <div class="row mb-4">
184
+ <div class="col-12">
185
+ <div class="card">
186
+ <div class="card-header">
187
+ <h3 class="mb-0">
188
+ <i class="fas fa-list"></i>
189
+ Example Texts
190
+ </h3>
191
+ </div>
192
+ <div class="card-body">
193
+ <div class="row">
194
+ <div class="col-md-6">
195
+ <button class="btn btn-outline-primary w-100 mb-2" onclick="setExample('I absolutely love this new product! It\'s amazing and works perfectly.', 'Sentiment')">
196
+ I absolutely love this new product! It's amazing and works perfectly.
197
+ </button>
198
+ </div>
199
+ <div class="col-md-6">
200
+ <button class="btn btn-outline-primary w-100 mb-2" onclick="setExample('I am so excited about this opportunity!', 'Emotion')">
201
+ I am so excited about this opportunity!
202
+ </button>
203
+ </div>
204
+ <div class="col-md-6">
205
+ <button class="btn btn-outline-primary w-100 mb-2" onclick="setExample('The implementation requires careful consideration of the underlying architecture.', 'Writing Style')">
206
+ The implementation requires careful consideration of the underlying architecture.
207
+ </button>
208
+ </div>
209
+ <div class="col-md-6">
210
+ <button class="btn btn-outline-primary w-100 mb-2" onclick="setExample('You should definitely buy this product because it will solve all your problems.', 'Intent')">
211
+ You should definitely buy this product because it will solve all your problems.
212
+ </button>
213
+ </div>
214
+ </div>
215
+ </div>
216
+ </div>
217
+ </div>
218
+ </div>
219
+
220
+ <!-- Results Section -->
221
+ <div class="row">
222
+ <div class="col-12">
223
+ <div class="card">
224
+ <div class="card-header">
225
+ <h3 class="mb-0">
226
+ <i class="fas fa-chart-bar"></i>
227
+ Classification Results
228
+ </h3>
229
+ </div>
230
+ <div class="card-body">
231
+ <div id="resultsContainer">
232
+ <div class="text-center text-muted py-5">
233
+ <i class="fas fa-arrow-up fa-2x mb-3"></i>
234
+ <p>Click "Classify Text" to see classification results</p>
235
+ </div>
236
+ </div>
237
+ </div>
238
+ </div>
239
+ </div>
240
+ </div>
241
+ </div>
242
+ {% endblock %}
243
+
244
+ {% block extra_scripts %}
245
+ <script>
246
+ // Initialize page
247
+ document.addEventListener('DOMContentLoaded', function() {
248
+ // Only carry over when using Quick Nav; otherwise leave defaults
249
+ const shouldCarry = sessionStorage.getItem('carryTextOnNextPage') === '1';
250
+ if (shouldCarry) {
251
+ const storedText = sessionStorage.getItem('analysisText');
252
+ if (storedText) document.getElementById('textInput').value = storedText;
253
+ sessionStorage.removeItem('carryTextOnNextPage');
254
+ }
255
+
256
+ // Scenario change handler
257
+ document.getElementById('scenario').addEventListener('change', function() {
258
+ const customLabelsDiv = document.getElementById('customLabelsDiv');
259
+ if (this.value === 'Custom') {
260
+ customLabelsDiv.style.display = 'block';
261
+ } else {
262
+ customLabelsDiv.style.display = 'none';
263
+ }
264
+
265
+ // Update multi-label checkbox based on scenario
266
+ const multiLabelCheckbox = document.getElementById('multiLabel');
267
+ if (['Emotion', 'Intent', 'Content Type'].includes(this.value)) {
268
+ multiLabelCheckbox.checked = true;
269
+ }
270
+ });
271
+
272
+ // Sample text dropdown handler
273
+ document.getElementById('sampleSelect').addEventListener('change', function() {
274
+ const sampleType = this.value;
275
+ const textInput = document.getElementById('textInput');
276
+ const scenario = document.getElementById('scenario');
277
+
278
+ if (sampleType === 'Custom') {
279
+ textInput.value = '';
280
+ } else {
281
+ // Set sample prompts based on type
282
+ const samples = {
283
+ 'Sentiment': 'I absolutely love this new product! It\'s amazing and works perfectly.',
284
+ 'Emotion': 'I am so excited about this opportunity!',
285
+ 'Writing Style': 'The implementation requires careful consideration of the underlying architecture.',
286
+ 'Intent': 'You should definitely buy this product because it will solve all your problems.'
287
+ };
288
+
289
+ if (samples[sampleType]) {
290
+ textInput.value = samples[sampleType];
291
+ scenario.value = sampleType;
292
+ }
293
+ }
294
+ });
295
+
296
+ // Process button handler
297
+ document.getElementById('processBtn').addEventListener('click', function() {
298
+ const text = document.getElementById('textInput').value.trim();
299
+
300
+ if (!text) {
301
+ alert('Please enter text to classify.');
302
+ return;
303
+ }
304
+
305
+ // Show loading state
306
+ this.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Classifying...';
307
+ this.disabled = true;
308
+
309
+ // Process classification
310
+ processClassification();
311
+
312
+ // Reset button after a delay
313
+ setTimeout(() => {
314
+ this.innerHTML = '<i class="fas fa-tags"></i> Classify Text';
315
+ this.disabled = false;
316
+ }, 2000);
317
+ });
318
+
319
+ // Clear button handler
320
+ document.getElementById('clearBtn').addEventListener('click', function() {
321
+ document.getElementById('textInput').value = '';
322
+ document.getElementById('resultsContainer').innerHTML = `
323
+ <div class="text-center text-muted py-5">
324
+ <i class="fas fa-arrow-up fa-2x mb-3"></i>
325
+ <p>Click "Classify Text" to see classification results</p>
326
+ </div>
327
+ `;
328
+ });
329
+
330
+ // Keyboard shortcuts
331
+ document.addEventListener('keydown', function(e) {
332
+ // Ctrl+Enter to process
333
+ if (e.ctrlKey && e.key === 'Enter') {
334
+ document.getElementById('processBtn').click();
335
+ }
336
+
337
+ // Ctrl+L to clear
338
+ if (e.ctrlKey && e.key === 'l') {
339
+ e.preventDefault();
340
+ document.getElementById('clearBtn').click();
341
+ }
342
+ });
343
+ });
344
+
345
+ // Set example text and scenario
346
+ function setExample(text, scenario) {
347
+ document.getElementById('textInput').value = text;
348
+ document.getElementById('scenario').value = scenario;
349
+
350
+ // Update custom labels visibility
351
+ const customLabelsDiv = document.getElementById('customLabelsDiv');
352
+ if (scenario === 'Custom') {
353
+ customLabelsDiv.style.display = 'block';
354
+ } else {
355
+ customLabelsDiv.style.display = 'none';
356
+ }
357
+ }
358
+
359
+ // Process classification
360
+ function processClassification() {
361
+ const text = document.getElementById('textInput').value.trim();
362
+ const scenario = document.getElementById('scenario').value;
363
+ const multiLabel = document.getElementById('multiLabel').checked;
364
+ const customLabels = document.getElementById('customLabels').value;
365
+
366
+ if (!text) {
367
+ alert('Please enter text to classify.');
368
+ return;
369
+ }
370
+
371
+ showLoading('resultsContainer');
372
+
373
+ fetch('/api/classification', {
374
+ method: 'POST',
375
+ headers: {
376
+ 'Content-Type': 'application/json',
377
+ },
378
+ body: JSON.stringify({
379
+ text: text,
380
+ scenario: scenario,
381
+ multi_label: multiLabel,
382
+ custom_labels: customLabels
383
+ })
384
+ })
385
+ .then(response => response.json())
386
+ .then(data => {
387
+ if (data.success) {
388
+ displayResults(data.result);
389
+ } else {
390
+ showError(data.error || 'An error occurred while classifying text');
391
+ }
392
+ })
393
+ .catch(error => {
394
+ showError('Failed to classify text: ' + error.message);
395
+ })
396
+ .finally(() => {
397
+ hideLoading('resultsContainer');
398
+ });
399
+ }
400
+
401
+ // Show loading state
402
+ function showLoading(elementId) {
403
+ const element = document.getElementById(elementId);
404
+ if (element) {
405
+ element.innerHTML = `
406
+ <div class="text-center py-4">
407
+ <div class="spinner-border text-primary" role="status">
408
+ <span class="visually-hidden">Loading...</span>
409
+ </div>
410
+ <p class="mt-2">Classifying text...</p>
411
+ </div>
412
+ `;
413
+ }
414
+ }
415
+
416
+ // Hide loading state
417
+ function hideLoading(elementId) {
418
+ const element = document.getElementById(elementId);
419
+ if (element && element.innerHTML.includes('spinner-border')) {
420
+ element.innerHTML = '';
421
+ }
422
+ }
423
+
424
+ // Show error message
425
+ function showError(message, elementId = 'resultsContainer') {
426
+ const element = document.getElementById(elementId);
427
+ if (element) {
428
+ element.innerHTML = `
429
+ <div class="alert alert-danger fade-in">
430
+ <i class="fas fa-exclamation-triangle"></i>
431
+ <strong>Error:</strong> ${message}
432
+ </div>
433
+ `;
434
+ }
435
+ }
436
+
437
+ // Display results
438
+ function displayResults(result) {
439
+ const container = document.getElementById('resultsContainer');
440
+ if (container) {
441
+ container.innerHTML = result;
442
+ container.classList.add('fade-in');
443
+
444
+ // Scroll to results
445
+ container.scrollIntoView({ behavior: 'smooth', block: 'start' });
446
+ }
447
+ }
448
+ </script>
449
+ {% endblock %}
templates/index.html ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+
3
+ {% block title %}NLP Ultimate Tutorial - Home{% endblock %}
4
+
5
+ {% block content %}
6
+ <div class="container">
7
+ <!-- Header Section -->
8
+ <div class="row mb-5">
9
+ <div class="col-12 text-center">
10
+ <h1 class="display-4 mb-3">
11
+ <i class="fas fa-brain text-primary"></i>
12
+ Natural Language Processing Demo
13
+ </h1>
14
+ <p class="lead">Explore the capabilities of modern NLP models and techniques. Enter your text and select a task to analyze.</p>
15
+
16
+ <div class="alert alert-info">
17
+ <i class="fas fa-info-circle"></i>
18
+ Ultimate guide to all the NLP concepts - Designed and developed by <strong>Aradhya Pavan</strong>
19
+ </div>
20
+ </div>
21
+ </div>
22
+
23
+ <!-- Text Input Section -->
24
+ <div class="row mb-4">
25
+ <div class="col-12">
26
+ <div class="card">
27
+ <div class="card-header">
28
+ <h3 class="mb-0">
29
+ <i class="fas fa-keyboard"></i>
30
+ Enter your text:
31
+ </h3>
32
+ </div>
33
+ <div class="card-body">
34
+ <div class="row mb-3">
35
+ <div class="col-md-8">
36
+ <textarea id="textInput" class="form-control" rows="6" placeholder="Enter or paste your text here...">Climate change is the long-term alteration of temperature and typical weather patterns in a place. The cause of current climate change is largely human activity, like burning fossil fuels, which adds heat-trapping gases to Earth's atmosphere. The consequences of changing climate are already being felt worldwide.</textarea>
37
+ </div>
38
+ <div class="col-md-4">
39
+ <label for="sampleSelect" class="form-label">Or choose a sample:</label>
40
+ <select id="sampleSelect" class="form-select">
41
+ <option value="Custom">Custom</option>
42
+ {% for key, value in sample_texts.items() %}
43
+ <option value="{{ key }}" {% if key == 'Scientific Text' %}selected{% endif %}>{{ key }}</option>
44
+ {% endfor %}
45
+ </select>
46
+ </div>
47
+ </div>
48
+
49
+ <!-- Text Statistics -->
50
+ <div id="textStats" class="row mb-3" style="display: none;">
51
+ <div class="col-md-4">
52
+ <div class="card text-center">
53
+ <div class="card-body">
54
+ <h5 class="card-title">Characters</h5>
55
+ <h2 class="text-primary" id="charCount">0</h2>
56
+ </div>
57
+ </div>
58
+ </div>
59
+ <div class="col-md-4">
60
+ <div class="card text-center">
61
+ <div class="card-body">
62
+ <h5 class="card-title">Words</h5>
63
+ <h2 class="text-primary" id="wordCount">0</h2>
64
+ </div>
65
+ </div>
66
+ </div>
67
+ <div class="col-md-4">
68
+ <div class="card text-center">
69
+ <div class="card-body">
70
+ <h5 class="card-title">Sentences</h5>
71
+ <h2 class="text-primary" id="sentenceCount">0</h2>
72
+ </div>
73
+ </div>
74
+ </div>
75
+ </div>
76
+
77
+ <!-- Warning -->
78
+ <div id="warningBox" class="alert alert-warning" style="display: none;">
79
+ <i class="fas fa-exclamation-triangle"></i>
80
+ <strong>Warning:</strong> Text exceeds 500 words. Some models may truncate the input or perform slower.
81
+ </div>
82
+ </div>
83
+ </div>
84
+ </div>
85
+ </div>
86
+
87
+ <!-- Instructions -->
88
+ <div class="row mb-4">
89
+ <div class="col-12">
90
+ <div class="alert alert-warning">
91
+ <div class="d-flex align-items-start">
92
+ <i class="fas fa-exclamation-triangle fa-2x me-3"></i>
93
+ <div>
94
+ <h5 class="alert-heading">Important Instructions:</h5>
95
+ <ul class="mb-0">
96
+ <li>When you change the text, please reselect your analysis method and task to refresh the results</li>
97
+ <li>Please wait a moment while processing your task - this may take a few seconds</li>
98
+ <li>Scroll down to see all processed results for your text</li>
99
+ </ul>
100
+ <hr>
101
+ <small><i class="fas fa-clock"></i> Processing may take longer for larger texts</small>
102
+ </div>
103
+ </div>
104
+ </div>
105
+ </div>
106
+ </div>
107
+
108
+ <!-- Analysis Methods (Simplified, no tabs) -->
109
+ <div class="row mb-4">
110
+ <div class="col-12">
111
+ <div class="card">
112
+ <div class="card-header bg-primary text-white text-center">
113
+ <h2 class="mb-0">CHOOSE AN OPERATION</h2>
114
+ </div>
115
+ <div class="card-body">
116
+ <!-- Text Processing -->
117
+ <h4 class="mb-3"><i class="fas fa-edit me-2"></i>Text Processing</h4>
118
+ <div class="row mb-4">
119
+ <div class="col-md-3 mb-2">
120
+ <a href="{{ url_for('preprocessing') }}" class="btn btn-primary w-100">
121
+ <i class="fas fa-tools"></i> Text Preprocessing
122
+ </a>
123
+ </div>
124
+ <div class="col-md-3 mb-2">
125
+ <a href="{{ url_for('tokenization') }}" class="btn btn-primary w-100">
126
+ <i class="fas fa-cut"></i> Tokenization
127
+ </a>
128
+ </div>
129
+ <div class="col-md-3 mb-2">
130
+ <a href="{{ url_for('pos_tagging') }}" class="btn btn-primary w-100">
131
+ <i class="fas fa-tags"></i> POS Tagging
132
+ </a>
133
+ </div>
134
+ <div class="col-md-3 mb-2">
135
+ <a href="{{ url_for('named_entity') }}" class="btn btn-primary w-100">
136
+ <i class="fas fa-user-tag"></i> Named Entities
137
+ </a>
138
+ </div>
139
+ </div>
140
+
141
+ <!-- Analysis -->
142
+ <h4 class="mb-3"><i class="fas fa-chart-line me-2"></i>Analysis</h4>
143
+ <div class="row mb-4">
144
+ <div class="col-md-4 mb-2">
145
+ <a href="{{ url_for('sentiment') }}" class="btn btn-success w-100">
146
+ <i class="fas fa-smile"></i> Sentiment Analysis
147
+ </a>
148
+ </div>
149
+ <div class="col-md-4 mb-2">
150
+ <a href="{{ url_for('summarization') }}" class="btn btn-success w-100">
151
+ <i class="fas fa-compress"></i> Text Summarization
152
+ </a>
153
+ </div>
154
+ <div class="col-md-4 mb-2">
155
+ <a href="{{ url_for('topic_analysis') }}" class="btn btn-success w-100">
156
+ <i class="fas fa-project-diagram"></i> Topic Analysis
157
+ </a>
158
+ </div>
159
+ </div>
160
+
161
+ <!-- Advanced NLP -->
162
+ <h4 class="mb-3"><i class="fas fa-robot me-2"></i>Advanced NLP</h4>
163
+ <div class="row">
164
+ <div class="col-md-2 mb-2">
165
+ <a href="{{ url_for('question_answering') }}" class="btn btn-info w-100">
166
+ <i class="fas fa-question-circle"></i> QA
167
+ </a>
168
+ </div>
169
+ <div class="col-md-2 mb-2">
170
+ <a href="{{ url_for('text_generation') }}" class="btn btn-info w-100">
171
+ <i class="fas fa-magic"></i> Generation
172
+ </a>
173
+ </div>
174
+ <div class="col-md-2 mb-2">
175
+ <a href="{{ url_for('translation') }}" class="btn btn-info w-100">
176
+ <i class="fas fa-language"></i> Translation
177
+ </a>
178
+ </div>
179
+ <div class="col-md-2 mb-2">
180
+ <a href="{{ url_for('classification') }}" class="btn btn-info w-100">
181
+ <i class="fas fa-sitemap"></i> Classification
182
+ </a>
183
+ </div>
184
+ <div class="col-md-2 mb-2">
185
+ <a href="{{ url_for('vector_embeddings') }}" class="btn btn-info w-100">
186
+ <i class="fas fa-vector-square"></i> Embeddings
187
+ </a>
188
+ </div>
189
+ </div>
190
+ </div>
191
+ </div>
192
+ </div>
193
+ </div>
194
+
195
+ <!-- Results Section -->
196
+ <div class="row">
197
+ <div class="col-12">
198
+ <div class="card">
199
+ <div class="card-header">
200
+ <h3 class="mb-0">
201
+ <i class="fas fa-chart-bar"></i>
202
+ Results
203
+ </h3>
204
+ </div>
205
+ <div class="card-body">
206
+ <div id="resultsContainer">
207
+ <div class="text-center text-muted">
208
+ <i class="fas fa-arrow-up fa-2x mb-3"></i>
209
+ <p>Select an analysis method above to see results here</p>
210
+ </div>
211
+ </div>
212
+ </div>
213
+ </div>
214
+ </div>
215
+ </div>
216
+ </div>
217
+ {% endblock %}
218
+
219
+ {% block extra_scripts %}
220
+ <script>
221
+ // Provide SAMPLE_TEXTS inline to avoid network for dropdown updates
222
+ const SAMPLE_TEXTS = {{ sample_texts | tojson | safe }};
223
+
224
+ // Initialize with default text statistics
225
+ document.addEventListener('DOMContentLoaded', function() {
226
+ // Ensure carry flag is cleared on home to avoid unintended persistence
227
+ sessionStorage.removeItem('carryTextOnNextPage');
228
+ // If a sample (not Custom) is selected by default, load it into the textarea
229
+ const select = document.getElementById('sampleSelect');
230
+ const textInput = document.getElementById('textInput');
231
+ if (select && select.value !== 'Custom' && SAMPLE_TEXTS[select.value]) {
232
+ textInput.value = SAMPLE_TEXTS[select.value];
233
+ }
234
+ updateTextStats();
235
+ });
236
+
237
+ // Sample text dropdown handler (no fetch, instant update)
238
+ const sampleSelectEl = document.getElementById('sampleSelect');
239
+ if (sampleSelectEl) {
240
+ sampleSelectEl.addEventListener('change', function() {
241
+ const sampleType = this.value;
242
+ const textInput = document.getElementById('textInput');
243
+ if (sampleType === 'Custom') {
244
+ textInput.value = '';
245
+ } else {
246
+ textInput.value = SAMPLE_TEXTS[sampleType] || '';
247
+ }
248
+ updateTextStats();
249
+ });
250
+ }
251
+
252
+ // Text input handler
253
+ const textAreaEl = document.getElementById('textInput');
254
+ if (textAreaEl) {
255
+ textAreaEl.addEventListener('input', function() {
256
+ updateTextStats();
257
+ });
258
+ }
259
+
260
+ // Update text statistics
261
+ function updateTextStats() {
262
+ const text = document.getElementById('textInput').value;
263
+
264
+ if (!text.trim()) {
265
+ document.getElementById('textStats').style.display = 'none';
266
+ document.getElementById('warningBox').style.display = 'none';
267
+ return;
268
+ }
269
+
270
+ fetch('/api/text-stats', {
271
+ method: 'POST',
272
+ headers: {
273
+ 'Content-Type': 'application/json',
274
+ },
275
+ body: JSON.stringify({text: text})
276
+ })
277
+ .then(response => response.json())
278
+ .then(data => {
279
+ document.getElementById('charCount').textContent = data.chars;
280
+ document.getElementById('wordCount').textContent = data.words;
281
+ document.getElementById('sentenceCount').textContent = data.sentences;
282
+
283
+ document.getElementById('textStats').style.display = 'flex';
284
+
285
+ // Show warning if text is too long
286
+ if (data.words > 500) {
287
+ document.getElementById('warningBox').style.display = 'block';
288
+ } else {
289
+ document.getElementById('warningBox').style.display = 'none';
290
+ }
291
+ })
292
+ .catch(error => {
293
+ console.error('Error:', error);
294
+ });
295
+ }
296
+
297
+ // Store text in session storage for other pages
298
+ function storeTextForAnalysis() {
299
+ const text = document.getElementById('textInput').value;
300
+ sessionStorage.setItem('analysisText', text);
301
+ }
302
+
303
+ // Add click handlers to analysis buttons
304
+ document.querySelectorAll('a[href*="/"]').forEach(link => {
305
+ if (link.href.includes('/preprocessing') ||
306
+ link.href.includes('/tokenization') ||
307
+ link.href.includes('/pos-tagging') ||
308
+ link.href.includes('/named-entity') ||
309
+ link.href.includes('/sentiment') ||
310
+ link.href.includes('/summarization') ||
311
+ link.href.includes('/topic-analysis') ||
312
+ link.href.includes('/question-answering') ||
313
+ link.href.includes('/text-generation') ||
314
+ link.href.includes('/translation') ||
315
+ link.href.includes('/classification') ||
316
+ link.href.includes('/vector-embeddings')) {
317
+
318
+ link.addEventListener('click', storeTextForAnalysis);
319
+ }
320
+ });
321
+ </script>
322
+ {% endblock %}
templates/named_entity.html ADDED
@@ -0,0 +1,362 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+
3
+ {% block title %}Named Entity Recognition - NLP Ultimate Tutorial{% endblock %}
4
+
5
+ {% block content %}
6
+ <div class="container">
7
+ <!-- Header Section -->
8
+ <div class="row mb-4">
9
+ <div class="col-12">
10
+ <div class="card">
11
+ <div class="card-header">
12
+ <h1 class="mb-0">
13
+ <i class="fas fa-user-tag"></i>
14
+ Named Entity Recognition
15
+ </h1>
16
+ </div>
17
+ <div class="card-body">
18
+ <p class="lead">Identify and classify key information in text such as people, organizations, locations, and more.</p>
19
+
20
+ <div class="alert alert-info">
21
+ <i class="fas fa-info-circle"></i>
22
+ <strong>About:</strong> Named Entity Recognition identifies and classifies key information in text into pre-defined categories such as person names, organizations, locations, etc.
23
+ </div>
24
+ </div>
25
+ </div>
26
+ </div>
27
+ </div>
28
+
29
+ {% include "_analysis_nav.html" %}
30
+
31
+ <!-- Text Input Section -->
32
+ <div class="row mb-4">
33
+ <div class="col-12">
34
+ <div class="card">
35
+ <div class="card-header">
36
+ <h3 class="mb-0">
37
+ <i class="fas fa-keyboard"></i>
38
+ Enter your text:
39
+ </h3>
40
+ </div>
41
+ <div class="card-body">
42
+ <div class="row mb-3">
43
+ <div class="col-md-8">
44
+ <textarea id="textInput" class="form-control" rows="6" placeholder="Enter or paste your text here...">Apple Inc. is planning to open a new campus in Austin, Texas next July. CEO Tim Cook announced the plan yesterday.</textarea>
45
+ </div>
46
+ <div class="col-md-4">
47
+ <label for="sampleSelect" class="form-label">Or choose a sample:</label>
48
+ <select id="sampleSelect" class="form-select">
49
+ <option value="Custom">Custom</option>
50
+ <option value="News Article">News Article</option>
51
+ <option value="Product Review">Product Review</option>
52
+ <option value="Scientific Text">Scientific Text</option>
53
+ <option value="Literary Text">Literary Text</option>
54
+ </select>
55
+ </div>
56
+ </div>
57
+
58
+ <div class="d-flex justify-content-between align-items-center">
59
+ <div>
60
+ <button id="processBtn" class="btn btn-primary btn-lg">
61
+ <i class="fas fa-user-tag"></i>
62
+ Identify Entities
63
+ </button>
64
+ </div>
65
+ <div>
66
+ <button id="clearBtn" class="btn btn-outline-secondary">
67
+ <i class="fas fa-trash"></i>
68
+ Clear
69
+ </button>
70
+ </div>
71
+ </div>
72
+ </div>
73
+ </div>
74
+ </div>
75
+ </div>
76
+
77
+ <!-- Entity Types Info -->
78
+ <div class="row mb-4">
79
+ <div class="col-12">
80
+ <div class="card">
81
+ <div class="card-header">
82
+ <h3 class="mb-0">
83
+ <i class="fas fa-info-circle"></i>
84
+ Entity Types Detected
85
+ </h3>
86
+ </div>
87
+ <div class="card-body">
88
+ <div class="row">
89
+ <div class="col-md-3">
90
+ <div class="card h-100">
91
+ <div class="card-body text-center">
92
+ <i class="fas fa-user fa-2x text-danger mb-2"></i>
93
+ <h5>PERSON</h5>
94
+ <p class="small">People, including fictional characters</p>
95
+ </div>
96
+ </div>
97
+ </div>
98
+ <div class="col-md-3">
99
+ <div class="card h-100">
100
+ <div class="card-body text-center">
101
+ <i class="fas fa-building fa-2x text-success mb-2"></i>
102
+ <h5>ORG</h5>
103
+ <p class="small">Organizations, companies, institutions</p>
104
+ </div>
105
+ </div>
106
+ </div>
107
+ <div class="col-md-3">
108
+ <div class="card h-100">
109
+ <div class="card-body text-center">
110
+ <i class="fas fa-map-marker-alt fa-2x text-primary mb-2"></i>
111
+ <h5>GPE</h5>
112
+ <p class="small">Countries, cities, states</p>
113
+ </div>
114
+ </div>
115
+ </div>
116
+ <div class="col-md-3">
117
+ <div class="card h-100">
118
+ <div class="card-body text-center">
119
+ <i class="fas fa-calendar fa-2x text-warning mb-2"></i>
120
+ <h5>DATE</h5>
121
+ <p class="small">Absolute or relative dates</p>
122
+ </div>
123
+ </div>
124
+ </div>
125
+ </div>
126
+
127
+ <div class="row mt-3">
128
+ <div class="col-md-3">
129
+ <div class="card h-100">
130
+ <div class="card-body text-center">
131
+ <i class="fas fa-dollar-sign fa-2x text-info mb-2"></i>
132
+ <h5>MONEY</h5>
133
+ <p class="small">Monetary values</p>
134
+ </div>
135
+ </div>
136
+ </div>
137
+ <div class="col-md-3">
138
+ <div class="card h-100">
139
+ <div class="card-body text-center">
140
+ <i class="fas fa-percentage fa-2x text-secondary mb-2"></i>
141
+ <h5>PERCENT</h5>
142
+ <p class="small">Percentage values</p>
143
+ </div>
144
+ </div>
145
+ </div>
146
+ <div class="col-md-3">
147
+ <div class="card h-100">
148
+ <div class="card-body text-center">
149
+ <i class="fas fa-cube fa-2x text-purple mb-2"></i>
150
+ <h5>PRODUCT</h5>
151
+ <p class="small">Products, objects, vehicles</p>
152
+ </div>
153
+ </div>
154
+ </div>
155
+ <div class="col-md-3">
156
+ <div class="card h-100">
157
+ <div class="card-body text-center">
158
+ <i class="fas fa-users fa-2x text-dark mb-2"></i>
159
+ <h5>NORP</h5>
160
+ <p class="small">Nationalities, religious groups</p>
161
+ </div>
162
+ </div>
163
+ </div>
164
+ </div>
165
+ </div>
166
+ </div>
167
+ </div>
168
+ </div>
169
+
170
+ <!-- Results Section -->
171
+ <div class="row">
172
+ <div class="col-12">
173
+ <div class="card">
174
+ <div class="card-header">
175
+ <h3 class="mb-0">
176
+ <i class="fas fa-chart-bar"></i>
177
+ Entity Recognition Results
178
+ </h3>
179
+ </div>
180
+ <div class="card-body">
181
+ <div id="resultsContainer">
182
+ <div class="text-center text-muted py-5">
183
+ <i class="fas fa-arrow-up fa-2x mb-3"></i>
184
+ <p>Click "Identify Entities" to see named entity recognition results</p>
185
+ </div>
186
+ </div>
187
+ </div>
188
+ </div>
189
+ </div>
190
+ </div>
191
+ </div>
192
+ {% endblock %}
193
+
194
+ {% block extra_scripts %}
195
+ <script>
196
+ // Initialize page
197
+ document.addEventListener('DOMContentLoaded', function() {
198
+ // Only carry over when using Quick Nav; otherwise leave defaults
199
+ const shouldCarry = sessionStorage.getItem('carryTextOnNextPage') === '1';
200
+ if (shouldCarry) {
201
+ const sampleSel = document.getElementById('sampleSelect');
202
+ if (sampleSel) sampleSel.value = 'Custom';
203
+ const storedText = sessionStorage.getItem('analysisText');
204
+ if (storedText) document.getElementById('textInput').value = storedText;
205
+ sessionStorage.removeItem('carryTextOnNextPage');
206
+ }
207
+
208
+ // Sample text dropdown handler
209
+ document.getElementById('sampleSelect').addEventListener('change', function() {
210
+ const sampleType = this.value;
211
+ const textInput = document.getElementById('textInput');
212
+
213
+ if (sampleType === 'Custom') {
214
+ textInput.value = '';
215
+ } else {
216
+ // Get sample text from server
217
+ fetch('/api/sample-text', {
218
+ method: 'POST',
219
+ headers: {
220
+ 'Content-Type': 'application/json',
221
+ },
222
+ body: JSON.stringify({sample_type: sampleType})
223
+ })
224
+ .then(response => response.json())
225
+ .then(data => {
226
+ textInput.value = data.text;
227
+ });
228
+ }
229
+ });
230
+
231
+ // Process button handler
232
+ document.getElementById('processBtn').addEventListener('click', function() {
233
+ const text = document.getElementById('textInput').value.trim();
234
+
235
+ if (!text) {
236
+ alert('Please enter some text to analyze.');
237
+ return;
238
+ }
239
+
240
+ // Show loading state
241
+ this.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Processing...';
242
+ this.disabled = true;
243
+
244
+ // Process text
245
+ processNamedEntities();
246
+
247
+ // Reset button after a delay
248
+ setTimeout(() => {
249
+ this.innerHTML = '<i class="fas fa-user-tag"></i> Identify Entities';
250
+ this.disabled = false;
251
+ }, 2000);
252
+ });
253
+
254
+ // Clear button handler
255
+ document.getElementById('clearBtn').addEventListener('click', function() {
256
+ document.getElementById('textInput').value = '';
257
+ document.getElementById('resultsContainer').innerHTML = `
258
+ <div class="text-center text-muted py-5">
259
+ <i class="fas fa-arrow-up fa-2x mb-3"></i>
260
+ <p>Click "Identify Entities" to see named entity recognition results</p>
261
+ </div>
262
+ `;
263
+ });
264
+
265
+ // Keyboard shortcuts
266
+ document.addEventListener('keydown', function(e) {
267
+ // Ctrl+Enter to process
268
+ if (e.ctrlKey && e.key === 'Enter') {
269
+ document.getElementById('processBtn').click();
270
+ }
271
+
272
+ // Ctrl+L to clear
273
+ if (e.ctrlKey && e.key === 'l') {
274
+ e.preventDefault();
275
+ document.getElementById('clearBtn').click();
276
+ }
277
+ });
278
+ });
279
+
280
+ // Process named entity recognition
281
+ function processNamedEntities() {
282
+ const text = document.getElementById('textInput').value.trim();
283
+
284
+ if (!text) {
285
+ alert('Please enter some text to analyze.');
286
+ return;
287
+ }
288
+
289
+ showLoading('resultsContainer');
290
+
291
+ fetch('/api/named-entity', {
292
+ method: 'POST',
293
+ headers: {
294
+ 'Content-Type': 'application/json',
295
+ },
296
+ body: JSON.stringify({text: text})
297
+ })
298
+ .then(response => response.json())
299
+ .then(data => {
300
+ if (data.success) {
301
+ displayResults(data.result);
302
+ } else {
303
+ showError(data.error || 'An error occurred while processing the text');
304
+ }
305
+ })
306
+ .catch(error => {
307
+ showError('Failed to process text: ' + error.message);
308
+ })
309
+ .finally(() => {
310
+ hideLoading('resultsContainer');
311
+ });
312
+ }
313
+
314
+ // Show loading state
315
+ function showLoading(elementId) {
316
+ const element = document.getElementById(elementId);
317
+ if (element) {
318
+ element.innerHTML = `
319
+ <div class="text-center py-4">
320
+ <div class="spinner-border text-primary" role="status">
321
+ <span class="visually-hidden">Loading...</span>
322
+ </div>
323
+ <p class="mt-2">Identifying entities...</p>
324
+ </div>
325
+ `;
326
+ }
327
+ }
328
+
329
+ // Hide loading state
330
+ function hideLoading(elementId) {
331
+ const element = document.getElementById(elementId);
332
+ if (element && element.innerHTML.includes('spinner-border')) {
333
+ element.innerHTML = '';
334
+ }
335
+ }
336
+
337
+ // Show error message
338
+ function showError(message, elementId = 'resultsContainer') {
339
+ const element = document.getElementById(elementId);
340
+ if (element) {
341
+ element.innerHTML = `
342
+ <div class="alert alert-danger fade-in">
343
+ <i class="fas fa-exclamation-triangle"></i>
344
+ <strong>Error:</strong> ${message}
345
+ </div>
346
+ `;
347
+ }
348
+ }
349
+
350
+ // Display results
351
+ function displayResults(result) {
352
+ const container = document.getElementById('resultsContainer');
353
+ if (container) {
354
+ container.innerHTML = result;
355
+ container.classList.add('fade-in');
356
+
357
+ // Scroll to results
358
+ container.scrollIntoView({ behavior: 'smooth', block: 'start' });
359
+ }
360
+ }
361
+ </script>
362
+ {% endblock %}
templates/pos_tagging.html ADDED
@@ -0,0 +1,397 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+
3
+ {% block title %}Part-of-Speech Tagging - NLP Ultimate Tutorial{% endblock %}
4
+
5
+ {% block content %}
6
+ <div class="container">
7
+ <!-- Header Section -->
8
+ <div class="row mb-4">
9
+ <div class="col-12">
10
+ <div class="card">
11
+ <div class="card-header">
12
+ <h1 class="mb-0">
13
+ <i class="fas fa-tags"></i>
14
+ Part-of-Speech Tagging
15
+ </h1>
16
+ </div>
17
+ <div class="card-body">
18
+ <p class="lead">Analyze the grammatical structure of text by identifying parts of speech for each word.</p>
19
+
20
+ <div class="alert alert-info">
21
+ <i class="fas fa-info-circle"></i>
22
+ <strong>About:</strong> Part-of-Speech (POS) tagging is the process of marking up words in text according to their grammatical categories such as noun, verb, adjective, etc.
23
+ </div>
24
+ </div>
25
+ </div>
26
+ </div>
27
+ </div>
28
+
29
+ {% include "_analysis_nav.html" %}
30
+
31
+ <!-- Text Input Section -->
32
+ <div class="row mb-4">
33
+ <div class="col-12">
34
+ <div class="card">
35
+ <div class="card-header">
36
+ <h3 class="mb-0">
37
+ <i class="fas fa-keyboard"></i>
38
+ Enter your text:
39
+ </h3>
40
+ </div>
41
+ <div class="card-body">
42
+ <div class="row mb-3">
43
+ <div class="col-md-8">
44
+ <textarea id="textInput" class="form-control" rows="6" placeholder="Enter or paste your text here...">The quick brown fox jumps over the lazy dog.</textarea>
45
+ </div>
46
+ <div class="col-md-4">
47
+ <label for="sampleSelect" class="form-label">Or choose a sample:</label>
48
+ <select id="sampleSelect" class="form-select">
49
+ <option value="Custom">Custom</option>
50
+ <option value="News Article">News Article</option>
51
+ <option value="Product Review">Product Review</option>
52
+ <option value="Scientific Text">Scientific Text</option>
53
+ <option value="Literary Text">Literary Text</option>
54
+ </select>
55
+ </div>
56
+ </div>
57
+
58
+ <div class="d-flex justify-content-between align-items-center">
59
+ <div>
60
+ <button id="processBtn" class="btn btn-primary btn-lg">
61
+ <i class="fas fa-tags"></i>
62
+ Analyze POS Tags
63
+ </button>
64
+ </div>
65
+ <div>
66
+ <button id="clearBtn" class="btn btn-outline-secondary">
67
+ <i class="fas fa-trash"></i>
68
+ Clear
69
+ </button>
70
+ </div>
71
+ </div>
72
+ </div>
73
+ </div>
74
+ </div>
75
+ </div>
76
+
77
+ <!-- POS Tagging Methods Info -->
78
+ <div class="row mb-4">
79
+ <div class="col-12">
80
+ <div class="card">
81
+ <div class="card-header">
82
+ <h3 class="mb-0">
83
+ <i class="fas fa-info-circle"></i>
84
+ POS Tagging Methods
85
+ </h3>
86
+ </div>
87
+ <div class="card-body">
88
+ <div class="row g-4">
89
+ <div class="col-md-6">
90
+ <div class="card h-100 border-primary shadow-sm">
91
+ <div class="card-header bg-primary text-white text-center">
92
+ <i class="fas fa-brain fa-3x mb-2"></i>
93
+ <h4 class="mb-0">NLTK (Penn Treebank)</h4>
94
+ </div>
95
+ <div class="card-body">
96
+ <p class="text-center mb-3">Uses the Perceptron tagger trained on the Penn Treebank corpus with detailed grammatical categories.</p>
97
+ <ul class="list-group list-group-flush">
98
+ <li class="list-group-item d-flex align-items-center">
99
+ <i class="fas fa-tags text-primary me-2"></i>
100
+ <strong>36+ detailed tags</strong>
101
+ </li>
102
+ <li class="list-group-item d-flex align-items-center">
103
+ <i class="fas fa-flag-usa text-primary me-2"></i>
104
+ <strong>English-specific</strong>
105
+ </li>
106
+ <li class="list-group-item d-flex align-items-center">
107
+ <i class="fas fa-book text-primary me-2"></i>
108
+ <strong>Traditional NLP approach</strong>
109
+ </li>
110
+ </ul>
111
+ </div>
112
+ </div>
113
+ </div>
114
+ <div class="col-md-6">
115
+ <div class="card h-100 border-success shadow-sm">
116
+ <div class="card-header bg-success text-white text-center">
117
+ <i class="fas fa-globe fa-3x mb-2"></i>
118
+ <h4 class="mb-0">spaCy (Universal)</h4>
119
+ </div>
120
+ <div class="card-body">
121
+ <p class="text-center mb-3">Uses the en_core_web_sm model with Universal POS tags for cross-linguistic consistency.</p>
122
+ <ul class="list-group list-group-flush">
123
+ <li class="list-group-item d-flex align-items-center">
124
+ <i class="fas fa-tags text-success me-2"></i>
125
+ <strong>17 universal tags</strong>
126
+ </li>
127
+ <li class="list-group-item d-flex align-items-center">
128
+ <i class="fas fa-globe text-success me-2"></i>
129
+ <strong>Cross-linguistic</strong>
130
+ </li>
131
+ <li class="list-group-item d-flex align-items-center">
132
+ <i class="fas fa-rocket text-success me-2"></i>
133
+ <strong>Modern NLP approach</strong>
134
+ </li>
135
+ </ul>
136
+ </div>
137
+ </div>
138
+ </div>
139
+ </div>
140
+ </div>
141
+ </div>
142
+ </div>
143
+ </div>
144
+
145
+ <!-- Results Section -->
146
+ <div class="row">
147
+ <div class="col-12">
148
+ <div class="card">
149
+ <div class="card-header">
150
+ <h3 class="mb-0">
151
+ <i class="fas fa-chart-bar"></i>
152
+ POS Tagging Results
153
+ </h3>
154
+ </div>
155
+ <div class="card-body">
156
+ <div id="resultsContainer">
157
+ <div class="text-center text-muted py-5">
158
+ <i class="fas fa-arrow-up fa-2x mb-3"></i>
159
+ <p>Click "Analyze POS Tags" to see grammatical analysis results</p>
160
+ </div>
161
+ </div>
162
+ </div>
163
+ </div>
164
+ </div>
165
+ </div>
166
+ </div>
167
+ {% endblock %}
168
+
169
+ {% block extra_scripts %}
170
+ <script>
171
+ // Initialize page
172
+ document.addEventListener('DOMContentLoaded', function() {
173
+ const textInput = document.getElementById('textInput');
174
+ const sampleSel = document.getElementById('sampleSelect');
175
+ const routeKey = 'customTextBackup:' + window.location.pathname;
176
+ const shouldCarry = sessionStorage.getItem('carryTextOnNextPage') === '1';
177
+
178
+ if (shouldCarry) {
179
+ // When carrying text via Quick Nav, use the carried text and set to Custom
180
+ const carriedText = sessionStorage.getItem('analysisText') || '';
181
+ if (sampleSel) sampleSel.value = 'Custom';
182
+ if (textInput) textInput.value = carriedText;
183
+ // Save as this route's custom backup
184
+ sessionStorage.setItem(routeKey, carriedText);
185
+ sessionStorage.removeItem('carryTextOnNextPage');
186
+ } else {
187
+ // Normal page load - restore any existing custom backup for this route
188
+ const routeBackup = sessionStorage.getItem(routeKey);
189
+ if (routeBackup !== null && sampleSel && sampleSel.value === 'Custom') {
190
+ if (textInput) textInput.value = routeBackup;
191
+ }
192
+ }
193
+
194
+ // Save custom input changes to route-specific backup
195
+ if (textInput) {
196
+ textInput.addEventListener('input', function() {
197
+ if (sampleSel && sampleSel.value === 'Custom') {
198
+ sessionStorage.setItem(routeKey, textInput.value);
199
+ }
200
+ });
201
+ }
202
+
203
+ // Sample text dropdown handler
204
+ document.getElementById('sampleSelect').addEventListener('change', function() {
205
+ const sampleType = this.value;
206
+ const textInput = document.getElementById('textInput');
207
+ const routeKey = 'customTextBackup:' + window.location.pathname;
208
+
209
+ if (sampleType === 'Custom') {
210
+ // Restore custom backup for this route
211
+ const routeBackup = sessionStorage.getItem(routeKey);
212
+ textInput.value = routeBackup || '';
213
+ } else {
214
+ // Before switching to sample, save current custom input
215
+ if (this.previousValue === 'Custom' && textInput.value.trim()) {
216
+ sessionStorage.setItem(routeKey, textInput.value);
217
+ }
218
+
219
+ // Get sample text from server
220
+ fetch('/api/sample-text', {
221
+ method: 'POST',
222
+ headers: {
223
+ 'Content-Type': 'application/json',
224
+ },
225
+ body: JSON.stringify({sample_type: sampleType})
226
+ })
227
+ .then(response => response.json())
228
+ .then(data => {
229
+ textInput.value = data.text;
230
+ });
231
+ }
232
+
233
+ // Remember previous value for next change
234
+ this.previousValue = sampleType;
235
+ });
236
+
237
+ // Process button handler
238
+ document.getElementById('processBtn').addEventListener('click', function() {
239
+ const text = document.getElementById('textInput').value.trim();
240
+
241
+ if (!text) {
242
+ alert('Please enter some text to analyze.');
243
+ return;
244
+ }
245
+
246
+ // Show loading state
247
+ this.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Processing...';
248
+ this.disabled = true;
249
+
250
+ // Process text
251
+ processPOSTagging();
252
+
253
+ // Reset button after a delay
254
+ setTimeout(() => {
255
+ this.innerHTML = '<i class="fas fa-tags"></i> Analyze POS Tags';
256
+ this.disabled = false;
257
+ }, 2000);
258
+ });
259
+
260
+ // Clear button handler
261
+ document.getElementById('clearBtn').addEventListener('click', function() {
262
+ document.getElementById('textInput').value = '';
263
+ document.getElementById('resultsContainer').innerHTML = `
264
+ <div class="text-center text-muted py-5">
265
+ <i class="fas fa-arrow-up fa-2x mb-3"></i>
266
+ <p>Click "Analyze POS Tags" to see grammatical analysis results</p>
267
+ </div>
268
+ `;
269
+ });
270
+
271
+ // Keyboard shortcuts
272
+ document.addEventListener('keydown', function(e) {
273
+ // Ctrl+Enter to process
274
+ if (e.ctrlKey && e.key === 'Enter') {
275
+ document.getElementById('processBtn').click();
276
+ }
277
+
278
+ // Ctrl+L to clear
279
+ if (e.ctrlKey && e.key === 'l') {
280
+ e.preventDefault();
281
+ document.getElementById('clearBtn').click();
282
+ }
283
+ });
284
+ });
285
+
286
+ // Process POS tagging
287
+ function processPOSTagging() {
288
+ const text = document.getElementById('textInput').value.trim();
289
+
290
+ if (!text) {
291
+ alert('Please enter some text to analyze.');
292
+ return;
293
+ }
294
+
295
+ showLoading('resultsContainer');
296
+
297
+ fetch('/api/pos-tagging', {
298
+ method: 'POST',
299
+ headers: {
300
+ 'Content-Type': 'application/json',
301
+ },
302
+ body: JSON.stringify({text: text})
303
+ })
304
+ .then(response => response.json())
305
+ .then(data => {
306
+ if (data.success) {
307
+ displayResults(data.result);
308
+ } else {
309
+ showError(data.error || 'An error occurred while processing the text');
310
+ }
311
+ })
312
+ .catch(error => {
313
+ showError('Failed to process text: ' + error.message);
314
+ })
315
+ .finally(() => {
316
+ hideLoading('resultsContainer');
317
+ });
318
+ }
319
+
320
+ // Show loading state
321
+ function showLoading(elementId) {
322
+ const element = document.getElementById(elementId);
323
+ if (element) {
324
+ element.innerHTML = `
325
+ <div class="text-center py-4">
326
+ <div class="spinner-border text-primary" role="status">
327
+ <span class="visually-hidden">Loading...</span>
328
+ </div>
329
+ <p class="mt-2">Analyzing grammatical structure...</p>
330
+ </div>
331
+ `;
332
+ }
333
+ }
334
+
335
+ // Hide loading state
336
+ function hideLoading(elementId) {
337
+ const element = document.getElementById(elementId);
338
+ if (element && element.innerHTML.includes('spinner-border')) {
339
+ element.innerHTML = '';
340
+ }
341
+ }
342
+
343
+ // Show error message
344
+ function showError(message, elementId = 'resultsContainer') {
345
+ const element = document.getElementById(elementId);
346
+ if (element) {
347
+ element.innerHTML = `
348
+ <div class="alert alert-danger fade-in">
349
+ <i class="fas fa-exclamation-triangle"></i>
350
+ <strong>Error:</strong> ${message}
351
+ </div>
352
+ `;
353
+ }
354
+ }
355
+
356
+ // Display results
357
+ function displayResults(result) {
358
+ const container = document.getElementById('resultsContainer');
359
+ if (container) {
360
+ container.innerHTML = result;
361
+ container.classList.add('fade-in');
362
+
363
+ // Scroll to results
364
+ container.scrollIntoView({ behavior: 'smooth', block: 'start' });
365
+ }
366
+ }
367
+
368
+ // Function to switch between POS tag types
369
+ function showPOSTags(type) {
370
+ // Hide all sections
371
+ const sections = document.querySelectorAll('.pos-tags-section');
372
+ sections.forEach(section => {
373
+ section.style.display = 'none';
374
+ });
375
+
376
+ // Remove active class from all buttons
377
+ const buttons = document.querySelectorAll('.pos-legend-buttons .btn');
378
+ buttons.forEach(btn => {
379
+ btn.classList.remove('btn-primary', 'active');
380
+ btn.classList.add('btn-outline-primary');
381
+ });
382
+
383
+ // Show selected section
384
+ const targetSection = document.getElementById(type + '-tags');
385
+ if (targetSection) {
386
+ targetSection.style.display = 'block';
387
+ }
388
+
389
+ // Activate selected button
390
+ const targetButton = document.getElementById(type + '-btn');
391
+ if (targetButton) {
392
+ targetButton.classList.remove('btn-outline-primary');
393
+ targetButton.classList.add('btn-primary', 'active');
394
+ }
395
+ }
396
+ </script>
397
+ {% endblock %}
templates/preprocessing.html ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+
3
+ {% block title %}Text Preprocessing - NLP Ultimate Tutorial{% endblock %}
4
+
5
+ {% block content %}
6
+ <div class="container">
7
+ <!-- Header Section -->
8
+ <div class="row mb-4">
9
+ <div class="col-12">
10
+ <div class="card">
11
+ <div class="card-header">
12
+ <h1 class="mb-0">
13
+ <i class="fas fa-tools"></i>
14
+ Text Preprocessing
15
+ </h1>
16
+ </div>
17
+ <div class="card-body">
18
+ <p class="lead">Clean and transform raw text into a format suitable for NLP analysis.</p>
19
+
20
+ <div class="alert alert-info">
21
+ <i class="fas fa-info-circle"></i>
22
+ <strong>About:</strong> Text preprocessing is the first step in NLP pipelines that transforms raw text into a clean, structured format suitable for analysis.
23
+ </div>
24
+ </div>
25
+ </div>
26
+ </div>
27
+ </div>
28
+
29
+ {% include "_analysis_nav.html" %}
30
+
31
+ <!-- Text Input Section -->
32
+ <div class="row mb-4">
33
+ <div class="col-12">
34
+ <div class="card">
35
+ <div class="card-header">
36
+ <h3 class="mb-0">
37
+ <i class="fas fa-keyboard"></i>
38
+ Enter your text:
39
+ </h3>
40
+ </div>
41
+ <div class="card-body">
42
+ <div class="row mb-3">
43
+ <div class="col-md-8">
44
+ <textarea id="textInput" class="form-control" rows="6" placeholder="Enter or paste your text here...">Climate change is the long-term alteration of temperature and typical weather patterns in a place. The cause of current climate change is largely human activity, like burning fossil fuels, which adds heat-trapping gases to Earth's atmosphere. The consequences of changing climate are already being felt worldwide.</textarea>
45
+ </div>
46
+ <div class="col-md-4">
47
+ <label for="sampleSelect" class="form-label">Or choose a sample:</label>
48
+ <select id="sampleSelect" class="form-select">
49
+ <option value="Custom">Custom</option>
50
+ <option value="News Article">News Article</option>
51
+ <option value="Product Review">Product Review</option>
52
+ <option value="Scientific Text" selected>Scientific Text</option>
53
+ <option value="Literary Text">Literary Text</option>
54
+ </select>
55
+ </div>
56
+ </div>
57
+
58
+ <div class="d-flex justify-content-between align-items-center">
59
+ <div>
60
+ <button id="processBtn" class="btn btn-primary btn-lg">
61
+ <i class="fas fa-cogs"></i>
62
+ Process Text
63
+ </button>
64
+ </div>
65
+ <div>
66
+ <button id="clearBtn" class="btn btn-outline-secondary">
67
+ <i class="fas fa-trash"></i>
68
+ Clear
69
+ </button>
70
+ </div>
71
+ </div>
72
+ </div>
73
+ </div>
74
+ </div>
75
+ </div>
76
+
77
+ <!-- Results Section -->
78
+ <div class="row">
79
+ <div class="col-12">
80
+ <div class="card">
81
+ <div class="card-header">
82
+ <h3 class="mb-0">
83
+ <i class="fas fa-chart-bar"></i>
84
+ Preprocessing Results
85
+ </h3>
86
+ </div>
87
+ <div class="card-body">
88
+ <div id="resultsContainer">
89
+ <div class="text-center text-muted py-5">
90
+ <i class="fas fa-arrow-up fa-2x mb-3"></i>
91
+ <p>Click "Process Text" to see preprocessing results</p>
92
+ </div>
93
+ </div>
94
+ </div>
95
+ </div>
96
+ </div>
97
+ </div>
98
+ </div>
99
+ {% endblock %}
100
+
101
+ {% block extra_scripts %}
102
+ <script>
103
+ // Initialize page
104
+ document.addEventListener('DOMContentLoaded', function() {
105
+ // Only carry when requested
106
+ const shouldCarry = sessionStorage.getItem('carryTextOnNextPage') === '1';
107
+ if (shouldCarry) {
108
+ const sampleSel = document.getElementById('sampleSelect');
109
+ if (sampleSel) sampleSel.value = 'Custom';
110
+ const storedText = sessionStorage.getItem('analysisText');
111
+ if (storedText) document.getElementById('textInput').value = storedText;
112
+ sessionStorage.removeItem('carryTextOnNextPage');
113
+ }
114
+
115
+ // Sample text dropdown handler
116
+ document.getElementById('sampleSelect').addEventListener('change', function() {
117
+ const sampleType = this.value;
118
+ const textInput = document.getElementById('textInput');
119
+
120
+ if (sampleType === 'Custom') {
121
+ textInput.value = '';
122
+ } else {
123
+ // Get sample text from server
124
+ fetch('/api/sample-text', {
125
+ method: 'POST',
126
+ headers: {
127
+ 'Content-Type': 'application/json',
128
+ },
129
+ body: JSON.stringify({sample_type: sampleType})
130
+ })
131
+ .then(response => response.json())
132
+ .then(data => {
133
+ textInput.value = data.text;
134
+ });
135
+ }
136
+ });
137
+
138
+ // Process button handler
139
+ document.getElementById('processBtn').addEventListener('click', function() {
140
+ const text = document.getElementById('textInput').value.trim();
141
+
142
+ if (!text) {
143
+ alert('Please enter some text to process.');
144
+ return;
145
+ }
146
+
147
+ // Show loading state
148
+ this.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Processing...';
149
+ this.disabled = true;
150
+
151
+ // Process text
152
+ NLPUtils.processText('/api/preprocessing', text);
153
+
154
+ // Reset button after a delay
155
+ setTimeout(() => {
156
+ this.innerHTML = '<i class="fas fa-cogs"></i> Process Text';
157
+ this.disabled = false;
158
+ }, 2000);
159
+ });
160
+
161
+ // Clear button handler
162
+ document.getElementById('clearBtn').addEventListener('click', function() {
163
+ document.getElementById('textInput').value = '';
164
+ document.getElementById('resultsContainer').innerHTML = `
165
+ <div class="text-center text-muted py-5">
166
+ <i class="fas fa-arrow-up fa-2x mb-3"></i>
167
+ <p>Click "Process Text" to see preprocessing results</p>
168
+ </div>
169
+ `;
170
+ });
171
+
172
+ // Keyboard shortcuts
173
+ document.addEventListener('keydown', function(e) {
174
+ // Ctrl+Enter to process
175
+ if (e.ctrlKey && e.key === 'Enter') {
176
+ document.getElementById('processBtn').click();
177
+ }
178
+
179
+ // Ctrl+L to clear
180
+ if (e.ctrlKey && e.key === 'l') {
181
+ e.preventDefault();
182
+ document.getElementById('clearBtn').click();
183
+ }
184
+ });
185
+ });
186
+
187
+ // Override the default processText function to handle preprocessing specifically
188
+ function processPreprocessing() {
189
+ const text = document.getElementById('textInput').value.trim();
190
+
191
+ if (!text) {
192
+ alert('Please enter some text to process.');
193
+ return;
194
+ }
195
+
196
+ showLoading('resultsContainer');
197
+
198
+ fetch('/api/preprocessing', {
199
+ method: 'POST',
200
+ headers: {
201
+ 'Content-Type': 'application/json',
202
+ },
203
+ body: JSON.stringify({text: text})
204
+ })
205
+ .then(response => response.json())
206
+ .then(data => {
207
+ if (data.success) {
208
+ displayResults(data.result);
209
+ } else {
210
+ showError(data.error || 'An error occurred while processing the text');
211
+ }
212
+ })
213
+ .catch(error => {
214
+ showError('Failed to process text: ' + error.message);
215
+ })
216
+ .finally(() => {
217
+ hideLoading('resultsContainer');
218
+ });
219
+ }
220
+
221
+ // Show loading state
222
+ function showLoading(elementId) {
223
+ const element = document.getElementById(elementId);
224
+ if (element) {
225
+ element.innerHTML = `
226
+ <div class="text-center py-4">
227
+ <div class="spinner-border text-primary" role="status">
228
+ <span class="visually-hidden">Loading...</span>
229
+ </div>
230
+ <p class="mt-2">Processing your text...</p>
231
+ </div>
232
+ `;
233
+ }
234
+ }
235
+
236
+ // Hide loading state
237
+ function hideLoading(elementId) {
238
+ const element = document.getElementById(elementId);
239
+ if (element && element.innerHTML.includes('spinner-border')) {
240
+ element.innerHTML = '';
241
+ }
242
+ }
243
+
244
+ // Show error message
245
+ function showError(message, elementId = 'resultsContainer') {
246
+ const element = document.getElementById(elementId);
247
+ if (element) {
248
+ element.innerHTML = `
249
+ <div class="alert alert-danger fade-in">
250
+ <i class="fas fa-exclamation-triangle"></i>
251
+ <strong>Error:</strong> ${message}
252
+ </div>
253
+ `;
254
+ }
255
+ }
256
+
257
+ // Display results
258
+ function displayResults(result) {
259
+ const container = document.getElementById('resultsContainer');
260
+ if (container) {
261
+ container.innerHTML = result;
262
+ container.classList.add('fade-in');
263
+
264
+ // Scroll to results
265
+ container.scrollIntoView({ behavior: 'smooth', block: 'start' });
266
+ }
267
+ }
268
+ </script>
269
+ {% endblock %}
templates/question_answering.html ADDED
@@ -0,0 +1,442 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+
3
+ {% block title %}Question Answering - NLP Ultimate Tutorial{% endblock %}
4
+
5
+ {% block content %}
6
+ <div class="container">
7
+ <!-- Header Section -->
8
+ <div class="row mb-4">
9
+ <div class="col-12">
10
+ <div class="card">
11
+ <div class="card-header">
12
+ <h1 class="mb-0">
13
+ <i class="fas fa-question-circle"></i>
14
+ Question Answering System
15
+ </h1>
16
+ </div>
17
+ <div class="card-body">
18
+ <p class="lead">Ask questions about any text context and get intelligent answers with confidence scores.</p>
19
+
20
+ <div class="alert alert-info">
21
+ <i class="fas fa-info-circle"></i>
22
+ <strong>About:</strong> Question Answering (QA) systems extract or generate answers to questions based on a given context or knowledge base.
23
+ </div>
24
+ </div>
25
+ </div>
26
+ </div>
27
+ </div>
28
+
29
+ {% include "_analysis_nav.html" %}
30
+
31
+ <!-- Input Section -->
32
+ <div class="row mb-4">
33
+ <div class="col-12">
34
+ <div class="card">
35
+ <div class="card-header">
36
+ <h3 class="mb-0">
37
+ <i class="fas fa-keyboard"></i>
38
+ Enter Context and Question:
39
+ </h3>
40
+ </div>
41
+ <div class="card-body">
42
+ <div class="row mb-3">
43
+ <div class="col-md-8">
44
+ <label for="contextInput" class="form-label">Context Text:</label>
45
+ <textarea id="contextInput" class="form-control" rows="8" placeholder="Enter the text context here...">The Amazon rainforest is a moist broadleaf tropical rainforest in the Amazon biome that covers most of the Amazon basin of South America. This basin encompasses 7,000,000 km2, of which 5,500,000 km2 are covered by the rainforest. The majority of the forest is contained within Brazil, with 60% of the rainforest, followed by Peru with 13%, Colombia with 10%, and with minor amounts in Bolivia, Ecuador, French Guiana, Guyana, Suriname, and Venezuela.</textarea>
46
+ </div>
47
+ <div class="col-md-4">
48
+ <label for="sampleSelect" class="form-label">Or choose a sample:</label>
49
+ <select id="sampleSelect" class="form-select">
50
+ <option value="Custom">Custom</option>
51
+ <option value="News Article">News Article</option>
52
+ <option value="Product Review">Product Review</option>
53
+ <option value="Scientific Text">Scientific Text</option>
54
+ <option value="Literary Text">Literary Text</option>
55
+ </select>
56
+ </div>
57
+ </div>
58
+
59
+ <div class="row mb-3">
60
+ <div class="col-md-8">
61
+ <label for="questionInput" class="form-label">Your Question:</label>
62
+ <input type="text" id="questionInput" class="form-control" placeholder="What would you like to know about the context?" value="What percentage of the Amazon rainforest is in Brazil?">
63
+ </div>
64
+ <div class="col-md-4">
65
+ <label for="confidenceThreshold" class="form-label">Confidence Threshold:</label>
66
+ <input type="range" class="form-range" id="confidenceThreshold" min="0.0" max="1.0" value="0.5" step="0.1">
67
+ <div class="d-flex justify-content-between">
68
+ <small>0.0</small>
69
+ <small id="confidenceValue">0.5</small>
70
+ <small>1.0</small>
71
+ </div>
72
+ </div>
73
+ </div>
74
+
75
+ <div class="d-flex justify-content-between align-items-center">
76
+ <div>
77
+ <button id="processBtn" class="btn btn-primary btn-lg">
78
+ <i class="fas fa-question-circle"></i>
79
+ Get Answer
80
+ </button>
81
+ </div>
82
+ <div>
83
+ <button id="clearBtn" class="btn btn-outline-secondary">
84
+ <i class="fas fa-trash"></i>
85
+ Clear
86
+ </button>
87
+ </div>
88
+ </div>
89
+ </div>
90
+ </div>
91
+ </div>
92
+ </div>
93
+
94
+ <!-- Tips and Features Section -->
95
+ <div class="row mb-4">
96
+ <div class="col-md-6">
97
+ <div class="card">
98
+ <div class="card-header">
99
+ <h3 class="mb-0">
100
+ <i class="fas fa-lightbulb"></i>
101
+ Tips for Better Results
102
+ </h3>
103
+ </div>
104
+ <div class="card-body">
105
+ <ul class="list-unstyled">
106
+ <li><i class="fas fa-check text-success"></i> <strong>Context:</strong> Provide relevant text containing the answer</li>
107
+ <li><i class="fas fa-check text-success"></i> <strong>Question:</strong> Be specific and clear</li>
108
+ <li><i class="fas fa-check text-success"></i> <strong>Extractive:</strong> Finds exact spans from the text</li>
109
+ <li><i class="fas fa-check text-success"></i> <strong>Confidence:</strong> Higher threshold = more selective answers</li>
110
+ </ul>
111
+ </div>
112
+ </div>
113
+ </div>
114
+ <div class="col-md-6">
115
+ <div class="card">
116
+ <div class="card-header">
117
+ <h3 class="mb-0">
118
+ <i class="fas fa-chart-line"></i>
119
+ System Features
120
+ </h3>
121
+ </div>
122
+ <div class="card-body">
123
+ <ul class="list-unstyled">
124
+ <li><i class="fas fa-brain text-primary"></i> <strong>Transformer QA:</strong> RoBERTa-SQuAD2 model</li>
125
+ <li><i class="fas fa-calculator text-info"></i> <strong>TF-IDF Matching:</strong> Similarity-based answer finding</li>
126
+ <li><i class="fas fa-chart-bar text-success"></i> <strong>Confidence Scoring:</strong> Reliability assessment</li>
127
+ <li><i class="fas fa-highlighter text-warning"></i> <strong>Context Highlighting:</strong> Visual answer location</li>
128
+ </ul>
129
+ </div>
130
+ </div>
131
+ </div>
132
+ </div>
133
+
134
+ <!-- Example Questions Section -->
135
+ <div class="row mb-4">
136
+ <div class="col-12">
137
+ <div class="card">
138
+ <div class="card-header">
139
+ <h3 class="mb-0">
140
+ <i class="fas fa-list"></i>
141
+ Example Questions
142
+ </h3>
143
+ </div>
144
+ <div class="card-body">
145
+ <div class="row">
146
+ <div class="col-md-4">
147
+ <button class="btn btn-outline-primary w-100 mb-2" onclick="setExample('What percentage of the Amazon rainforest is in Brazil?')">
148
+ What percentage of the Amazon rainforest is in Brazil?
149
+ </button>
150
+ </div>
151
+ <div class="col-md-4">
152
+ <button class="btn btn-outline-primary w-100 mb-2" onclick="setExample('Which countries contain parts of the Amazon rainforest?')">
153
+ Which countries contain parts of the Amazon rainforest?
154
+ </button>
155
+ </div>
156
+ <div class="col-md-4">
157
+ <button class="btn btn-outline-primary w-100 mb-2" onclick="setExample('How large is the Amazon basin?')">
158
+ How large is the Amazon basin?
159
+ </button>
160
+ </div>
161
+ </div>
162
+ </div>
163
+ </div>
164
+ </div>
165
+ </div>
166
+
167
+ <!-- Results Section -->
168
+ <div class="row">
169
+ <div class="col-12">
170
+ <div class="card">
171
+ <div class="card-header">
172
+ <h3 class="mb-0">
173
+ <i class="fas fa-chart-bar"></i>
174
+ Question Answering Results
175
+ </h3>
176
+ </div>
177
+ <div class="card-body">
178
+ <div id="resultsContainer">
179
+ <div class="text-center text-muted py-5">
180
+ <i class="fas fa-arrow-up fa-2x mb-3"></i>
181
+ <p>Click "Get Answer" to see question answering results</p>
182
+ </div>
183
+ </div>
184
+ </div>
185
+ </div>
186
+ </div>
187
+ </div>
188
+ </div>
189
+ {% endblock %}
190
+
191
+ {% block extra_scripts %}
192
+ <script>
193
+ // Initialize page
194
+ document.addEventListener('DOMContentLoaded', function() {
195
+ // Prefill only when explicitly navigating via quick-nav or same-route refresh
196
+ const shouldCarry = sessionStorage.getItem('carryTextOnNextPage') === '1';
197
+ if (shouldCarry) {
198
+ const storedText = sessionStorage.getItem('analysisText');
199
+ const storedQuestion = sessionStorage.getItem('qaQuestion');
200
+ if (storedText) document.getElementById('contextInput').value = storedText;
201
+ if (storedQuestion) document.getElementById('questionInput').value = storedQuestion;
202
+ // Reset flag so normal navigation doesn't keep stale inputs
203
+ sessionStorage.removeItem('carryTextOnNextPage');
204
+ } else {
205
+ // Fresh route load: clear inputs
206
+ document.getElementById('contextInput').value = document.getElementById('contextInput').defaultValue || '';
207
+ document.getElementById('questionInput').value = '';
208
+ sessionStorage.removeItem('qaQuestion');
209
+ }
210
+
211
+ // Update confidence threshold value
212
+ document.getElementById('confidenceThreshold').addEventListener('input', function() {
213
+ document.getElementById('confidenceValue').textContent = this.value;
214
+ });
215
+
216
+ // Sample text dropdown handler: keep context/question in sync and restore Custom
217
+ document.getElementById('sampleSelect').addEventListener('change', function() {
218
+ const sampleType = this.value;
219
+ const contextInput = document.getElementById('contextInput');
220
+ const questionInput = document.getElementById('questionInput');
221
+ const routeKey = 'customTextBackup:' + (window.location.pathname || '/question-answering');
222
+
223
+ const SAMPLE_QUESTIONS = {
224
+ 'News Article': 'What action did the commission take?',
225
+ 'Product Review': 'What is the reviewer\'s overall opinion?',
226
+ 'Scientific Text': 'What is the primary cause of current climate change?',
227
+ 'Literary Text': 'Who is the main character?'
228
+ };
229
+
230
+ if (sampleType === 'Custom') {
231
+ // Restore previously typed Custom text
232
+ const backup = sessionStorage.getItem(routeKey);
233
+ if (backup !== null) contextInput.value = backup;
234
+ // Clear question to avoid mismatch
235
+ questionInput.value = '';
236
+ sessionStorage.setItem('qaQuestion', '');
237
+ return;
238
+ }
239
+
240
+ // Save current custom text before switching away
241
+ sessionStorage.setItem(routeKey, contextInput.value);
242
+
243
+ fetch('/api/sample-text', {
244
+ method: 'POST',
245
+ headers: {
246
+ 'Content-Type': 'application/json',
247
+ },
248
+ body: JSON.stringify({ sample_type: sampleType })
249
+ })
250
+ .then(response => response.json())
251
+ .then(data => {
252
+ contextInput.value = data.text || '';
253
+ // Update question to match selected sample for consistency
254
+ questionInput.value = SAMPLE_QUESTIONS[sampleType] || '';
255
+ // Persist to session storage
256
+ sessionStorage.setItem('analysisText', contextInput.value);
257
+ sessionStorage.setItem('qaQuestion', questionInput.value);
258
+ });
259
+ });
260
+
261
+ // Persist inputs while typing
262
+ document.getElementById('contextInput').addEventListener('input', function() {
263
+ sessionStorage.setItem('analysisText', this.value);
264
+ sessionStorage.setItem('customTextBackup:' + (window.location.pathname || '/question-answering'), this.value);
265
+ // If user starts editing context while a sample-default question is set, clear it to avoid mismatch
266
+ const questionInput = document.getElementById('questionInput');
267
+ const SAMPLE_DEFAULTS = new Set([
268
+ 'What action did the commission\'s take?',
269
+ 'What action did the commission take?',
270
+ 'What is the reviewer\'s overall opinion?',
271
+ 'What is the reviewer\'s overall opinion?',
272
+ 'What is the primary cause of current climate change?',
273
+ 'Who is the main character?'
274
+ ]);
275
+ if (SAMPLE_DEFAULTS.has(questionInput.value)) {
276
+ questionInput.value = '';
277
+ sessionStorage.setItem('qaQuestion', '');
278
+ }
279
+ });
280
+ document.getElementById('questionInput').addEventListener('input', function() {
281
+ sessionStorage.setItem('qaQuestion', this.value);
282
+ });
283
+
284
+ // Process button handler
285
+ document.getElementById('processBtn').addEventListener('click', function() {
286
+ const context = document.getElementById('contextInput').value.trim();
287
+ const question = document.getElementById('questionInput').value.trim();
288
+
289
+ if (!context) {
290
+ alert('Please provide context text.');
291
+ return;
292
+ }
293
+
294
+ if (!question) {
295
+ alert('Please enter a question.');
296
+ return;
297
+ }
298
+
299
+ // Show loading state
300
+ this.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Processing...';
301
+ this.disabled = true;
302
+
303
+ // Persist before processing
304
+ sessionStorage.setItem('analysisText', context);
305
+ sessionStorage.setItem('qaQuestion', question);
306
+
307
+ // Process question
308
+ processQuestionAnswering();
309
+
310
+ // Reset button after a delay
311
+ setTimeout(() => {
312
+ this.innerHTML = '<i class="fas fa-question-circle"></i> Get Answer';
313
+ this.disabled = false;
314
+ }, 2000);
315
+ });
316
+
317
+ // Clear button handler
318
+ document.getElementById('clearBtn').addEventListener('click', function() {
319
+ document.getElementById('contextInput').value = '';
320
+ document.getElementById('questionInput').value = '';
321
+ document.getElementById('resultsContainer').innerHTML = `
322
+ <div class="text-center text-muted py-5">
323
+ <i class="fas fa-arrow-up fa-2x mb-3"></i>
324
+ <p>Click "Get Answer" to see question answering results</p>
325
+ </div>
326
+ `;
327
+ });
328
+
329
+ // Keyboard shortcuts
330
+ document.addEventListener('keydown', function(e) {
331
+ // Ctrl+Enter to process
332
+ if (e.ctrlKey && e.key === 'Enter') {
333
+ document.getElementById('processBtn').click();
334
+ }
335
+
336
+ // Ctrl+L to clear
337
+ if (e.ctrlKey && e.key === 'l') {
338
+ e.preventDefault();
339
+ document.getElementById('clearBtn').click();
340
+ }
341
+ });
342
+ });
343
+
344
+ // Set example question
345
+ function setExample(question) {
346
+ document.getElementById('questionInput').value = question;
347
+ }
348
+
349
+ // Process question answering
350
+ function processQuestionAnswering() {
351
+ const context = document.getElementById('contextInput').value.trim();
352
+ const question = document.getElementById('questionInput').value.trim();
353
+ const confidenceThreshold = parseFloat(document.getElementById('confidenceThreshold').value);
354
+
355
+ if (!context) {
356
+ alert('Please provide context text.');
357
+ return;
358
+ }
359
+
360
+ if (!question) {
361
+ alert('Please enter a question.');
362
+ return;
363
+ }
364
+
365
+ showLoading('resultsContainer');
366
+
367
+ fetch('/api/question-answering', {
368
+ method: 'POST',
369
+ headers: {
370
+ 'Content-Type': 'application/json',
371
+ },
372
+ body: JSON.stringify({
373
+ context: context,
374
+ question: question,
375
+ confidence_threshold: confidenceThreshold
376
+ })
377
+ })
378
+ .then(response => response.json())
379
+ .then(data => {
380
+ if (data.success) {
381
+ displayResults(data.result);
382
+ } else {
383
+ showError(data.error || 'An error occurred while processing the question');
384
+ }
385
+ })
386
+ .catch(error => {
387
+ showError('Failed to process question: ' + error.message);
388
+ })
389
+ .finally(() => {
390
+ hideLoading('resultsContainer');
391
+ });
392
+ }
393
+
394
+ // Show loading state
395
+ function showLoading(elementId) {
396
+ const element = document.getElementById(elementId);
397
+ if (element) {
398
+ element.innerHTML = `
399
+ <div class="text-center py-4">
400
+ <div class="spinner-border text-primary" role="status">
401
+ <span class="visually-hidden">Loading...</span>
402
+ </div>
403
+ <p class="mt-2">Processing your question...</p>
404
+ </div>
405
+ `;
406
+ }
407
+ }
408
+
409
+ // Hide loading state
410
+ function hideLoading(elementId) {
411
+ const element = document.getElementById(elementId);
412
+ if (element && element.innerHTML.includes('spinner-border')) {
413
+ element.innerHTML = '';
414
+ }
415
+ }
416
+
417
+ // Show error message
418
+ function showError(message, elementId = 'resultsContainer') {
419
+ const element = document.getElementById(elementId);
420
+ if (element) {
421
+ element.innerHTML = `
422
+ <div class="alert alert-danger fade-in">
423
+ <i class="fas fa-exclamation-triangle"></i>
424
+ <strong>Error:</strong> ${message}
425
+ </div>
426
+ `;
427
+ }
428
+ }
429
+
430
+ // Display results
431
+ function displayResults(result) {
432
+ const container = document.getElementById('resultsContainer');
433
+ if (container) {
434
+ container.innerHTML = result;
435
+ container.classList.add('fade-in');
436
+
437
+ // Scroll to results
438
+ container.scrollIntoView({ behavior: 'smooth', block: 'start' });
439
+ }
440
+ }
441
+ </script>
442
+ {% endblock %}
templates/sentiment.html ADDED
@@ -0,0 +1,430 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+
3
+ {% block title %}Sentiment Analysis - NLP Ultimate Tutorial{% endblock %}
4
+
5
+ {% block content %}
6
+ <div class="container">
7
+ <!-- Header Section -->
8
+ <div class="row mb-4">
9
+ <div class="col-12">
10
+ <div class="card">
11
+ <div class="card-header">
12
+ <h1 class="mb-0">
13
+ <i class="fas fa-heart"></i>
14
+ Sentiment Analysis
15
+ </h1>
16
+ </div>
17
+ <div class="card-body">
18
+ <p class="lead">Analyze the emotional tone and sentiment of text using multiple advanced models.</p>
19
+
20
+ <div class="alert alert-info">
21
+ <i class="fas fa-info-circle"></i>
22
+ <strong>About:</strong> Sentiment analysis determines the emotional tone behind text to identify if it expresses positive, negative, or neutral sentiment.
23
+ </div>
24
+ </div>
25
+ </div>
26
+ </div>
27
+ </div>
28
+
29
+ {% include "_analysis_nav.html" %}
30
+
31
+ <!-- Text Input Section -->
32
+ <div class="row mb-4">
33
+ <div class="col-12">
34
+ <div class="card">
35
+ <div class="card-header">
36
+ <h3 class="mb-0">
37
+ <i class="fas fa-keyboard"></i>
38
+ Enter your text:
39
+ </h3>
40
+ </div>
41
+ <div class="card-body">
42
+ <div class="row mb-3">
43
+ <div class="col-md-8">
44
+ <textarea id="textInput" class="form-control" rows="6" placeholder="Enter or paste your text here...">I absolutely loved this movie! The acting was superb and the plot kept me on the edge of my seat.</textarea>
45
+ </div>
46
+ <div class="col-md-4">
47
+ <label for="sampleSelect" class="form-label">Or choose a sample:</label>
48
+ <select id="sampleSelect" class="form-select">
49
+ <option value="Custom">Custom</option>
50
+ <option value="News Article">News Article</option>
51
+ <option value="Product Review">Product Review</option>
52
+ <option value="Scientific Text">Scientific Text</option>
53
+ <option value="Literary Text">Literary Text</option>
54
+ </select>
55
+ </div>
56
+ </div>
57
+
58
+ <div class="d-flex justify-content-between align-items-center">
59
+ <div>
60
+ <button id="processBtn" class="btn btn-primary btn-lg">
61
+ <i class="fas fa-heart"></i>
62
+ Analyze Sentiment
63
+ </button>
64
+ </div>
65
+ <div>
66
+ <button id="clearBtn" class="btn btn-outline-secondary">
67
+ <i class="fas fa-trash"></i>
68
+ Clear
69
+ </button>
70
+ </div>
71
+ </div>
72
+ </div>
73
+ </div>
74
+ </div>
75
+ </div>
76
+
77
+ <!-- Sentiment Analysis Methods Info -->
78
+ <div class="row mb-4">
79
+ <div class="col-12">
80
+ <div class="card">
81
+ <div class="card-header">
82
+ <h3 class="mb-0">
83
+ <i class="fas fa-info-circle"></i>
84
+ Sentiment Analysis Methods
85
+ </h3>
86
+ </div>
87
+ <div class="card-body p-4">
88
+ <div class="row g-4">
89
+ <!-- VADER Card -->
90
+ <div class="col-lg-4 col-md-6">
91
+ <div class="sentiment-method-card h-100 vader-card">
92
+ <div class="method-header">
93
+ <div class="method-icon">
94
+ <i class="fas fa-book"></i>
95
+ </div>
96
+ <h4 class="method-title">VADER</h4>
97
+ <p class="method-subtitle">Rule-based Analyzer</p>
98
+ </div>
99
+ <div class="method-body">
100
+ <p class="method-description">Rule-based sentiment analyzer specifically tuned for social media text with compound scoring.</p>
101
+ <div class="method-features">
102
+ <div class="feature-item">
103
+ <i class="fas fa-check-circle"></i>
104
+ <span>Lexicon-based approach</span>
105
+ </div>
106
+ <div class="feature-item">
107
+ <i class="fas fa-check-circle"></i>
108
+ <span>Social media optimized</span>
109
+ </div>
110
+ <div class="feature-item">
111
+ <i class="fas fa-check-circle"></i>
112
+ <span>Fast and reliable</span>
113
+ </div>
114
+ </div>
115
+ </div>
116
+ </div>
117
+ </div>
118
+
119
+ <!-- DistilBERT Card -->
120
+ <div class="col-lg-4 col-md-6">
121
+ <div class="sentiment-method-card h-100 distilbert-card">
122
+ <div class="method-header">
123
+ <div class="method-icon">
124
+ <i class="fas fa-brain"></i>
125
+ </div>
126
+ <h4 class="method-title">DistilBERT</h4>
127
+ <p class="method-subtitle">Transformer Model</p>
128
+ </div>
129
+ <div class="method-body">
130
+ <p class="method-description">Transformer model fine-tuned on Stanford Sentiment Treebank dataset with high accuracy.</p>
131
+ <div class="method-features">
132
+ <div class="feature-item">
133
+ <i class="fas fa-check-circle"></i>
134
+ <span>Deep learning approach</span>
135
+ </div>
136
+ <div class="feature-item">
137
+ <i class="fas fa-check-circle"></i>
138
+ <span>~91% accuracy</span>
139
+ </div>
140
+ <div class="feature-item">
141
+ <i class="fas fa-check-circle"></i>
142
+ <span>Context-aware</span>
143
+ </div>
144
+ </div>
145
+ </div>
146
+ </div>
147
+ </div>
148
+
149
+ <!-- RoBERTa Emotion Card -->
150
+ <div class="col-lg-4 col-md-6">
151
+ <div class="sentiment-method-card h-100 roberta-card">
152
+ <div class="method-header">
153
+ <div class="method-icon">
154
+ <i class="fas fa-smile"></i>
155
+ </div>
156
+ <h4 class="method-title">RoBERTa Emotion</h4>
157
+ <p class="method-subtitle">Multi-label Emotion</p>
158
+ </div>
159
+ <div class="method-body">
160
+ <p class="method-description">Multi-label emotion detection model identifying specific emotions like joy, anger, sadness, etc.</p>
161
+ <div class="method-features">
162
+ <div class="feature-item">
163
+ <i class="fas fa-check-circle"></i>
164
+ <span>Emotion classification</span>
165
+ </div>
166
+ <div class="feature-item">
167
+ <i class="fas fa-check-circle"></i>
168
+ <span>Multi-label detection</span>
169
+ </div>
170
+ <div class="feature-item">
171
+ <i class="fas fa-check-circle"></i>
172
+ <span>Detailed emotional analysis</span>
173
+ </div>
174
+ </div>
175
+ </div>
176
+ </div>
177
+ </div>
178
+ </div>
179
+ </div>
180
+ </div>
181
+ </div>
182
+ </div>
183
+
184
+ <!-- Sentiment Scale Info -->
185
+ <div class="row mb-4">
186
+ <div class="col-12">
187
+ <div class="card">
188
+ <div class="card-header">
189
+ <h3 class="mb-0">
190
+ <i class="fas fa-chart-line"></i>
191
+ Sentiment Scale
192
+ </h3>
193
+ </div>
194
+ <div class="card-body">
195
+ <div class="row">
196
+ <div class="col-md-3">
197
+ <div class="card text-center">
198
+ <div class="card-body">
199
+ <i class="fas fa-frown fa-2x text-danger mb-2"></i>
200
+ <h5>Negative</h5>
201
+ <p class="small mb-0">Score: -1.0 to -0.05</p>
202
+ </div>
203
+ </div>
204
+ </div>
205
+ <div class="col-md-3">
206
+ <div class="card text-center">
207
+ <div class="card-body">
208
+ <i class="fas fa-meh fa-2x text-warning mb-2"></i>
209
+ <h5>Neutral</h5>
210
+ <p class="small mb-0">Score: -0.05 to 0.05</p>
211
+ </div>
212
+ </div>
213
+ </div>
214
+ <div class="col-md-3">
215
+ <div class="card text-center">
216
+ <div class="card-body">
217
+ <i class="fas fa-smile fa-2x text-success mb-2"></i>
218
+ <h5>Positive</h5>
219
+ <p class="small mb-0">Score: 0.05 to 1.0</p>
220
+ </div>
221
+ </div>
222
+ </div>
223
+ <div class="col-md-3">
224
+ <div class="card text-center">
225
+ <div class="card-body">
226
+ <i class="fas fa-heart fa-2x text-info mb-2"></i>
227
+ <h5>Emotions</h5>
228
+ <p class="small mb-0">Joy, Anger, Sadness, Fear, etc.</p>
229
+ </div>
230
+ </div>
231
+ </div>
232
+ </div>
233
+ </div>
234
+ </div>
235
+ </div>
236
+ </div>
237
+
238
+ <!-- Results Section -->
239
+ <div class="row">
240
+ <div class="col-12">
241
+ <div class="card">
242
+ <div class="card-header">
243
+ <h3 class="mb-0">
244
+ <i class="fas fa-chart-bar"></i>
245
+ Sentiment Analysis Results
246
+ </h3>
247
+ </div>
248
+ <div class="card-body">
249
+ <div id="resultsContainer">
250
+ <div class="text-center text-muted py-5">
251
+ <i class="fas fa-arrow-up fa-2x mb-3"></i>
252
+ <p>Click "Analyze Sentiment" to see sentiment analysis results</p>
253
+ </div>
254
+ </div>
255
+ </div>
256
+ </div>
257
+ </div>
258
+ </div>
259
+ </div>
260
+ {% endblock %}
261
+
262
+ {% block extra_scripts %}
263
+ <script>
264
+ // Initialize page
265
+ document.addEventListener('DOMContentLoaded', function() {
266
+ // Only carry over when using Quick Nav; otherwise leave defaults
267
+ const shouldCarry = sessionStorage.getItem('carryTextOnNextPage') === '1';
268
+ if (shouldCarry) {
269
+ const sampleSel = document.getElementById('sampleSelect');
270
+ if (sampleSel) sampleSel.value = 'Custom';
271
+ const storedText = sessionStorage.getItem('analysisText');
272
+ if (storedText) document.getElementById('textInput').value = storedText;
273
+ sessionStorage.removeItem('carryTextOnNextPage');
274
+ }
275
+
276
+ // Sample text dropdown handler
277
+ document.getElementById('sampleSelect').addEventListener('change', function() {
278
+ const sampleType = this.value;
279
+ const textInput = document.getElementById('textInput');
280
+
281
+ if (sampleType === 'Custom') {
282
+ textInput.value = '';
283
+ } else {
284
+ // Get sample text from server
285
+ fetch('/api/sample-text', {
286
+ method: 'POST',
287
+ headers: {
288
+ 'Content-Type': 'application/json',
289
+ },
290
+ body: JSON.stringify({sample_type: sampleType})
291
+ })
292
+ .then(response => response.json())
293
+ .then(data => {
294
+ textInput.value = data.text;
295
+ });
296
+ }
297
+ });
298
+
299
+ // Process button handler
300
+ document.getElementById('processBtn').addEventListener('click', function() {
301
+ const text = document.getElementById('textInput').value.trim();
302
+
303
+ if (!text) {
304
+ alert('Please enter some text to analyze.');
305
+ return;
306
+ }
307
+
308
+ // Show loading state
309
+ this.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Processing...';
310
+ this.disabled = true;
311
+
312
+ // Process text
313
+ processSentiment();
314
+
315
+ // Reset button after a delay
316
+ setTimeout(() => {
317
+ this.innerHTML = '<i class="fas fa-heart"></i> Analyze Sentiment';
318
+ this.disabled = false;
319
+ }, 2000);
320
+ });
321
+
322
+ // Clear button handler
323
+ document.getElementById('clearBtn').addEventListener('click', function() {
324
+ document.getElementById('textInput').value = '';
325
+ document.getElementById('resultsContainer').innerHTML = `
326
+ <div class="text-center text-muted py-5">
327
+ <i class="fas fa-arrow-up fa-2x mb-3"></i>
328
+ <p>Click "Analyze Sentiment" to see sentiment analysis results</p>
329
+ </div>
330
+ `;
331
+ });
332
+
333
+ // Keyboard shortcuts
334
+ document.addEventListener('keydown', function(e) {
335
+ // Ctrl+Enter to process
336
+ if (e.ctrlKey && e.key === 'Enter') {
337
+ document.getElementById('processBtn').click();
338
+ }
339
+
340
+ // Ctrl+L to clear
341
+ if (e.ctrlKey && e.key === 'l') {
342
+ e.preventDefault();
343
+ document.getElementById('clearBtn').click();
344
+ }
345
+ });
346
+ });
347
+
348
+ // Process sentiment analysis
349
+ function processSentiment() {
350
+ const text = document.getElementById('textInput').value.trim();
351
+
352
+ if (!text) {
353
+ alert('Please enter some text to analyze.');
354
+ return;
355
+ }
356
+
357
+ showLoading('resultsContainer');
358
+
359
+ fetch('/api/sentiment', {
360
+ method: 'POST',
361
+ headers: {
362
+ 'Content-Type': 'application/json',
363
+ },
364
+ body: JSON.stringify({text: text})
365
+ })
366
+ .then(response => response.json())
367
+ .then(data => {
368
+ if (data.success) {
369
+ displayResults(data.result);
370
+ } else {
371
+ showError(data.error || 'An error occurred while processing the text');
372
+ }
373
+ })
374
+ .catch(error => {
375
+ showError('Failed to process text: ' + error.message);
376
+ })
377
+ .finally(() => {
378
+ hideLoading('resultsContainer');
379
+ });
380
+ }
381
+
382
+ // Show loading state
383
+ function showLoading(elementId) {
384
+ const element = document.getElementById(elementId);
385
+ if (element) {
386
+ element.innerHTML = `
387
+ <div class="text-center py-4">
388
+ <div class="spinner-border text-primary" role="status">
389
+ <span class="visually-hidden">Loading...</span>
390
+ </div>
391
+ <p class="mt-2">Analyzing sentiment and emotions...</p>
392
+ </div>
393
+ `;
394
+ }
395
+ }
396
+
397
+ // Hide loading state
398
+ function hideLoading(elementId) {
399
+ const element = document.getElementById(elementId);
400
+ if (element && element.innerHTML.includes('spinner-border')) {
401
+ element.innerHTML = '';
402
+ }
403
+ }
404
+
405
+ // Show error message
406
+ function showError(message, elementId = 'resultsContainer') {
407
+ const element = document.getElementById(elementId);
408
+ if (element) {
409
+ element.innerHTML = `
410
+ <div class="alert alert-danger fade-in">
411
+ <i class="fas fa-exclamation-triangle"></i>
412
+ <strong>Error:</strong> ${message}
413
+ </div>
414
+ `;
415
+ }
416
+ }
417
+
418
+ // Display results
419
+ function displayResults(result) {
420
+ const container = document.getElementById('resultsContainer');
421
+ if (container) {
422
+ container.innerHTML = result;
423
+ container.classList.add('fade-in');
424
+
425
+ // Scroll to results
426
+ container.scrollIntoView({ behavior: 'smooth', block: 'start' });
427
+ }
428
+ }
429
+ </script>
430
+ {% endblock %}
templates/summarization.html ADDED
@@ -0,0 +1,396 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+
3
+ {% block title %}Text Summarization - NLP Ultimate Tutorial{% endblock %}
4
+
5
+ {% block content %}
6
+ <div class="container">
7
+ <!-- Header Section -->
8
+ <div class="row mb-4">
9
+ <div class="col-12">
10
+ <div class="card">
11
+ <div class="card-header">
12
+ <h1 class="mb-0">
13
+ <i class="fas fa-compress-alt"></i>
14
+ Text Summarization
15
+ </h1>
16
+ </div>
17
+ <div class="card-body">
18
+ <p class="lead">Condense text to capture its main points using both extractive and abstractive summarization techniques.</p>
19
+
20
+ <div class="alert alert-info">
21
+ <i class="fas fa-info-circle"></i>
22
+ <strong>About:</strong> Text summarization condenses text to capture its main points, enabling quicker comprehension of large volumes of information.
23
+ </div>
24
+ </div>
25
+ </div>
26
+ </div>
27
+ </div>
28
+
29
+ {% include "_analysis_nav.html" %}
30
+
31
+ <!-- Text Input Section -->
32
+ <div class="row mb-4">
33
+ <div class="col-12">
34
+ <div class="card">
35
+ <div class="card-header">
36
+ <h3 class="mb-0">
37
+ <i class="fas fa-keyboard"></i>
38
+ Enter your text:
39
+ </h3>
40
+ </div>
41
+ <div class="card-body">
42
+ <div class="row mb-3">
43
+ <div class="col-md-8">
44
+ <textarea id="textInput" class="form-control" rows="8" placeholder="Enter or paste your text here... (minimum 3 sentences, 40 words)">Climate change is the long-term alteration of temperature and typical weather patterns in a place. Climate change is already affecting our planet in many ways - from rising sea levels to more frequent extreme weather events. The primary cause of current climate change is largely human activity, like burning fossil fuels, which adds heat-trapping gases to Earth's atmosphere. The consequences of changing climate are already being felt worldwide, affecting ecosystems, agriculture, and human settlements. Addressing climate change requires immediate action to reduce greenhouse gas emissions and adapt to the changes that are already occurring.</textarea>
45
+ </div>
46
+ <div class="col-md-4">
47
+ <label for="sampleSelect" class="form-label">Or choose a sample:</label>
48
+ <select id="sampleSelect" class="form-select">
49
+ <option value="Custom">Custom</option>
50
+ <option value="News Article">News Article</option>
51
+ <option value="Product Review">Product Review</option>
52
+ <option value="Scientific Text">Scientific Text</option>
53
+ <option value="Literary Text">Literary Text</option>
54
+ </select>
55
+ </div>
56
+ </div>
57
+
58
+ <div class="d-flex justify-content-between align-items-center">
59
+ <div>
60
+ <button id="processBtn" class="btn btn-primary btn-lg">
61
+ <i class="fas fa-compress-alt"></i>
62
+ Generate Summary
63
+ </button>
64
+ </div>
65
+ <div>
66
+ <button id="clearBtn" class="btn btn-outline-secondary">
67
+ <i class="fas fa-trash"></i>
68
+ Clear
69
+ </button>
70
+ </div>
71
+ </div>
72
+ </div>
73
+ </div>
74
+ </div>
75
+ </div>
76
+
77
+ <!-- Summary Settings Section -->
78
+ <div class="row mb-4">
79
+ <div class="col-12">
80
+ <div class="card">
81
+ <div class="card-header">
82
+ <h3 class="mb-0">
83
+ <i class="fas fa-cog"></i>
84
+ Summary Settings
85
+ </h3>
86
+ </div>
87
+ <div class="card-body">
88
+ <div class="row">
89
+ <div class="col-md-4">
90
+ <label for="minLength" class="form-label">Minimum Length</label>
91
+ <input type="range" class="form-range" id="minLength" min="10" max="100" value="30" step="5">
92
+ <div class="d-flex justify-content-between">
93
+ <small>10</small>
94
+ <small id="minLengthValue">30</small>
95
+ <small>100</small>
96
+ </div>
97
+ </div>
98
+ <div class="col-md-4">
99
+ <label for="maxLength" class="form-range" class="form-label">Maximum Length</label>
100
+ <input type="range" class="form-range" id="maxLength" min="50" max="350" value="300" step="10">
101
+ <div class="d-flex justify-content-between">
102
+ <small>50</small>
103
+ <small id="maxLengthValue">300</small>
104
+ <small>350</small>
105
+ </div>
106
+ </div>
107
+ <div class="col-md-4">
108
+ <div class="form-check mt-4">
109
+ <input class="form-check-input" type="checkbox" id="useSampling">
110
+ <label class="form-check-label" for="useSampling">
111
+ Use sampling for diverse summaries
112
+ </label>
113
+ <div class="form-text">Enable to generate more creative summaries</div>
114
+ </div>
115
+ </div>
116
+ </div>
117
+ </div>
118
+ </div>
119
+ </div>
120
+ </div>
121
+
122
+ <!-- Summarization Methods Info -->
123
+ <div class="row mb-4">
124
+ <div class="col-12">
125
+ <div class="card">
126
+ <div class="card-header">
127
+ <h3 class="mb-0">
128
+ <i class="fas fa-info-circle"></i>
129
+ Summarization Methods
130
+ </h3>
131
+ </div>
132
+ <div class="card-body">
133
+ <div class="row">
134
+ <div class="col-md-6">
135
+ <div class="card h-100">
136
+ <div class="card-body text-center">
137
+ <i class="fas fa-cut fa-2x text-primary mb-2"></i>
138
+ <h5>Extractive Summarization</h5>
139
+ <p class="small">Selects important sentences from the original text using TextRank algorithm.</p>
140
+ <ul class="list-unstyled small text-start">
141
+ <li>• Preserves original wording</li>
142
+ <li>• Fast and reliable</li>
143
+ <li>• Based on sentence importance</li>
144
+ </ul>
145
+ </div>
146
+ </div>
147
+ </div>
148
+ <div class="col-md-6">
149
+ <div class="card h-100">
150
+ <div class="card-body text-center">
151
+ <i class="fas fa-brain fa-2x text-success mb-2"></i>
152
+ <h5>Abstractive Summarization</h5>
153
+ <p class="small">Generates new sentences using BART model fine-tuned on CNN/DM dataset.</p>
154
+ <ul class="list-unstyled small text-start">
155
+ <li>• Creates human-like summaries</li>
156
+ <li>• More natural language</li>
157
+ <li>• Higher compression ratios</li>
158
+ </ul>
159
+ </div>
160
+ </div>
161
+ </div>
162
+ </div>
163
+ </div>
164
+ </div>
165
+ </div>
166
+ </div>
167
+
168
+ <!-- Results Section -->
169
+ <div class="row">
170
+ <div class="col-12">
171
+ <div class="card">
172
+ <div class="card-header">
173
+ <h3 class="mb-0">
174
+ <i class="fas fa-chart-bar"></i>
175
+ Summarization Results
176
+ </h3>
177
+ </div>
178
+ <div class="card-body">
179
+ <div id="resultsContainer">
180
+ <div class="text-center text-muted py-5">
181
+ <i class="fas fa-arrow-up fa-2x mb-3"></i>
182
+ <p>Click "Generate Summary" to see summarization results</p>
183
+ </div>
184
+ </div>
185
+ </div>
186
+ </div>
187
+ </div>
188
+ </div>
189
+ </div>
190
+ {% endblock %}
191
+
192
+ {% block extra_scripts %}
193
+ <script>
194
+ // Initialize page
195
+ document.addEventListener('DOMContentLoaded', function() {
196
+ // Only carry over text when explicitly requested by quick-nav
197
+ const shouldCarry = sessionStorage.getItem('carryTextOnNextPage') === '1';
198
+ if (shouldCarry) {
199
+ const storedText = sessionStorage.getItem('analysisText');
200
+ if (storedText) document.getElementById('textInput').value = storedText;
201
+ sessionStorage.removeItem('carryTextOnNextPage');
202
+ } else {
203
+ // Fresh route: leave default sample text
204
+ }
205
+
206
+ // Update slider values
207
+ document.getElementById('minLength').addEventListener('input', function() {
208
+ document.getElementById('minLengthValue').textContent = this.value;
209
+ });
210
+
211
+ document.getElementById('maxLength').addEventListener('input', function() {
212
+ document.getElementById('maxLengthValue').textContent = this.value;
213
+ });
214
+
215
+ // Sample text dropdown handler with Custom restore
216
+ document.getElementById('sampleSelect').addEventListener('change', function() {
217
+ const sampleType = this.value;
218
+ const textInput = document.getElementById('textInput');
219
+ const routeKey = 'customTextBackup:' + (window.location.pathname || '/summarization');
220
+
221
+ if (sampleType === 'Custom') {
222
+ // Restore previously typed custom text if any
223
+ const backup = sessionStorage.getItem(routeKey);
224
+ if (backup !== null) {
225
+ textInput.value = backup;
226
+ }
227
+ return;
228
+ }
229
+
230
+ // Save current custom text before switching away
231
+ sessionStorage.setItem(routeKey, textInput.value);
232
+
233
+ // Get sample text from server
234
+ fetch('/api/sample-text', {
235
+ method: 'POST',
236
+ headers: {
237
+ 'Content-Type': 'application/json',
238
+ },
239
+ body: JSON.stringify({sample_type: sampleType})
240
+ })
241
+ .then(response => response.json())
242
+ .then(data => {
243
+ textInput.value = data.text || '';
244
+ });
245
+ });
246
+
247
+ // While typing, keep a backup of custom text for this route
248
+ (function(){
249
+ const textInput = document.getElementById('textInput');
250
+ const routeKey = 'customTextBackup:' + (window.location.pathname || '/summarization');
251
+ textInput.addEventListener('input', function(){
252
+ sessionStorage.setItem(routeKey, textInput.value);
253
+ });
254
+ })();
255
+
256
+ // Process button handler
257
+ document.getElementById('processBtn').addEventListener('click', function() {
258
+ const text = document.getElementById('textInput').value.trim();
259
+
260
+ if (!text) {
261
+ alert('Please enter some text to summarize.');
262
+ return;
263
+ }
264
+
265
+ // Show loading state
266
+ this.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Processing...';
267
+ this.disabled = true;
268
+
269
+ // Process text
270
+ processSummarization();
271
+
272
+ // Reset button after a delay
273
+ setTimeout(() => {
274
+ this.innerHTML = '<i class="fas fa-compress-alt"></i> Generate Summary';
275
+ this.disabled = false;
276
+ }, 2000);
277
+ });
278
+
279
+ // Clear button handler
280
+ document.getElementById('clearBtn').addEventListener('click', function() {
281
+ document.getElementById('textInput').value = '';
282
+ document.getElementById('resultsContainer').innerHTML = `
283
+ <div class="text-center text-muted py-5">
284
+ <i class="fas fa-arrow-up fa-2x mb-3"></i>
285
+ <p>Click "Generate Summary" to see summarization results</p>
286
+ </div>
287
+ `;
288
+ });
289
+
290
+ // Keyboard shortcuts
291
+ document.addEventListener('keydown', function(e) {
292
+ // Ctrl+Enter to process
293
+ if (e.ctrlKey && e.key === 'Enter') {
294
+ document.getElementById('processBtn').click();
295
+ }
296
+
297
+ // Ctrl+L to clear
298
+ if (e.ctrlKey && e.key === 'l') {
299
+ e.preventDefault();
300
+ document.getElementById('clearBtn').click();
301
+ }
302
+ });
303
+ });
304
+
305
+ // Process summarization
306
+ function processSummarization() {
307
+ const text = document.getElementById('textInput').value.trim();
308
+
309
+ if (!text) {
310
+ alert('Please enter some text to summarize.');
311
+ return;
312
+ }
313
+
314
+ const minLength = parseInt(document.getElementById('minLength').value);
315
+ const maxLength = parseInt(document.getElementById('maxLength').value);
316
+ const useSampling = document.getElementById('useSampling').checked;
317
+
318
+ showLoading('resultsContainer');
319
+
320
+ fetch('/api/summarization', {
321
+ method: 'POST',
322
+ headers: {
323
+ 'Content-Type': 'application/json',
324
+ },
325
+ body: JSON.stringify({
326
+ text: text,
327
+ min_length: minLength,
328
+ max_length: maxLength,
329
+ use_sampling: useSampling
330
+ })
331
+ })
332
+ .then(response => response.json())
333
+ .then(data => {
334
+ if (data.success) {
335
+ displayResults(data.result);
336
+ } else {
337
+ showError(data.error || 'An error occurred while processing the text');
338
+ }
339
+ })
340
+ .catch(error => {
341
+ showError('Failed to process text: ' + error.message);
342
+ })
343
+ .finally(() => {
344
+ hideLoading('resultsContainer');
345
+ });
346
+ }
347
+
348
+ // Show loading state
349
+ function showLoading(elementId) {
350
+ const element = document.getElementById(elementId);
351
+ if (element) {
352
+ element.innerHTML = `
353
+ <div class="text-center py-4">
354
+ <div class="spinner-border text-primary" role="status">
355
+ <span class="visually-hidden">Loading...</span>
356
+ </div>
357
+ <p class="mt-2">Generating summaries...</p>
358
+ </div>
359
+ `;
360
+ }
361
+ }
362
+
363
+ // Hide loading state
364
+ function hideLoading(elementId) {
365
+ const element = document.getElementById(elementId);
366
+ if (element && element.innerHTML.includes('spinner-border')) {
367
+ element.innerHTML = '';
368
+ }
369
+ }
370
+
371
+ // Show error message
372
+ function showError(message, elementId = 'resultsContainer') {
373
+ const element = document.getElementById(elementId);
374
+ if (element) {
375
+ element.innerHTML = `
376
+ <div class="alert alert-danger fade-in">
377
+ <i class="fas fa-exclamation-triangle"></i>
378
+ <strong>Error:</strong> ${message}
379
+ </div>
380
+ `;
381
+ }
382
+ }
383
+
384
+ // Display results
385
+ function displayResults(result) {
386
+ const container = document.getElementById('resultsContainer');
387
+ if (container) {
388
+ container.innerHTML = result;
389
+ container.classList.add('fade-in');
390
+
391
+ // Scroll to results
392
+ container.scrollIntoView({ behavior: 'smooth', block: 'start' });
393
+ }
394
+ }
395
+ </script>
396
+ {% endblock %}
templates/text_generation.html ADDED
@@ -0,0 +1,469 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+
3
+ {% block title %}Text Generation - NLP Ultimate Tutorial{% endblock %}
4
+
5
+ {% block content %}
6
+ <div class="container">
7
+ <!-- Header Section -->
8
+ <div class="row mb-4">
9
+ <div class="col-12">
10
+ <div class="card">
11
+ <div class="card-header">
12
+ <h1 class="mb-0">
13
+ <i class="fas fa-magic"></i>
14
+ Text Generation
15
+ </h1>
16
+ </div>
17
+ <div class="card-body">
18
+ <p class="lead">Generate human-like text continuations using advanced language models.</p>
19
+
20
+ <div class="alert alert-info">
21
+ <i class="fas fa-info-circle"></i>
22
+ <strong>About:</strong> Text generation models can continue or expand on a given text prompt, creating new content that follows the style and context of the input.
23
+ </div>
24
+ </div>
25
+ </div>
26
+ </div>
27
+ </div>
28
+
29
+ {% include "_analysis_nav.html" %}
30
+
31
+ <!-- Input Section -->
32
+ <div class="row mb-4">
33
+ <div class="col-12">
34
+ <div class="card">
35
+ <div class="card-header">
36
+ <h3 class="mb-0">
37
+ <i class="fas fa-keyboard"></i>
38
+ Enter your prompt:
39
+ </h3>
40
+ </div>
41
+ <div class="card-body">
42
+ <div class="row mb-3">
43
+ <div class="col-md-8">
44
+ <textarea id="textInput" class="form-control" rows="6" placeholder="Enter a prompt to continue or expand on...">Once upon a time in a magical forest,</textarea>
45
+ </div>
46
+ <div class="col-md-4">
47
+ <label for="sampleSelect" class="form-label">Or choose a sample:</label>
48
+ <select id="sampleSelect" class="form-select">
49
+ <option value="Custom">Custom</option>
50
+ <option value="Story">Story</option>
51
+ <option value="Technical">Technical</option>
52
+ <option value="Email">Email</option>
53
+ <option value="Recipe">Recipe</option>
54
+ </select>
55
+ </div>
56
+ </div>
57
+
58
+ <div class="d-flex justify-content-between align-items-center">
59
+ <div>
60
+ <button id="processBtn" class="btn btn-primary btn-lg">
61
+ <i class="fas fa-magic"></i>
62
+ Generate Text
63
+ </button>
64
+ </div>
65
+ <div>
66
+ <button id="clearBtn" class="btn btn-outline-secondary">
67
+ <i class="fas fa-trash"></i>
68
+ Clear
69
+ </button>
70
+ </div>
71
+ </div>
72
+ </div>
73
+ </div>
74
+ </div>
75
+ </div>
76
+
77
+ <!-- Generation Settings Section -->
78
+ <div class="row mb-4">
79
+ <div class="col-12">
80
+ <div class="card">
81
+ <div class="card-header">
82
+ <h3 class="mb-0">
83
+ <i class="fas fa-cog"></i>
84
+ Generation Settings
85
+ </h3>
86
+ </div>
87
+ <div class="card-body">
88
+ <div class="row">
89
+ <div class="col-md-3">
90
+ <label for="maxLength" class="form-label">Output Length</label>
91
+ <input type="range" class="form-range" id="maxLength" min="30" max="250" value="100" step="10">
92
+ <div class="d-flex justify-content-between">
93
+ <small>30</small>
94
+ <small id="maxLengthValue">100</small>
95
+ <small>250</small>
96
+ </div>
97
+ <small class="text-muted">Maximum number of tokens to generate</small>
98
+ </div>
99
+ <div class="col-md-3">
100
+ <label for="temperature" class="form-label">Temperature</label>
101
+ <input type="range" class="form-range" id="temperature" min="0.1" max="1.5" value="0.7" step="0.1">
102
+ <div class="d-flex justify-content-between">
103
+ <small>0.1</small>
104
+ <small id="temperatureValue">0.7</small>
105
+ <small>1.5</small>
106
+ </div>
107
+ <small class="text-muted">Higher values make output more random</small>
108
+ </div>
109
+ <div class="col-md-3">
110
+ <label for="topP" class="form-label">Top-p Sampling</label>
111
+ <input type="range" class="form-range" id="topP" min="0.1" max="1.0" value="0.9" step="0.1">
112
+ <div class="d-flex justify-content-between">
113
+ <small>0.1</small>
114
+ <small id="topPValue">0.9</small>
115
+ <small>1.0</small>
116
+ </div>
117
+ <small class="text-muted">Controls diversity via nucleus sampling</small>
118
+ </div>
119
+ <div class="col-md-3">
120
+ <label for="numSequences" class="form-label">Number of Generations</label>
121
+ <input type="range" class="form-range" id="numSequences" min="1" max="3" value="1" step="1">
122
+ <div class="d-flex justify-content-between">
123
+ <small>1</small>
124
+ <small id="numSequencesValue">1</small>
125
+ <small>3</small>
126
+ </div>
127
+ <small class="text-muted">Generate multiple versions to choose from</small>
128
+ </div>
129
+ </div>
130
+ </div>
131
+ </div>
132
+ </div>
133
+ </div>
134
+
135
+ <!-- Model Info Section -->
136
+ <div class="row mb-4">
137
+ <div class="col-12">
138
+ <div class="card">
139
+ <div class="card-header">
140
+ <h3 class="mb-0">
141
+ <i class="fas fa-info-circle"></i>
142
+ Model Information
143
+ </h3>
144
+ </div>
145
+ <div class="card-body">
146
+ <div class="row">
147
+ <div class="col-md-4">
148
+ <div class="card h-100">
149
+ <div class="card-body text-center">
150
+ <i class="fas fa-brain fa-2x text-primary mb-2"></i>
151
+ <h5>GPT-2 Model</h5>
152
+ <p class="small">124M parameter language model trained on diverse internet text</p>
153
+ <ul class="list-unstyled small text-start">
154
+ <li>• Coherent text continuations</li>
155
+ <li>• Style-aware generation</li>
156
+ <li>• Context understanding</li>
157
+ </ul>
158
+ </div>
159
+ </div>
160
+ </div>
161
+ <div class="col-md-4">
162
+ <div class="card h-100">
163
+ <div class="card-body text-center">
164
+ <i class="fas fa-sliders-h fa-2x text-success mb-2"></i>
165
+ <h5>Generation Controls</h5>
166
+ <p class="small">Fine-tune output characteristics with advanced parameters</p>
167
+ <ul class="list-unstyled small text-start">
168
+ <li>• Temperature control</li>
169
+ <li>• Top-p sampling</li>
170
+ <li>• Length management</li>
171
+ </ul>
172
+ </div>
173
+ </div>
174
+ </div>
175
+ <div class="col-md-4">
176
+ <div class="card h-100">
177
+ <div class="card-body text-center">
178
+ <i class="fas fa-chart-line fa-2x text-info mb-2"></i>
179
+ <h5>Text Analysis</h5>
180
+ <p class="small">Comprehensive analysis of generated text quality and characteristics</p>
181
+ <ul class="list-unstyled small text-start">
182
+ <li>• Word length distribution</li>
183
+ <li>• Lexical diversity</li>
184
+ <li>• Generation statistics</li>
185
+ </ul>
186
+ </div>
187
+ </div>
188
+ </div>
189
+ </div>
190
+ </div>
191
+ </div>
192
+ </div>
193
+ </div>
194
+
195
+ <!-- Example Prompts Section -->
196
+ <div class="row mb-4">
197
+ <div class="col-12">
198
+ <div class="card">
199
+ <div class="card-header">
200
+ <h3 class="mb-0">
201
+ <i class="fas fa-list"></i>
202
+ Example Prompts
203
+ </h3>
204
+ </div>
205
+ <div class="card-body">
206
+ <div class="row">
207
+ <div class="col-md-3">
208
+ <button class="btn btn-outline-primary w-100 mb-2" onclick="setExample('Once upon a time in a magical forest,')">
209
+ Once upon a time in a magical forest,
210
+ </button>
211
+ </div>
212
+ <div class="col-md-3">
213
+ <button class="btn btn-outline-primary w-100 mb-2" onclick="setExample('The research findings indicate that')">
214
+ The research findings indicate that
215
+ </button>
216
+ </div>
217
+ <div class="col-md-3">
218
+ <button class="btn btn-outline-primary w-100 mb-2" onclick="setExample('Dear customer service team, I am writing regarding')">
219
+ Dear customer service team, I am writing regarding
220
+ </button>
221
+ </div>
222
+ <div class="col-md-3">
223
+ <button class="btn btn-outline-primary w-100 mb-2" onclick="setExample('The recipe for the perfect chocolate cake requires')">
224
+ The recipe for the perfect chocolate cake requires
225
+ </button>
226
+ </div>
227
+ </div>
228
+ </div>
229
+ </div>
230
+ </div>
231
+ </div>
232
+
233
+ <!-- Results Section -->
234
+ <div class="row">
235
+ <div class="col-12">
236
+ <div class="card">
237
+ <div class="card-header">
238
+ <h3 class="mb-0">
239
+ <i class="fas fa-chart-bar"></i>
240
+ Generated Text Results
241
+ </h3>
242
+ </div>
243
+ <div class="card-body">
244
+ <div id="resultsContainer">
245
+ <div class="text-center text-muted py-5">
246
+ <i class="fas fa-arrow-up fa-2x mb-3"></i>
247
+ <p>Click "Generate Text" to see text generation results</p>
248
+ </div>
249
+ </div>
250
+ </div>
251
+ </div>
252
+ </div>
253
+ </div>
254
+ </div>
255
+ {% endblock %}
256
+
257
+ {% block extra_scripts %}
258
+ <script>
259
+ // Initialize page
260
+ document.addEventListener('DOMContentLoaded', function() {
261
+ // Only carry over when navigating via Quick Nav; otherwise reset
262
+ const shouldCarry = sessionStorage.getItem('carryTextOnNextPage') === '1';
263
+ const textInput = document.getElementById('textInput');
264
+ const sampleSelect = document.getElementById('sampleSelect');
265
+ const routeKey = 'customTextBackup:' + (window.location.pathname || '/text-generation');
266
+
267
+ if (shouldCarry) {
268
+ const storedText = sessionStorage.getItem('analysisText');
269
+ if (storedText) textInput.value = storedText;
270
+ sessionStorage.removeItem('carryTextOnNextPage');
271
+ } else {
272
+ // Fresh route: reset to defaults
273
+ textInput.value = textInput.defaultValue || '';
274
+ if (sampleSelect) sampleSelect.value = 'Custom';
275
+ }
276
+
277
+ // Update slider values
278
+ document.getElementById('maxLength').addEventListener('input', function() {
279
+ document.getElementById('maxLengthValue').textContent = this.value;
280
+ });
281
+
282
+ document.getElementById('temperature').addEventListener('input', function() {
283
+ document.getElementById('temperatureValue').textContent = this.value;
284
+ });
285
+
286
+ document.getElementById('topP').addEventListener('input', function() {
287
+ document.getElementById('topPValue').textContent = this.value;
288
+ });
289
+
290
+ document.getElementById('numSequences').addEventListener('input', function() {
291
+ document.getElementById('numSequencesValue').textContent = this.value;
292
+ });
293
+
294
+ // Sample text dropdown handler with Custom restore
295
+ sampleSelect.addEventListener('change', function() {
296
+ const sampleType = this.value;
297
+ if (sampleType === 'Custom') {
298
+ const backup = sessionStorage.getItem(routeKey);
299
+ if (backup !== null) textInput.value = backup;
300
+ return;
301
+ }
302
+
303
+ // Save current custom prompt before switching away
304
+ sessionStorage.setItem(routeKey, textInput.value);
305
+
306
+ // Set sample prompts based on type
307
+ const samples = {
308
+ 'Story': 'Once upon a time in a magical forest,',
309
+ 'Technical': 'The research findings indicate that',
310
+ 'Email': 'Dear customer service team, I am writing regarding',
311
+ 'Recipe': 'The recipe for the perfect chocolate cake requires'
312
+ };
313
+ textInput.value = samples[sampleType] || '';
314
+ sessionStorage.setItem('analysisText', textInput.value);
315
+ });
316
+
317
+ // While typing, keep backups
318
+ textInput.addEventListener('input', function(){
319
+ sessionStorage.setItem('analysisText', textInput.value);
320
+ sessionStorage.setItem(routeKey, textInput.value);
321
+ });
322
+
323
+ // Process button handler
324
+ document.getElementById('processBtn').addEventListener('click', function() {
325
+ const text = document.getElementById('textInput').value.trim();
326
+
327
+ if (!text) {
328
+ alert('Please enter a prompt text.');
329
+ return;
330
+ }
331
+
332
+ // Show loading state
333
+ this.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Generating...';
334
+ this.disabled = true;
335
+
336
+ // Process text
337
+ processTextGeneration();
338
+
339
+ // Reset button after a delay
340
+ setTimeout(() => {
341
+ this.innerHTML = '<i class="fas fa-magic"></i> Generate Text';
342
+ this.disabled = false;
343
+ }, 2000);
344
+ });
345
+
346
+ // Clear button handler
347
+ document.getElementById('clearBtn').addEventListener('click', function() {
348
+ document.getElementById('textInput').value = '';
349
+ document.getElementById('resultsContainer').innerHTML = `
350
+ <div class="text-center text-muted py-5">
351
+ <i class="fas fa-arrow-up fa-2x mb-3"></i>
352
+ <p>Click "Generate Text" to see text generation results</p>
353
+ </div>
354
+ `;
355
+ });
356
+
357
+ // Keyboard shortcuts
358
+ document.addEventListener('keydown', function(e) {
359
+ // Ctrl+Enter to process
360
+ if (e.ctrlKey && e.key === 'Enter') {
361
+ document.getElementById('processBtn').click();
362
+ }
363
+
364
+ // Ctrl+L to clear
365
+ if (e.ctrlKey && e.key === 'l') {
366
+ e.preventDefault();
367
+ document.getElementById('clearBtn').click();
368
+ }
369
+ });
370
+ });
371
+
372
+ // Set example prompt
373
+ function setExample(prompt) {
374
+ document.getElementById('textInput').value = prompt;
375
+ }
376
+
377
+ // Process text generation
378
+ function processTextGeneration() {
379
+ const text = document.getElementById('textInput').value.trim();
380
+ const maxLength = parseInt(document.getElementById('maxLength').value);
381
+ const temperature = parseFloat(document.getElementById('temperature').value);
382
+ const topP = parseFloat(document.getElementById('topP').value);
383
+ const numSequences = parseInt(document.getElementById('numSequences').value);
384
+
385
+ if (!text) {
386
+ alert('Please enter a prompt text.');
387
+ return;
388
+ }
389
+
390
+ showLoading('resultsContainer');
391
+
392
+ fetch('/api/text-generation', {
393
+ method: 'POST',
394
+ headers: {
395
+ 'Content-Type': 'application/json',
396
+ },
397
+ body: JSON.stringify({
398
+ text: text,
399
+ max_length: maxLength,
400
+ temperature: temperature,
401
+ top_p: topP,
402
+ num_sequences: numSequences
403
+ })
404
+ })
405
+ .then(response => response.json())
406
+ .then(data => {
407
+ if (data.success) {
408
+ displayResults(data.result);
409
+ } else {
410
+ showError(data.error || 'An error occurred while generating text');
411
+ }
412
+ })
413
+ .catch(error => {
414
+ showError('Failed to generate text: ' + error.message);
415
+ })
416
+ .finally(() => {
417
+ hideLoading('resultsContainer');
418
+ });
419
+ }
420
+
421
+ // Show loading state
422
+ function showLoading(elementId) {
423
+ const element = document.getElementById(elementId);
424
+ if (element) {
425
+ element.innerHTML = `
426
+ <div class="text-center py-4">
427
+ <div class="spinner-border text-primary" role="status">
428
+ <span class="visually-hidden">Loading...</span>
429
+ </div>
430
+ <p class="mt-2">Generating text...</p>
431
+ </div>
432
+ `;
433
+ }
434
+ }
435
+
436
+ // Hide loading state
437
+ function hideLoading(elementId) {
438
+ const element = document.getElementById(elementId);
439
+ if (element && element.innerHTML.includes('spinner-border')) {
440
+ element.innerHTML = '';
441
+ }
442
+ }
443
+
444
+ // Show error message
445
+ function showError(message, elementId = 'resultsContainer') {
446
+ const element = document.getElementById(elementId);
447
+ if (element) {
448
+ element.innerHTML = `
449
+ <div class="alert alert-danger fade-in">
450
+ <i class="fas fa-exclamation-triangle"></i>
451
+ <strong>Error:</strong> ${message}
452
+ </div>
453
+ `;
454
+ }
455
+ }
456
+
457
+ // Display results
458
+ function displayResults(result) {
459
+ const container = document.getElementById('resultsContainer');
460
+ if (container) {
461
+ container.innerHTML = result;
462
+ container.classList.add('fade-in');
463
+
464
+ // Scroll to results
465
+ container.scrollIntoView({ behavior: 'smooth', block: 'start' });
466
+ }
467
+ }
468
+ </script>
469
+ {% endblock %}
templates/tokenization.html ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+
3
+ {% block title %}Tokenization - NLP Ultimate Tutorial{% endblock %}
4
+
5
+ {% block content %}
6
+ <div class="container">
7
+ <!-- Header Section -->
8
+ <div class="row mb-4">
9
+ <div class="col-12">
10
+ <div class="card">
11
+ <div class="card-header">
12
+ <h1 class="mb-0">
13
+ <i class="fas fa-cut"></i>
14
+ Tokenization
15
+ </h1>
16
+ </div>
17
+ <div class="card-body">
18
+ <p class="lead">Break text into smaller units called tokens using various tokenization methods.</p>
19
+
20
+ <div class="alert alert-info">
21
+ <i class="fas fa-info-circle"></i>
22
+ <strong>About:</strong> Tokenization is the process of breaking text into smaller units called tokens, which can be words, characters, or subwords.
23
+ </div>
24
+ </div>
25
+ </div>
26
+ </div>
27
+ </div>
28
+
29
+ {% include "_analysis_nav.html" %}
30
+
31
+ <!-- Text Input Section -->
32
+ <div class="row mb-4">
33
+ <div class="col-12">
34
+ <div class="card">
35
+ <div class="card-header">
36
+ <h3 class="mb-0">
37
+ <i class="fas fa-keyboard"></i>
38
+ Enter your text:
39
+ </h3>
40
+ </div>
41
+ <div class="card-body">
42
+ <div class="row mb-3">
43
+ <div class="col-md-8">
44
+ <textarea id="textInput" class="form-control" rows="6" placeholder="Enter or paste your text here...">The quick brown fox jumps over the lazy dog. It was a beautiful day in May of 2023!</textarea>
45
+ </div>
46
+ <div class="col-md-4">
47
+ <label for="sampleSelect" class="form-label">Or choose a sample:</label>
48
+ <select id="sampleSelect" class="form-select">
49
+ <option value="Custom">Custom</option>
50
+ <option value="News Article">News Article</option>
51
+ <option value="Product Review">Product Review</option>
52
+ <option value="Scientific Text">Scientific Text</option>
53
+ <option value="Literary Text">Literary Text</option>
54
+ </select>
55
+ </div>
56
+ </div>
57
+
58
+ <div class="d-flex justify-content-between align-items-center">
59
+ <div>
60
+ <button id="processBtn" class="btn btn-primary btn-lg">
61
+ <i class="fas fa-cut"></i>
62
+ Analyze Tokens
63
+ </button>
64
+ </div>
65
+ <div>
66
+ <button id="clearBtn" class="btn btn-outline-secondary">
67
+ <i class="fas fa-trash"></i>
68
+ Clear
69
+ </button>
70
+ </div>
71
+ </div>
72
+ </div>
73
+ </div>
74
+ </div>
75
+ </div>
76
+
77
+ <!-- Tokenization Methods Info -->
78
+ <div class="row mb-4">
79
+ <div class="col-12">
80
+ <div class="card">
81
+ <div class="card-header">
82
+ <h3 class="mb-0">
83
+ <i class="fas fa-info-circle"></i>
84
+ Tokenization Methods
85
+ </h3>
86
+ </div>
87
+ <div class="card-body">
88
+ <div class="row">
89
+ <div class="col-md-3">
90
+ <div class="card h-100">
91
+ <div class="card-body text-center">
92
+ <i class="fas fa-font fa-2x text-primary mb-2"></i>
93
+ <h5>Word Tokenization</h5>
94
+ <p class="small">Splits text into individual words and punctuation marks using NLTK.</p>
95
+ </div>
96
+ </div>
97
+ </div>
98
+ <div class="col-md-3">
99
+ <div class="card h-100">
100
+ <div class="card-body text-center">
101
+ <i class="fas fa-paragraph fa-2x text-success mb-2"></i>
102
+ <h5>Sentence Tokenization</h5>
103
+ <p class="small">Divides text into sentences using punctuation and linguistic rules.</p>
104
+ </div>
105
+ </div>
106
+ </div>
107
+ <div class="col-md-3">
108
+ <div class="card h-100">
109
+ <div class="card-body text-center">
110
+ <i class="fas fa-brain fa-2x text-info mb-2"></i>
111
+ <h5>Linguistic Tokenization</h5>
112
+ <p class="small">Advanced tokenization with spaCy including POS tags and dependencies.</p>
113
+ </div>
114
+ </div>
115
+ </div>
116
+ <div class="col-md-3">
117
+ <div class="card h-100">
118
+ <div class="card-body text-center">
119
+ <i class="fas fa-puzzle-piece fa-2x text-warning mb-2"></i>
120
+ <h5>Subword Tokenization</h5>
121
+ <p class="small">Breaks words into smaller units using BERT WordPiece and GPT-2 BPE.</p>
122
+ </div>
123
+ </div>
124
+ </div>
125
+ </div>
126
+ </div>
127
+ </div>
128
+ </div>
129
+ </div>
130
+
131
+ <!-- Results Section -->
132
+ <div class="row">
133
+ <div class="col-12">
134
+ <div class="card">
135
+ <div class="card-header">
136
+ <h3 class="mb-0">
137
+ <i class="fas fa-chart-bar"></i>
138
+ Tokenization Results
139
+ </h3>
140
+ </div>
141
+ <div class="card-body">
142
+ <div id="resultsContainer">
143
+ <div class="text-center text-muted py-5">
144
+ <i class="fas fa-arrow-up fa-2x mb-3"></i>
145
+ <p>Click "Analyze Tokens" to see tokenization results</p>
146
+ </div>
147
+ </div>
148
+ </div>
149
+ </div>
150
+ </div>
151
+ </div>
152
+ </div>
153
+ {% endblock %}
154
+
155
+ {% block extra_scripts %}
156
+ <script>
157
+ // Initialize page
158
+ document.addEventListener('DOMContentLoaded', function() {
159
+ // Only carry over when using Quick Nav; otherwise leave defaults
160
+ const shouldCarry = sessionStorage.getItem('carryTextOnNextPage') === '1';
161
+ if (shouldCarry) {
162
+ const sampleSel = document.getElementById('sampleSelect');
163
+ if (sampleSel) sampleSel.value = 'Custom';
164
+ const storedText = sessionStorage.getItem('analysisText');
165
+ if (storedText) document.getElementById('textInput').value = storedText;
166
+ sessionStorage.removeItem('carryTextOnNextPage');
167
+ }
168
+
169
+ // Sample text dropdown handler
170
+ document.getElementById('sampleSelect').addEventListener('change', function() {
171
+ const sampleType = this.value;
172
+ const textInput = document.getElementById('textInput');
173
+
174
+ if (sampleType === 'Custom') {
175
+ textInput.value = '';
176
+ } else {
177
+ // Get sample text from server
178
+ fetch('/api/sample-text', {
179
+ method: 'POST',
180
+ headers: {
181
+ 'Content-Type': 'application/json',
182
+ },
183
+ body: JSON.stringify({sample_type: sampleType})
184
+ })
185
+ .then(response => response.json())
186
+ .then(data => {
187
+ textInput.value = data.text;
188
+ });
189
+ }
190
+ });
191
+
192
+ // Process button handler
193
+ document.getElementById('processBtn').addEventListener('click', function() {
194
+ const text = document.getElementById('textInput').value.trim();
195
+
196
+ if (!text) {
197
+ alert('Please enter some text to tokenize.');
198
+ return;
199
+ }
200
+
201
+ // Show loading state
202
+ this.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Processing...';
203
+ this.disabled = true;
204
+
205
+ // Process text
206
+ processTokenization();
207
+
208
+ // Reset button after a delay
209
+ setTimeout(() => {
210
+ this.innerHTML = '<i class="fas fa-cut"></i> Analyze Tokens';
211
+ this.disabled = false;
212
+ }, 2000);
213
+ });
214
+
215
+ // Clear button handler
216
+ document.getElementById('clearBtn').addEventListener('click', function() {
217
+ document.getElementById('textInput').value = '';
218
+ document.getElementById('resultsContainer').innerHTML = `
219
+ <div class="text-center text-muted py-5">
220
+ <i class="fas fa-arrow-up fa-2x mb-3"></i>
221
+ <p>Click "Analyze Tokens" to see tokenization results</p>
222
+ </div>
223
+ `;
224
+ });
225
+
226
+ // Keyboard shortcuts
227
+ document.addEventListener('keydown', function(e) {
228
+ // Ctrl+Enter to process
229
+ if (e.ctrlKey && e.key === 'Enter') {
230
+ document.getElementById('processBtn').click();
231
+ }
232
+
233
+ // Ctrl+L to clear
234
+ if (e.ctrlKey && e.key === 'l') {
235
+ e.preventDefault();
236
+ document.getElementById('clearBtn').click();
237
+ }
238
+ });
239
+ });
240
+
241
+ // Process tokenization
242
+ function processTokenization() {
243
+ const text = document.getElementById('textInput').value.trim();
244
+
245
+ if (!text) {
246
+ alert('Please enter some text to tokenize.');
247
+ return;
248
+ }
249
+
250
+ showLoading('resultsContainer');
251
+
252
+ fetch('/api/tokenization', {
253
+ method: 'POST',
254
+ headers: {
255
+ 'Content-Type': 'application/json',
256
+ },
257
+ body: JSON.stringify({text: text})
258
+ })
259
+ .then(response => response.json())
260
+ .then(data => {
261
+ if (data.success) {
262
+ displayResults(data.result);
263
+ } else {
264
+ showError(data.error || 'An error occurred while processing the text');
265
+ }
266
+ })
267
+ .catch(error => {
268
+ showError('Failed to process text: ' + error.message);
269
+ })
270
+ .finally(() => {
271
+ hideLoading('resultsContainer');
272
+ });
273
+ }
274
+
275
+ // Show loading state
276
+ function showLoading(elementId) {
277
+ const element = document.getElementById(elementId);
278
+ if (element) {
279
+ element.innerHTML = `
280
+ <div class="text-center py-4">
281
+ <div class="spinner-border text-primary" role="status">
282
+ <span class="visually-hidden">Loading...</span>
283
+ </div>
284
+ <p class="mt-2">Analyzing tokens...</p>
285
+ </div>
286
+ `;
287
+ }
288
+ }
289
+
290
+ // Hide loading state
291
+ function hideLoading(elementId) {
292
+ const element = document.getElementById(elementId);
293
+ if (element && element.innerHTML.includes('spinner-border')) {
294
+ element.innerHTML = '';
295
+ }
296
+ }
297
+
298
+ // Show error message
299
+ function showError(message, elementId = 'resultsContainer') {
300
+ const element = document.getElementById(elementId);
301
+ if (element) {
302
+ element.innerHTML = `
303
+ <div class="alert alert-danger fade-in">
304
+ <i class="fas fa-exclamation-triangle"></i>
305
+ <strong>Error:</strong> ${message}
306
+ </div>
307
+ `;
308
+ }
309
+ }
310
+
311
+ // Display results
312
+ function displayResults(result) {
313
+ const container = document.getElementById('resultsContainer');
314
+ if (container) {
315
+ container.innerHTML = result;
316
+ container.classList.add('fade-in');
317
+
318
+ // Scroll to results
319
+ container.scrollIntoView({ behavior: 'smooth', block: 'start' });
320
+ }
321
+ }
322
+ </script>
323
+ {% endblock %}
templates/topic_analysis.html ADDED
@@ -0,0 +1,399 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+
3
+ {% block title %}Topic Analysis - NLP Ultimate Tutorial{% endblock %}
4
+
5
+ {% block content %}
6
+ <div class="container">
7
+ <!-- Header Section -->
8
+ <div class="row mb-4">
9
+ <div class="col-12">
10
+ <div class="card">
11
+ <div class="card-header">
12
+ <h1 class="mb-0">
13
+ <i class="fas fa-project-diagram"></i>
14
+ Topic Analysis
15
+ </h1>
16
+ </div>
17
+ <div class="card-body">
18
+ <p class="lead">Identify main themes and subjects in text using advanced topic modeling techniques.</p>
19
+
20
+ <div class="alert alert-info">
21
+ <i class="fas fa-info-circle"></i>
22
+ <strong>About:</strong> Topic analysis identifies the main themes and subjects in a text, helping to categorize content and understand what it's about.
23
+ </div>
24
+ </div>
25
+ </div>
26
+ </div>
27
+ </div>
28
+
29
+ {% include "_analysis_nav.html" %}
30
+
31
+ <!-- Text Input Section -->
32
+ <div class="row mb-4">
33
+ <div class="col-12">
34
+ <div class="card">
35
+ <div class="card-header">
36
+ <h3 class="mb-0">
37
+ <i class="fas fa-keyboard"></i>
38
+ Enter your text:
39
+ </h3>
40
+ </div>
41
+ <div class="card-body">
42
+ <div class="row mb-3">
43
+ <div class="col-md-8">
44
+ <textarea id="textInput" class="form-control" rows="10" placeholder="Enter or paste your text here... (minimum 50 words for best results)">Artificial intelligence (AI) is intelligence demonstrated by machines, as opposed to the natural intelligence displayed by animals including humans. AI research has been defined as the field of study of intelligent agents, which refers to any system that perceives its environment and takes actions that maximize its chance of achieving its goals. The term "artificial intelligence" had previously been used to describe machines that mimic and display "human" cognitive skills that are associated with the human mind, such as "learning" and "problem-solving". This definition has since been rejected by major AI researchers who now describe AI in terms of rationality and acting rationally, which does not limit how intelligence can be articulated. AI applications include advanced web search engines, recommendation systems (used by YouTube, Amazon and Netflix), understanding human speech (such as Siri or Alexa), self-driving cars, and competing at the highest level in strategic game systems (such as chess and Go). As machines become increasingly capable, tasks considered to require "intelligence" are often removed from the definition of AI, a phenomenon known as the AI effect. For instance, optical character recognition is frequently excluded from things considered to be AI.</textarea>
45
+ </div>
46
+ <div class="col-md-4">
47
+ <label for="sampleSelect" class="form-label">Or choose a sample:</label>
48
+ <select id="sampleSelect" class="form-select">
49
+ <option value="Custom">Custom</option>
50
+ <option value="News Article">News Article</option>
51
+ <option value="Product Review">Product Review</option>
52
+ <option value="Scientific Text">Scientific Text</option>
53
+ <option value="Literary Text">Literary Text</option>
54
+ </select>
55
+ </div>
56
+ </div>
57
+
58
+ <div class="d-flex justify-content-between align-items-center">
59
+ <div>
60
+ <button id="processBtn" class="btn btn-primary btn-lg">
61
+ <i class="fas fa-project-diagram"></i>
62
+ Analyze Topics
63
+ </button>
64
+ </div>
65
+ <div>
66
+ <button id="clearBtn" class="btn btn-outline-secondary">
67
+ <i class="fas fa-trash"></i>
68
+ Clear
69
+ </button>
70
+ </div>
71
+ </div>
72
+ </div>
73
+ </div>
74
+ </div>
75
+ </div>
76
+
77
+ <!-- Topic Analysis Methods Info -->
78
+ <div class="row mb-4">
79
+ <div class="col-12">
80
+ <div class="card">
81
+ <div class="card-header">
82
+ <h3 class="mb-0">
83
+ <i class="fas fa-info-circle"></i>
84
+ Topic Analysis Methods
85
+ </h3>
86
+ </div>
87
+ <div class="card-body">
88
+ <div class="row">
89
+ <div class="col-md-3">
90
+ <div class="card h-100">
91
+ <div class="card-body text-center">
92
+ <i class="fas fa-tags fa-2x text-primary mb-2"></i>
93
+ <h5>Topic Classification</h5>
94
+ <p class="small">Identifies predefined topic categories using keyword matching and scoring.</p>
95
+ <ul class="list-unstyled small text-start">
96
+ <li>• Environment, Science, Business</li>
97
+ <li>• Health, Technology, Politics</li>
98
+ <li>• Sports, Entertainment, Travel</li>
99
+ </ul>
100
+ </div>
101
+ </div>
102
+ </div>
103
+ <div class="col-md-3">
104
+ <div class="card h-100">
105
+ <div class="card-body text-center">
106
+ <i class="fas fa-chart-bar fa-2x text-success mb-2"></i>
107
+ <h5>Term Frequency</h5>
108
+ <p class="small">Analyzes word frequencies to identify the most important terms and concepts.</p>
109
+ <ul class="list-unstyled small text-start">
110
+ <li>• Word frequency analysis</li>
111
+ <li>• Key phrase extraction</li>
112
+ <li>• N-gram analysis</li>
113
+ </ul>
114
+ </div>
115
+ </div>
116
+ </div>
117
+ <div class="col-md-3">
118
+ <div class="card h-100">
119
+ <div class="card-body text-center">
120
+ <i class="fas fa-calculator fa-2x text-info mb-2"></i>
121
+ <h5>TF-IDF Analysis</h5>
122
+ <p class="small">Identifies distinctive terms using Term Frequency-Inverse Document Frequency.</p>
123
+ <ul class="list-unstyled small text-start">
124
+ <li>• Sentence-level analysis</li>
125
+ <li>• Distinctive term identification</li>
126
+ <li>• Heatmap visualization</li>
127
+ </ul>
128
+ </div>
129
+ </div>
130
+ </div>
131
+ <div class="col-md-3">
132
+ <div class="card h-100">
133
+ <div class="card-body text-center">
134
+ <i class="fas fa-brain fa-2x text-warning mb-2"></i>
135
+ <h5>LDA Topic Modeling</h5>
136
+ <p class="small">Uses Latent Dirichlet Allocation to discover abstract topics in text.</p>
137
+ <ul class="list-unstyled small text-start">
138
+ <li>• Probabilistic modeling</li>
139
+ <li>• Topic distribution</li>
140
+ <li>• Network visualization</li>
141
+ </ul>
142
+ </div>
143
+ </div>
144
+ </div>
145
+ </div>
146
+ </div>
147
+ </div>
148
+ </div>
149
+ </div>
150
+
151
+ <!-- Analysis Features Info -->
152
+ <div class="row mb-4">
153
+ <div class="col-12">
154
+ <div class="card">
155
+ <div class="card-header">
156
+ <h3 class="mb-0">
157
+ <i class="fas fa-chart-line"></i>
158
+ Analysis Features
159
+ </h3>
160
+ </div>
161
+ <div class="card-body">
162
+ <div class="row">
163
+ <div class="col-md-4">
164
+ <div class="card text-center">
165
+ <div class="card-body">
166
+ <i class="fas fa-cloud fa-2x text-primary mb-2"></i>
167
+ <h5>Word Clouds</h5>
168
+ <p class="small mb-0">Visual representation of term frequencies</p>
169
+ </div>
170
+ </div>
171
+ </div>
172
+ <div class="col-md-4">
173
+ <div class="card text-center">
174
+ <div class="card-body">
175
+ <i class="fas fa-network-wired fa-2x text-success mb-2"></i>
176
+ <h5>Network Graphs</h5>
177
+ <p class="small mb-0">Topic-term relationship visualization</p>
178
+ </div>
179
+ </div>
180
+ </div>
181
+ <div class="col-md-4">
182
+ <div class="card text-center">
183
+ <div class="card-body">
184
+ <i class="fas fa-fire fa-2x text-danger mb-2"></i>
185
+ <h5>Heatmaps</h5>
186
+ <p class="small mb-0">TF-IDF term importance visualization</p>
187
+ </div>
188
+ </div>
189
+ </div>
190
+ </div>
191
+ </div>
192
+ </div>
193
+ </div>
194
+ </div>
195
+
196
+ <!-- Results Section -->
197
+ <div class="row">
198
+ <div class="col-12">
199
+ <div class="card">
200
+ <div class="card-header">
201
+ <h3 class="mb-0">
202
+ <i class="fas fa-chart-bar"></i>
203
+ Topic Analysis Results
204
+ </h3>
205
+ </div>
206
+ <div class="card-body">
207
+ <div id="resultsContainer">
208
+ <div class="text-center text-muted py-5">
209
+ <i class="fas fa-arrow-up fa-2x mb-3"></i>
210
+ <p>Click "Analyze Topics" to see topic analysis results</p>
211
+ </div>
212
+ </div>
213
+ </div>
214
+ </div>
215
+ </div>
216
+ </div>
217
+ </div>
218
+ {% endblock %}
219
+
220
+ {% block extra_scripts %}
221
+ <script>
222
+ // Initialize page
223
+ document.addEventListener('DOMContentLoaded', function() {
224
+ // Only carry over when requested
225
+ const shouldCarry = sessionStorage.getItem('carryTextOnNextPage') === '1';
226
+ if (shouldCarry) {
227
+ const storedText = sessionStorage.getItem('analysisText');
228
+ if (storedText) document.getElementById('textInput').value = storedText;
229
+ sessionStorage.removeItem('carryTextOnNextPage');
230
+ }
231
+
232
+ // Sample text dropdown handler with Custom restore
233
+ document.getElementById('sampleSelect').addEventListener('change', function() {
234
+ const sampleType = this.value;
235
+ const textInput = document.getElementById('textInput');
236
+ const routeKey = 'customTextBackup:' + (window.location.pathname || '/topic-analysis');
237
+
238
+ if (sampleType === 'Custom') {
239
+ const backup = sessionStorage.getItem(routeKey);
240
+ if (backup !== null) textInput.value = backup;
241
+ return;
242
+ }
243
+
244
+ sessionStorage.setItem(routeKey, textInput.value);
245
+
246
+ fetch('/api/sample-text', {
247
+ method: 'POST',
248
+ headers: {
249
+ 'Content-Type': 'application/json',
250
+ },
251
+ body: JSON.stringify({sample_type: sampleType})
252
+ })
253
+ .then(response => response.json())
254
+ .then(data => {
255
+ textInput.value = data.text || '';
256
+ });
257
+ });
258
+
259
+ // Keep backup while typing
260
+ (function(){
261
+ const textInput = document.getElementById('textInput');
262
+ const routeKey = 'customTextBackup:' + (window.location.pathname || '/topic-analysis');
263
+ textInput.addEventListener('input', function(){
264
+ sessionStorage.setItem(routeKey, textInput.value);
265
+ });
266
+ })();
267
+
268
+ // Process button handler
269
+ document.getElementById('processBtn').addEventListener('click', function() {
270
+ const text = document.getElementById('textInput').value.trim();
271
+
272
+ if (!text) {
273
+ alert('Please enter some text to analyze.');
274
+ return;
275
+ }
276
+
277
+ // Show loading state
278
+ this.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Processing...';
279
+ this.disabled = true;
280
+
281
+ // Process text
282
+ processTopicAnalysis();
283
+
284
+ // Reset button after a delay
285
+ setTimeout(() => {
286
+ this.innerHTML = '<i class="fas fa-project-diagram"></i> Analyze Topics';
287
+ this.disabled = false;
288
+ }, 2000);
289
+ });
290
+
291
+ // Clear button handler
292
+ document.getElementById('clearBtn').addEventListener('click', function() {
293
+ document.getElementById('textInput').value = '';
294
+ document.getElementById('resultsContainer').innerHTML = `
295
+ <div class="text-center text-muted py-5">
296
+ <i class="fas fa-arrow-up fa-2x mb-3"></i>
297
+ <p>Click "Analyze Topics" to see topic analysis results</p>
298
+ </div>
299
+ `;
300
+ });
301
+
302
+ // Keyboard shortcuts
303
+ document.addEventListener('keydown', function(e) {
304
+ // Ctrl+Enter to process
305
+ if (e.ctrlKey && e.key === 'Enter') {
306
+ document.getElementById('processBtn').click();
307
+ }
308
+
309
+ // Ctrl+L to clear
310
+ if (e.ctrlKey && e.key === 'l') {
311
+ e.preventDefault();
312
+ document.getElementById('clearBtn').click();
313
+ }
314
+ });
315
+ });
316
+
317
+ // Process topic analysis
318
+ function processTopicAnalysis() {
319
+ const text = document.getElementById('textInput').value.trim();
320
+
321
+ if (!text) {
322
+ alert('Please enter some text to analyze.');
323
+ return;
324
+ }
325
+
326
+ showLoading('resultsContainer');
327
+
328
+ fetch('/api/topic-analysis', {
329
+ method: 'POST',
330
+ headers: {
331
+ 'Content-Type': 'application/json',
332
+ },
333
+ body: JSON.stringify({text: text})
334
+ })
335
+ .then(response => response.json())
336
+ .then(data => {
337
+ if (data.success) {
338
+ displayResults(data.result);
339
+ } else {
340
+ showError(data.error || 'An error occurred while processing the text');
341
+ }
342
+ })
343
+ .catch(error => {
344
+ showError('Failed to process text: ' + error.message);
345
+ })
346
+ .finally(() => {
347
+ hideLoading('resultsContainer');
348
+ });
349
+ }
350
+
351
+ // Show loading state
352
+ function showLoading(elementId) {
353
+ const element = document.getElementById(elementId);
354
+ if (element) {
355
+ element.innerHTML = `
356
+ <div class="text-center py-4">
357
+ <div class="spinner-border text-primary" role="status">
358
+ <span class="visually-hidden">Loading...</span>
359
+ </div>
360
+ <p class="mt-2">Analyzing topics and themes...</p>
361
+ </div>
362
+ `;
363
+ }
364
+ }
365
+
366
+ // Hide loading state
367
+ function hideLoading(elementId) {
368
+ const element = document.getElementById(elementId);
369
+ if (element && element.innerHTML.includes('spinner-border')) {
370
+ element.innerHTML = '';
371
+ }
372
+ }
373
+
374
+ // Show error message
375
+ function showError(message, elementId = 'resultsContainer') {
376
+ const element = document.getElementById(elementId);
377
+ if (element) {
378
+ element.innerHTML = `
379
+ <div class="alert alert-danger fade-in">
380
+ <i class="fas fa-exclamation-triangle"></i>
381
+ <strong>Error:</strong> ${message}
382
+ </div>
383
+ `;
384
+ }
385
+ }
386
+
387
+ // Display results
388
+ function displayResults(result) {
389
+ const container = document.getElementById('resultsContainer');
390
+ if (container) {
391
+ container.innerHTML = result;
392
+ container.classList.add('fade-in');
393
+
394
+ // Scroll to results
395
+ container.scrollIntoView({ behavior: 'smooth', block: 'start' });
396
+ }
397
+ }
398
+ </script>
399
+ {% endblock %}
templates/translation.html ADDED
@@ -0,0 +1,468 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+
3
+ {% block title %}Machine Translation - NLP Ultimate Tutorial{% endblock %}
4
+
5
+ {% block content %}
6
+ <div class="container">
7
+ <!-- Header Section -->
8
+ <div class="row mb-4">
9
+ <div class="col-12">
10
+ <div class="card">
11
+ <div class="card-header">
12
+ <h1 class="mb-0">
13
+ <i class="fas fa-language"></i>
14
+ Machine Translation
15
+ </h1>
16
+ </div>
17
+ <div class="card-body">
18
+ <p class="lead">Translate text between multiple languages using advanced neural machine translation models.</p>
19
+
20
+ <div class="alert alert-info">
21
+ <i class="fas fa-info-circle"></i>
22
+ <strong>About:</strong> Machine translation converts text from one language to another while preserving meaning and context as accurately as possible.
23
+ </div>
24
+ </div>
25
+ </div>
26
+ </div>
27
+ </div>
28
+
29
+ {% include "_analysis_nav.html" %}
30
+
31
+ <!-- Input Section -->
32
+ <div class="row mb-4">
33
+ <div class="col-12">
34
+ <div class="card">
35
+ <div class="card-header">
36
+ <h3 class="mb-0">
37
+ <i class="fas fa-keyboard"></i>
38
+ Enter text to translate:
39
+ </h3>
40
+ </div>
41
+ <div class="card-body">
42
+ <div class="row mb-3">
43
+ <div class="col-md-8">
44
+ <textarea id="textInput" class="form-control" rows="6" placeholder="Enter text here to translate...">Hello, how are you today? I hope you're doing well.</textarea>
45
+ </div>
46
+ <div class="col-md-4">
47
+ <label for="sampleSelect" class="form-label">Or choose a sample:</label>
48
+ <select id="sampleSelect" class="form-select">
49
+ <option value="Custom">Custom</option>
50
+ <option value="English">English</option>
51
+ <option value="Spanish">Spanish</option>
52
+ <option value="French">French</option>
53
+ <option value="German">German</option>
54
+ </select>
55
+ </div>
56
+ </div>
57
+
58
+ <div class="d-flex justify-content-between align-items-center">
59
+ <div>
60
+ <button id="processBtn" class="btn btn-primary btn-lg">
61
+ <i class="fas fa-language"></i>
62
+ Translate
63
+ </button>
64
+ </div>
65
+ <div>
66
+ <button id="clearBtn" class="btn btn-outline-secondary">
67
+ <i class="fas fa-trash"></i>
68
+ Clear
69
+ </button>
70
+ </div>
71
+ </div>
72
+ </div>
73
+ </div>
74
+ </div>
75
+ </div>
76
+
77
+ <!-- Language Selection Section -->
78
+ <div class="row mb-4">
79
+ <div class="col-12">
80
+ <div class="card">
81
+ <div class="card-header">
82
+ <h3 class="mb-0">
83
+ <i class="fas fa-cog"></i>
84
+ Language Selection
85
+ </h3>
86
+ </div>
87
+ <div class="card-body">
88
+ <div class="row">
89
+ <div class="col-md-6">
90
+ <label for="sourceLang" class="form-label">Source Language</label>
91
+ <select id="sourceLang" class="form-select">
92
+ <option value="auto" selected>Auto-detect</option>
93
+ <option value="en">English</option>
94
+ <option value="es">Spanish</option>
95
+ <option value="fr">French</option>
96
+ <option value="de">German</option>
97
+ <option value="ru">Russian</option>
98
+ <option value="zh">Chinese</option>
99
+ <option value="ar">Arabic</option>
100
+ <option value="hi">Hindi</option>
101
+ <option value="ja">Japanese</option>
102
+ <option value="pt">Portuguese</option>
103
+ <option value="it">Italian</option>
104
+ </select>
105
+ </div>
106
+ <div class="col-md-6">
107
+ <label for="targetLang" class="form-label">Target Language</label>
108
+ <select id="targetLang" class="form-select">
109
+ <option value="en" selected>English</option>
110
+ <option value="es">Spanish</option>
111
+ <option value="fr">French</option>
112
+ <option value="de">German</option>
113
+ <option value="ru">Russian</option>
114
+ <option value="zh">Chinese</option>
115
+ <option value="ar">Arabic</option>
116
+ <option value="hi">Hindi</option>
117
+ <option value="ja">Japanese</option>
118
+ <option value="pt">Portuguese</option>
119
+ <option value="it">Italian</option>
120
+ </select>
121
+ </div>
122
+ </div>
123
+ </div>
124
+ </div>
125
+ </div>
126
+ </div>
127
+
128
+ <!-- Model Info Section -->
129
+ <div class="row mb-4">
130
+ <div class="col-12">
131
+ <div class="card">
132
+ <div class="card-header">
133
+ <h3 class="mb-0">
134
+ <i class="fas fa-info-circle"></i>
135
+ Model Information
136
+ </h3>
137
+ </div>
138
+ <div class="card-body">
139
+ <div class="row">
140
+ <div class="col-md-4">
141
+ <div class="card h-100">
142
+ <div class="card-body text-center">
143
+ <i class="fas fa-brain fa-2x text-primary mb-2"></i>
144
+ <h5>Helsinki-NLP/opus-mt</h5>
145
+ <p class="small">Pre-trained neural machine translation models</p>
146
+ <ul class="list-unstyled small text-start">
147
+ <li>• Transformer-based architecture</li>
148
+ <li>• Multiple language pairs</li>
149
+ <li>• High accuracy translations</li>
150
+ </ul>
151
+ </div>
152
+ </div>
153
+ </div>
154
+ <div class="col-md-4">
155
+ <div class="card h-100">
156
+ <div class="card-body text-center">
157
+ <i class="fas fa-globe fa-2x text-success mb-2"></i>
158
+ <h5>Language Support</h5>
159
+ <p class="small">Comprehensive language pair coverage</p>
160
+ <ul class="list-unstyled small text-start">
161
+ <li>• 12+ languages supported</li>
162
+ <li>• Auto-detection capability</li>
163
+ <li>• Bidirectional translation</li>
164
+ </ul>
165
+ </div>
166
+ </div>
167
+ </div>
168
+ <div class="col-md-4">
169
+ <div class="card h-100">
170
+ <div class="card-body text-center">
171
+ <i class="fas fa-chart-line fa-2x text-info mb-2"></i>
172
+ <h5>Translation Analysis</h5>
173
+ <p class="small">Comprehensive analysis of translation quality</p>
174
+ <ul class="list-unstyled small text-start">
175
+ <li>• Length comparison</li>
176
+ <li>• Language characteristics</li>
177
+ <li>• Translation statistics</li>
178
+ </ul>
179
+ </div>
180
+ </div>
181
+ </div>
182
+ </div>
183
+ </div>
184
+ </div>
185
+ </div>
186
+ </div>
187
+
188
+ <!-- Example Texts Section -->
189
+ <div class="row mb-4">
190
+ <div class="col-12">
191
+ <div class="card">
192
+ <div class="card-header">
193
+ <h3 class="mb-0">
194
+ <i class="fas fa-list"></i>
195
+ Example Texts
196
+ </h3>
197
+ </div>
198
+ <div class="card-body">
199
+ <div class="row">
200
+ <div class="col-md-6">
201
+ <button class="btn btn-outline-primary w-100 mb-2" onclick="setExample('Hello, how are you today? I hope you\'re doing well.', 'auto', 'es')">
202
+ Hello, how are you today? I hope you're doing well.
203
+ </button>
204
+ </div>
205
+ <div class="col-md-6">
206
+ <button class="btn btn-outline-primary w-100 mb-2" onclick="setExample('La vie est belle et pleine de surprises.', 'auto', 'en')">
207
+ La vie est belle et pleine de surprises.
208
+ </button>
209
+ </div>
210
+ <div class="col-md-6">
211
+ <button class="btn btn-outline-primary w-100 mb-2" onclick="setExample('Der schnelle braune Fuchs springt über den faulen Hund.', 'auto', 'fr')">
212
+ Der schnelle braune Fuchs springt über den faulen Hund.
213
+ </button>
214
+ </div>
215
+ <div class="col-md-6">
216
+ <button class="btn btn-outline-primary w-100 mb-2" onclick="setExample('Me gustaría reservar una mesa para dos personas esta noche.', 'auto', 'en')">
217
+ Me gustaría reservar una mesa para dos personas esta noche.
218
+ </button>
219
+ </div>
220
+ </div>
221
+ </div>
222
+ </div>
223
+ </div>
224
+ </div>
225
+
226
+ <!-- Results Section -->
227
+ <div class="row">
228
+ <div class="col-12">
229
+ <div class="card">
230
+ <div class="card-header">
231
+ <h3 class="mb-0">
232
+ <i class="fas fa-chart-bar"></i>
233
+ Translation Results
234
+ </h3>
235
+ </div>
236
+ <div class="card-body">
237
+ <div id="resultsContainer">
238
+ <div class="text-center text-muted py-5">
239
+ <i class="fas fa-arrow-up fa-2x mb-3"></i>
240
+ <p>Click "Translate" to see translation results</p>
241
+ </div>
242
+ </div>
243
+ </div>
244
+ </div>
245
+ </div>
246
+ </div>
247
+ </div>
248
+ {% endblock %}
249
+
250
+ {% block extra_scripts %}
251
+ <script>
252
+ // Initialize page
253
+ document.addEventListener('DOMContentLoaded', function() {
254
+ // Only carry inputs when navigating via Quick Nav
255
+ const shouldCarry = sessionStorage.getItem('carryTextOnNextPage') === '1';
256
+ const textInput = document.getElementById('textInput');
257
+ const sampleSelect = document.getElementById('sampleSelect');
258
+ const sourceLang = document.getElementById('sourceLang');
259
+ const targetLang = document.getElementById('targetLang');
260
+ const routeKey = 'customTextBackup:' + (window.location.pathname || '/translation');
261
+
262
+ if (shouldCarry) {
263
+ const storedText = sessionStorage.getItem('analysisText');
264
+ if (storedText) textInput.value = storedText;
265
+ sessionStorage.removeItem('carryTextOnNextPage');
266
+ } else {
267
+ // Fresh route: reset to defaults
268
+ textInput.value = textInput.defaultValue || '';
269
+ if (sampleSelect) sampleSelect.value = 'Custom';
270
+ if (sourceLang) sourceLang.value = sourceLang.querySelector('option[selected]')?.value || 'auto';
271
+ if (targetLang) targetLang.value = targetLang.querySelector('option[selected]')?.value || 'en';
272
+ }
273
+
274
+ // Sample text dropdown handler with Custom restore
275
+ sampleSelect.addEventListener('change', function() {
276
+ const sampleType = this.value;
277
+ if (sampleType === 'Custom') {
278
+ const backupJson = sessionStorage.getItem(routeKey);
279
+ if (backupJson) {
280
+ try {
281
+ const backup = JSON.parse(backupJson);
282
+ textInput.value = backup.text || '';
283
+ if (backup.source) sourceLang.value = backup.source;
284
+ if (backup.target) targetLang.value = backup.target;
285
+ } catch {}
286
+ }
287
+ return;
288
+ }
289
+
290
+ // Save current custom input before switching away
291
+ sessionStorage.setItem(routeKey, JSON.stringify({
292
+ text: textInput.value,
293
+ source: sourceLang.value,
294
+ target: targetLang.value
295
+ }));
296
+
297
+ // Set sample prompts based on type
298
+ const samples = {
299
+ 'English': { text: 'Hello, how are you today? I hope you\'re doing well.', source: 'auto', target: 'es' },
300
+ 'Spanish': { text: 'La vida es bella y llena de sorpresas.', source: 'auto', target: 'en' },
301
+ 'French': { text: 'La vie est belle et pleine de surprises.', source: 'auto', target: 'en' },
302
+ 'German': { text: 'Der schnelle braune Fuchs springt über den faulen Hund.', source: 'auto', target: 'fr' }
303
+ };
304
+ if (samples[sampleType]) {
305
+ textInput.value = samples[sampleType].text;
306
+ sourceLang.value = samples[sampleType].source;
307
+ targetLang.value = samples[sampleType].target;
308
+ sessionStorage.setItem('analysisText', textInput.value);
309
+ }
310
+ });
311
+
312
+ // While typing or changing languages, keep a backup and analysisText
313
+ textInput.addEventListener('input', function(){
314
+ sessionStorage.setItem('analysisText', textInput.value);
315
+ sessionStorage.setItem(routeKey, JSON.stringify({ text: textInput.value, source: sourceLang.value, target: targetLang.value }));
316
+ });
317
+ sourceLang.addEventListener('change', function(){
318
+ sessionStorage.setItem(routeKey, JSON.stringify({ text: textInput.value, source: sourceLang.value, target: targetLang.value }));
319
+ });
320
+ targetLang.addEventListener('change', function(){
321
+ sessionStorage.setItem(routeKey, JSON.stringify({ text: textInput.value, source: sourceLang.value, target: targetLang.value }));
322
+ });
323
+
324
+ // Process button handler
325
+ document.getElementById('processBtn').addEventListener('click', function() {
326
+ const text = document.getElementById('textInput').value.trim();
327
+
328
+ if (!text) {
329
+ alert('Please enter text to translate.');
330
+ return;
331
+ }
332
+
333
+ // Show loading state
334
+ this.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Translating...';
335
+ this.disabled = true;
336
+
337
+ // Process translation
338
+ processTranslation();
339
+
340
+ // Reset button after a delay
341
+ setTimeout(() => {
342
+ this.innerHTML = '<i class="fas fa-language"></i> Translate';
343
+ this.disabled = false;
344
+ }, 2000);
345
+ });
346
+
347
+ // Clear button handler
348
+ document.getElementById('clearBtn').addEventListener('click', function() {
349
+ document.getElementById('textInput').value = '';
350
+ document.getElementById('resultsContainer').innerHTML = `
351
+ <div class="text-center text-muted py-5">
352
+ <i class="fas fa-arrow-up fa-2x mb-3"></i>
353
+ <p>Click "Translate" to see translation results</p>
354
+ </div>
355
+ `;
356
+ });
357
+
358
+ // Keyboard shortcuts
359
+ document.addEventListener('keydown', function(e) {
360
+ // Ctrl+Enter to process
361
+ if (e.ctrlKey && e.key === 'Enter') {
362
+ document.getElementById('processBtn').click();
363
+ }
364
+
365
+ // Ctrl+L to clear
366
+ if (e.ctrlKey && e.key === 'l') {
367
+ e.preventDefault();
368
+ document.getElementById('clearBtn').click();
369
+ }
370
+ });
371
+ });
372
+
373
+ // Set example text and language settings
374
+ function setExample(text, sourceLang, targetLang) {
375
+ document.getElementById('textInput').value = text;
376
+ document.getElementById('sourceLang').value = sourceLang;
377
+ document.getElementById('targetLang').value = targetLang;
378
+ }
379
+
380
+ // Process translation
381
+ function processTranslation() {
382
+ const text = document.getElementById('textInput').value.trim();
383
+ const sourceLang = document.getElementById('sourceLang').value;
384
+ const targetLang = document.getElementById('targetLang').value;
385
+
386
+ if (!text) {
387
+ alert('Please enter text to translate.');
388
+ return;
389
+ }
390
+
391
+ showLoading('resultsContainer');
392
+
393
+ fetch('/api/translation', {
394
+ method: 'POST',
395
+ headers: {
396
+ 'Content-Type': 'application/json',
397
+ },
398
+ body: JSON.stringify({
399
+ text: text,
400
+ source_lang: sourceLang,
401
+ target_lang: targetLang
402
+ })
403
+ })
404
+ .then(response => response.json())
405
+ .then(data => {
406
+ if (data.success) {
407
+ displayResults(data.result);
408
+ } else {
409
+ showError(data.error || 'An error occurred while translating text');
410
+ }
411
+ })
412
+ .catch(error => {
413
+ showError('Failed to translate text: ' + error.message);
414
+ })
415
+ .finally(() => {
416
+ hideLoading('resultsContainer');
417
+ });
418
+ }
419
+
420
+ // Show loading state
421
+ function showLoading(elementId) {
422
+ const element = document.getElementById(elementId);
423
+ if (element) {
424
+ element.innerHTML = `
425
+ <div class="text-center py-4">
426
+ <div class="spinner-border text-primary" role="status">
427
+ <span class="visually-hidden">Loading...</span>
428
+ </div>
429
+ <p class="mt-2">Translating text...</p>
430
+ </div>
431
+ `;
432
+ }
433
+ }
434
+
435
+ // Hide loading state
436
+ function hideLoading(elementId) {
437
+ const element = document.getElementById(elementId);
438
+ if (element && element.innerHTML.includes('spinner-border')) {
439
+ element.innerHTML = '';
440
+ }
441
+ }
442
+
443
+ // Show error message
444
+ function showError(message, elementId = 'resultsContainer') {
445
+ const element = document.getElementById(elementId);
446
+ if (element) {
447
+ element.innerHTML = `
448
+ <div class="alert alert-danger fade-in">
449
+ <i class="fas fa-exclamation-triangle"></i>
450
+ <strong>Error:</strong> ${message}
451
+ </div>
452
+ `;
453
+ }
454
+ }
455
+
456
+ // Display results
457
+ function displayResults(result) {
458
+ const container = document.getElementById('resultsContainer');
459
+ if (container) {
460
+ container.innerHTML = result;
461
+ container.classList.add('fade-in');
462
+
463
+ // Scroll to results
464
+ container.scrollIntoView({ behavior: 'smooth', block: 'start' });
465
+ }
466
+ }
467
+ </script>
468
+ {% endblock %}
templates/vector_embeddings.html ADDED
@@ -0,0 +1,499 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+
3
+ {% block title %}Vector Embeddings & Semantic Search - NLP Ultimate Tutorial{% endblock %}
4
+
5
+ {% block content %}
6
+ <div class="container">
7
+ <!-- Header Section -->
8
+ <div class="row mb-4">
9
+ <div class="col-12">
10
+ <div class="card">
11
+ <div class="card-header">
12
+ <h1 class="mb-0">
13
+ <i class="fas fa-project-diagram"></i>
14
+ Vector Embeddings & Semantic Search
15
+ </h1>
16
+ </div>
17
+ <div class="card-body">
18
+ <p class="lead">Convert text into numerical representations and perform semantic search to find meaningfully similar content.</p>
19
+
20
+ <div class="alert alert-info">
21
+ <i class="fas fa-info-circle"></i>
22
+ <strong>About:</strong> Vector embeddings convert text into numerical representations where similar texts are placed closer together in a high-dimensional space.
23
+ </div>
24
+ </div>
25
+ </div>
26
+ </div>
27
+ </div>
28
+
29
+ {% include "_analysis_nav.html" %}
30
+
31
+ <!-- Input Section -->
32
+ <div class="row mb-4">
33
+ <div class="col-12">
34
+ <div class="card">
35
+ <div class="card-header">
36
+ <h3 class="mb-0">
37
+ <i class="fas fa-keyboard"></i>
38
+ Enter text to analyze:
39
+ </h3>
40
+ </div>
41
+ <div class="card-body">
42
+ <div class="row mb-3">
43
+ <div class="col-md-8">
44
+ <textarea id="textInput" class="form-control" rows="6" placeholder="Enter text to analyze with vector embeddings...">The International Space Station (ISS) is a modular space station in low Earth orbit. It is a multinational collaborative project involving five space agencies: NASA (United States), Roscosmos (Russia), JAXA (Japan), ESA (Europe), and CSA (Canada). The ownership and use of the space station is established by intergovernmental treaties and agreements. The ISS serves as a microgravity and space environment research laboratory in which scientific research is conducted in astrobiology, astronomy, meteorology, physics, and other fields.</textarea>
45
+ </div>
46
+ <div class="col-md-4">
47
+ <label for="sampleSelect" class="form-label">Or choose a sample:</label>
48
+ <select id="sampleSelect" class="form-select">
49
+ <option value="Custom">Custom</option>
50
+ <option value="Space Station">Space Station</option>
51
+ <option value="Python">Python</option>
52
+ <option value="Climate">Climate</option>
53
+ </select>
54
+ </div>
55
+ </div>
56
+
57
+ <div class="d-flex justify-content-between align-items-center">
58
+ <div>
59
+ <button id="processBtn" class="btn btn-primary btn-lg">
60
+ <i class="fas fa-project-diagram"></i>
61
+ Generate Embeddings
62
+ </button>
63
+ </div>
64
+ <div>
65
+ <button id="clearBtn" class="btn btn-outline-secondary">
66
+ <i class="fas fa-trash"></i>
67
+ Clear
68
+ </button>
69
+ </div>
70
+ </div>
71
+ </div>
72
+ </div>
73
+ </div>
74
+ </div>
75
+
76
+
77
+
78
+
79
+ <!-- Results Section -->
80
+ <div class="row">
81
+ <div class="col-12">
82
+ <div class="card">
83
+ <div class="card-header">
84
+ <h3 class="mb-0">
85
+ <i class="fas fa-chart-bar"></i>
86
+ Embedding Analysis Results
87
+ </h3>
88
+ </div>
89
+ <div class="card-body">
90
+ <div id="resultsContainer">
91
+ <div class="text-center text-muted py-5">
92
+ <i class="fas fa-arrow-up fa-2x mb-3"></i>
93
+ <p>Click "Generate Embeddings" to see vector analysis results</p>
94
+ </div>
95
+ </div>
96
+ </div>
97
+ </div>
98
+ </div>
99
+ </div>
100
+ </div>
101
+ {% endblock %}
102
+
103
+ {% block extra_scripts %}
104
+ <script>
105
+ // Initialize page
106
+ document.addEventListener('DOMContentLoaded', function() {
107
+ // Only carry over when using Quick Nav; otherwise leave defaults
108
+ const shouldCarry = sessionStorage.getItem('carryTextOnNextPage') === '1';
109
+ if (shouldCarry) {
110
+ const storedText = sessionStorage.getItem('analysisText');
111
+ if (storedText) document.getElementById('textInput').value = storedText;
112
+ sessionStorage.removeItem('carryTextOnNextPage');
113
+ }
114
+
115
+ // Sample text dropdown handler
116
+ document.getElementById('sampleSelect').addEventListener('change', function() {
117
+ const sampleType = this.value;
118
+ const textInput = document.getElementById('textInput');
119
+
120
+ if (sampleType === 'Custom') {
121
+ textInput.value = '';
122
+ } else {
123
+ // Set sample prompts based on type
124
+ const samples = {
125
+ 'Space Station': 'The International Space Station (ISS) is a modular space station in low Earth orbit. It is a multinational collaborative project involving five space agencies: NASA (United States), Roscosmos (Russia), JAXA (Japan), ESA (Europe), and CSA (Canada). The ownership and use of the space station is established by intergovernmental treaties and agreements. The ISS serves as a microgravity and space environment research laboratory in which scientific research is conducted in astrobiology, astronomy, meteorology, physics, and other fields.',
126
+ 'Python': 'Python is a high-level, general-purpose programming language. Its design philosophy emphasizes code readability with the use of significant indentation. Python is dynamically typed and garbage-collected. It supports multiple programming paradigms, including structured, object-oriented and functional programming. It is often described as a \'batteries included\' language due to its comprehensive standard library.',
127
+ 'Climate': 'Climate change refers to long-term shifts in global temperatures and weather patterns. While climate variations are natural, human activities have been the main driver of climate change since the 1800s, primarily due to burning fossil fuels, which generates heat-trapping gases. The effects of climate change include rising sea levels, more frequent extreme weather events, and changes in precipitation patterns.'
128
+ };
129
+
130
+ if (samples[sampleType]) {
131
+ textInput.value = samples[sampleType];
132
+ }
133
+ }
134
+ });
135
+
136
+ // Process button handler
137
+ document.getElementById('processBtn').addEventListener('click', function() {
138
+ const text = document.getElementById('textInput').value.trim();
139
+
140
+ if (!text) {
141
+ alert('Please enter text to analyze.');
142
+ return;
143
+ }
144
+
145
+ // Show loading state
146
+ this.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Generating...';
147
+ this.disabled = true;
148
+
149
+ // Process embeddings
150
+ processEmbeddings();
151
+
152
+ // Reset button after a delay
153
+ setTimeout(() => {
154
+ this.innerHTML = '<i class="fas fa-project-diagram"></i> Generate Embeddings';
155
+ this.disabled = false;
156
+ }, 2000);
157
+ });
158
+
159
+ // Search button handler
160
+ document.getElementById('searchBtn').addEventListener('click', function() {
161
+ const text = document.getElementById('textInput').value.trim();
162
+ const query = document.getElementById('searchInput').value.trim();
163
+
164
+ if (!text) {
165
+ alert('Please enter text to analyze first.');
166
+ return;
167
+ }
168
+
169
+ if (!query) {
170
+ alert('Please enter a search query.');
171
+ return;
172
+ }
173
+
174
+ // Show loading state
175
+ this.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Searching...';
176
+ this.disabled = true;
177
+
178
+ // Process search
179
+ processSearch();
180
+
181
+ // Reset button after a delay
182
+ setTimeout(() => {
183
+ this.innerHTML = '<i class="fas fa-search"></i> Search';
184
+ this.disabled = false;
185
+ }, 2000);
186
+ });
187
+
188
+ // Clear button handler
189
+ document.getElementById('clearBtn').addEventListener('click', function() {
190
+ document.getElementById('textInput').value = '';
191
+ document.getElementById('searchInput').value = '';
192
+ document.getElementById('resultsContainer').innerHTML = `
193
+ <div class="text-center text-muted py-5">
194
+ <i class="fas fa-arrow-up fa-2x mb-3"></i>
195
+ <p>Click "Generate Embeddings" to see vector analysis results</p>
196
+ </div>
197
+ `;
198
+ });
199
+
200
+ // Keyboard shortcuts
201
+ document.addEventListener('keydown', function(e) {
202
+ // Ctrl+Enter to process
203
+ if (e.ctrlKey && e.key === 'Enter') {
204
+ document.getElementById('processBtn').click();
205
+ }
206
+
207
+ // Ctrl+L to clear
208
+ if (e.ctrlKey && e.key === 'l') {
209
+ e.preventDefault();
210
+ document.getElementById('clearBtn').click();
211
+ }
212
+ });
213
+ });
214
+
215
+ // Set example text
216
+ function setExample(text) {
217
+ document.getElementById('textInput').value = text;
218
+ }
219
+
220
+ // Process embeddings
221
+ function processEmbeddings() {
222
+ const text = document.getElementById('textInput').value.trim();
223
+
224
+ if (!text) {
225
+ alert('Please enter text to analyze.');
226
+ return;
227
+ }
228
+
229
+ showLoading('resultsContainer');
230
+
231
+ fetch('/api/vector-embeddings', {
232
+ method: 'POST',
233
+ headers: {
234
+ 'Content-Type': 'application/json',
235
+ },
236
+ body: JSON.stringify({
237
+ text: text
238
+ })
239
+ })
240
+ .then(response => response.json())
241
+ .then(data => {
242
+ console.log('API Response:', data); // Debug log
243
+ if (data.success) {
244
+ console.log('Result length:', data.result ? data.result.length : 'null'); // Debug log
245
+ displayResults(data.result);
246
+ } else {
247
+ console.error('API Error:', data.error); // Debug log
248
+ showError(data.error || 'An error occurred while generating embeddings');
249
+ }
250
+ })
251
+ .catch(error => {
252
+ showError('Failed to generate embeddings: ' + error.message);
253
+ })
254
+ .finally(() => {
255
+ hideLoading('resultsContainer');
256
+ });
257
+ }
258
+
259
+ // Process search
260
+ function processSearch() {
261
+ const text = document.getElementById('textInput').value.trim();
262
+ const query = document.getElementById('searchInput').value.trim();
263
+
264
+ if (!text) {
265
+ alert('Please enter text to analyze first.');
266
+ return;
267
+ }
268
+
269
+ if (!query) {
270
+ alert('Please enter a search query.');
271
+ return;
272
+ }
273
+
274
+ showLoading('resultsContainer');
275
+
276
+ fetch('/api/vector-embeddings', {
277
+ method: 'POST',
278
+ headers: {
279
+ 'Content-Type': 'application/json',
280
+ },
281
+ body: JSON.stringify({
282
+ text: text,
283
+ query: query
284
+ })
285
+ })
286
+ .then(response => response.json())
287
+ .then(data => {
288
+ if (data.success) {
289
+ displaySearchResults(data.results);
290
+ } else {
291
+ showError(data.error || 'An error occurred while searching');
292
+ }
293
+ })
294
+ .catch(error => {
295
+ showError('Failed to perform search: ' + error.message);
296
+ })
297
+ .finally(() => {
298
+ hideLoading('resultsContainer');
299
+ });
300
+ }
301
+
302
+ // Show loading state
303
+ function showLoading(elementId) {
304
+ const element = document.getElementById(elementId);
305
+ if (element) {
306
+ element.innerHTML = `
307
+ <div class="text-center py-4">
308
+ <div class="spinner-border text-primary" role="status">
309
+ <span class="visually-hidden">Loading...</span>
310
+ </div>
311
+ <p class="mt-2">Processing...</p>
312
+ </div>
313
+ `;
314
+ }
315
+ }
316
+
317
+ // Hide loading state
318
+ function hideLoading(elementId) {
319
+ const element = document.getElementById(elementId);
320
+ if (element && element.innerHTML.includes('spinner-border')) {
321
+ element.innerHTML = '';
322
+ }
323
+ }
324
+
325
+ // Show error message
326
+ function showError(message, elementId = 'resultsContainer') {
327
+ const element = document.getElementById(elementId);
328
+ if (element) {
329
+ element.innerHTML = `
330
+ <div class="alert alert-danger fade-in">
331
+ <i class="fas fa-exclamation-triangle"></i>
332
+ <strong>Error:</strong> ${message}
333
+ </div>
334
+ `;
335
+ }
336
+ }
337
+
338
+ // Display results
339
+ function displayResults(result) {
340
+ console.log('displayResults called with:', result ? result.substring(0, 200) + '...' : 'null'); // Debug log
341
+ const container = document.getElementById('resultsContainer');
342
+ console.log('Results container found:', !!container); // Debug log
343
+ if (container) {
344
+ container.innerHTML = result;
345
+ container.classList.add('fade-in');
346
+ console.log('Results inserted, container innerHTML length:', container.innerHTML.length); // Debug log
347
+
348
+ // Scroll to results
349
+ container.scrollIntoView({ behavior: 'smooth', block: 'start' });
350
+ }
351
+ }
352
+
353
+ // Display search results
354
+ function displaySearchResults(results) {
355
+ const container = document.getElementById('resultsContainer');
356
+ if (container) {
357
+ let html = '<h4>Search Results:</h4>';
358
+
359
+ if (results && results.length > 0) {
360
+ results.forEach((result, index) => {
361
+ const scorePercent = Math.round(result.score * 100);
362
+ html += `
363
+ <div class="card mb-2">
364
+ <div class="card-body">
365
+ <div class="row">
366
+ <div class="col-md-8">
367
+ <p class="mb-1">${result.text}</p>
368
+ </div>
369
+ <div class="col-md-4">
370
+ <div class="text-end">
371
+ <span class="badge bg-primary">${scorePercent}%</span>
372
+ </div>
373
+ <div class="progress mt-1" style="height: 8px;">
374
+ <div class="progress-bar" role="progressbar" style="width: ${scorePercent}%"></div>
375
+ </div>
376
+ </div>
377
+ </div>
378
+ </div>
379
+ </div>
380
+ `;
381
+ });
382
+ } else {
383
+ html += '<div class="alert alert-info">No relevant results found. Try different search terms.</div>';
384
+ }
385
+
386
+ container.innerHTML = html;
387
+ container.classList.add('fade-in');
388
+
389
+ // Scroll to results
390
+ container.scrollIntoView({ behavior: 'smooth', block: 'start' });
391
+ }
392
+ }
393
+
394
+ // Semantic search function (called from the generated search interface)
395
+ function performSemanticSearch() {
396
+ const searchInput = document.getElementById('search-input');
397
+ const textInput = document.getElementById('textInput');
398
+
399
+ if (!searchInput || !textInput) {
400
+ alert('Please generate embeddings first, then try searching.');
401
+ return;
402
+ }
403
+
404
+ const query = searchInput.value.trim();
405
+ const context = textInput.value.trim();
406
+
407
+ if (!query) {
408
+ alert('Please enter a search query.');
409
+ return;
410
+ }
411
+
412
+ if (!context) {
413
+ alert('Please enter text to analyze first.');
414
+ return;
415
+ }
416
+
417
+ // Show loading
418
+ const resultsDiv = document.getElementById('search-results');
419
+ const resultsContainer = document.getElementById('results-container');
420
+
421
+ if (resultsDiv) {
422
+ resultsDiv.style.display = 'block';
423
+ resultsContainer.innerHTML = `
424
+ <div class="text-center py-3">
425
+ <div class="spinner-border text-warning" role="status">
426
+ <span class="visually-hidden">Searching...</span>
427
+ </div>
428
+ <p class="mt-2">Searching for semantically similar content...</p>
429
+ </div>
430
+ `;
431
+ }
432
+
433
+ // Perform search
434
+ fetch('/api/semantic-search', {
435
+ method: 'POST',
436
+ headers: {
437
+ 'Content-Type': 'application/json',
438
+ },
439
+ body: JSON.stringify({
440
+ context: context,
441
+ query: query
442
+ })
443
+ })
444
+ .then(response => response.json())
445
+ .then(data => {
446
+ if (data.success) {
447
+ displaySearchResults(data.results);
448
+ } else {
449
+ showSearchError(data.error || 'Search failed');
450
+ }
451
+ })
452
+ .catch(error => {
453
+ showSearchError('Failed to perform search: ' + error.message);
454
+ });
455
+ }
456
+
457
+ function displaySearchResults(results) {
458
+ const resultsContainer = document.getElementById('results-container');
459
+
460
+ if (!results || results.length === 0) {
461
+ resultsContainer.innerHTML = `
462
+ <div class="text-center py-4">
463
+ <i class="fas fa-search fa-2x text-muted mb-3"></i>
464
+ <p class="text-muted">No similar content found.</p>
465
+ </div>
466
+ `;
467
+ return;
468
+ }
469
+
470
+ let html = '';
471
+ results.forEach((result, index) => {
472
+ const percentage = (result.score * 100).toFixed(1);
473
+ const badgeClass = result.score > 0.8 ? 'bg-success' : result.score > 0.6 ? 'bg-warning' : 'bg-secondary';
474
+
475
+ html += `
476
+ <div class="mb-3 p-3 border rounded bg-white">
477
+ <div class="d-flex justify-content-between align-items-start mb-2">
478
+ <h6 class="mb-0 text-primary">Result ${index + 1}</h6>
479
+ <span class="badge ${badgeClass}">${percentage}% match</span>
480
+ </div>
481
+ <p class="mb-0">${result.text}</p>
482
+ </div>
483
+ `;
484
+ });
485
+
486
+ resultsContainer.innerHTML = html;
487
+ }
488
+
489
+ function showSearchError(message) {
490
+ const resultsContainer = document.getElementById('results-container');
491
+ resultsContainer.innerHTML = `
492
+ <div class="alert alert-danger">
493
+ <i class="fas fa-exclamation-triangle me-2"></i>
494
+ <strong>Search Error:</strong> ${message}
495
+ </div>
496
+ `;
497
+ }
498
+ </script>
499
+ {% endblock %}
utils/__init__.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Utils package for NLP Ultimate Tutorial Flask Application
2
+
3
+ from .helpers import (
4
+ fig_to_html,
5
+ df_to_html_table,
6
+ text_statistics,
7
+ create_text_length_chart,
8
+ format_pos_token,
9
+ create_entity_span,
10
+ create_sentiment_color,
11
+ format_sentiment_score,
12
+ create_progress_bar,
13
+ create_confidence_gauge
14
+ )
15
+
16
+ from .model_loader import (
17
+ download_nltk_resources,
18
+ load_spacy,
19
+ load_sentiment_analyzer,
20
+ load_emotion_classifier,
21
+ load_summarizer,
22
+ load_qa_pipeline,
23
+ load_translator,
24
+ load_text_generator,
25
+ load_zero_shot,
26
+ load_embedding_model,
27
+ initialize_all_models,
28
+ get_model_status,
29
+ clear_models
30
+ )
31
+
32
+ from .visualization import (
33
+ setup_mpl_style,
34
+ create_bar_chart,
35
+ create_horizontal_bar_chart,
36
+ create_pie_chart,
37
+ create_line_chart,
38
+ create_scatter_plot,
39
+ create_heatmap,
40
+ create_word_cloud_placeholder,
41
+ create_network_graph,
42
+ create_gauge_chart,
43
+ create_comparison_chart
44
+ )
45
+
46
+ __all__ = [
47
+ # Helpers
48
+ 'fig_to_html',
49
+ 'df_to_html_table',
50
+ 'text_statistics',
51
+ 'create_text_length_chart',
52
+ 'format_pos_token',
53
+ 'create_entity_span',
54
+ 'create_sentiment_color',
55
+ 'format_sentiment_score',
56
+ 'create_progress_bar',
57
+ 'create_confidence_gauge',
58
+
59
+ # Model Loader
60
+ 'download_nltk_resources',
61
+ 'load_spacy',
62
+ 'load_sentiment_analyzer',
63
+ 'load_emotion_classifier',
64
+ 'load_summarizer',
65
+ 'load_qa_pipeline',
66
+ 'load_translator',
67
+ 'load_text_generator',
68
+ 'load_zero_shot',
69
+ 'load_embedding_model',
70
+ 'initialize_all_models',
71
+ 'get_model_status',
72
+ 'clear_models',
73
+
74
+ # Visualization
75
+ 'setup_mpl_style',
76
+ 'create_bar_chart',
77
+ 'create_horizontal_bar_chart',
78
+ 'create_pie_chart',
79
+ 'create_line_chart',
80
+ 'create_scatter_plot',
81
+ 'create_heatmap',
82
+ 'create_word_cloud_placeholder',
83
+ 'create_network_graph',
84
+ 'create_gauge_chart',
85
+ 'create_comparison_chart'
86
+ ]
utils/helpers.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import matplotlib.pyplot as plt
3
+ import pandas as pd
4
+ from io import BytesIO
5
+ import plotly.graph_objects as go
6
+ import nltk
7
+
8
+ def fig_to_html(fig, width=None):
9
+ """Convert a matplotlib figure to HTML with optional responsive width"""
10
+ buf = BytesIO()
11
+ fig.savefig(buf, format='png', dpi=100, bbox_inches='tight')
12
+ buf.seek(0)
13
+ b64 = base64.b64encode(buf.read()).decode()
14
+
15
+ # Add style attribute if width is specified
16
+ style_attr = ""
17
+ if width:
18
+ style_attr = f' style="width: {width}; max-width: 100%;"'
19
+
20
+ return f'<img{style_attr} src="data:image/png;base64,{b64}" alt="Plot">'
21
+
22
+ def df_to_html_table(df):
23
+ """Convert a pandas dataframe to an HTML table with Bootstrap styling"""
24
+ return df.to_html(index=False, classes='table table-striped table-hover', escape=False, table_id='data-table')
25
+
26
+ def text_statistics(text):
27
+ """Calculate basic text statistics"""
28
+ if not text:
29
+ return {"chars": 0, "words": 0, "sentences": 0}
30
+
31
+ word_count = len(text.split())
32
+ char_count = len(text)
33
+
34
+ try:
35
+ sentence_count = len(nltk.sent_tokenize(text))
36
+ except:
37
+ sentence_count = 0
38
+
39
+ return {"chars": char_count, "words": word_count, "sentences": sentence_count}
40
+
41
+ def create_text_length_chart(text):
42
+ """Create chart showing text length metrics."""
43
+ words = text.split()
44
+ sentences = nltk.sent_tokenize(text)
45
+ chars = len(text)
46
+
47
+ fig = go.Figure()
48
+
49
+ fig.add_trace(go.Bar(
50
+ x=['Characters', 'Words', 'Sentences'],
51
+ y=[chars, len(words), len(sentences)],
52
+ marker_color=['#90CAF9', '#1E88E5', '#0D47A1']
53
+ ))
54
+
55
+ fig.update_layout(
56
+ title="Text Length Metrics",
57
+ xaxis_title="Metric",
58
+ yaxis_title="Count",
59
+ template="plotly_white",
60
+ height=400
61
+ )
62
+
63
+ return fig
64
+
65
+ def get_image_download_link(fig, filename, text):
66
+ """Generate an HTML representation of a figure - placeholder for Gradio compatibility"""
67
+ return fig_to_html(fig)
68
+
69
+ def get_table_download_link(df, filename, text):
70
+ """Generate an HTML representation of a dataframe - placeholder for Gradio compatibility"""
71
+ return df_to_html_table(df)
72
+
73
+ def format_pos_token(token, pos, explanation=""):
74
+ """Format a token with its part-of-speech tag in HTML"""
75
+ # Define colors for different POS types
76
+ pos_colors = {
77
+ 'NOUN': '#E3F2FD', # Light blue
78
+ 'PROPN': '#E3F2FD', # Light blue (same as NOUN)
79
+ 'VERB': '#E8F5E9', # Light green
80
+ 'ADJ': '#FFF8E1', # Light yellow
81
+ 'ADV': '#F3E5F5', # Light purple
82
+ 'ADP': '#EFEBE9', # Light brown
83
+ 'PRON': '#E8EAF6', # Light indigo
84
+ 'DET': '#E0F7FA', # Light cyan
85
+ 'CONJ': '#FBE9E7', # Light deep orange
86
+ 'CCONJ': '#FBE9E7', # Light deep orange (for compatibility)
87
+ 'SCONJ': '#FBE9E7', # Light deep orange (for compatibility)
88
+ 'NUM': '#FFEBEE', # Light red
89
+ 'PART': '#F1F8E9', # Light light green
90
+ 'INTJ': '#FFF3E0', # Light orange
91
+ 'PUNCT': '#FAFAFA', # Light grey
92
+ 'SYM': '#FAFAFA', # Light grey (same as PUNCT)
93
+ 'X': '#FAFAFA', # Light grey (for other)
94
+ }
95
+
96
+ # Get color for this POS tag, default to light grey if not found
97
+ bg_color = pos_colors.get(pos, '#FAFAFA')
98
+
99
+ # Create HTML for the token with tooltip
100
+ if explanation:
101
+ return f'<span class="pos-token" style="background-color: {bg_color}; border: 1px solid #ccc; padding: 3px 6px; margin: 2px; display: inline-block; border-radius: 3px;" title="{explanation}">{token} <small style="color: #666; font-size: 0.8em;">({pos})</small></span>'
102
+ else:
103
+ return f'<span class="pos-token" style="background-color: {bg_color}; border: 1px solid #ccc; padding: 3px 6px; margin: 2px; display: inline-block; border-radius: 3px;">{token} <small style="color: #666; font-size: 0.8em;">({pos})</small></span>'
104
+
105
+ def create_entity_span(text, entity_type, explanation=""):
106
+ """Format a named entity with its type in HTML"""
107
+ # Define colors for different entity types
108
+ entity_colors = {
109
+ 'PERSON': '#E3F2FD', # Light blue
110
+ 'ORG': '#E8F5E9', # Light green
111
+ 'GPE': '#FFF8E1', # Light yellow
112
+ 'LOC': '#F3E5F5', # Light purple
113
+ 'PRODUCT': '#EFEBE9', # Light brown
114
+ 'EVENT': '#E8EAF6', # Light indigo
115
+ 'WORK_OF_ART': '#E0F7FA', # Light cyan
116
+ 'LAW': '#FBE9E7', # Light deep orange
117
+ 'LANGUAGE': '#FFEBEE', # Light red
118
+ 'DATE': '#F1F8E9', # Light light green
119
+ 'TIME': '#FFF3E0', # Light orange
120
+ 'PERCENT': '#FAFAFA', # Light grey
121
+ 'MONEY': '#FAFAFA', # Light grey
122
+ 'QUANTITY': '#FAFAFA', # Light grey
123
+ 'ORDINAL': '#FAFAFA', # Light grey
124
+ 'CARDINAL': '#FAFAFA', # Light grey
125
+ }
126
+
127
+ # Get color for this entity type, default to light grey if not found
128
+ bg_color = entity_colors.get(entity_type, '#FAFAFA')
129
+
130
+ # Create HTML for the entity with tooltip
131
+ if explanation:
132
+ return f'<span class="entity-token" style="background-color: {bg_color}; border: 1px solid #ccc; padding: 3px 6px; margin: 2px; display: inline-block; border-radius: 3px;" title="{explanation}">{text} <small style="color: #666; font-size: 0.8em;">({entity_type})</small></span>'
133
+ else:
134
+ return f'<span class="entity-token" style="background-color: {bg_color}; border: 1px solid #ccc; padding: 3px 6px; margin: 2px; display: inline-block; border-radius: 3px;">{text} <small style="color: #666; font-size: 0.8em;">({entity_type})</small></span>'
135
+
136
+ def create_sentiment_color(score):
137
+ """Create color based on sentiment score"""
138
+ if score > 0.1:
139
+ return '#4CAF50' # Green for positive
140
+ elif score < -0.1:
141
+ return '#F44336' # Red for negative
142
+ else:
143
+ return '#FF9800' # Orange for neutral
144
+
145
+ def format_sentiment_score(score, label):
146
+ """Format sentiment score with appropriate color"""
147
+ color = create_sentiment_color(score)
148
+ return f'<span style="color: {color}; font-weight: bold;">{label} ({score:.3f})</span>'
149
+
150
+ def create_progress_bar(value, max_value=1.0, color='#1976D2'):
151
+ """Create HTML progress bar"""
152
+ percentage = (value / max_value) * 100
153
+ return f'''
154
+ <div class="progress mb-2" style="height: 20px;">
155
+ <div class="progress-bar" role="progressbar" style="width: {percentage}%; background-color: {color};"
156
+ aria-valuenow="{value}" aria-valuemin="0" aria-valuemax="{max_value}">
157
+ {value:.3f}
158
+ </div>
159
+ </div>
160
+ '''
161
+
162
+ def create_confidence_gauge(score, label):
163
+ """Create confidence gauge visualization"""
164
+ color = '#4CAF50' if score > 0.7 else '#FF9800' if score > 0.4 else '#F44336'
165
+ return f'''
166
+ <div class="text-center">
167
+ <div class="display-6 text-{color.replace('#', '')}" style="color: {color};">
168
+ {score:.1%}
169
+ </div>
170
+ <div class="small text-muted">{label}</div>
171
+ </div>
172
+ '''
utils/model_loader.py ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nltk
2
+ import spacy
3
+ from transformers import pipeline
4
+
5
+ # Global models dictionary for persistent access
6
+ models = {
7
+ "nlp": None,
8
+ "sentiment_analyzer": None,
9
+ "emotion_classifier": None,
10
+ "summarizer": None,
11
+ "qa_pipeline": None,
12
+ "translation_pipeline": None,
13
+ "text_generator": None,
14
+ "zero_shot": None,
15
+ "embedding_model": None
16
+ }
17
+
18
+ def download_nltk_resources():
19
+ """Download and initialize NLTK resources"""
20
+ resources = ['punkt', 'stopwords', 'vader_lexicon', 'wordnet', 'averaged_perceptron_tagger', 'sentiwordnet']
21
+ for resource in resources:
22
+ try:
23
+ if resource == 'punkt':
24
+ nltk.data.find(f'tokenizers/{resource}')
25
+ elif resource in ['stopwords', 'wordnet']:
26
+ nltk.data.find(f'corpora/{resource}')
27
+ elif resource == 'vader_lexicon':
28
+ nltk.data.find(f'sentiment/{resource}')
29
+ elif resource == 'averaged_perceptron_tagger':
30
+ nltk.data.find(f'taggers/{resource}')
31
+ elif resource == 'sentiwordnet':
32
+ nltk.data.find(f'corpora/{resource}')
33
+ except LookupError:
34
+ print(f"Downloading required NLTK resource: {resource}")
35
+ nltk.download(resource)
36
+
37
+ def load_spacy():
38
+ """Load spaCy model"""
39
+ if models["nlp"] is None:
40
+ try:
41
+ models["nlp"] = spacy.load("en_core_web_sm")
42
+ except:
43
+ print("SpaCy model not found. Please run: python -m spacy download en_core_web_sm")
44
+ return models["nlp"]
45
+
46
+ def load_sentiment_analyzer():
47
+ """Load sentiment analysis model"""
48
+ if models["sentiment_analyzer"] is None:
49
+ try:
50
+ models["sentiment_analyzer"] = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
51
+ except Exception as e:
52
+ print(f"Failed to load sentiment analyzer: {e}")
53
+ return models["sentiment_analyzer"]
54
+
55
+ def load_emotion_classifier():
56
+ """Load emotion classification model"""
57
+ if models["emotion_classifier"] is None:
58
+ try:
59
+ models["emotion_classifier"] = pipeline(
60
+ "text-classification",
61
+ model="cardiffnlp/twitter-roberta-base-emotion",
62
+ return_all_scores=True
63
+ )
64
+ except Exception as e:
65
+ print(f"Failed to load emotion classifier: {e}")
66
+ return models["emotion_classifier"]
67
+
68
+ def load_summarizer():
69
+ """Load summarization model"""
70
+ if models["summarizer"] is None:
71
+ try:
72
+ models["summarizer"] = pipeline("summarization", model="facebook/bart-large-cnn")
73
+ except Exception as e:
74
+ print(f"Failed to load summarizer: {e}")
75
+ return models["summarizer"]
76
+
77
+ def load_qa_pipeline():
78
+ """Load or initialize the question answering pipeline."""
79
+ if models["qa_pipeline"] is None:
80
+ try:
81
+ from transformers import pipeline
82
+
83
+ # Use a smaller model to reduce memory usage and improve speed
84
+ models["qa_pipeline"] = pipeline(
85
+ "question-answering",
86
+ model="deepset/roberta-base-squad2", # You can change this to a different model if needed
87
+ tokenizer="deepset/roberta-base-squad2"
88
+ )
89
+ except Exception as e:
90
+ print(f"Error loading QA pipeline: {e}")
91
+ models["qa_pipeline"] = None
92
+ raise e
93
+ return models["qa_pipeline"]
94
+
95
+ def load_translation_pipeline():
96
+ """Load translation model"""
97
+ if models["translation_pipeline"] is None:
98
+ try:
99
+ models["translation_pipeline"] = pipeline("translation_en_to_fr", model="Helsinki-NLP/opus-mt-en-fr")
100
+ except Exception as e:
101
+ print(f"Failed to load translation model: {e}")
102
+ return models["translation_pipeline"]
103
+
104
+ def load_translator(source_lang="auto", target_lang="en"):
105
+ """
106
+ Load a machine translation model for the given language pair.
107
+
108
+ Args:
109
+ source_lang (str): Source language code, or 'auto' for automatic detection
110
+ target_lang (str): Target language code
111
+
112
+ Returns:
113
+ A translation pipeline or model
114
+ """
115
+ from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
116
+
117
+ try:
118
+ # For auto language detection, use a more general model
119
+ if source_lang == "auto":
120
+ # Using Helsinki-NLP's opus-mt model for translation
121
+ model_name = "Helsinki-NLP/opus-mt-mul-en" # Multilingual to English
122
+ translator = pipeline("translation", model=model_name)
123
+ else:
124
+ # For specific language pairs
125
+ model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"
126
+
127
+ # Load the model and tokenizer
128
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
129
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
130
+
131
+ # Create the translation pipeline
132
+ translator = pipeline("translation", model=model, tokenizer=tokenizer)
133
+
134
+ return translator
135
+ except Exception as e:
136
+ # Fallback to a more general model if language pair isn't available
137
+ try:
138
+ # Use MarianMT model for many language pairs
139
+ model_name = "Helsinki-NLP/opus-mt-mul-en" # Multilingual to English
140
+ translator = pipeline("translation", model=model_name)
141
+ return translator
142
+ except Exception as nested_e:
143
+ # If all else fails, return a simple callable object that returns an error message
144
+ class ErrorTranslator:
145
+ def __call__(self, text, **kwargs):
146
+ return [{"translation_text": f"Error loading translation model: {str(e)}. Fallback also failed: {str(nested_e)}"}]
147
+ return ErrorTranslator()
148
+
149
+ def load_text_generator():
150
+ """Load text generation model"""
151
+ if models["text_generator"] is None:
152
+ try:
153
+ models["text_generator"] = pipeline("text-generation", model="gpt2")
154
+ except Exception as e:
155
+ print(f"Failed to load text generator: {e}")
156
+ return models["text_generator"]
157
+
158
+ def load_zero_shot():
159
+ """Load zero-shot classification model"""
160
+ if models["zero_shot"] is None:
161
+ try:
162
+ models["zero_shot"] = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
163
+ except Exception as e:
164
+ print(f"Failed to load zero-shot classifier: {e}")
165
+ return models["zero_shot"]
166
+
167
+ def load_embedding_model():
168
+ """Load sentence embedding model for semantic search"""
169
+ if models.get("embedding_model") is None:
170
+ try:
171
+ from sentence_transformers import SentenceTransformer
172
+ models["embedding_model"] = SentenceTransformer('all-MiniLM-L6-v2')
173
+ except Exception as e:
174
+ print(f"Failed to load embedding model: {e}")
175
+ return models["embedding_model"]
176
+
177
+ def initialize_all_models():
178
+ """Initialize all models for better performance"""
179
+ print("Initializing NLP models...")
180
+
181
+ # Download NLTK resources first
182
+ download_nltk_resources()
183
+
184
+ # Load spaCy model
185
+ try:
186
+ load_spacy()
187
+ print("✓ spaCy model loaded")
188
+ except Exception as e:
189
+ print(f"✗ Failed to load spaCy: {e}")
190
+
191
+ # Load transformer models (these might take time)
192
+ models_to_load = [
193
+ ("Sentiment Analyzer", load_sentiment_analyzer),
194
+ ("Emotion Classifier", load_emotion_classifier),
195
+ ("Summarizer", load_summarizer),
196
+ ("QA Pipeline", load_qa_pipeline),
197
+ ("Text Generator", load_text_generator),
198
+ ("Zero-shot Classifier", load_zero_shot),
199
+ ("Embedding Model", load_embedding_model)
200
+ ]
201
+
202
+ for name, loader_func in models_to_load:
203
+ try:
204
+ loader_func()
205
+ print(f"✓ {name} loaded")
206
+ except Exception as e:
207
+ print(f"✗ Failed to load {name}: {e}")
208
+
209
+ print("Model initialization complete!")
210
+
211
+ def get_model_status():
212
+ """Get status of all models"""
213
+ status = {}
214
+ for model_name, model in models.items():
215
+ status[model_name] = model is not None
216
+ return status
217
+
218
+ def clear_models():
219
+ """Clear all loaded models to free memory"""
220
+ for key in models:
221
+ models[key] = None
222
+ print("All models cleared from memory")
utils/model_loader_hf.py ADDED
@@ -0,0 +1,267 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Optimized model loader for Hugging Face Spaces with memory management
3
+ """
4
+ import os
5
+ import gc
6
+ import psutil
7
+ import nltk
8
+ import spacy
9
+ from transformers import pipeline, AutoTokenizer, AutoModel
10
+ import torch
11
+ from functools import lru_cache
12
+ import warnings
13
+ warnings.filterwarnings("ignore")
14
+
15
+ # Set device to CPU for HF Spaces (unless GPU is available)
16
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
17
+ print(f"Using device: {DEVICE}")
18
+
19
+ # Model cache with memory-conscious loading
20
+ class ModelCache:
21
+ def __init__(self, max_models=3):
22
+ self.models = {}
23
+ self.max_models = max_models
24
+ self.access_count = {}
25
+
26
+ def get_memory_usage(self):
27
+ """Get current memory usage in MB"""
28
+ process = psutil.Process(os.getpid())
29
+ return process.memory_info().rss / 1024 / 1024
30
+
31
+ def cleanup_least_used(self):
32
+ """Remove least recently used model if cache is full"""
33
+ if len(self.models) >= self.max_models:
34
+ # Find least used model
35
+ least_used = min(self.access_count.items(), key=lambda x: x[1])
36
+ model_name = least_used[0]
37
+
38
+ print(f"Removing {model_name} from cache to free memory")
39
+ del self.models[model_name]
40
+ del self.access_count[model_name]
41
+
42
+ # Force garbage collection
43
+ gc.collect()
44
+ if torch.cuda.is_available():
45
+ torch.cuda.empty_cache()
46
+
47
+ def load_model(self, model_name, loader_func):
48
+ """Load model with caching and memory management"""
49
+ if model_name in self.models:
50
+ self.access_count[model_name] += 1
51
+ return self.models[model_name]
52
+
53
+ # Check memory before loading
54
+ memory_before = self.get_memory_usage()
55
+ print(f"Memory before loading {model_name}: {memory_before:.1f}MB")
56
+
57
+ # Clean up if necessary
58
+ self.cleanup_least_used()
59
+
60
+ # Load the model
61
+ try:
62
+ model = loader_func()
63
+ self.models[model_name] = model
64
+ self.access_count[model_name] = 1
65
+
66
+ memory_after = self.get_memory_usage()
67
+ print(f"Memory after loading {model_name}: {memory_after:.1f}MB")
68
+
69
+ return model
70
+ except Exception as e:
71
+ print(f"Failed to load {model_name}: {str(e)}")
72
+ return None
73
+
74
+ # Global model cache
75
+ model_cache = ModelCache(max_models=3)
76
+
77
+ @lru_cache(maxsize=1)
78
+ def download_nltk_resources():
79
+ """Download and cache NLTK resources"""
80
+ resources = ['punkt', 'stopwords', 'vader_lexicon', 'wordnet', 'averaged_perceptron_tagger']
81
+
82
+ for resource in resources:
83
+ try:
84
+ if resource == 'punkt':
85
+ nltk.data.find(f'tokenizers/{resource}')
86
+ elif resource in ['stopwords', 'wordnet']:
87
+ nltk.data.find(f'corpora/{resource}')
88
+ elif resource == 'vader_lexicon':
89
+ nltk.data.find(f'sentiment/{resource}')
90
+ elif resource == 'averaged_perceptron_tagger':
91
+ nltk.data.find(f'taggers/{resource}')
92
+ except LookupError:
93
+ print(f"Downloading NLTK resource: {resource}")
94
+ nltk.download(resource, quiet=True)
95
+
96
+ @lru_cache(maxsize=1)
97
+ def load_spacy():
98
+ """Load spaCy model with caching"""
99
+ def _load_spacy():
100
+ try:
101
+ return spacy.load("en_core_web_sm")
102
+ except OSError:
103
+ print("SpaCy model not found. Please install: python -m spacy download en_core_web_sm")
104
+ return None
105
+
106
+ return model_cache.load_model("spacy", _load_spacy)
107
+
108
+ def load_sentiment_analyzer():
109
+ """Load lightweight sentiment analyzer"""
110
+ def _load_sentiment():
111
+ return pipeline(
112
+ "sentiment-analysis",
113
+ model="cardiffnlp/twitter-roberta-base-sentiment-latest",
114
+ device=0 if DEVICE == "cuda" else -1,
115
+ max_length=512,
116
+ truncation=True
117
+ )
118
+
119
+ return model_cache.load_model("sentiment", _load_sentiment)
120
+
121
+ def load_summarizer():
122
+ """Load efficient summarization model"""
123
+ def _load_summarizer():
124
+ return pipeline(
125
+ "summarization",
126
+ model="facebook/bart-large-cnn",
127
+ device=0 if DEVICE == "cuda" else -1,
128
+ max_length=512,
129
+ truncation=True
130
+ )
131
+
132
+ return model_cache.load_model("summarizer", _load_summarizer)
133
+
134
+ def load_qa_pipeline():
135
+ """Load question-answering pipeline"""
136
+ def _load_qa():
137
+ return pipeline(
138
+ "question-answering",
139
+ model="deepset/roberta-base-squad2",
140
+ device=0 if DEVICE == "cuda" else -1,
141
+ max_length=512,
142
+ truncation=True
143
+ )
144
+
145
+ return model_cache.load_model("qa", _load_qa)
146
+
147
+ def load_text_generator():
148
+ """Load text generation model"""
149
+ def _load_generator():
150
+ return pipeline(
151
+ "text-generation",
152
+ model="gpt2",
153
+ device=0 if DEVICE == "cuda" else -1,
154
+ max_length=256,
155
+ truncation=True,
156
+ pad_token_id=50256
157
+ )
158
+
159
+ return model_cache.load_model("generator", _load_generator)
160
+
161
+ def load_zero_shot():
162
+ """Load zero-shot classification model"""
163
+ def _load_zero_shot():
164
+ return pipeline(
165
+ "zero-shot-classification",
166
+ model="facebook/bart-large-mnli",
167
+ device=0 if DEVICE == "cuda" else -1,
168
+ max_length=512,
169
+ truncation=True
170
+ )
171
+
172
+ return model_cache.load_model("zero_shot", _load_zero_shot)
173
+
174
+ def load_embedding_model():
175
+ """Load sentence embedding model"""
176
+ def _load_embedding():
177
+ from sentence_transformers import SentenceTransformer
178
+ return SentenceTransformer('all-MiniLM-L6-v2', device=DEVICE)
179
+
180
+ return model_cache.load_model("embedding", _load_embedding)
181
+
182
+ def load_translation_pipeline(source_lang="auto", target_lang="en"):
183
+ """Load translation model with fallback"""
184
+ def _load_translation():
185
+ try:
186
+ if source_lang == "auto" or target_lang == "en":
187
+ model_name = "Helsinki-NLP/opus-mt-mul-en"
188
+ else:
189
+ model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"
190
+
191
+ return pipeline(
192
+ "translation",
193
+ model=model_name,
194
+ device=0 if DEVICE == "cuda" else -1,
195
+ max_length=512,
196
+ truncation=True
197
+ )
198
+ except Exception as e:
199
+ print(f"Translation model error: {e}")
200
+ return None
201
+
202
+ return model_cache.load_model(f"translation_{source_lang}_{target_lang}", _load_translation)
203
+
204
+ def get_memory_status():
205
+ """Get current memory usage statistics"""
206
+ process = psutil.Process(os.getpid())
207
+ memory_info = process.memory_info()
208
+
209
+ return {
210
+ "rss_mb": memory_info.rss / 1024 / 1024,
211
+ "vms_mb": memory_info.vms / 1024 / 1024,
212
+ "percent": process.memory_percent(),
213
+ "loaded_models": list(model_cache.models.keys()),
214
+ "cache_size": len(model_cache.models)
215
+ }
216
+
217
+ def clear_model_cache():
218
+ """Clear all models from cache to free memory"""
219
+ model_cache.models.clear()
220
+ model_cache.access_count.clear()
221
+ gc.collect()
222
+ if torch.cuda.is_available():
223
+ torch.cuda.empty_cache()
224
+ print("Model cache cleared")
225
+
226
+ def initialize_essential_models():
227
+ """Initialize only the most essential models for startup"""
228
+ print("Initializing essential models for Hugging Face Spaces...")
229
+
230
+ # Download NLTK resources
231
+ download_nltk_resources()
232
+ print("✓ NLTK resources downloaded")
233
+
234
+ # Load spaCy (small footprint)
235
+ try:
236
+ load_spacy()
237
+ print("✓ spaCy model loaded")
238
+ except Exception as e:
239
+ print(f"✗ spaCy failed: {e}")
240
+
241
+ # Load sentiment analyzer (most commonly used)
242
+ try:
243
+ load_sentiment_analyzer()
244
+ print("✓ Sentiment analyzer loaded")
245
+ except Exception as e:
246
+ print(f"✗ Sentiment analyzer failed: {e}")
247
+
248
+ print(f"Memory status: {get_memory_status()}")
249
+ print("Essential models initialized!")
250
+
251
+ # Lazy loading functions for other models
252
+ def ensure_model_loaded(model_name, loader_func):
253
+ """Ensure a model is loaded before use"""
254
+ if model_name not in model_cache.models:
255
+ print(f"Loading {model_name} on demand...")
256
+ loader_func()
257
+ return model_cache.models.get(model_name)
258
+
259
+ # Model status for debugging
260
+ def get_model_status():
261
+ """Get status of all models"""
262
+ return {
263
+ "loaded_models": list(model_cache.models.keys()),
264
+ "access_counts": model_cache.access_count.copy(),
265
+ "memory_usage": get_memory_status(),
266
+ "device": DEVICE
267
+ }
utils/visualization.py ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib.pyplot as plt
2
+ import seaborn as sns
3
+
4
+ def apply_custom_css():
5
+ """Load custom CSS for the Flask interface"""
6
+ css_file_path = "static/css/style.css"
7
+ try:
8
+ with open(css_file_path, "r") as f:
9
+ return f.read()
10
+ except Exception as e:
11
+ print(f"Warning: Could not load custom CSS: {e}")
12
+ return ""
13
+
14
+ def setup_mpl_style():
15
+ """Setup matplotlib style for consistent visualizations"""
16
+ try:
17
+ plt.style.use('seaborn-v0_8-whitegrid')
18
+ sns.set_style("whitegrid")
19
+ except:
20
+ # Fallback if seaborn style is not available
21
+ plt.style.use('default')
22
+
23
+ # Configure matplotlib for better visuals
24
+ plt.rcParams['figure.figsize'] = (10, 6)
25
+ plt.rcParams['axes.labelsize'] = 12
26
+ plt.rcParams['axes.titlesize'] = 14
27
+ plt.rcParams['xtick.labelsize'] = 10
28
+ plt.rcParams['ytick.labelsize'] = 10
29
+ plt.rcParams['legend.fontsize'] = 10
30
+ plt.rcParams['axes.spines.top'] = False
31
+ plt.rcParams['axes.spines.right'] = False
32
+
33
+ def create_bar_chart(labels, values, title, xlabel, ylabel, color='#1976D2'):
34
+ """Create a matplotlib bar chart"""
35
+ setup_mpl_style()
36
+ fig, ax = plt.subplots(figsize=(10, 6))
37
+ bars = ax.bar(labels, values, color=color)
38
+
39
+ ax.set_title(title)
40
+ ax.set_xlabel(xlabel)
41
+ ax.set_ylabel(ylabel)
42
+ plt.xticks(rotation=45, ha='right')
43
+ plt.tight_layout()
44
+
45
+ return fig
46
+
47
+ def create_horizontal_bar_chart(labels, values, title, xlabel, ylabel, color='#1976D2'):
48
+ """Create a matplotlib horizontal bar chart"""
49
+ setup_mpl_style()
50
+ fig, ax = plt.subplots(figsize=(10, 6))
51
+ bars = ax.barh(labels, values, color=color)
52
+
53
+ ax.set_title(title)
54
+ ax.set_xlabel(xlabel)
55
+ ax.set_ylabel(ylabel)
56
+ ax.invert_yaxis() # To have the highest value at the top
57
+ plt.tight_layout()
58
+
59
+ return fig
60
+
61
+ def create_pie_chart(labels, values, title, colors=None):
62
+ """Create a matplotlib pie chart"""
63
+ setup_mpl_style()
64
+ fig, ax = plt.subplots(figsize=(8, 8))
65
+
66
+ if colors is None:
67
+ colors = ['#1976D2', '#4CAF50', '#FF9800', '#F44336', '#9C27B0', '#00BCD4', '#FFC107', '#795548']
68
+
69
+ wedges, texts, autotexts = ax.pie(values, labels=labels, autopct='%1.1f%%', colors=colors)
70
+ ax.set_title(title)
71
+
72
+ # Improve text readability
73
+ for autotext in autotexts:
74
+ autotext.set_color('white')
75
+ autotext.set_fontweight('bold')
76
+
77
+ plt.tight_layout()
78
+ return fig
79
+
80
+ def create_line_chart(x_values, y_values, title, xlabel, ylabel, color='#1976D2'):
81
+ """Create a matplotlib line chart"""
82
+ setup_mpl_style()
83
+ fig, ax = plt.subplots(figsize=(10, 6))
84
+ ax.plot(x_values, y_values, color=color, linewidth=2, marker='o')
85
+
86
+ ax.set_title(title)
87
+ ax.set_xlabel(xlabel)
88
+ ax.set_ylabel(ylabel)
89
+ ax.grid(True, alpha=0.3)
90
+ plt.tight_layout()
91
+
92
+ return fig
93
+
94
+ def create_scatter_plot(x_values, y_values, title, xlabel, ylabel, color='#1976D2'):
95
+ """Create a matplotlib scatter plot"""
96
+ setup_mpl_style()
97
+ fig, ax = plt.subplots(figsize=(10, 6))
98
+ ax.scatter(x_values, y_values, color=color, alpha=0.6, s=50)
99
+
100
+ ax.set_title(title)
101
+ ax.set_xlabel(xlabel)
102
+ ax.set_ylabel(ylabel)
103
+ ax.grid(True, alpha=0.3)
104
+ plt.tight_layout()
105
+
106
+ return fig
107
+
108
+ def create_heatmap(data, title, xlabel, ylabel, cmap='YlGnBu'):
109
+ """Create a matplotlib heatmap"""
110
+ setup_mpl_style()
111
+ fig, ax = plt.subplots(figsize=(10, 8))
112
+
113
+ im = ax.imshow(data, cmap=cmap, aspect='auto')
114
+
115
+ # Add colorbar
116
+ cbar = ax.figure.colorbar(im, ax=ax)
117
+ cbar.ax.set_ylabel('Value', rotation=-90, va="bottom")
118
+
119
+ ax.set_title(title)
120
+ ax.set_xlabel(xlabel)
121
+ ax.set_ylabel(ylabel)
122
+
123
+ plt.tight_layout()
124
+ return fig
125
+
126
+ def create_word_cloud_placeholder(text, title="Word Cloud"):
127
+ """Create a placeholder for word cloud visualization"""
128
+ setup_mpl_style()
129
+ fig, ax = plt.subplots(figsize=(10, 6))
130
+
131
+ # Create a simple text visualization as placeholder
132
+ ax.text(0.5, 0.5, f"Word Cloud: {title}\n\n{text[:100]}...",
133
+ ha='center', va='center', fontsize=12,
134
+ bbox=dict(boxstyle="round,pad=0.3", facecolor="lightblue", alpha=0.7))
135
+
136
+ ax.set_xlim(0, 1)
137
+ ax.set_ylim(0, 1)
138
+ ax.axis('off')
139
+ ax.set_title(title)
140
+
141
+ plt.tight_layout()
142
+ return fig
143
+
144
+ def create_network_graph(edges, nodes, title="Network Graph"):
145
+ """Create a network graph visualization"""
146
+ setup_mpl_style()
147
+ fig, ax = plt.subplots(figsize=(12, 8))
148
+
149
+ # Simple network visualization
150
+ if edges and nodes:
151
+ # Extract node positions (simplified)
152
+ pos = {}
153
+ for i, node in enumerate(nodes):
154
+ angle = 2 * 3.14159 * i / len(nodes)
155
+ pos[node] = (0.5 + 0.3 * np.cos(angle), 0.5 + 0.3 * np.sin(angle))
156
+
157
+ # Draw edges
158
+ for edge in edges:
159
+ if len(edge) >= 2:
160
+ x1, y1 = pos.get(edge[0], (0, 0))
161
+ x2, y2 = pos.get(edge[1], (0, 0))
162
+ ax.plot([x1, x2], [y1, y2], 'k-', alpha=0.5, linewidth=1)
163
+
164
+ # Draw nodes
165
+ for node, (x, y) in pos.items():
166
+ ax.scatter(x, y, s=200, c='lightblue', edgecolors='black', linewidth=2)
167
+ ax.text(x, y, str(node), ha='center', va='center', fontsize=8)
168
+
169
+ ax.set_xlim(0, 1)
170
+ ax.set_ylim(0, 1)
171
+ ax.axis('off')
172
+ ax.set_title(title)
173
+
174
+ plt.tight_layout()
175
+ return fig
176
+
177
+ def create_gauge_chart(value, max_value=1.0, title="Gauge Chart"):
178
+ """Create a gauge chart visualization"""
179
+ setup_mpl_style()
180
+ fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(projection='polar'))
181
+
182
+ # Create gauge
183
+ theta = np.linspace(0, np.pi, 100)
184
+ r = np.ones_like(theta)
185
+
186
+ # Color based on value
187
+ if value / max_value > 0.7:
188
+ color = '#4CAF50' # Green
189
+ elif value / max_value > 0.4:
190
+ color = '#FF9800' # Orange
191
+ else:
192
+ color = '#F44336' # Red
193
+
194
+ ax.fill_between(theta, 0, r, alpha=0.3, color=color)
195
+ ax.plot(theta, r, color=color, linewidth=3)
196
+
197
+ # Add value indicator
198
+ indicator_theta = np.pi * (1 - value / max_value)
199
+ ax.plot([indicator_theta, indicator_theta], [0, 1], color='black', linewidth=4)
200
+
201
+ ax.set_ylim(0, 1)
202
+ ax.set_title(title, pad=20)
203
+ ax.set_xticks([])
204
+ ax.set_yticks([])
205
+
206
+ # Add value text
207
+ ax.text(0, 0, f'{value:.2f}', ha='center', va='center', fontsize=20, fontweight='bold')
208
+
209
+ plt.tight_layout()
210
+ return fig
211
+
212
+ def create_comparison_chart(categories, values1, values2, title, xlabel, ylabel,
213
+ label1="Series 1", label2="Series 2", color1='#1976D2', color2='#4CAF50'):
214
+ """Create a comparison bar chart"""
215
+ setup_mpl_style()
216
+ fig, ax = plt.subplots(figsize=(12, 6))
217
+
218
+ x = np.arange(len(categories))
219
+ width = 0.35
220
+
221
+ bars1 = ax.bar(x - width/2, values1, width, label=label1, color=color1)
222
+ bars2 = ax.bar(x + width/2, values2, width, label=label2, color=color2)
223
+
224
+ ax.set_title(title)
225
+ ax.set_xlabel(xlabel)
226
+ ax.set_ylabel(ylabel)
227
+ ax.set_xticks(x)
228
+ ax.set_xticklabels(categories, rotation=45, ha='right')
229
+ ax.legend()
230
+
231
+ # Add value labels on bars
232
+ for bars in [bars1, bars2]:
233
+ for bar in bars:
234
+ height = bar.get_height()
235
+ ax.annotate(f'{height:.1f}',
236
+ xy=(bar.get_x() + bar.get_width() / 2, height),
237
+ xytext=(0, 3), # 3 points vertical offset
238
+ textcoords="offset points",
239
+ ha='center', va='bottom')
240
+
241
+ plt.tight_layout()
242
+ return fig