Spaces:
Running
Running
nlp ultimate tutor
Browse files- Dockerfile +55 -0
- app.py +360 -0
- components/classification.py +188 -0
- components/named_entity.py +342 -0
- components/pos_tagging.py +540 -0
- components/preprocessing.py +888 -0
- components/question_answering.py +498 -0
- components/sentiment.py +549 -0
- components/summarization.py +541 -0
- components/text_generation.py +259 -0
- components/tokenization.py +460 -0
- components/topic_analysis.py +766 -0
- components/translation.py +337 -0
- components/vector_embeddings.py +241 -0
- requirements.txt +37 -0
- static/css/components.css +1756 -0
- static/css/style.css +762 -0
- static/js/api.js +335 -0
- static/js/components.js +380 -0
- static/js/main.js +370 -0
- templates/_analysis_nav.html +72 -0
- templates/base.html +125 -0
- templates/classification.html +449 -0
- templates/index.html +322 -0
- templates/named_entity.html +362 -0
- templates/pos_tagging.html +397 -0
- templates/preprocessing.html +269 -0
- templates/question_answering.html +442 -0
- templates/sentiment.html +430 -0
- templates/summarization.html +396 -0
- templates/text_generation.html +469 -0
- templates/tokenization.html +323 -0
- templates/topic_analysis.html +399 -0
- templates/translation.html +468 -0
- templates/vector_embeddings.html +499 -0
- utils/__init__.py +86 -0
- utils/helpers.py +172 -0
- utils/model_loader.py +222 -0
- utils/model_loader_hf.py +267 -0
- utils/visualization.py +242 -0
Dockerfile
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use Python 3.9 slim image for better performance
|
| 2 |
+
FROM python:3.11-slim
|
| 3 |
+
|
| 4 |
+
# Set environment variables
|
| 5 |
+
ENV PYTHONUNBUFFERED=1
|
| 6 |
+
ENV PYTHONDONTWRITEBYTECODE=1
|
| 7 |
+
ENV HF_HOME=/tmp/huggingface
|
| 8 |
+
ENV TRANSFORMERS_CACHE=/tmp/huggingface/transformers
|
| 9 |
+
ENV TORCH_HOME=/tmp/torch
|
| 10 |
+
|
| 11 |
+
# Create a non-root user (required for HF Spaces)
|
| 12 |
+
RUN useradd -m -u 1000 user
|
| 13 |
+
USER user
|
| 14 |
+
ENV HOME=/home/user
|
| 15 |
+
ENV PATH=/home/user/.local/bin:$PATH
|
| 16 |
+
|
| 17 |
+
# Set working directory
|
| 18 |
+
WORKDIR $HOME/app
|
| 19 |
+
|
| 20 |
+
# Install system dependencies (as root)
|
| 21 |
+
USER root
|
| 22 |
+
RUN apt-get update && apt-get install -y \
|
| 23 |
+
build-essential \
|
| 24 |
+
curl \
|
| 25 |
+
software-properties-common \
|
| 26 |
+
git \
|
| 27 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 28 |
+
|
| 29 |
+
# Switch back to user
|
| 30 |
+
USER user
|
| 31 |
+
|
| 32 |
+
# Copy requirements and install Python dependencies
|
| 33 |
+
COPY --chown=user requirements.txt .
|
| 34 |
+
RUN pip install --no-cache-dir --upgrade pip
|
| 35 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 36 |
+
|
| 37 |
+
# Download spaCy model
|
| 38 |
+
RUN python -m spacy download en_core_web_sm
|
| 39 |
+
|
| 40 |
+
# Copy application code
|
| 41 |
+
COPY --chown=user . .
|
| 42 |
+
|
| 43 |
+
# Create cache directories
|
| 44 |
+
RUN mkdir -p /tmp/huggingface/transformers
|
| 45 |
+
RUN mkdir -p /tmp/torch
|
| 46 |
+
|
| 47 |
+
# Expose port
|
| 48 |
+
EXPOSE 7860
|
| 49 |
+
|
| 50 |
+
# Health check
|
| 51 |
+
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
|
| 52 |
+
CMD curl -f http://localhost:7860/ || exit 1
|
| 53 |
+
|
| 54 |
+
# Run the Flask application
|
| 55 |
+
CMD ["python", "app.py"]
|
app.py
ADDED
|
@@ -0,0 +1,360 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from flask import Flask, render_template, request, jsonify, session
|
| 2 |
+
import os
|
| 3 |
+
import json
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
|
| 6 |
+
# Import components
|
| 7 |
+
from components.preprocessing import preprocessing_handler
|
| 8 |
+
from components.tokenization import tokenization_handler
|
| 9 |
+
from components.pos_tagging import pos_tagging_handler
|
| 10 |
+
from components.named_entity import named_entity_handler
|
| 11 |
+
from components.sentiment import sentiment_handler
|
| 12 |
+
from components.summarization import summarization_handler
|
| 13 |
+
from components.topic_analysis import topic_analysis_handler
|
| 14 |
+
from components.question_answering import question_answering_handler
|
| 15 |
+
from components.text_generation import text_generation_handler
|
| 16 |
+
from components.translation import translation_handler
|
| 17 |
+
from components.classification import classification_handler
|
| 18 |
+
from components.vector_embeddings import vector_embeddings_handler
|
| 19 |
+
|
| 20 |
+
# Import utilities
|
| 21 |
+
from utils.model_loader_hf import download_nltk_resources, load_spacy, initialize_essential_models
|
| 22 |
+
from utils.helpers import text_statistics
|
| 23 |
+
|
| 24 |
+
app = Flask(__name__)
|
| 25 |
+
app.secret_key = 'your-secret-key-here' # Change this in production
|
| 26 |
+
|
| 27 |
+
# Sample texts
|
| 28 |
+
SAMPLE_TEXTS = {
|
| 29 |
+
"News Article": "The European Commission has fined Google €1.49 billion for abusive practices in online advertising. Google abused its market dominance by imposing restrictive clauses in contracts with third-party websites, preventing competitors from placing their search adverts on these websites.",
|
| 30 |
+
"Product Review": "I absolutely love this smartphone! The camera quality is outstanding and the battery life is impressive. The user interface is intuitive and the performance is smooth even when running multiple apps. However, I find the price a bit high compared to similar models on the market.",
|
| 31 |
+
"Scientific Text": "Climate change is the long-term alteration of temperature and typical weather patterns in a place. The cause of current climate change is largely human activity, like burning fossil fuels, which adds heat-trapping gases to Earth's atmosphere. The consequences of changing climate are already being felt worldwide.",
|
| 32 |
+
"Literary Text": "The old man was thin and gaunt with deep wrinkles in the back of his neck. The brown blotches of the benevolent skin cancer the sun brings from its reflection on the tropical sea were on his cheeks. The blotches ran well down the sides of his face and his hands had the deep-creased scars from handling heavy fish on the cords."
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
# Initialize essential models for HF Spaces
|
| 36 |
+
initialize_essential_models()
|
| 37 |
+
|
| 38 |
+
@app.route('/')
|
| 39 |
+
def index():
|
| 40 |
+
"""Main page with text input and analysis options"""
|
| 41 |
+
return render_template('index.html', sample_texts=SAMPLE_TEXTS)
|
| 42 |
+
|
| 43 |
+
@app.route('/api/text-stats', methods=['POST'])
|
| 44 |
+
def get_text_stats():
|
| 45 |
+
"""API endpoint to get text statistics"""
|
| 46 |
+
data = request.get_json()
|
| 47 |
+
text = data.get('text', '')
|
| 48 |
+
|
| 49 |
+
if not text:
|
| 50 |
+
return jsonify({'error': 'No text provided'}), 400
|
| 51 |
+
|
| 52 |
+
stats = text_statistics(text)
|
| 53 |
+
return jsonify(stats)
|
| 54 |
+
|
| 55 |
+
@app.route('/api/sample-text', methods=['POST'])
|
| 56 |
+
def get_sample_text():
|
| 57 |
+
"""API endpoint to get sample text"""
|
| 58 |
+
data = request.get_json()
|
| 59 |
+
sample_type = data.get('sample_type', 'Custom')
|
| 60 |
+
|
| 61 |
+
if sample_type == "Custom":
|
| 62 |
+
return jsonify({'text': ''})
|
| 63 |
+
else:
|
| 64 |
+
return jsonify({'text': SAMPLE_TEXTS.get(sample_type, '')})
|
| 65 |
+
|
| 66 |
+
# Text Processing Routes
|
| 67 |
+
@app.route('/preprocessing')
|
| 68 |
+
def preprocessing():
|
| 69 |
+
"""Text preprocessing page"""
|
| 70 |
+
return render_template('preprocessing.html')
|
| 71 |
+
|
| 72 |
+
@app.route('/api/preprocessing', methods=['POST'])
|
| 73 |
+
def api_preprocessing():
|
| 74 |
+
"""API endpoint for text preprocessing"""
|
| 75 |
+
data = request.get_json()
|
| 76 |
+
text = data.get('text', '')
|
| 77 |
+
|
| 78 |
+
if not text:
|
| 79 |
+
return jsonify({'error': 'No text provided'}), 400
|
| 80 |
+
|
| 81 |
+
try:
|
| 82 |
+
result = preprocessing_handler(text)
|
| 83 |
+
return jsonify({'success': True, 'result': result})
|
| 84 |
+
except Exception as e:
|
| 85 |
+
return jsonify({'error': str(e)}), 500
|
| 86 |
+
|
| 87 |
+
@app.route('/tokenization')
|
| 88 |
+
def tokenization():
|
| 89 |
+
"""Tokenization page"""
|
| 90 |
+
return render_template('tokenization.html')
|
| 91 |
+
|
| 92 |
+
@app.route('/api/tokenization', methods=['POST'])
|
| 93 |
+
def api_tokenization():
|
| 94 |
+
"""API endpoint for tokenization"""
|
| 95 |
+
data = request.get_json()
|
| 96 |
+
text = data.get('text', '')
|
| 97 |
+
|
| 98 |
+
if not text:
|
| 99 |
+
return jsonify({'error': 'No text provided'}), 400
|
| 100 |
+
|
| 101 |
+
try:
|
| 102 |
+
result = tokenization_handler(text)
|
| 103 |
+
return jsonify({'success': True, 'result': result})
|
| 104 |
+
except Exception as e:
|
| 105 |
+
return jsonify({'error': str(e)}), 500
|
| 106 |
+
|
| 107 |
+
@app.route('/pos-tagging')
|
| 108 |
+
def pos_tagging():
|
| 109 |
+
"""POS tagging page"""
|
| 110 |
+
return render_template('pos_tagging.html')
|
| 111 |
+
|
| 112 |
+
@app.route('/api/pos-tagging', methods=['POST'])
|
| 113 |
+
def api_pos_tagging():
|
| 114 |
+
"""API endpoint for POS tagging"""
|
| 115 |
+
data = request.get_json()
|
| 116 |
+
text = data.get('text', '')
|
| 117 |
+
|
| 118 |
+
if not text:
|
| 119 |
+
return jsonify({'error': 'No text provided'}), 400
|
| 120 |
+
|
| 121 |
+
try:
|
| 122 |
+
result = pos_tagging_handler(text)
|
| 123 |
+
return jsonify({'success': True, 'result': result})
|
| 124 |
+
except Exception as e:
|
| 125 |
+
return jsonify({'error': str(e)}), 500
|
| 126 |
+
|
| 127 |
+
@app.route('/named-entity')
|
| 128 |
+
def named_entity():
|
| 129 |
+
"""Named entity recognition page"""
|
| 130 |
+
return render_template('named_entity.html')
|
| 131 |
+
|
| 132 |
+
@app.route('/api/named-entity', methods=['POST'])
|
| 133 |
+
def api_named_entity():
|
| 134 |
+
"""API endpoint for named entity recognition"""
|
| 135 |
+
data = request.get_json()
|
| 136 |
+
text = data.get('text', '')
|
| 137 |
+
|
| 138 |
+
if not text:
|
| 139 |
+
return jsonify({'error': 'No text provided'}), 400
|
| 140 |
+
|
| 141 |
+
try:
|
| 142 |
+
result = named_entity_handler(text)
|
| 143 |
+
return jsonify({'success': True, 'result': result})
|
| 144 |
+
except Exception as e:
|
| 145 |
+
return jsonify({'error': str(e)}), 500
|
| 146 |
+
|
| 147 |
+
# Analysis Routes
|
| 148 |
+
@app.route('/sentiment')
|
| 149 |
+
def sentiment():
|
| 150 |
+
"""Sentiment analysis page"""
|
| 151 |
+
return render_template('sentiment.html')
|
| 152 |
+
|
| 153 |
+
@app.route('/api/sentiment', methods=['POST'])
|
| 154 |
+
def api_sentiment():
|
| 155 |
+
"""API endpoint for sentiment analysis"""
|
| 156 |
+
data = request.get_json()
|
| 157 |
+
text = data.get('text', '')
|
| 158 |
+
|
| 159 |
+
if not text:
|
| 160 |
+
return jsonify({'error': 'No text provided'}), 400
|
| 161 |
+
|
| 162 |
+
try:
|
| 163 |
+
result = sentiment_handler(text)
|
| 164 |
+
return jsonify({'success': True, 'result': result})
|
| 165 |
+
except Exception as e:
|
| 166 |
+
return jsonify({'error': str(e)}), 500
|
| 167 |
+
|
| 168 |
+
@app.route('/summarization')
|
| 169 |
+
def summarization():
|
| 170 |
+
"""Text summarization page"""
|
| 171 |
+
return render_template('summarization.html')
|
| 172 |
+
|
| 173 |
+
@app.route('/api/summarization', methods=['POST'])
|
| 174 |
+
def api_summarization():
|
| 175 |
+
"""API endpoint for text summarization"""
|
| 176 |
+
data = request.get_json()
|
| 177 |
+
text = data.get('text', '')
|
| 178 |
+
|
| 179 |
+
if not text:
|
| 180 |
+
return jsonify({'error': 'No text provided'}), 400
|
| 181 |
+
|
| 182 |
+
try:
|
| 183 |
+
result = summarization_handler(text)
|
| 184 |
+
return jsonify({'success': True, 'result': result})
|
| 185 |
+
except Exception as e:
|
| 186 |
+
return jsonify({'error': str(e)}), 500
|
| 187 |
+
|
| 188 |
+
@app.route('/topic-analysis')
|
| 189 |
+
def topic_analysis():
|
| 190 |
+
"""Topic analysis page"""
|
| 191 |
+
return render_template('topic_analysis.html')
|
| 192 |
+
|
| 193 |
+
@app.route('/api/topic-analysis', methods=['POST'])
|
| 194 |
+
def api_topic_analysis():
|
| 195 |
+
"""API endpoint for topic analysis"""
|
| 196 |
+
data = request.get_json()
|
| 197 |
+
text = data.get('text', '')
|
| 198 |
+
|
| 199 |
+
if not text:
|
| 200 |
+
return jsonify({'error': 'No text provided'}), 400
|
| 201 |
+
|
| 202 |
+
try:
|
| 203 |
+
result = topic_analysis_handler(text)
|
| 204 |
+
return jsonify({'success': True, 'result': result})
|
| 205 |
+
except Exception as e:
|
| 206 |
+
return jsonify({'error': str(e)}), 500
|
| 207 |
+
|
| 208 |
+
# Advanced NLP Routes
|
| 209 |
+
@app.route('/question-answering')
|
| 210 |
+
def question_answering():
|
| 211 |
+
"""Question answering page"""
|
| 212 |
+
return render_template('question_answering.html')
|
| 213 |
+
|
| 214 |
+
@app.route('/api/question-answering', methods=['POST'])
|
| 215 |
+
def api_question_answering():
|
| 216 |
+
"""API endpoint for question answering"""
|
| 217 |
+
data = request.get_json(silent=True) or {}
|
| 218 |
+
# Accept from JSON, form, or query string
|
| 219 |
+
text = (
|
| 220 |
+
data.get('context')
|
| 221 |
+
or data.get('text')
|
| 222 |
+
or request.form.get('context')
|
| 223 |
+
or request.form.get('text')
|
| 224 |
+
or request.args.get('context')
|
| 225 |
+
or request.args.get('text')
|
| 226 |
+
or ''
|
| 227 |
+
)
|
| 228 |
+
question = (
|
| 229 |
+
data.get('question')
|
| 230 |
+
or request.form.get('question')
|
| 231 |
+
or request.args.get('question')
|
| 232 |
+
or ''
|
| 233 |
+
)
|
| 234 |
+
confidence_threshold = (
|
| 235 |
+
data.get('confidence_threshold')
|
| 236 |
+
or request.form.get('confidence_threshold')
|
| 237 |
+
or request.args.get('confidence_threshold')
|
| 238 |
+
or 0.5
|
| 239 |
+
)
|
| 240 |
+
try:
|
| 241 |
+
confidence_threshold = float(confidence_threshold)
|
| 242 |
+
except Exception:
|
| 243 |
+
confidence_threshold = 0.5
|
| 244 |
+
|
| 245 |
+
if not text:
|
| 246 |
+
return jsonify({'error': 'No text provided'}), 400
|
| 247 |
+
|
| 248 |
+
try:
|
| 249 |
+
result = question_answering_handler(text, question, confidence_threshold=confidence_threshold)
|
| 250 |
+
return jsonify({'success': True, 'result': result})
|
| 251 |
+
except Exception as e:
|
| 252 |
+
return jsonify({'error': str(e)}), 500
|
| 253 |
+
|
| 254 |
+
@app.route('/text-generation')
|
| 255 |
+
def text_generation():
|
| 256 |
+
"""Text generation page"""
|
| 257 |
+
return render_template('text_generation.html')
|
| 258 |
+
|
| 259 |
+
@app.route('/api/text-generation', methods=['POST'])
|
| 260 |
+
def api_text_generation():
|
| 261 |
+
"""API endpoint for text generation"""
|
| 262 |
+
data = request.get_json()
|
| 263 |
+
text = data.get('text', '')
|
| 264 |
+
|
| 265 |
+
if not text:
|
| 266 |
+
return jsonify({'error': 'No text provided'}), 400
|
| 267 |
+
|
| 268 |
+
try:
|
| 269 |
+
result = text_generation_handler(text)
|
| 270 |
+
return jsonify({'success': True, 'result': result})
|
| 271 |
+
except Exception as e:
|
| 272 |
+
return jsonify({'error': str(e)}), 500
|
| 273 |
+
|
| 274 |
+
@app.route('/translation')
|
| 275 |
+
def translation():
|
| 276 |
+
"""Translation page"""
|
| 277 |
+
return render_template('translation.html')
|
| 278 |
+
|
| 279 |
+
@app.route('/api/translation', methods=['POST'])
|
| 280 |
+
def api_translation():
|
| 281 |
+
"""API endpoint for translation"""
|
| 282 |
+
data = request.get_json()
|
| 283 |
+
text = data.get('text', '')
|
| 284 |
+
target_language = data.get('target_language', 'en')
|
| 285 |
+
|
| 286 |
+
if not text:
|
| 287 |
+
return jsonify({'error': 'No text provided'}), 400
|
| 288 |
+
|
| 289 |
+
try:
|
| 290 |
+
result = translation_handler(text, target_language)
|
| 291 |
+
return jsonify({'success': True, 'result': result})
|
| 292 |
+
except Exception as e:
|
| 293 |
+
return jsonify({'error': str(e)}), 500
|
| 294 |
+
|
| 295 |
+
@app.route('/classification')
|
| 296 |
+
def classification():
|
| 297 |
+
"""Classification page"""
|
| 298 |
+
return render_template('classification.html')
|
| 299 |
+
|
| 300 |
+
@app.route('/api/classification', methods=['POST'])
|
| 301 |
+
def api_classification():
|
| 302 |
+
"""API endpoint for classification"""
|
| 303 |
+
data = request.get_json()
|
| 304 |
+
text = data.get('text', '')
|
| 305 |
+
scenario = data.get('scenario', 'Sentiment')
|
| 306 |
+
multi_label = data.get('multi_label', False)
|
| 307 |
+
custom_labels = data.get('custom_labels', '')
|
| 308 |
+
|
| 309 |
+
if not text:
|
| 310 |
+
return jsonify({'error': 'No text provided'}), 400
|
| 311 |
+
|
| 312 |
+
try:
|
| 313 |
+
result = classification_handler(text, scenario, multi_label, custom_labels)
|
| 314 |
+
return jsonify({'success': True, 'result': result})
|
| 315 |
+
except Exception as e:
|
| 316 |
+
return jsonify({'error': str(e)}), 500
|
| 317 |
+
|
| 318 |
+
@app.route('/vector-embeddings')
|
| 319 |
+
def vector_embeddings():
|
| 320 |
+
"""Vector embeddings page"""
|
| 321 |
+
return render_template('vector_embeddings.html')
|
| 322 |
+
|
| 323 |
+
@app.route('/api/vector-embeddings', methods=['POST'])
|
| 324 |
+
def api_vector_embeddings():
|
| 325 |
+
"""API endpoint for vector embeddings"""
|
| 326 |
+
data = request.get_json()
|
| 327 |
+
text = data.get('text', '')
|
| 328 |
+
query = data.get('query', '')
|
| 329 |
+
|
| 330 |
+
if not text:
|
| 331 |
+
return jsonify({'error': 'No text provided'}), 400
|
| 332 |
+
|
| 333 |
+
try:
|
| 334 |
+
result = vector_embeddings_handler(text, query)
|
| 335 |
+
return jsonify({'success': True, 'result': result})
|
| 336 |
+
except Exception as e:
|
| 337 |
+
return jsonify({'error': str(e)}), 500
|
| 338 |
+
|
| 339 |
+
@app.route('/api/semantic-search', methods=['POST'])
|
| 340 |
+
def api_semantic_search():
|
| 341 |
+
"""API endpoint for semantic search"""
|
| 342 |
+
from components.vector_embeddings import perform_semantic_search
|
| 343 |
+
|
| 344 |
+
data = request.get_json()
|
| 345 |
+
context = data.get('context', '')
|
| 346 |
+
query = data.get('query', '')
|
| 347 |
+
|
| 348 |
+
if not context or not query:
|
| 349 |
+
return jsonify({'error': 'Both context and query are required'}), 400
|
| 350 |
+
|
| 351 |
+
try:
|
| 352 |
+
result = perform_semantic_search(context, query)
|
| 353 |
+
return jsonify(result)
|
| 354 |
+
except Exception as e:
|
| 355 |
+
return jsonify({'error': str(e)}), 500
|
| 356 |
+
|
| 357 |
+
if __name__ == '__main__':
|
| 358 |
+
# For HF Spaces, run on port 7860
|
| 359 |
+
port = int(os.environ.get('PORT', 7860))
|
| 360 |
+
app.run(debug=False, host='0.0.0.0', port=port)
|
components/classification.py
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import matplotlib.pyplot as plt
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from utils.model_loader import load_zero_shot
|
| 4 |
+
from utils.helpers import fig_to_html, df_to_html_table
|
| 5 |
+
|
| 6 |
+
def classification_handler(text_input, scenario="Sentiment", multi_label=False, custom_labels=""):
|
| 7 |
+
"""Show zero-shot classification capabilities."""
|
| 8 |
+
output_html = []
|
| 9 |
+
|
| 10 |
+
# Add result area container
|
| 11 |
+
output_html.append('<div class="result-area">')
|
| 12 |
+
output_html.append('<h2 class="task-header">Zero-shot Classification</h2>')
|
| 13 |
+
|
| 14 |
+
output_html.append("""
|
| 15 |
+
<div class="alert alert-info">
|
| 16 |
+
<i class="fas fa-tags"></i>
|
| 17 |
+
Zero-shot classification can categorize text into arbitrary classes without having been specifically trained on those categories.
|
| 18 |
+
</div>
|
| 19 |
+
""")
|
| 20 |
+
|
| 21 |
+
# Model info
|
| 22 |
+
output_html.append("""
|
| 23 |
+
<div class="alert alert-info">
|
| 24 |
+
<h4><i class="fas fa-tools"></i> Model Used:</h4>
|
| 25 |
+
<ul>
|
| 26 |
+
<li><b>facebook/bart-large-mnli</b> - BART model fine-tuned on MultiNLI dataset</li>
|
| 27 |
+
<li><b>Capabilities</b> - Can classify text into any user-defined categories</li>
|
| 28 |
+
<li><b>Performance</b> - Best performance on distinct, well-defined categories</li>
|
| 29 |
+
</ul>
|
| 30 |
+
</div>
|
| 31 |
+
""")
|
| 32 |
+
|
| 33 |
+
# Classification scenarios
|
| 34 |
+
scenarios = {
|
| 35 |
+
"Sentiment": ["positive", "negative", "neutral"],
|
| 36 |
+
"Emotion": ["joy", "sadness", "anger", "fear", "surprise"],
|
| 37 |
+
"Writing Style": ["formal", "informal", "technical", "creative", "persuasive"],
|
| 38 |
+
"Intent": ["inform", "persuade", "entertain", "instruct"],
|
| 39 |
+
"Content Type": ["news", "opinion", "review", "instruction", "narrative"],
|
| 40 |
+
"Audience Level": ["beginner", "intermediate", "advanced", "expert"],
|
| 41 |
+
"Custom": []
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
try:
|
| 45 |
+
# Get labels based on scenario
|
| 46 |
+
if scenario == "Custom":
|
| 47 |
+
labels = [label.strip() for label in custom_labels.split("\n") if label.strip()]
|
| 48 |
+
if not labels:
|
| 49 |
+
output_html.append("""
|
| 50 |
+
<div class="alert alert-warning">
|
| 51 |
+
<h3>No Custom Categories</h3>
|
| 52 |
+
<p>Please enter at least one custom category.</p>
|
| 53 |
+
</div>
|
| 54 |
+
""")
|
| 55 |
+
output_html.append('</div>') # Close result-area div
|
| 56 |
+
return '\n'.join(output_html)
|
| 57 |
+
else:
|
| 58 |
+
labels = scenarios[scenario]
|
| 59 |
+
|
| 60 |
+
# Update multi-label default for certain categories
|
| 61 |
+
if scenario in ["Emotion", "Intent", "Content Type"] and not multi_label:
|
| 62 |
+
multi_label = True
|
| 63 |
+
|
| 64 |
+
# Load model
|
| 65 |
+
classifier = load_zero_shot()
|
| 66 |
+
|
| 67 |
+
# Classification process
|
| 68 |
+
result = classifier(text_input, labels, multi_label=multi_label)
|
| 69 |
+
|
| 70 |
+
# Display results
|
| 71 |
+
output_html.append('<h3 class="task-subheader">Classification Results</h3>')
|
| 72 |
+
|
| 73 |
+
# Create DataFrame
|
| 74 |
+
class_df = pd.DataFrame({
|
| 75 |
+
'Category': result['labels'],
|
| 76 |
+
'Confidence': result['scores']
|
| 77 |
+
})
|
| 78 |
+
|
| 79 |
+
# Visualization
|
| 80 |
+
fig = plt.figure(figsize=(10, 6))
|
| 81 |
+
bars = plt.barh(class_df['Category'], class_df['Confidence'], color='#1976D2')
|
| 82 |
+
|
| 83 |
+
# Add percentage labels
|
| 84 |
+
for i, bar in enumerate(bars):
|
| 85 |
+
plt.text(bar.get_width() + 0.01, bar.get_y() + bar.get_height()/2,
|
| 86 |
+
f"{bar.get_width():.1%}", va='center')
|
| 87 |
+
|
| 88 |
+
plt.xlim(0, 1.1)
|
| 89 |
+
plt.xlabel('Confidence Score')
|
| 90 |
+
plt.title(f'{scenario} Classification')
|
| 91 |
+
plt.tight_layout()
|
| 92 |
+
|
| 93 |
+
# Layout with vertical stacking - Chart first
|
| 94 |
+
output_html.append('<div class="row mb-4">')
|
| 95 |
+
output_html.append('<div class="col-12">')
|
| 96 |
+
output_html.append('<h4>Classification Confidence Chart</h4>')
|
| 97 |
+
output_html.append(fig_to_html(fig))
|
| 98 |
+
output_html.append('</div>')
|
| 99 |
+
output_html.append('</div>') # Close chart row
|
| 100 |
+
|
| 101 |
+
# Data table and result in next row
|
| 102 |
+
output_html.append('<div class="row">')
|
| 103 |
+
output_html.append('<div class="col-md-6">')
|
| 104 |
+
output_html.append('<h4>Detailed Results</h4>')
|
| 105 |
+
output_html.append(df_to_html_table(class_df))
|
| 106 |
+
output_html.append('</div>')
|
| 107 |
+
|
| 108 |
+
# Top result
|
| 109 |
+
output_html.append('<div class="col-md-6">')
|
| 110 |
+
top_class = class_df.iloc[0]['Category']
|
| 111 |
+
top_score = class_df.iloc[0]['Confidence']
|
| 112 |
+
|
| 113 |
+
output_html.append(f"""
|
| 114 |
+
<div class="alert alert-primary">
|
| 115 |
+
<h3>Primary Classification</h3>
|
| 116 |
+
<p class="h4">{top_class}</p>
|
| 117 |
+
<p>Confidence: {top_score:.1%}</p>
|
| 118 |
+
</div>
|
| 119 |
+
""")
|
| 120 |
+
|
| 121 |
+
output_html.append('</div>') # Close result column
|
| 122 |
+
output_html.append('</div>') # Close row
|
| 123 |
+
|
| 124 |
+
# Multiple categories (if multi-label)
|
| 125 |
+
if multi_label:
|
| 126 |
+
# Get all categories with significant confidence
|
| 127 |
+
significant_classes = class_df[class_df['Confidence'] > 0.5]
|
| 128 |
+
|
| 129 |
+
if len(significant_classes) > 1:
|
| 130 |
+
output_html.append(f"""
|
| 131 |
+
<div class="alert alert-info">
|
| 132 |
+
<h3>Multiple Categories Detected</h3>
|
| 133 |
+
<p>This text appears to belong to multiple categories:</p>
|
| 134 |
+
</div>
|
| 135 |
+
""")
|
| 136 |
+
|
| 137 |
+
category_list = []
|
| 138 |
+
for _, row in significant_classes.iterrows():
|
| 139 |
+
category_list.append(f"<li><b>{row['Category']}</b> ({row['Confidence']:.1%})</li>")
|
| 140 |
+
|
| 141 |
+
output_html.append(f"<ul>{''.join(category_list)}</ul>")
|
| 142 |
+
|
| 143 |
+
except Exception as e:
|
| 144 |
+
output_html.append(f"""
|
| 145 |
+
<div class="alert alert-danger">
|
| 146 |
+
<h3>Error</h3>
|
| 147 |
+
<p>Failed to classify text: {str(e)}</p>
|
| 148 |
+
</div>
|
| 149 |
+
""")
|
| 150 |
+
|
| 151 |
+
# About zero-shot classification
|
| 152 |
+
output_html.append("""
|
| 153 |
+
<div class="card mt-4">
|
| 154 |
+
<div class="card-header">
|
| 155 |
+
<h4 class="mb-0">
|
| 156 |
+
<i class="fas fa-info-circle"></i>
|
| 157 |
+
About Zero-shot Classification
|
| 158 |
+
</h4>
|
| 159 |
+
</div>
|
| 160 |
+
<div class="card-body">
|
| 161 |
+
<h5>What is Zero-shot Classification?</h5>
|
| 162 |
+
|
| 163 |
+
<p>Unlike traditional classifiers that need to be trained on examples from each category,
|
| 164 |
+
zero-shot classification can categorize text into arbitrary classes it has never seen
|
| 165 |
+
during training.</p>
|
| 166 |
+
|
| 167 |
+
<h5>How it works:</h5>
|
| 168 |
+
|
| 169 |
+
<ol>
|
| 170 |
+
<li>The model converts your text and each potential category into embeddings</li>
|
| 171 |
+
<li>It calculates how likely the text entails or belongs to each category</li>
|
| 172 |
+
<li>The model ranks categories by confidence scores</li>
|
| 173 |
+
</ol>
|
| 174 |
+
|
| 175 |
+
<h5>Benefits:</h5>
|
| 176 |
+
|
| 177 |
+
<ul>
|
| 178 |
+
<li>Flexibility to classify into any categories without retraining</li>
|
| 179 |
+
<li>Can work with domain-specific or custom categories</li>
|
| 180 |
+
<li>Useful for exploratory analysis or when training data is limited</li>
|
| 181 |
+
</ul>
|
| 182 |
+
</div>
|
| 183 |
+
</div>
|
| 184 |
+
""")
|
| 185 |
+
|
| 186 |
+
output_html.append('</div>') # Close result-area div
|
| 187 |
+
|
| 188 |
+
return '\n'.join(output_html)
|
components/named_entity.py
ADDED
|
@@ -0,0 +1,342 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import matplotlib
|
| 2 |
+
matplotlib.use('Agg') # Use non-interactive backend
|
| 3 |
+
import matplotlib.pyplot as plt
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import numpy as np
|
| 6 |
+
import spacy
|
| 7 |
+
from collections import Counter
|
| 8 |
+
import networkx as nx
|
| 9 |
+
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
|
| 10 |
+
|
| 11 |
+
from utils.model_loader import load_spacy
|
| 12 |
+
from utils.helpers import fig_to_html, df_to_html_table
|
| 13 |
+
|
| 14 |
+
def named_entity_handler(text_input):
|
| 15 |
+
"""Show named entity recognition capabilities."""
|
| 16 |
+
output_html = []
|
| 17 |
+
|
| 18 |
+
# Add result area container
|
| 19 |
+
output_html.append('<div class="result-area">')
|
| 20 |
+
output_html.append('<h2 class="task-header">Named Entity Recognition</h2>')
|
| 21 |
+
|
| 22 |
+
output_html.append("""
|
| 23 |
+
<div class="alert alert-info">
|
| 24 |
+
<i class="fas fa-info-circle"></i>
|
| 25 |
+
Named Entity Recognition identifies and classifies key information in text into pre-defined categories such as person names, organizations, locations, etc.
|
| 26 |
+
</div>
|
| 27 |
+
""")
|
| 28 |
+
|
| 29 |
+
# Model info
|
| 30 |
+
output_html.append("""
|
| 31 |
+
<div class="alert alert-info">
|
| 32 |
+
<h4><i class="fas fa-tools"></i> Models Used:</h4>
|
| 33 |
+
<ul>
|
| 34 |
+
<li><b>dslim/bert-base-NER</b> - BERT-based Named Entity Recognition model</li>
|
| 35 |
+
<li><b>spaCy en_core_web_sm</b> - Statistical NLP model for additional analysis</li>
|
| 36 |
+
<li><b>Entity Types</b> - Identifies people, organizations, locations, and miscellaneous entities</li>
|
| 37 |
+
</ul>
|
| 38 |
+
</div>
|
| 39 |
+
""")
|
| 40 |
+
|
| 41 |
+
try:
|
| 42 |
+
# Load BERT NER model
|
| 43 |
+
try:
|
| 44 |
+
tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
|
| 45 |
+
model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
|
| 46 |
+
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
|
| 47 |
+
except Exception as model_err:
|
| 48 |
+
output_html.append(f"""
|
| 49 |
+
<div class="alert alert-warning">
|
| 50 |
+
<h4>Model Loading Issue</h4>
|
| 51 |
+
<p>Could not load BERT NER model: {str(model_err)}</p>
|
| 52 |
+
<p>Falling back to spaCy model...</p>
|
| 53 |
+
</div>
|
| 54 |
+
""")
|
| 55 |
+
# Fallback to spaCy
|
| 56 |
+
nlp = load_spacy()
|
| 57 |
+
doc = nlp(text_input)
|
| 58 |
+
bert_entities = []
|
| 59 |
+
else:
|
| 60 |
+
# Process with BERT NER
|
| 61 |
+
bert_entities = ner_pipeline(text_input)
|
| 62 |
+
|
| 63 |
+
# Also load spaCy for additional analysis
|
| 64 |
+
nlp = load_spacy()
|
| 65 |
+
doc = nlp(text_input)
|
| 66 |
+
|
| 67 |
+
# Combine entities from both models
|
| 68 |
+
all_entities = []
|
| 69 |
+
|
| 70 |
+
# Add BERT entities
|
| 71 |
+
for entity in bert_entities:
|
| 72 |
+
all_entities.append({
|
| 73 |
+
'text': entity['word'].replace('##', ''),
|
| 74 |
+
'label': entity['entity_group'],
|
| 75 |
+
'confidence': entity['score'],
|
| 76 |
+
'start': entity['start'],
|
| 77 |
+
'end': entity['end'],
|
| 78 |
+
'source': 'BERT'
|
| 79 |
+
})
|
| 80 |
+
|
| 81 |
+
# Add spaCy entities
|
| 82 |
+
for ent in doc.ents:
|
| 83 |
+
all_entities.append({
|
| 84 |
+
'text': ent.text,
|
| 85 |
+
'label': ent.label_,
|
| 86 |
+
'confidence': 1.0, # spaCy doesn't provide confidence scores
|
| 87 |
+
'start': ent.start_char,
|
| 88 |
+
'end': ent.end_char,
|
| 89 |
+
'source': 'spaCy'
|
| 90 |
+
})
|
| 91 |
+
|
| 92 |
+
# If no entities were found
|
| 93 |
+
if len(all_entities) == 0:
|
| 94 |
+
output_html.append("""
|
| 95 |
+
<div class="alert alert-warning">
|
| 96 |
+
<h3>No Named Entities Found</h3>
|
| 97 |
+
<p>The model couldn't identify any named entities in the provided text. Try a different text that contains names, places, organizations, dates, etc.</p>
|
| 98 |
+
</div>
|
| 99 |
+
""")
|
| 100 |
+
else:
|
| 101 |
+
# Display identified entities in text
|
| 102 |
+
output_html.append('<h3 class="task-subheader">Identified Entities</h3>')
|
| 103 |
+
|
| 104 |
+
# Color scheme for different entity types (BERT + spaCy)
|
| 105 |
+
colors = {
|
| 106 |
+
# BERT NER labels
|
| 107 |
+
'PER': '#e6194B', # Person - Red
|
| 108 |
+
'ORG': '#3cb44b', # Organization - Green
|
| 109 |
+
'LOC': '#4363d8', # Location - Blue
|
| 110 |
+
'MISC': '#f58231', # Miscellaneous - Orange
|
| 111 |
+
# spaCy labels
|
| 112 |
+
'PERSON': '#e6194B', # Red
|
| 113 |
+
'ORG': '#3cb44b', # Green
|
| 114 |
+
'GPE': '#4363d8', # Blue (locations/geopolitical)
|
| 115 |
+
'LOC': '#42d4f4', # Cyan (non-GPE locations)
|
| 116 |
+
'FACILITY': '#f58231', # Orange
|
| 117 |
+
'PRODUCT': '#911eb4', # Purple
|
| 118 |
+
'EVENT': '#f032e6', # Magenta
|
| 119 |
+
'WORK_OF_ART': '#fabebe', # Pink
|
| 120 |
+
'LAW': '#008080', # Teal
|
| 121 |
+
'DATE': '#9A6324', # Brown
|
| 122 |
+
'TIME': '#800000', # Maroon
|
| 123 |
+
'PERCENT': '#808000', # Olive
|
| 124 |
+
'MONEY': '#000075', # Navy
|
| 125 |
+
'QUANTITY': '#000000', # Black
|
| 126 |
+
'CARDINAL': '#a9a9a9', # Dark Gray
|
| 127 |
+
'ORDINAL': '#808080', # Gray
|
| 128 |
+
'NORP': '#469990' # Nationality/Religious/Political
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
# Remove duplicates and sort entities by position
|
| 132 |
+
unique_entities = []
|
| 133 |
+
seen_spans = set()
|
| 134 |
+
|
| 135 |
+
for entity in all_entities:
|
| 136 |
+
span = (entity['start'], entity['end'])
|
| 137 |
+
if span not in seen_spans:
|
| 138 |
+
unique_entities.append(entity)
|
| 139 |
+
seen_spans.add(span)
|
| 140 |
+
|
| 141 |
+
# Sort by start position
|
| 142 |
+
sorted_ents = sorted(unique_entities, key=lambda x: x['start'])
|
| 143 |
+
|
| 144 |
+
# Create HTML with highlighted entities
|
| 145 |
+
html_text = text_input
|
| 146 |
+
offset = 0
|
| 147 |
+
|
| 148 |
+
for entity in sorted_ents:
|
| 149 |
+
# Get the appropriate color (default to gray if not found)
|
| 150 |
+
color = colors.get(entity['label'], '#a9a9a9')
|
| 151 |
+
|
| 152 |
+
# Create the HTML span with tooltip including confidence and source
|
| 153 |
+
start = entity['start'] + offset
|
| 154 |
+
end = entity['end'] + offset
|
| 155 |
+
confidence_text = f" (Confidence: {entity['confidence']:.2f})" if entity['confidence'] < 1.0 else ""
|
| 156 |
+
tooltip = f"{entity['label']} - {entity['source']}{confidence_text}"
|
| 157 |
+
|
| 158 |
+
entity_html = f'<span class="entity-badge" style="background-color: {color}; color: white; border: 2px solid #fff; box-shadow: 0 2px 4px rgba(0,0,0,0.3);" title="{tooltip}"><strong>{entity["text"]}</strong> <span style="font-size: 0.8em;">({entity["label"]}) ({entity["source"]})</span></span>'
|
| 159 |
+
|
| 160 |
+
# Replace the entity text with the highlighted version
|
| 161 |
+
html_text = html_text[:start] + entity_html + html_text[end:]
|
| 162 |
+
|
| 163 |
+
# Update offset for subsequent entities
|
| 164 |
+
offset += len(entity_html) - len(entity['text'])
|
| 165 |
+
|
| 166 |
+
# Display the highlighted text
|
| 167 |
+
output_html.append(f'<div class="card"><div class="card-body"><div class="entity-text-container">{html_text}</div></div></div>')
|
| 168 |
+
|
| 169 |
+
# Entity count and distribution
|
| 170 |
+
output_html.append('<h3 class="task-subheader">Entity Distribution</h3>')
|
| 171 |
+
|
| 172 |
+
# Create a DataFrame for the entities
|
| 173 |
+
entities_data = []
|
| 174 |
+
for entity in unique_entities:
|
| 175 |
+
entities_data.append({
|
| 176 |
+
'Entity': entity['text'],
|
| 177 |
+
'Type': entity['label'],
|
| 178 |
+
'Source': entity['source'],
|
| 179 |
+
'Confidence': f"{entity['confidence']:.2f}" if entity['confidence'] < 1.0 else "1.00"
|
| 180 |
+
})
|
| 181 |
+
|
| 182 |
+
entity_df = pd.DataFrame(entities_data)
|
| 183 |
+
|
| 184 |
+
# Calculate entity type distribution
|
| 185 |
+
entity_counts = Counter([entity['label'] for entity in unique_entities])
|
| 186 |
+
|
| 187 |
+
# Create bar chart for entity type distribution
|
| 188 |
+
fig = plt.figure(figsize=(12, 8))
|
| 189 |
+
bars = plt.bar(entity_counts.keys(), entity_counts.values(),
|
| 190 |
+
color=[colors.get(k, '#a9a9a9') for k in entity_counts.keys()])
|
| 191 |
+
plt.xlabel('Entity Type')
|
| 192 |
+
plt.ylabel('Count')
|
| 193 |
+
plt.title('Entity Type Distribution (BERT + spaCy)')
|
| 194 |
+
plt.xticks(rotation=45, ha='right')
|
| 195 |
+
|
| 196 |
+
# Add count labels on top of bars
|
| 197 |
+
for bar in bars:
|
| 198 |
+
height = bar.get_height()
|
| 199 |
+
plt.text(bar.get_x() + bar.get_width()/2., height + 0.1,
|
| 200 |
+
f'{int(height)}',
|
| 201 |
+
ha='center', va='bottom')
|
| 202 |
+
|
| 203 |
+
plt.tight_layout()
|
| 204 |
+
|
| 205 |
+
# Chart section
|
| 206 |
+
output_html.append('<section class="entity-chart-section">')
|
| 207 |
+
output_html.append('<div class="chart-container">')
|
| 208 |
+
output_html.append(fig_to_html(fig))
|
| 209 |
+
output_html.append('</div>')
|
| 210 |
+
output_html.append('</section>')
|
| 211 |
+
|
| 212 |
+
# Table section
|
| 213 |
+
output_html.append('<section class="entity-table-container">')
|
| 214 |
+
output_html.append('<h4>Entities Found</h4>')
|
| 215 |
+
output_html.append(df_to_html_table(entity_df))
|
| 216 |
+
output_html.append('</section>')
|
| 217 |
+
|
| 218 |
+
# Entity relationship visualization (for texts with multiple entities)
|
| 219 |
+
if len(doc.ents) > 1:
|
| 220 |
+
output_html.append('<h3 class="task-subheader">Entity Relationships</h3>')
|
| 221 |
+
|
| 222 |
+
# Create a network graph of entities that appear in the same sentence
|
| 223 |
+
G = nx.Graph()
|
| 224 |
+
|
| 225 |
+
# Add nodes for each unique entity
|
| 226 |
+
for ent in doc.ents:
|
| 227 |
+
G.add_node(ent.text, type=ent.label_)
|
| 228 |
+
|
| 229 |
+
# Add edges between entities that appear in the same sentence
|
| 230 |
+
for sent in doc.sents:
|
| 231 |
+
sent_ents = [ent for ent in doc.ents if sent.start <= ent.start < sent.end]
|
| 232 |
+
for i, ent1 in enumerate(sent_ents):
|
| 233 |
+
for ent2 in sent_ents[i+1:]:
|
| 234 |
+
if G.has_edge(ent1.text, ent2.text):
|
| 235 |
+
G[ent1.text][ent2.text]['weight'] += 1
|
| 236 |
+
else:
|
| 237 |
+
G.add_edge(ent1.text, ent2.text, weight=1)
|
| 238 |
+
|
| 239 |
+
# Only show relationship visualization if there are edges
|
| 240 |
+
if G.number_of_edges() > 0:
|
| 241 |
+
# Create a network visualization
|
| 242 |
+
plt.figure(figsize=(10, 8))
|
| 243 |
+
|
| 244 |
+
# Node colors based on entity type
|
| 245 |
+
node_colors = [colors.get(G.nodes[node]['type'], '#a9a9a9') for node in G.nodes()]
|
| 246 |
+
|
| 247 |
+
# Position nodes using spring layout
|
| 248 |
+
pos = nx.spring_layout(G)
|
| 249 |
+
|
| 250 |
+
# Draw the network
|
| 251 |
+
nx.draw_networkx_nodes(G, pos, node_size=300, node_color=node_colors, alpha=0.8)
|
| 252 |
+
nx.draw_networkx_edges(G, pos, width=1.5, alpha=0.7, edge_color='#888888')
|
| 253 |
+
nx.draw_networkx_labels(G, pos, font_size=10, font_weight='bold')
|
| 254 |
+
|
| 255 |
+
plt.title('Entity Co-occurrence Network')
|
| 256 |
+
plt.axis('off')
|
| 257 |
+
plt.tight_layout()
|
| 258 |
+
|
| 259 |
+
output_html.append('<div class="alert alert-light"><p class="mb-0">This visualization shows entities that appear in the same sentences:</p></div>')
|
| 260 |
+
output_html.append(fig_to_html(plt.gcf()))
|
| 261 |
+
plt.close()
|
| 262 |
+
else:
|
| 263 |
+
output_html.append('<p>No entity relationships detected in the text.</p>')
|
| 264 |
+
|
| 265 |
+
# Legend for entity types
|
| 266 |
+
output_html.append('<h3 class="task-subheader">Entity Type Legend</h3>')
|
| 267 |
+
|
| 268 |
+
entity_descriptions = {
|
| 269 |
+
'PERSON': 'People, including fictional',
|
| 270 |
+
'ORG': 'Organizations, companies, institutions',
|
| 271 |
+
'GPE': 'Geopolitical entities (countries, cities, states)',
|
| 272 |
+
'LOC': 'Non-GPE locations (mountain ranges, water bodies)',
|
| 273 |
+
'FACILITY': 'Buildings, airports, highways, bridges',
|
| 274 |
+
'PRODUCT': 'Products, objects, vehicles, foods',
|
| 275 |
+
'EVENT': 'Hurricanes, battles, wars, sports events',
|
| 276 |
+
'WORK_OF_ART': 'Titles of books, songs, etc.',
|
| 277 |
+
'LAW': 'Named documents made into laws',
|
| 278 |
+
'DATE': 'Absolute or relative dates',
|
| 279 |
+
'TIME': 'Times smaller than a day',
|
| 280 |
+
'PERCENT': 'Percentage',
|
| 281 |
+
'MONEY': 'Monetary values',
|
| 282 |
+
'QUANTITY': 'Measurements',
|
| 283 |
+
'CARDINAL': 'Numerals not falling under another type',
|
| 284 |
+
'ORDINAL': 'Ordinal numbers',
|
| 285 |
+
'NORP': 'Nationalities, religious or political groups'
|
| 286 |
+
}
|
| 287 |
+
|
| 288 |
+
output_html.append('<div class="row">')
|
| 289 |
+
for entity, color in colors.items():
|
| 290 |
+
if entity in entity_counts:
|
| 291 |
+
output_html.append(f"""
|
| 292 |
+
<div class="col-md-6 mb-2">
|
| 293 |
+
<div class="card">
|
| 294 |
+
<div class="card-body p-2">
|
| 295 |
+
<span class="badge me-2" style="background-color: {color}; color: white;">{entity}</span>
|
| 296 |
+
<small>{entity_descriptions.get(entity, '')}</small>
|
| 297 |
+
</div>
|
| 298 |
+
</div>
|
| 299 |
+
</div>
|
| 300 |
+
""")
|
| 301 |
+
output_html.append('</div>') # Close row
|
| 302 |
+
|
| 303 |
+
except Exception as e:
|
| 304 |
+
output_html.append(f"""
|
| 305 |
+
<div class="alert alert-danger">
|
| 306 |
+
<h3>Error</h3>
|
| 307 |
+
<p>Failed to process named entities: {str(e)}</p>
|
| 308 |
+
</div>
|
| 309 |
+
""")
|
| 310 |
+
|
| 311 |
+
# About NER section
|
| 312 |
+
output_html.append("""
|
| 313 |
+
<div class="card mt-4">
|
| 314 |
+
<div class="card-header">
|
| 315 |
+
<h4 class="mb-0">
|
| 316 |
+
<i class="fas fa-info-circle"></i>
|
| 317 |
+
About Named Entity Recognition
|
| 318 |
+
</h4>
|
| 319 |
+
</div>
|
| 320 |
+
<div class="card-body">
|
| 321 |
+
<h5>What is Named Entity Recognition?</h5>
|
| 322 |
+
|
| 323 |
+
<p>Named Entity Recognition (NER) is an NLP technique that automatically identifies and classifies named entities
|
| 324 |
+
in text into predefined categories. These entities are typically proper nouns such as people, organizations,
|
| 325 |
+
locations, expressions of times, quantities, monetary values, and percentages.</p>
|
| 326 |
+
|
| 327 |
+
<h5>Applications of NER:</h5>
|
| 328 |
+
|
| 329 |
+
<ul>
|
| 330 |
+
<li><b>Information Extraction</b> - Identifying key information from large volumes of text</li>
|
| 331 |
+
<li><b>Question Answering</b> - Helping systems understand what entities questions are referring to</li>
|
| 332 |
+
<li><b>Document Classification</b> - Using entity types and frequencies to categorize documents</li>
|
| 333 |
+
<li><b>Customer Service</b> - Identifying product names, issue types, and user information in support tickets</li>
|
| 334 |
+
<li><b>Content Recommendation</b> - Using entities to find related content</li>
|
| 335 |
+
</ul>
|
| 336 |
+
</div>
|
| 337 |
+
</div>
|
| 338 |
+
""")
|
| 339 |
+
|
| 340 |
+
output_html.append('</div>') # Close result-area div
|
| 341 |
+
|
| 342 |
+
return '\n'.join(output_html)
|
components/pos_tagging.py
ADDED
|
@@ -0,0 +1,540 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import matplotlib.pyplot as plt
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import nltk
|
| 4 |
+
from collections import Counter
|
| 5 |
+
import random
|
| 6 |
+
import numpy as np
|
| 7 |
+
import io
|
| 8 |
+
import base64
|
| 9 |
+
from PIL import Image
|
| 10 |
+
|
| 11 |
+
from utils.model_loader import load_spacy
|
| 12 |
+
from utils.helpers import fig_to_html, df_to_html_table, format_pos_token
|
| 13 |
+
|
| 14 |
+
def pos_tagging_handler(text_input):
|
| 15 |
+
"""Show part-of-speech tagging capabilities."""
|
| 16 |
+
output_html = []
|
| 17 |
+
|
| 18 |
+
# Add result area container
|
| 19 |
+
output_html.append('<div class="result-area">')
|
| 20 |
+
output_html.append('<h2 class="task-header">Part-of-Speech Tagging</h2>')
|
| 21 |
+
|
| 22 |
+
output_html.append("""
|
| 23 |
+
<div class="alert alert-info">
|
| 24 |
+
<i class="fas fa-info-circle"></i>
|
| 25 |
+
Part-of-Speech (POS) tagging is the process of marking up words in text according to their grammatical categories
|
| 26 |
+
such as noun, verb, adjective, etc.
|
| 27 |
+
</div>
|
| 28 |
+
""")
|
| 29 |
+
|
| 30 |
+
# Model info
|
| 31 |
+
output_html.append("""
|
| 32 |
+
<div class="alert alert-info">
|
| 33 |
+
<h4><i class="fas fa-tools"></i> Models Used:</h4>
|
| 34 |
+
<ul>
|
| 35 |
+
<li><b>NLTK</b> - Using the Perceptron tagger trained on the Penn Treebank corpus</li>
|
| 36 |
+
<li><b>spaCy</b> - Using the en_core_web_sm model's POS tagging capabilities</li>
|
| 37 |
+
</ul>
|
| 38 |
+
</div>
|
| 39 |
+
""")
|
| 40 |
+
|
| 41 |
+
try:
|
| 42 |
+
# Process with NLTK
|
| 43 |
+
words = nltk.word_tokenize(text_input)
|
| 44 |
+
nltk_pos = nltk.pos_tag(words)
|
| 45 |
+
|
| 46 |
+
# Process with spaCy
|
| 47 |
+
nlp = load_spacy()
|
| 48 |
+
doc = nlp(text_input)
|
| 49 |
+
spacy_pos = [(token.text, token.pos_) for token in doc]
|
| 50 |
+
|
| 51 |
+
# Display tagged text
|
| 52 |
+
output_html.append('<h3 class="task-subheader">Tagged Text</h3>')
|
| 53 |
+
|
| 54 |
+
# Color scheme for different POS tags
|
| 55 |
+
# Using a visually distinct color palette
|
| 56 |
+
colors = {
|
| 57 |
+
# NLTK Penn Treebank Tags
|
| 58 |
+
'NN': '#e6194B', # Noun - Red
|
| 59 |
+
'NNS': '#e6194B', # Plural noun - Red
|
| 60 |
+
'NNP': '#3cb44b', # Proper noun - Green
|
| 61 |
+
'NNPS': '#3cb44b', # Plural proper noun - Green
|
| 62 |
+
'VB': '#4363d8', # Verb - Blue
|
| 63 |
+
'VBD': '#4363d8', # Verb, past tense - Blue
|
| 64 |
+
'VBG': '#4363d8', # Verb, gerund - Blue
|
| 65 |
+
'VBN': '#4363d8', # Verb, past participle - Blue
|
| 66 |
+
'VBP': '#4363d8', # Verb, non-3rd singular present - Blue
|
| 67 |
+
'VBZ': '#4363d8', # Verb, 3rd singular present - Blue
|
| 68 |
+
'JJ': '#f58231', # Adjective - Orange
|
| 69 |
+
'JJR': '#f58231', # Comparative adjective - Orange
|
| 70 |
+
'JJS': '#f58231', # Superlative adjective - Orange
|
| 71 |
+
'RB': '#911eb4', # Adverb - Purple
|
| 72 |
+
'RBR': '#911eb4', # Comparative adverb - Purple
|
| 73 |
+
'RBS': '#911eb4', # Superlative adverb - Purple
|
| 74 |
+
'IN': '#f032e6', # Preposition - Magenta
|
| 75 |
+
'DT': '#fabebe', # Determiner - Pink
|
| 76 |
+
'PRP': '#008080', # Personal pronoun - Teal
|
| 77 |
+
'PRP$': '#008080', # Possessive pronoun - Teal
|
| 78 |
+
'CC': '#9A6324', # Coordinating conjunction - Brown
|
| 79 |
+
'CD': '#800000', # Cardinal number - Maroon
|
| 80 |
+
'EX': '#808000', # Existential there - Olive
|
| 81 |
+
'FW': '#000075', # Foreign word - Navy
|
| 82 |
+
'MD': '#a9a9a9', # Modal - Dark Gray
|
| 83 |
+
'PDT': '#469990', # Predeterminer - Greenish
|
| 84 |
+
'POS': '#000000', # Possessive ending - Black
|
| 85 |
+
'RP': '#aaffc3', # Particle - Mint
|
| 86 |
+
'SYM': '#ffd8b1', # Symbol - Light Orange
|
| 87 |
+
'TO': '#fffac8', # to - Light Yellow
|
| 88 |
+
'UH': '#dcbeff', # Interjection - Lavender
|
| 89 |
+
'WDT': '#808080', # Wh-determiner - Gray
|
| 90 |
+
'WP': '#808080', # Wh-pronoun - Gray
|
| 91 |
+
'WP$': '#808080', # Possessive wh-pronoun - Gray
|
| 92 |
+
'WRB': '#808080', # Wh-adverb - Gray
|
| 93 |
+
|
| 94 |
+
# spaCy Universal POS Tags
|
| 95 |
+
'NOUN': '#e6194B', # Noun - Red
|
| 96 |
+
'PROPN': '#3cb44b', # Proper noun - Green
|
| 97 |
+
'VERB': '#4363d8', # Verb - Blue
|
| 98 |
+
'ADJ': '#f58231', # Adjective - Orange
|
| 99 |
+
'ADV': '#911eb4', # Adverb - Purple
|
| 100 |
+
'ADP': '#f032e6', # Adposition (preposition) - Magenta
|
| 101 |
+
'DET': '#fabebe', # Determiner - Pink
|
| 102 |
+
'PRON': '#008080', # Pronoun - Teal
|
| 103 |
+
'CCONJ': '#9A6324', # Coordinating conjunction - Brown
|
| 104 |
+
'NUM': '#800000', # Numeral - Maroon
|
| 105 |
+
'PART': '#aaffc3', # Particle - Mint
|
| 106 |
+
'INTJ': '#dcbeff', # Interjection - Lavender
|
| 107 |
+
'PUNCT': '#000000', # Punctuation - Black
|
| 108 |
+
'SYM': '#ffd8b1', # Symbol - Light Orange
|
| 109 |
+
'X': '#808080', # Other - Gray
|
| 110 |
+
'SPACE': '#ffffff' # Space - White
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
# Function to generate HTML for POS tagged text
|
| 114 |
+
def generate_tagged_html(pos_tags, tagset_name):
|
| 115 |
+
html = '<div style="line-height: 2.5; padding: 15px; background-color: #f5f5f5; border-radius: 5px; margin-bottom: 20px; overflow-wrap: break-word; word-wrap: break-word;">'
|
| 116 |
+
|
| 117 |
+
for word, tag in pos_tags:
|
| 118 |
+
# Skip pure whitespace tokens
|
| 119 |
+
if word.strip() == '':
|
| 120 |
+
html += ' '
|
| 121 |
+
continue
|
| 122 |
+
|
| 123 |
+
# Get color (default to gray if tag not in colors)
|
| 124 |
+
color = colors.get(tag, '#a9a9a9')
|
| 125 |
+
|
| 126 |
+
# Add tooltip with tag and make sure tags wrap properly
|
| 127 |
+
html += f'<span style="background-color: {color}; color: white; padding: 2px 4px; margin: 2px; border-radius: 4px; display: inline-block;" title="{tag}">{word}</span>'
|
| 128 |
+
|
| 129 |
+
html += '</div>'
|
| 130 |
+
return html
|
| 131 |
+
|
| 132 |
+
# Display NLTK and spaCy in a row, one after another
|
| 133 |
+
output_html.append('<div class="row">')
|
| 134 |
+
|
| 135 |
+
# NLTK Section
|
| 136 |
+
output_html.append('<div class="col-md-6">')
|
| 137 |
+
output_html.append('<div class="card">')
|
| 138 |
+
output_html.append('<div class="card-header">')
|
| 139 |
+
output_html.append('<h4 class="mb-0 text-primary">NLTK (Penn Treebank)</h4>')
|
| 140 |
+
output_html.append('</div>')
|
| 141 |
+
output_html.append('<div class="card-body">')
|
| 142 |
+
output_html.append(generate_tagged_html(nltk_pos, "Penn Treebank"))
|
| 143 |
+
output_html.append('</div>')
|
| 144 |
+
output_html.append('</div>')
|
| 145 |
+
output_html.append('</div>')
|
| 146 |
+
|
| 147 |
+
# spaCy Section
|
| 148 |
+
output_html.append('<div class="col-md-6">')
|
| 149 |
+
output_html.append('<div class="card">')
|
| 150 |
+
output_html.append('<div class="card-header">')
|
| 151 |
+
output_html.append('<h4 class="mb-0 text-primary">spaCy (Universal)</h4>')
|
| 152 |
+
output_html.append('</div>')
|
| 153 |
+
output_html.append('<div class="card-body">')
|
| 154 |
+
output_html.append(generate_tagged_html(spacy_pos, "Universal"))
|
| 155 |
+
output_html.append('</div>')
|
| 156 |
+
output_html.append('</div>')
|
| 157 |
+
output_html.append('</div>')
|
| 158 |
+
|
| 159 |
+
output_html.append('</div>') # Close the row
|
| 160 |
+
|
| 161 |
+
# Syntactic Tree Visualization (Dependency Parse)
|
| 162 |
+
output_html.append('<h3 class="task-subheader">Sentence Structure Visualization</h3>')
|
| 163 |
+
|
| 164 |
+
# Split visualizations for each sentence to avoid overcrowding
|
| 165 |
+
sentences = list(doc.sents)
|
| 166 |
+
|
| 167 |
+
if not sentences:
|
| 168 |
+
output_html.append('<p>No complete sentences found for visualization.</p>')
|
| 169 |
+
else:
|
| 170 |
+
# Add description for dependency parsing
|
| 171 |
+
output_html.append("""
|
| 172 |
+
<div class="alert alert-light">
|
| 173 |
+
<p class="mb-0">
|
| 174 |
+
These diagrams show the grammatical structure of each sentence.
|
| 175 |
+
Words are connected with arrows that represent the syntactic relationships between them.
|
| 176 |
+
</p>
|
| 177 |
+
</div>
|
| 178 |
+
""")
|
| 179 |
+
|
| 180 |
+
# For each sentence, create a dependency visualization
|
| 181 |
+
for i, sent in enumerate(sentences):
|
| 182 |
+
if len(sent) > 50: # Skip very long sentences that might break the visualization
|
| 183 |
+
output_html.append(f'<div class="alert alert-warning"><strong>Note:</strong> Sentence {i+1} is too long ({len(sent)} tokens) for visualization.</div>')
|
| 184 |
+
continue
|
| 185 |
+
|
| 186 |
+
# Create the sentence dependency visualization using matplotlib
|
| 187 |
+
try:
|
| 188 |
+
# Try to generate the dependency visualization
|
| 189 |
+
fig, ax = plt.subplots(figsize=(10, 3), constrained_layout=True)
|
| 190 |
+
# Clear the axes before drawing
|
| 191 |
+
ax.clear()
|
| 192 |
+
|
| 193 |
+
# Draw connecting arcs between words
|
| 194 |
+
words = [token.text for token in sent]
|
| 195 |
+
positions = list(range(len(words)))
|
| 196 |
+
|
| 197 |
+
# Draw words
|
| 198 |
+
for i, word in enumerate(words):
|
| 199 |
+
ax.text(i, 0, word, ha='center')
|
| 200 |
+
|
| 201 |
+
# Draw arcs for dependencies
|
| 202 |
+
max_height = 1
|
| 203 |
+
for token in sent:
|
| 204 |
+
if token.dep_ and token.head.i != token.i: # Skip root dependency
|
| 205 |
+
# Determine start and end positions
|
| 206 |
+
start = token.i - sent.start
|
| 207 |
+
end = token.head.i - sent.start
|
| 208 |
+
|
| 209 |
+
# Make sure start is before end
|
| 210 |
+
if start > end:
|
| 211 |
+
start, end = end, start
|
| 212 |
+
|
| 213 |
+
# Determine the height of the arc (based on distance)
|
| 214 |
+
height = 0.2 + (end - start) * 0.1
|
| 215 |
+
max_height = max(max_height, height + 0.3)
|
| 216 |
+
|
| 217 |
+
# Draw the dependency arc
|
| 218 |
+
arc_xs = np.linspace(start, end, 50)
|
| 219 |
+
arc_ys = [height * np.sin((x - start) / (end - start) * np.pi) for x in arc_xs]
|
| 220 |
+
ax.plot(arc_xs, arc_ys, color=colors.get(token.pos_, 'gray'), lw=1.5)
|
| 221 |
+
|
| 222 |
+
# Add dependency label at the peak of the arc
|
| 223 |
+
mid_point = (start + end) / 2
|
| 224 |
+
label_height = height * 0.95 # Just below the peak
|
| 225 |
+
ax.text(mid_point, label_height, token.dep_, ha='center', fontsize=8,
|
| 226 |
+
bbox=dict(facecolor='white', alpha=0.8, edgecolor='none', pad=0.2))
|
| 227 |
+
|
| 228 |
+
# Set axis limits
|
| 229 |
+
ax.set_xlim([-0.5, len(words) - 0.5])
|
| 230 |
+
ax.set_ylim([0, max_height + 0.2])
|
| 231 |
+
|
| 232 |
+
# Remove axes and set title
|
| 233 |
+
ax.axis('off')
|
| 234 |
+
plt.tight_layout()
|
| 235 |
+
|
| 236 |
+
# Render the plot to HTML
|
| 237 |
+
output_html.append(fig_to_html(fig))
|
| 238 |
+
plt.close(fig)
|
| 239 |
+
|
| 240 |
+
except Exception as viz_err:
|
| 241 |
+
output_html.append(f'<div class="alert alert-danger"><strong>Error:</strong> Failed to visualize sentence {i+1}: {str(viz_err)}</div>')
|
| 242 |
+
|
| 243 |
+
# POS Distribution Analysis
|
| 244 |
+
output_html.append('<h3 class="task-subheader">POS Distribution Analysis</h3>')
|
| 245 |
+
|
| 246 |
+
# Calculate POS distribution using spaCy tags (more consistent)
|
| 247 |
+
pos_counts = Counter([token.pos_ for token in doc])
|
| 248 |
+
|
| 249 |
+
# Create bar chart for POS distribution
|
| 250 |
+
fig = plt.figure(figsize=(10, 6))
|
| 251 |
+
bars = plt.bar(pos_counts.keys(), pos_counts.values(), color=[colors.get(k, '#a9a9a9') for k in pos_counts.keys()])
|
| 252 |
+
plt.xlabel('Part of Speech')
|
| 253 |
+
plt.ylabel('Count')
|
| 254 |
+
plt.title('Part-of-Speech Distribution')
|
| 255 |
+
plt.xticks(rotation=45, ha='right')
|
| 256 |
+
|
| 257 |
+
# Add count labels on top of bars
|
| 258 |
+
for bar in bars:
|
| 259 |
+
height = bar.get_height()
|
| 260 |
+
plt.text(bar.get_x() + bar.get_width()/2., height + 0.1,
|
| 261 |
+
f'{int(height)}',
|
| 262 |
+
ha='center', va='bottom')
|
| 263 |
+
|
| 264 |
+
plt.tight_layout()
|
| 265 |
+
|
| 266 |
+
# Chart section
|
| 267 |
+
output_html.append('<section class="pos-chart-section">')
|
| 268 |
+
output_html.append('<div class="chart-container">')
|
| 269 |
+
output_html.append(fig_to_html(fig))
|
| 270 |
+
output_html.append('</div>')
|
| 271 |
+
output_html.append('</section>')
|
| 272 |
+
|
| 273 |
+
# Table section
|
| 274 |
+
output_html.append('<section class="pos-table-container">')
|
| 275 |
+
output_html.append('<div class="row">')
|
| 276 |
+
output_html.append('<div class="col-md-6">')
|
| 277 |
+
|
| 278 |
+
# Create a DataFrame for the POS counts
|
| 279 |
+
pos_df = pd.DataFrame({
|
| 280 |
+
'POS Tag': list(pos_counts.keys()),
|
| 281 |
+
'Count': list(pos_counts.values()),
|
| 282 |
+
'Percentage': [count/sum(pos_counts.values())*100 for count in pos_counts.values()]
|
| 283 |
+
})
|
| 284 |
+
pos_df = pos_df.sort_values('Count', ascending=False).reset_index(drop=True)
|
| 285 |
+
|
| 286 |
+
# Add percentage column
|
| 287 |
+
pos_df['Percentage'] = pos_df['Percentage'].map('{:.1f}%'.format)
|
| 288 |
+
|
| 289 |
+
output_html.append(df_to_html_table(pos_df))
|
| 290 |
+
output_html.append('</div>')
|
| 291 |
+
|
| 292 |
+
# Most common words section
|
| 293 |
+
output_html.append('<div class="col-md-6">')
|
| 294 |
+
output_html.append('<h4 class="mt-0">Most Common Words by POS</h4>')
|
| 295 |
+
|
| 296 |
+
# Get common words for major POS categories
|
| 297 |
+
major_pos = ['NOUN', 'VERB', 'ADJ', 'ADV']
|
| 298 |
+
common_words = {}
|
| 299 |
+
|
| 300 |
+
for pos in major_pos:
|
| 301 |
+
words = [token.text.lower() for token in doc if token.pos_ == pos]
|
| 302 |
+
if words:
|
| 303 |
+
word_counts = Counter(words).most_common(5)
|
| 304 |
+
common_words[pos] = word_counts
|
| 305 |
+
|
| 306 |
+
# Create HTML for common words
|
| 307 |
+
for pos, words in common_words.items():
|
| 308 |
+
if words:
|
| 309 |
+
output_html.append(f'<h5>{pos}</h5>')
|
| 310 |
+
output_html.append('<div class="d-flex flex-wrap gap-1 mb-2">')
|
| 311 |
+
|
| 312 |
+
for word, count in words:
|
| 313 |
+
# Get appropriate color
|
| 314 |
+
color = colors.get(pos, '#a9a9a9')
|
| 315 |
+
output_html.append(f'<span class="badge" style="background-color: {color}; color: white;">{word} ({count})</span>')
|
| 316 |
+
|
| 317 |
+
output_html.append('</div>')
|
| 318 |
+
|
| 319 |
+
output_html.append('</div>') # Close column 2
|
| 320 |
+
output_html.append('</div>') # Close row
|
| 321 |
+
output_html.append('</section>') # Close table section
|
| 322 |
+
|
| 323 |
+
# Add Sentence Grammatical Analysis
|
| 324 |
+
output_html.append('<h3 class="task-subheader">Grammatical Analysis</h3>')
|
| 325 |
+
output_html.append('<p>Detailed analysis of the grammatical components in each sentence.</p>')
|
| 326 |
+
|
| 327 |
+
# Create Grammatical Role Table
|
| 328 |
+
grammatical_roles = []
|
| 329 |
+
for token in doc:
|
| 330 |
+
if token.dep_ not in ["punct", "space"]: # Skip punctuation and spaces
|
| 331 |
+
grammatical_roles.append({
|
| 332 |
+
"Word": token.text,
|
| 333 |
+
"POS": token.pos_,
|
| 334 |
+
"Dependency": token.dep_,
|
| 335 |
+
"Head": token.head.text,
|
| 336 |
+
"Description": get_dependency_description(token.dep_)
|
| 337 |
+
})
|
| 338 |
+
|
| 339 |
+
# Convert to DataFrame
|
| 340 |
+
if grammatical_roles:
|
| 341 |
+
roles_df = pd.DataFrame(grammatical_roles)
|
| 342 |
+
output_html.append('<div class="table-responsive" style="max-height: 400px;">')
|
| 343 |
+
output_html.append(df_to_html_table(roles_df))
|
| 344 |
+
output_html.append('</div>')
|
| 345 |
+
else:
|
| 346 |
+
output_html.append('<p>No grammatical roles found to analyze.</p>')
|
| 347 |
+
|
| 348 |
+
# POS Tag Legend
|
| 349 |
+
output_html.append('<h3 class="task-subheader">POS Tag Legend</h3>')
|
| 350 |
+
|
| 351 |
+
# Create button toggle for different tagsets
|
| 352 |
+
output_html.append('<div class="card">')
|
| 353 |
+
output_html.append('<div class="card-header text-center">')
|
| 354 |
+
output_html.append('<div class="btn-group pos-legend-buttons" role="group" aria-label="POS Tag Types">')
|
| 355 |
+
output_html.append('<button type="button" class="btn btn-primary btn-lg active" id="universal-btn" onclick="showPOSTags(\'universal\')">Universal Tags</button>')
|
| 356 |
+
output_html.append('<button type="button" class="btn btn-outline-primary btn-lg" id="penn-btn" onclick="showPOSTags(\'penn\')">Penn Treebank Tags</button>')
|
| 357 |
+
output_html.append('</div>')
|
| 358 |
+
output_html.append('</div>')
|
| 359 |
+
output_html.append('<div class="card-body">')
|
| 360 |
+
output_html.append('<div id="pos-content">')
|
| 361 |
+
|
| 362 |
+
# Universal Tags
|
| 363 |
+
output_html.append('<div class="pos-tags-section" id="universal-tags" style="display: block;">')
|
| 364 |
+
|
| 365 |
+
universal_tags = {
|
| 366 |
+
'NOUN': 'Nouns - people, places, things',
|
| 367 |
+
'PROPN': 'Proper nouns - specific named entities',
|
| 368 |
+
'VERB': 'Verbs - actions, occurrences',
|
| 369 |
+
'ADJ': 'Adjectives - describe nouns',
|
| 370 |
+
'ADV': 'Adverbs - modify verbs, adjectives, or other adverbs',
|
| 371 |
+
'ADP': 'Adpositions - prepositions, postpositions',
|
| 372 |
+
'DET': 'Determiners - articles and other noun modifiers',
|
| 373 |
+
'PRON': 'Pronouns - words that substitute for nouns',
|
| 374 |
+
'CCONJ': 'Coordinating conjunctions - connect words, phrases, clauses',
|
| 375 |
+
'SCONJ': 'Subordinating conjunctions - connect clauses',
|
| 376 |
+
'NUM': 'Numerals - numbers',
|
| 377 |
+
'PART': 'Particles - function words associated with another word',
|
| 378 |
+
'INTJ': 'Interjections - exclamatory words',
|
| 379 |
+
'PUNCT': 'Punctuation',
|
| 380 |
+
'SYM': 'Symbols',
|
| 381 |
+
'X': 'Other - foreign words, typos, abbreviations',
|
| 382 |
+
'SPACE': 'Space - white spaces'
|
| 383 |
+
}
|
| 384 |
+
|
| 385 |
+
output_html.append('<div class="row">')
|
| 386 |
+
|
| 387 |
+
for tag, description in universal_tags.items():
|
| 388 |
+
if tag in colors:
|
| 389 |
+
output_html.append(f"""
|
| 390 |
+
<div class="col-md-6 mb-2">
|
| 391 |
+
<div class="d-flex align-items-center p-2 border rounded">
|
| 392 |
+
<span class="badge me-2" style="background-color: {colors[tag]}; color: white; min-width: 60px;">{tag}</span>
|
| 393 |
+
<span class="small">{description}</span>
|
| 394 |
+
</div>
|
| 395 |
+
</div>
|
| 396 |
+
""")
|
| 397 |
+
|
| 398 |
+
output_html.append('</div>') # Close row
|
| 399 |
+
output_html.append('</div>') # Close universal tags tab
|
| 400 |
+
|
| 401 |
+
# Penn Treebank Tags
|
| 402 |
+
output_html.append('<div class="pos-tags-section" id="penn-tags" style="display: none;">')
|
| 403 |
+
|
| 404 |
+
penn_tags = {
|
| 405 |
+
'CC': 'Coordinating conjunction',
|
| 406 |
+
'CD': 'Cardinal number',
|
| 407 |
+
'DT': 'Determiner',
|
| 408 |
+
'EX': 'Existential there',
|
| 409 |
+
'FW': 'Foreign word',
|
| 410 |
+
'IN': 'Preposition or subordinating conjunction',
|
| 411 |
+
'JJ': 'Adjective',
|
| 412 |
+
'JJR': 'Adjective, comparative',
|
| 413 |
+
'JJS': 'Adjective, superlative',
|
| 414 |
+
'LS': 'List item marker',
|
| 415 |
+
'MD': 'Modal',
|
| 416 |
+
'NN': 'Noun, singular or mass',
|
| 417 |
+
'NNS': 'Noun, plural',
|
| 418 |
+
'NNP': 'Proper noun, singular',
|
| 419 |
+
'NNPS': 'Proper noun, plural',
|
| 420 |
+
'PDT': 'Predeterminer',
|
| 421 |
+
'POS': 'Possessive ending',
|
| 422 |
+
'PRP': 'Personal pronoun',
|
| 423 |
+
'PRP$': 'Possessive pronoun',
|
| 424 |
+
'RB': 'Adverb',
|
| 425 |
+
'RBR': 'Adverb, comparative',
|
| 426 |
+
'RBS': 'Adverb, superlative',
|
| 427 |
+
'RP': 'Particle',
|
| 428 |
+
'SYM': 'Symbol',
|
| 429 |
+
'TO': 'to',
|
| 430 |
+
'UH': 'Interjection',
|
| 431 |
+
'VB': 'Verb, base form',
|
| 432 |
+
'VBD': 'Verb, past tense',
|
| 433 |
+
'VBG': 'Verb, gerund or present participle',
|
| 434 |
+
'VBN': 'Verb, past participle',
|
| 435 |
+
'VBP': 'Verb, non-3rd person singular present',
|
| 436 |
+
'VBZ': 'Verb, 3rd person singular present',
|
| 437 |
+
'WDT': 'Wh-determiner',
|
| 438 |
+
'WP': 'Wh-pronoun',
|
| 439 |
+
'WP$': 'Possessive wh-pronoun',
|
| 440 |
+
'WRB': 'Wh-adverb'
|
| 441 |
+
}
|
| 442 |
+
|
| 443 |
+
output_html.append('<div class="row">')
|
| 444 |
+
|
| 445 |
+
for tag, description in penn_tags.items():
|
| 446 |
+
if tag in colors:
|
| 447 |
+
output_html.append(f"""
|
| 448 |
+
<div class="col-md-6 mb-2">
|
| 449 |
+
<div class="d-flex align-items-center p-2 border rounded">
|
| 450 |
+
<span class="badge me-2" style="background-color: {colors[tag]}; color: white; min-width: 60px;">{tag}</span>
|
| 451 |
+
<span class="small">{description}</span>
|
| 452 |
+
</div>
|
| 453 |
+
</div>
|
| 454 |
+
""")
|
| 455 |
+
|
| 456 |
+
output_html.append('</div>') # Close row
|
| 457 |
+
output_html.append('</div>') # Close penn tags section
|
| 458 |
+
output_html.append('</div>') # Close pos content
|
| 459 |
+
output_html.append('</div>') # Close card body
|
| 460 |
+
output_html.append('</div>') # Close card
|
| 461 |
+
|
| 462 |
+
except Exception as e:
|
| 463 |
+
output_html.append(f"""
|
| 464 |
+
<div class="alert alert-danger">
|
| 465 |
+
<h3>Error</h3>
|
| 466 |
+
<p>Failed to process part-of-speech tagging: {str(e)}</p>
|
| 467 |
+
</div>
|
| 468 |
+
""")
|
| 469 |
+
|
| 470 |
+
# About POS Tagging section
|
| 471 |
+
output_html.append("""
|
| 472 |
+
<div class="card mt-4">
|
| 473 |
+
<div class="card-header">
|
| 474 |
+
<h4 class="mb-0">
|
| 475 |
+
<i class="fas fa-info-circle"></i>
|
| 476 |
+
About Part-of-Speech Tagging
|
| 477 |
+
</h4>
|
| 478 |
+
</div>
|
| 479 |
+
<div class="card-body">
|
| 480 |
+
<h5>What is Part-of-Speech Tagging?</h5>
|
| 481 |
+
|
| 482 |
+
<p>Part-of-Speech (POS) tagging is the process of assigning grammatical categories (such as noun, verb, adjective, etc.)
|
| 483 |
+
to each word in a text. It's one of the fundamental steps in natural language processing.</p>
|
| 484 |
+
|
| 485 |
+
<h5>Why is POS Tagging Important?</h5>
|
| 486 |
+
|
| 487 |
+
<ol>
|
| 488 |
+
<li><b>Disambiguation</b> - Words can have multiple meanings depending on their usage. POS tags help disambiguate words.</li>
|
| 489 |
+
<li><b>Syntactic Parsing</b> - POS tags form the basis for higher-level syntactic analysis.</li>
|
| 490 |
+
<li><b>Named Entity Recognition</b> - POS tags help in identifying entities.</li>
|
| 491 |
+
<li><b>Information Extraction</b> - They help in extracting specific information from text.</li>
|
| 492 |
+
<li><b>Text-to-Speech Systems</b> - For correct pronunciation based on word function.</li>
|
| 493 |
+
</ol>
|
| 494 |
+
|
| 495 |
+
<h5>Tagsets:</h5>
|
| 496 |
+
|
| 497 |
+
<ul>
|
| 498 |
+
<li><b>Universal Tagset</b> - A simpler, cross-linguistic set with about 17 tags.</li>
|
| 499 |
+
<li><b>Penn Treebank</b> - A more detailed English-specific tagset with about 36 tags.</li>
|
| 500 |
+
</ul>
|
| 501 |
+
</div>
|
| 502 |
+
</div>
|
| 503 |
+
""")
|
| 504 |
+
|
| 505 |
+
output_html.append('</div>') # Close result-area div
|
| 506 |
+
|
| 507 |
+
return '\n'.join(output_html)
|
| 508 |
+
|
| 509 |
+
def get_dependency_description(dep_tag):
|
| 510 |
+
"""Return a description for common dependency tags"""
|
| 511 |
+
descriptions = {
|
| 512 |
+
"ROOT": "Root of the sentence",
|
| 513 |
+
"nsubj": "Nominal subject",
|
| 514 |
+
"obj": "Direct object",
|
| 515 |
+
"dobj": "Direct object",
|
| 516 |
+
"iobj": "Indirect object",
|
| 517 |
+
"det": "Determiner",
|
| 518 |
+
"amod": "Adjectival modifier",
|
| 519 |
+
"advmod": "Adverbial modifier",
|
| 520 |
+
"pobj": "Object of preposition",
|
| 521 |
+
"prep": "Preposition",
|
| 522 |
+
"aux": "Auxiliary verb",
|
| 523 |
+
"cc": "Coordinating conjunction",
|
| 524 |
+
"conj": "Conjunct",
|
| 525 |
+
"mark": "Marker",
|
| 526 |
+
"nmod": "Nominal modifier",
|
| 527 |
+
"compound": "Compound word",
|
| 528 |
+
"attr": "Attribute",
|
| 529 |
+
"case": "Case marker",
|
| 530 |
+
"neg": "Negation modifier",
|
| 531 |
+
"punct": "Punctuation",
|
| 532 |
+
"nsubjpass": "Passive nominal subject",
|
| 533 |
+
"auxpass": "Passive auxiliary",
|
| 534 |
+
"ccomp": "Clausal complement",
|
| 535 |
+
"xcomp": "Open clausal complement",
|
| 536 |
+
"acl": "Adjectival clause",
|
| 537 |
+
"advcl": "Adverbial clause modifier",
|
| 538 |
+
"relcl": "Relative clause modifier"
|
| 539 |
+
}
|
| 540 |
+
return descriptions.get(dep_tag, "Dependency relation")
|
components/preprocessing.py
ADDED
|
@@ -0,0 +1,888 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import matplotlib
|
| 2 |
+
matplotlib.use('Agg') # Use non-interactive backend
|
| 3 |
+
import matplotlib.pyplot as plt
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import nltk
|
| 6 |
+
import re
|
| 7 |
+
import string
|
| 8 |
+
import base64
|
| 9 |
+
import io
|
| 10 |
+
from collections import Counter
|
| 11 |
+
from nltk.corpus import stopwords
|
| 12 |
+
from nltk.stem import WordNetLemmatizer, PorterStemmer
|
| 13 |
+
from wordcloud import WordCloud
|
| 14 |
+
from utils.model_loader import download_nltk_resources
|
| 15 |
+
from utils.helpers import fig_to_html, df_to_html_table
|
| 16 |
+
from nltk.util import ngrams
|
| 17 |
+
|
| 18 |
+
def preprocessing_handler(text_input):
|
| 19 |
+
"""Generate HTML for text preprocessing display"""
|
| 20 |
+
output_html = []
|
| 21 |
+
|
| 22 |
+
# Add result area container
|
| 23 |
+
output_html.append('<div class="result-area">')
|
| 24 |
+
output_html.append('<h2 class="task-header">Text Preprocessing</h2>')
|
| 25 |
+
|
| 26 |
+
output_html.append("""
|
| 27 |
+
<div class="alert alert-info">
|
| 28 |
+
<i class="fas fa-info-circle"></i>
|
| 29 |
+
Text preprocessing is the process of cleaning and transforming raw text into a format that can be easily analyzed by NLP models.
|
| 30 |
+
</div>
|
| 31 |
+
""")
|
| 32 |
+
|
| 33 |
+
# Model info
|
| 34 |
+
output_html.append("""
|
| 35 |
+
<div class="alert alert-info">
|
| 36 |
+
<h4><i class="fas fa-tools"></i> Tools & Libraries Used:</h4>
|
| 37 |
+
<ul>
|
| 38 |
+
<li><b>NLTK</b> - For stopwords, tokenization, stemming and lemmatization</li>
|
| 39 |
+
<li><b>Regular Expressions</b> - For pattern matching and text cleaning</li>
|
| 40 |
+
<li><b>WordCloud</b> - For visualizing word frequency</li>
|
| 41 |
+
</ul>
|
| 42 |
+
</div>
|
| 43 |
+
""")
|
| 44 |
+
|
| 45 |
+
# Ensure NLTK resources are downloaded
|
| 46 |
+
download_nltk_resources()
|
| 47 |
+
|
| 48 |
+
try:
|
| 49 |
+
# Original Text
|
| 50 |
+
output_html.append('<h3 class="task-subheader">Original Text</h3>')
|
| 51 |
+
output_html.append(f'<div class="card"><div class="card-body"><div class="text-content" style="word-wrap: break-word; word-break: break-word; overflow-wrap: break-word; max-height: 500px; overflow-y: auto; padding: 15px; background-color: #f8f9fa; border-radius: 5px; border: 1px solid #e9ecef; line-height: 1.6;">{text_input}</div></div></div>')
|
| 52 |
+
|
| 53 |
+
# Text statistics
|
| 54 |
+
word_count = len(text_input.split())
|
| 55 |
+
char_count = len(text_input)
|
| 56 |
+
sentence_count = len(nltk.sent_tokenize(text_input))
|
| 57 |
+
|
| 58 |
+
stats_html = f"""
|
| 59 |
+
<div class="stats-container">
|
| 60 |
+
<div class="row">
|
| 61 |
+
<div class="col-md-4">
|
| 62 |
+
<div class="card text-center stats-card">
|
| 63 |
+
<div class="card-body">
|
| 64 |
+
<h3 class="metric-blue">{word_count}</h3>
|
| 65 |
+
<p>Words</p>
|
| 66 |
+
</div>
|
| 67 |
+
</div>
|
| 68 |
+
</div>
|
| 69 |
+
<div class="col-md-4">
|
| 70 |
+
<div class="card text-center stats-card">
|
| 71 |
+
<div class="card-body">
|
| 72 |
+
<h3 class="metric-green">{char_count}</h3>
|
| 73 |
+
<p>Characters</p>
|
| 74 |
+
</div>
|
| 75 |
+
</div>
|
| 76 |
+
</div>
|
| 77 |
+
<div class="col-md-4">
|
| 78 |
+
<div class="card text-center stats-card">
|
| 79 |
+
<div class="card-body">
|
| 80 |
+
<h3 class="metric-orange">{sentence_count}</h3>
|
| 81 |
+
<p>Sentences</p>
|
| 82 |
+
</div>
|
| 83 |
+
</div>
|
| 84 |
+
</div>
|
| 85 |
+
</div>
|
| 86 |
+
</div>
|
| 87 |
+
"""
|
| 88 |
+
output_html.append(stats_html)
|
| 89 |
+
|
| 90 |
+
# NEW SECTION: Text Cleaning with Regular Expressions
|
| 91 |
+
output_html.append('<div class="section-divider"></div>')
|
| 92 |
+
output_html.append('<h3 class="task-subheader">Text Cleaning with Regular Expressions</h3>')
|
| 93 |
+
|
| 94 |
+
output_html.append("""
|
| 95 |
+
<div class="alert alert-light">
|
| 96 |
+
<p>Regular expressions (regex) provide powerful pattern matching capabilities for cleaning and processing text data.
|
| 97 |
+
Common text cleaning tasks include removing URLs, HTML tags, special characters, and normalizing text formats.</p>
|
| 98 |
+
</div>
|
| 99 |
+
""")
|
| 100 |
+
|
| 101 |
+
# Several regex cleaning examples
|
| 102 |
+
url_pattern = r'https?://\S+|www\.\S+'
|
| 103 |
+
html_pattern = r'<.*?>'
|
| 104 |
+
whitespace_pattern = r'\s+'
|
| 105 |
+
email_pattern = r'\S+@\S+'
|
| 106 |
+
|
| 107 |
+
# Original text for comparison
|
| 108 |
+
text_cleaned = text_input
|
| 109 |
+
|
| 110 |
+
# 1. Remove URLs
|
| 111 |
+
urls_cleaned = re.sub(url_pattern, '[URL]', text_cleaned)
|
| 112 |
+
|
| 113 |
+
# 2. Remove HTML tags
|
| 114 |
+
html_cleaned = re.sub(html_pattern, '', urls_cleaned)
|
| 115 |
+
|
| 116 |
+
# 3. Remove extra whitespace
|
| 117 |
+
whitespace_cleaned = re.sub(whitespace_pattern, ' ', html_cleaned).strip()
|
| 118 |
+
|
| 119 |
+
# 4. Remove email addresses
|
| 120 |
+
email_cleaned = re.sub(email_pattern, '[EMAIL]', whitespace_cleaned)
|
| 121 |
+
|
| 122 |
+
# 5. Fix common contractions
|
| 123 |
+
contractions = {
|
| 124 |
+
r"won't": "will not",
|
| 125 |
+
r"can't": "cannot",
|
| 126 |
+
r"n't": " not",
|
| 127 |
+
r"'re": " are",
|
| 128 |
+
r"'s": " is",
|
| 129 |
+
r"'d": " would",
|
| 130 |
+
r"'ll": " will",
|
| 131 |
+
r"'t": " not",
|
| 132 |
+
r"'ve": " have",
|
| 133 |
+
r"'m": " am"
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
contraction_cleaned = email_cleaned
|
| 137 |
+
for pattern, replacement in contractions.items():
|
| 138 |
+
contraction_cleaned = re.sub(pattern, replacement, contraction_cleaned)
|
| 139 |
+
|
| 140 |
+
# Display the regex cleaning examples in a table
|
| 141 |
+
output_html.append("""
|
| 142 |
+
<h4>Regex Text Cleaning Operations</h4>
|
| 143 |
+
<div class="table-responsive">
|
| 144 |
+
<table class="table table-striped">
|
| 145 |
+
<thead class="table-primary">
|
| 146 |
+
<tr>
|
| 147 |
+
<th>Operation</th>
|
| 148 |
+
<th>Regex Pattern</th>
|
| 149 |
+
<th>Description</th>
|
| 150 |
+
</tr>
|
| 151 |
+
</thead>
|
| 152 |
+
<tbody>
|
| 153 |
+
<tr>
|
| 154 |
+
<td>URL Removal</td>
|
| 155 |
+
<td><code>https?://\\S+|www\\.\\S+</code></td>
|
| 156 |
+
<td>Removes or replaces web URLs in text</td>
|
| 157 |
+
</tr>
|
| 158 |
+
<tr>
|
| 159 |
+
<td>HTML Tag Removal</td>
|
| 160 |
+
<td><code><.*?></code></td>
|
| 161 |
+
<td>Strips HTML/XML markup tags</td>
|
| 162 |
+
</tr>
|
| 163 |
+
<tr>
|
| 164 |
+
<td>Whitespace Normalization</td>
|
| 165 |
+
<td><code>\\s+</code></td>
|
| 166 |
+
<td>Replaces multiple spaces, tabs, and newlines with a single space</td>
|
| 167 |
+
</tr>
|
| 168 |
+
<tr>
|
| 169 |
+
<td>Email Anonymization</td>
|
| 170 |
+
<td><code>\\S+@\\S+</code></td>
|
| 171 |
+
<td>Redacts email addresses for privacy</td>
|
| 172 |
+
</tr>
|
| 173 |
+
<tr>
|
| 174 |
+
<td>Contraction Expansion</td>
|
| 175 |
+
<td><code>Multiple patterns</code></td>
|
| 176 |
+
<td>Expands contractions like "don't" to "do not"</td>
|
| 177 |
+
</tr>
|
| 178 |
+
</tbody>
|
| 179 |
+
</table>
|
| 180 |
+
</div>
|
| 181 |
+
""")
|
| 182 |
+
|
| 183 |
+
# Example of cleaned text
|
| 184 |
+
output_html.append("""
|
| 185 |
+
<h4>Example of Text After Regex Cleaning</h4>
|
| 186 |
+
<div class="row">
|
| 187 |
+
<div class="col-md-6">
|
| 188 |
+
<div class="card">
|
| 189 |
+
<div class="card-header">
|
| 190 |
+
<h5 class="mb-0">Before Cleaning</h5>
|
| 191 |
+
</div>
|
| 192 |
+
<div class="card-body">
|
| 193 |
+
<div class="text-content" style="word-wrap: break-word; word-break: break-word; overflow-wrap: break-word; max-height: 400px; overflow-y: auto; padding: 15px; background-color: #f8f9fa; border-radius: 5px; border: 1px solid #e9ecef; line-height: 1.6;">""")
|
| 194 |
+
output_html.append(f"{text_input}")
|
| 195 |
+
output_html.append("""</div>
|
| 196 |
+
</div>
|
| 197 |
+
</div>
|
| 198 |
+
</div>
|
| 199 |
+
<div class="col-md-6">
|
| 200 |
+
<div class="card">
|
| 201 |
+
<div class="card-header">
|
| 202 |
+
<h5 class="mb-0">After Regex Cleaning</h5>
|
| 203 |
+
</div>
|
| 204 |
+
<div class="card-body">
|
| 205 |
+
<div class="text-content" style="word-wrap: break-word; word-break: break-word; overflow-wrap: break-word; max-height: 400px; overflow-y: auto; padding: 15px; background-color: #f8f9fa; border-radius: 5px; border: 1px solid #e9ecef; line-height: 1.6;">""")
|
| 206 |
+
output_html.append(f"{contraction_cleaned}")
|
| 207 |
+
output_html.append("""</div>
|
| 208 |
+
</div>
|
| 209 |
+
</div>
|
| 210 |
+
</div>
|
| 211 |
+
</div>
|
| 212 |
+
""")
|
| 213 |
+
|
| 214 |
+
output_html.append("""
|
| 215 |
+
<div class="alert alert-success">
|
| 216 |
+
<h4><i class="fas fa-lightbulb"></i> Why Use Regex for Text Cleaning?</h4>
|
| 217 |
+
<ul>
|
| 218 |
+
<li><b>Precision:</b> Regular expressions allow for precise pattern matching</li>
|
| 219 |
+
<li><b>Flexibility:</b> Can be customized for domain-specific cleaning needs</li>
|
| 220 |
+
<li><b>Efficiency:</b> Processes text in a single pass for better performance</li>
|
| 221 |
+
<li><b>Standardization:</b> Creates consistent formatting across documents</li>
|
| 222 |
+
</ul>
|
| 223 |
+
</div>
|
| 224 |
+
""")
|
| 225 |
+
|
| 226 |
+
# Word length distribution
|
| 227 |
+
word_lengths = [len(word) for word in text_input.split()]
|
| 228 |
+
fig = plt.figure(figsize=(10, 4))
|
| 229 |
+
plt.hist(word_lengths, bins=range(1, max(word_lengths) + 2), alpha=0.7, color='#1976D2')
|
| 230 |
+
plt.xlabel('Word Length')
|
| 231 |
+
plt.ylabel('Frequency')
|
| 232 |
+
plt.title('Word Length Distribution')
|
| 233 |
+
plt.grid(alpha=0.3)
|
| 234 |
+
plt.tight_layout()
|
| 235 |
+
|
| 236 |
+
output_html.append('<div class="section-divider"></div>')
|
| 237 |
+
output_html.append('<h3 class="task-subheader">Word Length Distribution</h3>')
|
| 238 |
+
output_html.append(fig_to_html(fig))
|
| 239 |
+
|
| 240 |
+
# Case Normalization
|
| 241 |
+
output_html.append('<div class="section-divider"></div>')
|
| 242 |
+
output_html.append('<h3 class="task-subheader">Case Normalization</h3>')
|
| 243 |
+
|
| 244 |
+
lowercase_text = text_input.lower()
|
| 245 |
+
uppercase_text = text_input.upper()
|
| 246 |
+
|
| 247 |
+
case_html = f"""
|
| 248 |
+
<div class="row">
|
| 249 |
+
<div class="col-md-4">
|
| 250 |
+
<div class="card">
|
| 251 |
+
<div class="card-header">
|
| 252 |
+
<h5 class="mb-0">Original Text</h5>
|
| 253 |
+
</div>
|
| 254 |
+
<div class="card-body">
|
| 255 |
+
<div class="text-content" style="word-wrap: break-word; word-break: break-word; overflow-wrap: break-word; max-height: 400px; overflow-y: auto; padding: 15px; background-color: #f8f9fa; border-radius: 5px; border: 1px solid #e9ecef; line-height: 1.6;">{text_input}</div>
|
| 256 |
+
</div>
|
| 257 |
+
</div>
|
| 258 |
+
</div>
|
| 259 |
+
<div class="col-md-4">
|
| 260 |
+
<div class="card">
|
| 261 |
+
<div class="card-header">
|
| 262 |
+
<h5 class="mb-0">Lowercase Text</h5>
|
| 263 |
+
</div>
|
| 264 |
+
<div class="card-body">
|
| 265 |
+
<div class="text-content" style="word-wrap: break-word; word-break: break-word; overflow-wrap: break-word; max-height: 400px; overflow-y: auto; padding: 15px; background-color: #f8f9fa; border-radius: 5px; border: 1px solid #e9ecef; line-height: 1.6;">{lowercase_text}</div>
|
| 266 |
+
</div>
|
| 267 |
+
</div>
|
| 268 |
+
</div>
|
| 269 |
+
<div class="col-md-4">
|
| 270 |
+
<div class="card">
|
| 271 |
+
<div class="card-header">
|
| 272 |
+
<h5 class="mb-0">Uppercase Text</h5>
|
| 273 |
+
</div>
|
| 274 |
+
<div class="card-body">
|
| 275 |
+
<div class="text-content" style="word-wrap: break-word; word-break: break-word; overflow-wrap: break-word; max-height: 400px; overflow-y: auto; padding: 15px; background-color: #f8f9fa; border-radius: 5px; border: 1px solid #e9ecef; line-height: 1.6;">{uppercase_text}</div>
|
| 276 |
+
</div>
|
| 277 |
+
</div>
|
| 278 |
+
</div>
|
| 279 |
+
</div>
|
| 280 |
+
"""
|
| 281 |
+
output_html.append(case_html)
|
| 282 |
+
|
| 283 |
+
# Remove Punctuation & Special Characters
|
| 284 |
+
output_html.append('<div class="section-divider"></div>')
|
| 285 |
+
output_html.append('<h3 class="task-subheader">Punctuation & Special Characters Removal</h3>')
|
| 286 |
+
|
| 287 |
+
# Count original punctuation
|
| 288 |
+
punc_count = sum([1 for char in text_input if char in string.punctuation])
|
| 289 |
+
|
| 290 |
+
# Remove punctuation
|
| 291 |
+
no_punct_text = re.sub(r'[^\w\s]', '', text_input)
|
| 292 |
+
|
| 293 |
+
punct_html = f"""
|
| 294 |
+
<div class="row">
|
| 295 |
+
<div class="col-md-6">
|
| 296 |
+
<div class="card">
|
| 297 |
+
<div class="card-header">
|
| 298 |
+
<h5 class="mb-0">Original Text</h5>
|
| 299 |
+
</div>
|
| 300 |
+
<div class="card-body">
|
| 301 |
+
<div class="text-content" style="word-wrap: break-word; word-break: break-word; overflow-wrap: break-word; max-height: 400px; overflow-y: auto; padding: 15px; background-color: #f8f9fa; border-radius: 5px; border: 1px solid #e9ecef; line-height: 1.6;">{text_input}</div>
|
| 302 |
+
<small class="text-muted">Contains {punc_count} punctuation marks</small>
|
| 303 |
+
</div>
|
| 304 |
+
</div>
|
| 305 |
+
</div>
|
| 306 |
+
<div class="col-md-6">
|
| 307 |
+
<div class="card">
|
| 308 |
+
<div class="card-header">
|
| 309 |
+
<h5 class="mb-0">Without Punctuation</h5>
|
| 310 |
+
</div>
|
| 311 |
+
<div class="card-body">
|
| 312 |
+
<div class="text-content" style="word-wrap: break-word; word-break: break-word; overflow-wrap: break-word; max-height: 400px; overflow-y: auto; padding: 15px; background-color: #f8f9fa; border-radius: 5px; border: 1px solid #e9ecef; line-height: 1.6;">{no_punct_text}</div>
|
| 313 |
+
<small class="text-muted">Removed {punc_count} punctuation marks</small>
|
| 314 |
+
</div>
|
| 315 |
+
</div>
|
| 316 |
+
</div>
|
| 317 |
+
</div>
|
| 318 |
+
"""
|
| 319 |
+
output_html.append(punct_html)
|
| 320 |
+
|
| 321 |
+
# Show removed punctuation
|
| 322 |
+
punct_chars = [char for char in text_input if char in string.punctuation]
|
| 323 |
+
punct_freq = Counter(punct_chars)
|
| 324 |
+
|
| 325 |
+
if punct_freq:
|
| 326 |
+
output_html.append('<h4>Punctuation Distribution</h4>')
|
| 327 |
+
|
| 328 |
+
fig = plt.figure(figsize=(10, 4))
|
| 329 |
+
plt.bar(punct_freq.keys(), punct_freq.values(), color='#1976D2')
|
| 330 |
+
plt.xlabel('Punctuation')
|
| 331 |
+
plt.ylabel('Frequency')
|
| 332 |
+
plt.title('Punctuation Distribution')
|
| 333 |
+
plt.tight_layout()
|
| 334 |
+
|
| 335 |
+
output_html.append(fig_to_html(fig))
|
| 336 |
+
|
| 337 |
+
# Tokenization
|
| 338 |
+
output_html.append('<div class="section-divider"></div>')
|
| 339 |
+
output_html.append('<h3 class="task-subheader">Tokenization</h3>')
|
| 340 |
+
|
| 341 |
+
# Word tokenization
|
| 342 |
+
words = nltk.word_tokenize(text_input)
|
| 343 |
+
|
| 344 |
+
# Create a multi-column layout for word tokens
|
| 345 |
+
output_html.append('<h4>Word Tokens</h4>')
|
| 346 |
+
output_html.append(f'<p>Total tokens: {len(words)} (showing first 50)</p>')
|
| 347 |
+
|
| 348 |
+
# Create a multi-column table layout
|
| 349 |
+
tokens_html = """
|
| 350 |
+
<div class="table-responsive">
|
| 351 |
+
<table class="table table-striped table-hover" style="table-layout: fixed;">
|
| 352 |
+
<thead class="table-primary">
|
| 353 |
+
<tr>
|
| 354 |
+
<th style="width: 8%;">#</th>
|
| 355 |
+
<th style="width: 25%;">Token</th>
|
| 356 |
+
<th style="width: 12%;">Length</th>
|
| 357 |
+
<th style="width: 8%;">#</th>
|
| 358 |
+
<th style="width: 25%;">Token</th>
|
| 359 |
+
<th style="width: 12%;">Length</th>
|
| 360 |
+
<th style="width: 8%;">#</th>
|
| 361 |
+
<th style="width: 25%;">Token</th>
|
| 362 |
+
<th style="width: 12%;">Length</th>
|
| 363 |
+
</tr>
|
| 364 |
+
</thead>
|
| 365 |
+
<tbody>
|
| 366 |
+
"""
|
| 367 |
+
|
| 368 |
+
# Create rows with 3 tokens per row
|
| 369 |
+
for i in range(0, min(50, len(words)), 3):
|
| 370 |
+
tokens_html += "<tr>"
|
| 371 |
+
for j in range(3):
|
| 372 |
+
if i + j < min(50, len(words)):
|
| 373 |
+
token = words[i + j]
|
| 374 |
+
tokens_html += f'<td>{i + j + 1}</td><td><code>{token}</code></td><td><span class="badge bg-secondary">{len(token)}</span></td>'
|
| 375 |
+
else:
|
| 376 |
+
tokens_html += '<td></td><td></td><td></td>'
|
| 377 |
+
tokens_html += "</tr>"
|
| 378 |
+
|
| 379 |
+
tokens_html += """
|
| 380 |
+
</tbody>
|
| 381 |
+
</table>
|
| 382 |
+
</div>
|
| 383 |
+
"""
|
| 384 |
+
|
| 385 |
+
output_html.append(tokens_html)
|
| 386 |
+
|
| 387 |
+
# Sentence tokenization
|
| 388 |
+
sentences = nltk.sent_tokenize(text_input)
|
| 389 |
+
|
| 390 |
+
output_html.append('<h4>Sentence Tokens</h4>')
|
| 391 |
+
output_html.append(f'<p>Total sentences: {len(sentences)}</p>')
|
| 392 |
+
|
| 393 |
+
for i, sentence in enumerate(sentences[:5]):
|
| 394 |
+
output_html.append(f'<div class="card mb-2"><div class="card-body"><strong>{i+1}.</strong> {sentence}</div></div>')
|
| 395 |
+
|
| 396 |
+
if len(sentences) > 5:
|
| 397 |
+
output_html.append(f'<p class="text-muted">... and {len(sentences) - 5} more sentences.</p>')
|
| 398 |
+
|
| 399 |
+
# Stopwords Removal
|
| 400 |
+
output_html.append('<div class="section-divider"></div>')
|
| 401 |
+
output_html.append('<h3 class="task-subheader">Stopwords Removal</h3>')
|
| 402 |
+
|
| 403 |
+
stop_words = set(stopwords.words('english'))
|
| 404 |
+
filtered_words = [word for word in words if word.lower() not in stop_words]
|
| 405 |
+
|
| 406 |
+
# Count stopwords
|
| 407 |
+
stopword_count = len(words) - len(filtered_words)
|
| 408 |
+
stopword_percentage = (stopword_count / len(words)) * 100 if words else 0
|
| 409 |
+
|
| 410 |
+
output_html.append(f"""
|
| 411 |
+
<div class="row mb-3">
|
| 412 |
+
<div class="col-md-4">
|
| 413 |
+
<div class="card text-center">
|
| 414 |
+
<div class="card-body">
|
| 415 |
+
<h5>Original Words</h5>
|
| 416 |
+
<h3 class="text-primary">{len(words)}</h3>
|
| 417 |
+
</div>
|
| 418 |
+
</div>
|
| 419 |
+
</div>
|
| 420 |
+
<div class="col-md-4">
|
| 421 |
+
<div class="card text-center">
|
| 422 |
+
<div class="card-body">
|
| 423 |
+
<h5>After Stopword Removal</h5>
|
| 424 |
+
<h3 class="text-success">{len(filtered_words)}</h3>
|
| 425 |
+
</div>
|
| 426 |
+
</div>
|
| 427 |
+
</div>
|
| 428 |
+
<div class="col-md-4">
|
| 429 |
+
<div class="card text-center">
|
| 430 |
+
<div class="card-body">
|
| 431 |
+
<h5>Stopwords Removed</h5>
|
| 432 |
+
<h3 class="text-warning">{stopword_count} ({stopword_percentage:.1f}%)</h3>
|
| 433 |
+
</div>
|
| 434 |
+
</div>
|
| 435 |
+
</div>
|
| 436 |
+
</div>
|
| 437 |
+
""")
|
| 438 |
+
|
| 439 |
+
# Display common stopwords in the text
|
| 440 |
+
text_stopwords = [word for word in words if word.lower() in stop_words]
|
| 441 |
+
stop_freq = Counter(text_stopwords).most_common(10)
|
| 442 |
+
|
| 443 |
+
if stop_freq:
|
| 444 |
+
output_html.append('<h4>Most Common Stopwords in Text</h4>')
|
| 445 |
+
|
| 446 |
+
# Create a multi-column layout for stopwords
|
| 447 |
+
stopwords_html = """
|
| 448 |
+
<div class="table-responsive">
|
| 449 |
+
<table class="table table-striped table-hover" style="table-layout: fixed;">
|
| 450 |
+
<thead class="table-primary">
|
| 451 |
+
<tr>
|
| 452 |
+
<th style="width: 10%;">#</th>
|
| 453 |
+
<th style="width: 35%;">Stopword</th>
|
| 454 |
+
<th style="width: 15%;">Frequency</th>
|
| 455 |
+
<th style="width: 10%;">#</th>
|
| 456 |
+
<th style="width: 35%;">Stopword</th>
|
| 457 |
+
<th style="width: 15%;">Frequency</th>
|
| 458 |
+
</tr>
|
| 459 |
+
</thead>
|
| 460 |
+
<tbody>
|
| 461 |
+
"""
|
| 462 |
+
|
| 463 |
+
# Create rows with 2 stopwords per row
|
| 464 |
+
for i in range(0, len(stop_freq), 2):
|
| 465 |
+
stopwords_html += "<tr>"
|
| 466 |
+
for j in range(2):
|
| 467 |
+
if i + j < len(stop_freq):
|
| 468 |
+
stopword, freq = stop_freq[i + j]
|
| 469 |
+
stopwords_html += f'<td>{i + j + 1}</td><td><code>{stopword}</code></td><td><span class="badge bg-warning">{freq}</span></td>'
|
| 470 |
+
else:
|
| 471 |
+
stopwords_html += '<td></td><td></td><td></td>'
|
| 472 |
+
stopwords_html += "</tr>"
|
| 473 |
+
|
| 474 |
+
stopwords_html += """
|
| 475 |
+
</tbody>
|
| 476 |
+
</table>
|
| 477 |
+
</div>
|
| 478 |
+
"""
|
| 479 |
+
|
| 480 |
+
output_html.append(stopwords_html)
|
| 481 |
+
|
| 482 |
+
# Visualization of before and after
|
| 483 |
+
fig, ax = plt.subplots(1, 2, figsize=(12, 5))
|
| 484 |
+
|
| 485 |
+
# Before
|
| 486 |
+
ax[0].hist([len(word) for word in words], bins=range(1, 15), alpha=0.7, color='#1976D2')
|
| 487 |
+
ax[0].set_title('Word Length Before Stopword Removal')
|
| 488 |
+
ax[0].set_xlabel('Word Length')
|
| 489 |
+
ax[0].set_ylabel('Frequency')
|
| 490 |
+
|
| 491 |
+
# After
|
| 492 |
+
ax[1].hist([len(word) for word in filtered_words], bins=range(1, 15), alpha=0.7, color='#4CAF50')
|
| 493 |
+
ax[1].set_title('Word Length After Stopword Removal')
|
| 494 |
+
ax[1].set_xlabel('Word Length')
|
| 495 |
+
ax[1].set_ylabel('Frequency')
|
| 496 |
+
|
| 497 |
+
plt.tight_layout()
|
| 498 |
+
output_html.append(fig_to_html(fig))
|
| 499 |
+
|
| 500 |
+
# Stemming and Lemmatization
|
| 501 |
+
output_html.append('<div class="section-divider"></div>')
|
| 502 |
+
output_html.append('<h3 class="task-subheader">Stemming & Lemmatization</h3>')
|
| 503 |
+
|
| 504 |
+
# Apply stemming (Porter Stemmer)
|
| 505 |
+
stemmer = PorterStemmer()
|
| 506 |
+
stemmed_words = [stemmer.stem(word) for word in filtered_words[:100]] # Limit to first 100 words for performance
|
| 507 |
+
|
| 508 |
+
# Apply lemmatization
|
| 509 |
+
lemmatizer = WordNetLemmatizer()
|
| 510 |
+
lemmatized_words = [lemmatizer.lemmatize(word) for word in filtered_words[:100]] # Limit to first 100 words
|
| 511 |
+
|
| 512 |
+
# Create comparison DataFrame
|
| 513 |
+
comparison_data = []
|
| 514 |
+
for i in range(min(20, len(filtered_words))): # Show first 20 examples
|
| 515 |
+
if i < len(filtered_words) and filtered_words[i].isalpha(): # Only include alphabetic words
|
| 516 |
+
comparison_data.append({
|
| 517 |
+
'Original': filtered_words[i],
|
| 518 |
+
'Stemmed': stemmer.stem(filtered_words[i]),
|
| 519 |
+
'Lemmatized': lemmatizer.lemmatize(filtered_words[i])
|
| 520 |
+
})
|
| 521 |
+
|
| 522 |
+
comparison_df = pd.DataFrame(comparison_data)
|
| 523 |
+
|
| 524 |
+
output_html.append('<h4>Stemming vs. Lemmatization Comparison</h4>')
|
| 525 |
+
|
| 526 |
+
# Create a custom table for stemming vs lemmatization comparison
|
| 527 |
+
comparison_html = """
|
| 528 |
+
<div class="table-responsive">
|
| 529 |
+
<table class="table table-striped table-hover" style="table-layout: fixed;">
|
| 530 |
+
<thead class="table-primary">
|
| 531 |
+
<tr>
|
| 532 |
+
<th style="width: 30%;">Original</th>
|
| 533 |
+
<th style="width: 35%;">Stemmed</th>
|
| 534 |
+
<th style="width: 35%;">Lemmatized</th>
|
| 535 |
+
</tr>
|
| 536 |
+
</thead>
|
| 537 |
+
<tbody>
|
| 538 |
+
"""
|
| 539 |
+
|
| 540 |
+
# Add comparison data rows
|
| 541 |
+
for _, row in comparison_df.iterrows():
|
| 542 |
+
comparison_html += f"""
|
| 543 |
+
<tr>
|
| 544 |
+
<td><code>{row['Original']}</code></td>
|
| 545 |
+
<td><code>{row['Stemmed']}</code></td>
|
| 546 |
+
<td><code>{row['Lemmatized']}</code></td>
|
| 547 |
+
</tr>
|
| 548 |
+
"""
|
| 549 |
+
|
| 550 |
+
comparison_html += """
|
| 551 |
+
</tbody>
|
| 552 |
+
</table>
|
| 553 |
+
</div>
|
| 554 |
+
"""
|
| 555 |
+
|
| 556 |
+
output_html.append(comparison_html)
|
| 557 |
+
|
| 558 |
+
output_html.append("""
|
| 559 |
+
<div class="alert alert-success">
|
| 560 |
+
<h4><i class="fas fa-lightbulb"></i> Stemming vs. Lemmatization</h4>
|
| 561 |
+
<ul>
|
| 562 |
+
<li><b>Stemming</b> - Cuts off word endings based on common patterns, faster but less accurate</li>
|
| 563 |
+
<li><b>Lemmatization</b> - Uses vocabulary and morphological analysis, slower but produces actual words</li>
|
| 564 |
+
</ul>
|
| 565 |
+
</div>
|
| 566 |
+
""")
|
| 567 |
+
|
| 568 |
+
# NEW SECTION: N-gram Analysis
|
| 569 |
+
output_html.append('<div class="section-divider"></div>')
|
| 570 |
+
output_html.append('<h3 class="task-subheader">N-gram Analysis</h3>')
|
| 571 |
+
|
| 572 |
+
output_html.append("""
|
| 573 |
+
<div class="alert alert-light">
|
| 574 |
+
<p>N-grams are contiguous sequences of n items from text. In NLP, they are used to capture word patterns and relationships,
|
| 575 |
+
and are helpful for language modeling, prediction, and feature extraction.</p>
|
| 576 |
+
</div>
|
| 577 |
+
""")
|
| 578 |
+
|
| 579 |
+
# Process text for n-grams (use filtered_words to avoid stopwords)
|
| 580 |
+
# Convert to lowercase for consistency
|
| 581 |
+
clean_words = [word.lower() for word in filtered_words if word.isalnum()]
|
| 582 |
+
|
| 583 |
+
# Generate n-grams
|
| 584 |
+
bigrams_list = list(ngrams(clean_words, 2))
|
| 585 |
+
trigrams_list = list(ngrams(clean_words, 3))
|
| 586 |
+
|
| 587 |
+
# Count frequencies
|
| 588 |
+
bigram_freq = Counter(bigrams_list)
|
| 589 |
+
trigram_freq = Counter(trigrams_list)
|
| 590 |
+
|
| 591 |
+
# Get most common
|
| 592 |
+
common_bigrams = bigram_freq.most_common(15)
|
| 593 |
+
common_trigrams = trigram_freq.most_common(15)
|
| 594 |
+
|
| 595 |
+
# Format for display
|
| 596 |
+
bigram_labels = [' '.join(bg) for bg, _ in common_bigrams]
|
| 597 |
+
bigram_values = [count for _, count in common_bigrams]
|
| 598 |
+
|
| 599 |
+
trigram_labels = [' '.join(tg) for tg, _ in common_trigrams]
|
| 600 |
+
trigram_values = [count for _, count in common_trigrams]
|
| 601 |
+
|
| 602 |
+
# Create DataFrames for display
|
| 603 |
+
bigram_df = pd.DataFrame({
|
| 604 |
+
'Bigram': [' '.join(bg) for bg, _ in common_bigrams],
|
| 605 |
+
'Frequency': [count for _, count in common_bigrams]
|
| 606 |
+
})
|
| 607 |
+
|
| 608 |
+
trigram_df = pd.DataFrame({
|
| 609 |
+
'Trigram': [' '.join(tg) for tg, _ in common_trigrams],
|
| 610 |
+
'Frequency': [count for _, count in common_trigrams]
|
| 611 |
+
})
|
| 612 |
+
|
| 613 |
+
# Explanation of n-grams
|
| 614 |
+
output_html.append("""
|
| 615 |
+
<div class="alert alert-info">
|
| 616 |
+
<h4>What are N-grams?</h4>
|
| 617 |
+
<ul>
|
| 618 |
+
<li><b>Unigrams</b> - Single words (e.g., "climate")</li>
|
| 619 |
+
<li><b>Bigrams</b> - Two consecutive words (e.g., "climate change")</li>
|
| 620 |
+
<li><b>Trigrams</b> - Three consecutive words (e.g., "global climate change")</li>
|
| 621 |
+
</ul>
|
| 622 |
+
<p>N-grams capture contextual relationships between words and are valuable for many NLP tasks including language modeling,
|
| 623 |
+
machine translation, speech recognition, and text classification.</p>
|
| 624 |
+
</div>
|
| 625 |
+
""")
|
| 626 |
+
|
| 627 |
+
# Create visualizations for bigrams and trigrams
|
| 628 |
+
if bigram_labels and len(bigram_values) > 0:
|
| 629 |
+
# Bigram visualization
|
| 630 |
+
output_html.append('<h4>Most Common Bigrams</h4>')
|
| 631 |
+
|
| 632 |
+
fig = plt.figure(figsize=(10, 6))
|
| 633 |
+
plt.barh(range(len(bigram_labels)), bigram_values, align='center', color='#1976D2')
|
| 634 |
+
plt.yticks(range(len(bigram_labels)), bigram_labels)
|
| 635 |
+
plt.xlabel('Frequency')
|
| 636 |
+
plt.title('Most Common Bigrams')
|
| 637 |
+
plt.tight_layout()
|
| 638 |
+
|
| 639 |
+
output_html.append(fig_to_html(fig))
|
| 640 |
+
|
| 641 |
+
# Create a multi-column layout for bigrams
|
| 642 |
+
bigram_html = """
|
| 643 |
+
<div class="table-responsive">
|
| 644 |
+
<table class="table table-striped table-hover" style="table-layout: fixed;">
|
| 645 |
+
<thead class="table-primary">
|
| 646 |
+
<tr>
|
| 647 |
+
<th style="width: 10%;">#</th>
|
| 648 |
+
<th style="width: 35%;">Bigram</th>
|
| 649 |
+
<th style="width: 15%;">Freq</th>
|
| 650 |
+
<th style="width: 10%;">#</th>
|
| 651 |
+
<th style="width: 35%;">Bigram</th>
|
| 652 |
+
<th style="width: 15%;">Freq</th>
|
| 653 |
+
</tr>
|
| 654 |
+
</thead>
|
| 655 |
+
<tbody>
|
| 656 |
+
"""
|
| 657 |
+
|
| 658 |
+
# Create rows with 2 bigrams per row
|
| 659 |
+
for i in range(0, len(common_bigrams), 2):
|
| 660 |
+
bigram_html += "<tr>"
|
| 661 |
+
for j in range(2):
|
| 662 |
+
if i + j < len(common_bigrams):
|
| 663 |
+
bigram, freq = common_bigrams[i + j]
|
| 664 |
+
bigram_text = ' '.join(bigram)
|
| 665 |
+
bigram_html += f'<td>{i + j + 1}</td><td><code>{bigram_text}</code></td><td><span class="badge bg-info">{freq}</span></td>'
|
| 666 |
+
else:
|
| 667 |
+
bigram_html += '<td></td><td></td><td></td>'
|
| 668 |
+
bigram_html += "</tr>"
|
| 669 |
+
|
| 670 |
+
bigram_html += """
|
| 671 |
+
</tbody>
|
| 672 |
+
</table>
|
| 673 |
+
</div>
|
| 674 |
+
"""
|
| 675 |
+
|
| 676 |
+
output_html.append(bigram_html)
|
| 677 |
+
else:
|
| 678 |
+
output_html.append('<p class="text-muted">Not enough text to generate meaningful bigrams.</p>')
|
| 679 |
+
|
| 680 |
+
if trigram_labels and len(trigram_values) > 0:
|
| 681 |
+
# Trigram visualization
|
| 682 |
+
output_html.append('<h4>Most Common Trigrams</h4>')
|
| 683 |
+
|
| 684 |
+
fig = plt.figure(figsize=(10, 6))
|
| 685 |
+
plt.barh(range(len(trigram_labels)), trigram_values, align='center', color='#4CAF50')
|
| 686 |
+
plt.yticks(range(len(trigram_labels)), trigram_labels)
|
| 687 |
+
plt.xlabel('Frequency')
|
| 688 |
+
plt.title('Most Common Trigrams')
|
| 689 |
+
plt.tight_layout()
|
| 690 |
+
|
| 691 |
+
output_html.append(fig_to_html(fig))
|
| 692 |
+
|
| 693 |
+
# Create a multi-column layout for trigrams
|
| 694 |
+
trigram_html = """
|
| 695 |
+
<div class="table-responsive">
|
| 696 |
+
<table class="table table-striped table-hover" style="table-layout: fixed;">
|
| 697 |
+
<thead class="table-primary">
|
| 698 |
+
<tr>
|
| 699 |
+
<th style="width: 10%;">#</th>
|
| 700 |
+
<th style="width: 35%;">Trigram</th>
|
| 701 |
+
<th style="width: 15%;">Freq</th>
|
| 702 |
+
<th style="width: 10%;">#</th>
|
| 703 |
+
<th style="width: 35%;">Trigram</th>
|
| 704 |
+
<th style="width: 15%;">Freq</th>
|
| 705 |
+
</tr>
|
| 706 |
+
</thead>
|
| 707 |
+
<tbody>
|
| 708 |
+
"""
|
| 709 |
+
|
| 710 |
+
# Create rows with 2 trigrams per row
|
| 711 |
+
for i in range(0, len(common_trigrams), 2):
|
| 712 |
+
trigram_html += "<tr>"
|
| 713 |
+
for j in range(2):
|
| 714 |
+
if i + j < len(common_trigrams):
|
| 715 |
+
trigram, freq = common_trigrams[i + j]
|
| 716 |
+
trigram_text = ' '.join(trigram)
|
| 717 |
+
trigram_html += f'<td>{i + j + 1}</td><td><code>{trigram_text}</code></td><td><span class="badge bg-success">{freq}</span></td>'
|
| 718 |
+
else:
|
| 719 |
+
trigram_html += '<td></td><td></td><td></td>'
|
| 720 |
+
trigram_html += "</tr>"
|
| 721 |
+
|
| 722 |
+
trigram_html += """
|
| 723 |
+
</tbody>
|
| 724 |
+
</table>
|
| 725 |
+
</div>
|
| 726 |
+
"""
|
| 727 |
+
|
| 728 |
+
output_html.append(trigram_html)
|
| 729 |
+
else:
|
| 730 |
+
output_html.append('<p class="text-muted">Not enough text to generate meaningful trigrams.</p>')
|
| 731 |
+
|
| 732 |
+
# Applications of N-grams
|
| 733 |
+
output_html.append("""
|
| 734 |
+
<div class="alert alert-info">
|
| 735 |
+
<h4><i class="fas fa-lightbulb"></i> Applications of N-gram Analysis</h4>
|
| 736 |
+
<ul>
|
| 737 |
+
<li><b>Language Modeling</b> - Predicting the next word in a sequence</li>
|
| 738 |
+
<li><b>Machine Translation</b> - Improving translation quality</li>
|
| 739 |
+
<li><b>Text Classification</b> - Using n-grams as features</li>
|
| 740 |
+
<li><b>Spelling Correction</b> - Suggesting correct spellings</li>
|
| 741 |
+
<li><b>Information Retrieval</b> - Enhancing search results</li>
|
| 742 |
+
<li><b>Sentiment Analysis</b> - Capturing phrase-level sentiments</li>
|
| 743 |
+
</ul>
|
| 744 |
+
</div>
|
| 745 |
+
""")
|
| 746 |
+
|
| 747 |
+
# Word Cloud
|
| 748 |
+
output_html.append('<div class="section-divider"></div>')
|
| 749 |
+
output_html.append('<h3 class="task-subheader">Word Cloud</h3>')
|
| 750 |
+
|
| 751 |
+
try:
|
| 752 |
+
# Create word cloud from filtered words
|
| 753 |
+
wordcloud_text = ' '.join(filtered_words)
|
| 754 |
+
wordcloud = WordCloud(
|
| 755 |
+
width=800,
|
| 756 |
+
height=400,
|
| 757 |
+
background_color='white',
|
| 758 |
+
colormap='viridis',
|
| 759 |
+
max_words=100,
|
| 760 |
+
contour_width=1,
|
| 761 |
+
contour_color='#1976D2'
|
| 762 |
+
).generate(wordcloud_text)
|
| 763 |
+
|
| 764 |
+
# Display word cloud
|
| 765 |
+
fig = plt.figure(figsize=(12, 8))
|
| 766 |
+
plt.imshow(wordcloud, interpolation='bilinear')
|
| 767 |
+
plt.axis('off')
|
| 768 |
+
plt.tight_layout()
|
| 769 |
+
|
| 770 |
+
output_html.append(fig_to_html(fig))
|
| 771 |
+
|
| 772 |
+
except Exception as e:
|
| 773 |
+
output_html.append(f"<div class='alert alert-warning'>Failed to generate word cloud: {str(e)}</div>")
|
| 774 |
+
|
| 775 |
+
# Word Frequency
|
| 776 |
+
output_html.append('<div class="section-divider"></div>')
|
| 777 |
+
output_html.append('<h3 class="task-subheader">Word Frequency Analysis</h3>')
|
| 778 |
+
|
| 779 |
+
# Calculate word frequencies
|
| 780 |
+
word_freq = Counter(filtered_words)
|
| 781 |
+
most_common = word_freq.most_common(20)
|
| 782 |
+
|
| 783 |
+
# Create DataFrame
|
| 784 |
+
freq_df = pd.DataFrame(most_common, columns=['Word', 'Frequency'])
|
| 785 |
+
|
| 786 |
+
# Create horizontal bar chart
|
| 787 |
+
fig = plt.figure(figsize=(12, 16))
|
| 788 |
+
plt.barh(range(len(most_common)), [val[1] for val in most_common], align='center', color='#1976D2')
|
| 789 |
+
plt.yticks(range(len(most_common)), [val[0] for val in most_common])
|
| 790 |
+
plt.xlabel('Frequency')
|
| 791 |
+
plt.title('Top 20 Words')
|
| 792 |
+
plt.subplots_adjust(left=0.15, right=0.95, top=0.95, bottom=0.1)
|
| 793 |
+
plt.tight_layout(pad=3.0)
|
| 794 |
+
|
| 795 |
+
# Render chart
|
| 796 |
+
output_html.append('<section class="wf-chart-section">')
|
| 797 |
+
output_html.append('<div class="chart-container">')
|
| 798 |
+
output_html.append(fig_to_html(fig))
|
| 799 |
+
output_html.append('</div>')
|
| 800 |
+
output_html.append('</section>')
|
| 801 |
+
|
| 802 |
+
# Create a multi-column layout for word frequency
|
| 803 |
+
freq_html = """
|
| 804 |
+
<section class="wf-table-container">
|
| 805 |
+
<div class="table-responsive">
|
| 806 |
+
<table class="table table-striped table-hover" style="table-layout: fixed;">
|
| 807 |
+
<thead class="table-primary">
|
| 808 |
+
<tr>
|
| 809 |
+
<th style="width: 10%;">#</th>
|
| 810 |
+
<th style="width: 35%;">Word</th>
|
| 811 |
+
<th style="width: 15%;">Freq</th>
|
| 812 |
+
<th style="width: 10%;">#</th>
|
| 813 |
+
<th style="width: 35%;">Word</th>
|
| 814 |
+
<th style="width: 15%;">Freq</th>
|
| 815 |
+
</tr>
|
| 816 |
+
</thead>
|
| 817 |
+
<tbody>
|
| 818 |
+
"""
|
| 819 |
+
|
| 820 |
+
# Create rows with 2 words per row
|
| 821 |
+
for i in range(0, len(most_common), 2):
|
| 822 |
+
freq_html += "<tr>"
|
| 823 |
+
for j in range(2):
|
| 824 |
+
if i + j < len(most_common):
|
| 825 |
+
word, freq = most_common[i + j]
|
| 826 |
+
freq_html += f'<td>{i + j + 1}</td><td><code>{word}</code></td><td><span class="badge bg-primary">{freq}</span></td>'
|
| 827 |
+
else:
|
| 828 |
+
freq_html += '<td></td><td></td><td></td>'
|
| 829 |
+
freq_html += "</tr>"
|
| 830 |
+
|
| 831 |
+
freq_html += """
|
| 832 |
+
</tbody>
|
| 833 |
+
</table>
|
| 834 |
+
</div>
|
| 835 |
+
</section>
|
| 836 |
+
"""
|
| 837 |
+
|
| 838 |
+
output_html.append(freq_html)
|
| 839 |
+
|
| 840 |
+
except Exception as e:
|
| 841 |
+
output_html.append(f"""
|
| 842 |
+
<div class="alert alert-danger">
|
| 843 |
+
<h3>Error</h3>
|
| 844 |
+
<p>Failed to process text: {str(e)}</p>
|
| 845 |
+
</div>
|
| 846 |
+
""")
|
| 847 |
+
|
| 848 |
+
# About text preprocessing
|
| 849 |
+
output_html.append("""
|
| 850 |
+
<div class="card mt-4">
|
| 851 |
+
<div class="card-header">
|
| 852 |
+
<h4 class="mb-0">
|
| 853 |
+
<i class="fas fa-info-circle"></i>
|
| 854 |
+
About Text Preprocessing
|
| 855 |
+
</h4>
|
| 856 |
+
</div>
|
| 857 |
+
<div class="card-body">
|
| 858 |
+
<h5>What is Text Preprocessing?</h5>
|
| 859 |
+
|
| 860 |
+
<p>Text preprocessing is the first step in NLP pipelines that transforms raw text into a clean, structured format
|
| 861 |
+
suitable for analysis. It includes various techniques to standardize text and reduce noise.</p>
|
| 862 |
+
|
| 863 |
+
<h5>Common Preprocessing Steps:</h5>
|
| 864 |
+
|
| 865 |
+
<ul>
|
| 866 |
+
<li><b>Tokenization</b> - Splitting text into individual words or sentences</li>
|
| 867 |
+
<li><b>Normalization</b> - Converting text to lowercase, removing accents, etc.</li>
|
| 868 |
+
<li><b>Noise Removal</b> - Removing punctuation, special characters, HTML tags, etc.</li>
|
| 869 |
+
<li><b>Stopword Removal</b> - Filtering out common words that add little meaning</li>
|
| 870 |
+
<li><b>Stemming/Lemmatization</b> - Reducing words to their root forms</li>
|
| 871 |
+
<li><b>Spelling Correction</b> - Fixing typos and errors</li>
|
| 872 |
+
</ul>
|
| 873 |
+
|
| 874 |
+
<h5>Why Preprocess Text?</h5>
|
| 875 |
+
|
| 876 |
+
<ul>
|
| 877 |
+
<li>Reduces dimensionality and noise in the data</li>
|
| 878 |
+
<li>Standardizes text for consistent analysis</li>
|
| 879 |
+
<li>Improves performance of downstream NLP tasks</li>
|
| 880 |
+
<li>Makes text more suitable for machine learning models</li>
|
| 881 |
+
</ul>
|
| 882 |
+
</div>
|
| 883 |
+
</div>
|
| 884 |
+
""")
|
| 885 |
+
|
| 886 |
+
output_html.append('</div>') # Close result-area div
|
| 887 |
+
|
| 888 |
+
return '\n'.join(output_html)
|
components/question_answering.py
ADDED
|
@@ -0,0 +1,498 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import matplotlib.pyplot as plt
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
from transformers import pipeline
|
| 5 |
+
import nltk
|
| 6 |
+
from collections import Counter
|
| 7 |
+
import re
|
| 8 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 9 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 10 |
+
|
| 11 |
+
from utils.model_loader import load_qa_pipeline
|
| 12 |
+
from utils.helpers import fig_to_html, df_to_html_table
|
| 13 |
+
|
| 14 |
+
def question_answering_handler(context_text, question, answer_type="extractive", confidence_threshold=0.5):
|
| 15 |
+
"""Show question answering capabilities with comprehensive analysis."""
|
| 16 |
+
output_html = []
|
| 17 |
+
|
| 18 |
+
# Add result area container
|
| 19 |
+
output_html.append('<div class="result-area">')
|
| 20 |
+
output_html.append('<h2 class="task-header">Question Answering System</h2>')
|
| 21 |
+
|
| 22 |
+
output_html.append("""
|
| 23 |
+
<div class="alert alert-info">
|
| 24 |
+
<i class="fas fa-info-circle"></i>
|
| 25 |
+
Question Answering (QA) systems extract or generate answers to questions based on a given context or knowledge base.
|
| 26 |
+
This system can handle both extractive (finding answers in text) and abstractive (generating new answers) approaches.
|
| 27 |
+
</div>
|
| 28 |
+
""")
|
| 29 |
+
|
| 30 |
+
# Model info
|
| 31 |
+
output_html.append("""
|
| 32 |
+
<div class="alert alert-info">
|
| 33 |
+
<h4><i class="fas fa-tools"></i> Models & Techniques Used:</h4>
|
| 34 |
+
<ul>
|
| 35 |
+
<li><b>RoBERTa-SQuAD2</b> - Fine-tuned transformer model for extractive QA (F1: ~83.7 on SQuAD 2.0)</li>
|
| 36 |
+
<li><b>BERT-based QA</b> - Bidirectional encoder representations for understanding context</li>
|
| 37 |
+
<li><b>TF-IDF Similarity</b> - Traditional approach for finding relevant text spans</li>
|
| 38 |
+
<li><b>Confidence Scoring</b> - Model uncertainty estimation for answer reliability</li>
|
| 39 |
+
</ul>
|
| 40 |
+
</div>
|
| 41 |
+
""")
|
| 42 |
+
|
| 43 |
+
try:
|
| 44 |
+
# Validate inputs
|
| 45 |
+
if not context_text or not context_text.strip():
|
| 46 |
+
output_html.append('<div class="alert alert-warning">⚠️ Please provide a context text for question answering.</div>')
|
| 47 |
+
output_html.append('</div>')
|
| 48 |
+
return "\n".join(output_html)
|
| 49 |
+
|
| 50 |
+
if not question or not question.strip():
|
| 51 |
+
output_html.append('<div class="alert alert-warning">⚠️ Please provide a question to answer.</div>')
|
| 52 |
+
output_html.append('</div>')
|
| 53 |
+
return "\n".join(output_html)
|
| 54 |
+
|
| 55 |
+
# Display input information
|
| 56 |
+
output_html.append('<h3 class="task-subheader">Input Analysis</h3>')
|
| 57 |
+
|
| 58 |
+
context_stats = {
|
| 59 |
+
"Context Length": len(context_text),
|
| 60 |
+
"Word Count": len(context_text.split()),
|
| 61 |
+
"Sentence Count": len(nltk.sent_tokenize(context_text)),
|
| 62 |
+
"Question Length": len(question),
|
| 63 |
+
"Question Words": len(question.split())
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
stats_df = pd.DataFrame(list(context_stats.items()), columns=['Metric', 'Value'])
|
| 67 |
+
output_html.append('<h4>Input Statistics</h4>')
|
| 68 |
+
output_html.append(df_to_html_table(stats_df))
|
| 69 |
+
|
| 70 |
+
# Question Analysis
|
| 71 |
+
output_html.append('<h3 class="task-subheader">Question Analysis</h3>')
|
| 72 |
+
|
| 73 |
+
# Classify question type
|
| 74 |
+
question_lower = question.lower().strip()
|
| 75 |
+
question_type = classify_question_type(question_lower)
|
| 76 |
+
|
| 77 |
+
output_html.append(f"""
|
| 78 |
+
<div class="card">
|
| 79 |
+
<div class="card-header">
|
| 80 |
+
<h4 class="mb-0">Question Classification</h4>
|
| 81 |
+
</div>
|
| 82 |
+
<div class="card-body">
|
| 83 |
+
<p><strong>Question:</strong> {question}</p>
|
| 84 |
+
<p><strong>Type:</strong> {question_type['type']}</p>
|
| 85 |
+
<p><strong>Expected Answer:</strong> {question_type['expected']}</p>
|
| 86 |
+
<p><strong>Keywords:</strong> {', '.join(question_type['keywords'])}</p>
|
| 87 |
+
</div>
|
| 88 |
+
</div>
|
| 89 |
+
""")
|
| 90 |
+
|
| 91 |
+
# Extractive Question Answering using Transformer
|
| 92 |
+
output_html.append('<h3 class="task-subheader">Transformer-based Answer Extraction</h3>')
|
| 93 |
+
|
| 94 |
+
try:
|
| 95 |
+
qa_pipeline = load_qa_pipeline()
|
| 96 |
+
|
| 97 |
+
# Get answer from the model
|
| 98 |
+
result = qa_pipeline(question=question, context=context_text)
|
| 99 |
+
|
| 100 |
+
answer = result['answer']
|
| 101 |
+
confidence = result['score']
|
| 102 |
+
start_pos = result['start']
|
| 103 |
+
end_pos = result['end']
|
| 104 |
+
|
| 105 |
+
# Create confidence visualization
|
| 106 |
+
fig, ax = plt.subplots(1, 1, figsize=(8, 4))
|
| 107 |
+
|
| 108 |
+
# Confidence bar
|
| 109 |
+
colors = ['red' if confidence < 0.3 else 'orange' if confidence < 0.7 else 'green']
|
| 110 |
+
bars = ax.barh(['Confidence'], [confidence], color=colors[0])
|
| 111 |
+
ax.set_xlim(0, 1)
|
| 112 |
+
ax.set_xlabel('Confidence Score')
|
| 113 |
+
ax.set_title('Answer Confidence')
|
| 114 |
+
|
| 115 |
+
# Add confidence threshold line
|
| 116 |
+
ax.axvline(x=confidence_threshold, color='red', linestyle='--', label=f'Threshold ({confidence_threshold})')
|
| 117 |
+
ax.legend()
|
| 118 |
+
|
| 119 |
+
# Add value labels
|
| 120 |
+
for bar in bars:
|
| 121 |
+
width = bar.get_width()
|
| 122 |
+
ax.text(width/2, bar.get_y() + bar.get_height()/2,
|
| 123 |
+
f'{width:.3f}', ha='center', va='center', fontweight='bold')
|
| 124 |
+
|
| 125 |
+
plt.tight_layout()
|
| 126 |
+
output_html.append(fig_to_html(fig))
|
| 127 |
+
plt.close()
|
| 128 |
+
|
| 129 |
+
# Display answer with context highlighting
|
| 130 |
+
confidence_status = "High" if confidence >= 0.7 else "Medium" if confidence >= 0.3 else "Low"
|
| 131 |
+
confidence_color = "#4CAF50" if confidence >= 0.7 else "#FF9800" if confidence >= 0.3 else "#F44336"
|
| 132 |
+
|
| 133 |
+
output_html.append(f"""
|
| 134 |
+
<div class="card" style="border-color: {confidence_color};">
|
| 135 |
+
<div class="card-header" style="background-color: {confidence_color}22;">
|
| 136 |
+
<h4 class="mb-0">📝 Extracted Answer</h4>
|
| 137 |
+
</div>
|
| 138 |
+
<div class="card-body">
|
| 139 |
+
<div class="alert alert-light">
|
| 140 |
+
<strong>Answer:</strong> <span class="badge bg-warning text-dark fs-6">{answer}</span>
|
| 141 |
+
</div>
|
| 142 |
+
<p><strong>Confidence:</strong> {confidence:.3f} ({confidence_status})</p>
|
| 143 |
+
<p><strong>Position in Text:</strong> Characters {start_pos}-{end_pos}</p>
|
| 144 |
+
</div>
|
| 145 |
+
</div>
|
| 146 |
+
""")
|
| 147 |
+
|
| 148 |
+
# Show context with answer highlighted
|
| 149 |
+
highlighted_context = highlight_answer_in_context(context_text, start_pos, end_pos)
|
| 150 |
+
output_html.append(f"""
|
| 151 |
+
<div class="card">
|
| 152 |
+
<div class="card-header">
|
| 153 |
+
<h4 class="mb-0">📄 Context with Highlighted Answer</h4>
|
| 154 |
+
</div>
|
| 155 |
+
<div class="card-body">
|
| 156 |
+
<div style="line-height: 1.6; border: 1px solid #ddd; padding: 1rem; border-radius: 5px;">
|
| 157 |
+
{highlighted_context}
|
| 158 |
+
</div>
|
| 159 |
+
</div>
|
| 160 |
+
</div>
|
| 161 |
+
""")
|
| 162 |
+
|
| 163 |
+
except Exception as e:
|
| 164 |
+
output_html.append(f'<div class="alert alert-danger">❌ Error in transformer QA: {str(e)}</div>')
|
| 165 |
+
|
| 166 |
+
# Alternative: TF-IDF based answer extraction
|
| 167 |
+
output_html.append('<h3 class="task-subheader">TF-IDF Based Answer Extraction</h3>')
|
| 168 |
+
|
| 169 |
+
try:
|
| 170 |
+
tfidf_answer = extract_answer_tfidf(context_text, question)
|
| 171 |
+
|
| 172 |
+
output_html.append(f"""
|
| 173 |
+
<div class="alert alert-success">
|
| 174 |
+
<h4>🔍 TF-IDF Based Answer</h4>
|
| 175 |
+
<div class="alert alert-light">
|
| 176 |
+
<strong>Most Relevant Sentence:</strong> {tfidf_answer['sentence']}
|
| 177 |
+
</div>
|
| 178 |
+
<p><strong>Similarity Score:</strong> {tfidf_answer['score']:.3f}</p>
|
| 179 |
+
<p><strong>Method:</strong> Cosine similarity between question and context sentences using TF-IDF vectors</p>
|
| 180 |
+
</div>
|
| 181 |
+
""")
|
| 182 |
+
|
| 183 |
+
except Exception as e:
|
| 184 |
+
output_html.append(f'<div class="alert alert-danger">❌ Error in TF-IDF QA: {str(e)}</div>')
|
| 185 |
+
|
| 186 |
+
# Answer Quality Assessment
|
| 187 |
+
output_html.append('<h3 class="task-subheader">Answer Quality Assessment</h3>')
|
| 188 |
+
|
| 189 |
+
if 'confidence' in locals():
|
| 190 |
+
quality_metrics = assess_answer_quality(question, answer, confidence, context_text)
|
| 191 |
+
|
| 192 |
+
# Create quality assessment visualization
|
| 193 |
+
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
|
| 194 |
+
|
| 195 |
+
# Quality metrics radar chart
|
| 196 |
+
categories = list(quality_metrics.keys())
|
| 197 |
+
values = list(quality_metrics.values())
|
| 198 |
+
|
| 199 |
+
ax1.bar(categories, values, color=['#4CAF50', '#2196F3', '#FF9800', '#9C27B0'])
|
| 200 |
+
ax1.set_ylim(0, 1)
|
| 201 |
+
ax1.set_title('Answer Quality Metrics')
|
| 202 |
+
ax1.set_ylabel('Score')
|
| 203 |
+
plt.setp(ax1.get_xticklabels(), rotation=45, ha='right')
|
| 204 |
+
|
| 205 |
+
# Overall quality score
|
| 206 |
+
overall_score = sum(values) / len(values)
|
| 207 |
+
quality_label = "Excellent" if overall_score >= 0.8 else "Good" if overall_score >= 0.6 else "Fair" if overall_score >= 0.4 else "Poor"
|
| 208 |
+
|
| 209 |
+
ax2.pie([overall_score, 1-overall_score], labels=[f'{quality_label}\n({overall_score:.2f})', 'Room for Improvement'],
|
| 210 |
+
colors=['#4CAF50', '#E0E0E0'], startangle=90)
|
| 211 |
+
ax2.set_title('Overall Answer Quality')
|
| 212 |
+
|
| 213 |
+
plt.tight_layout()
|
| 214 |
+
output_html.append(fig_to_html(fig))
|
| 215 |
+
plt.close()
|
| 216 |
+
|
| 217 |
+
# Quality metrics table
|
| 218 |
+
quality_df = pd.DataFrame([
|
| 219 |
+
{'Metric': 'Confidence', 'Score': f"{quality_metrics['Confidence']:.3f}", 'Description': 'Model confidence in the answer'},
|
| 220 |
+
{'Metric': 'Relevance', 'Score': f"{quality_metrics['Relevance']:.3f}", 'Description': 'Semantic similarity to question'},
|
| 221 |
+
{'Metric': 'Completeness', 'Score': f"{quality_metrics['Completeness']:.3f}", 'Description': 'Answer length appropriateness'},
|
| 222 |
+
{'Metric': 'Context Match', 'Score': f"{quality_metrics['Context_Match']:.3f}", 'Description': 'How well answer fits context'}
|
| 223 |
+
])
|
| 224 |
+
|
| 225 |
+
output_html.append('<h4>Quality Assessment Details</h4>')
|
| 226 |
+
output_html.append(df_to_html_table(quality_df))
|
| 227 |
+
|
| 228 |
+
# Question-Answer Pairs Suggestions
|
| 229 |
+
output_html.append('<h3 class="task-subheader">Suggested Follow-up Questions</h3>')
|
| 230 |
+
|
| 231 |
+
try:
|
| 232 |
+
suggested_questions = generate_followup_questions(context_text, question, answer if 'answer' in locals() else "")
|
| 233 |
+
|
| 234 |
+
output_html.append('<div class="alert alert-warning">')
|
| 235 |
+
output_html.append('<h4>💡 Follow-up Questions:</h4>')
|
| 236 |
+
output_html.append('<ul>')
|
| 237 |
+
for i, q in enumerate(suggested_questions, 1):
|
| 238 |
+
output_html.append(f'<li><strong>Q{i}:</strong> {q}</li>')
|
| 239 |
+
output_html.append('</ul>')
|
| 240 |
+
output_html.append('</div>')
|
| 241 |
+
|
| 242 |
+
except Exception as e:
|
| 243 |
+
output_html.append(f'<div class="alert alert-danger">❌ Error generating suggestions: {str(e)}</div>')
|
| 244 |
+
|
| 245 |
+
except Exception as e:
|
| 246 |
+
output_html.append(f'<div class="alert alert-danger">❌ Unexpected error: {str(e)}</div>')
|
| 247 |
+
|
| 248 |
+
output_html.append('</div>')
|
| 249 |
+
return "\n".join(output_html)
|
| 250 |
+
|
| 251 |
+
def classify_question_type(question):
|
| 252 |
+
"""Classify the type of question and expected answer format."""
|
| 253 |
+
question = question.lower().strip()
|
| 254 |
+
|
| 255 |
+
# Question word patterns
|
| 256 |
+
patterns = {
|
| 257 |
+
'what': {'type': 'Definition/Fact', 'expected': 'Entity, concept, or description'},
|
| 258 |
+
'who': {'type': 'Person', 'expected': 'Person name or group'},
|
| 259 |
+
'when': {'type': 'Time', 'expected': 'Date, time, or temporal expression'},
|
| 260 |
+
'where': {'type': 'Location', 'expected': 'Place, location, or spatial reference'},
|
| 261 |
+
'why': {'type': 'Reason/Cause', 'expected': 'Explanation or causal relationship'},
|
| 262 |
+
'how': {'type': 'Method/Process', 'expected': 'Process, method, or manner'},
|
| 263 |
+
'which': {'type': 'Selection', 'expected': 'Specific choice from options'},
|
| 264 |
+
'how much': {'type': 'Quantity', 'expected': 'Numerical amount or quantity'},
|
| 265 |
+
'how many': {'type': 'Count', 'expected': 'Numerical count'},
|
| 266 |
+
'is': {'type': 'Yes/No', 'expected': 'Boolean answer'},
|
| 267 |
+
'are': {'type': 'Yes/No', 'expected': 'Boolean answer'},
|
| 268 |
+
'can': {'type': 'Ability/Possibility', 'expected': 'Yes/No with explanation'},
|
| 269 |
+
'will': {'type': 'Future/Prediction', 'expected': 'Future state or prediction'},
|
| 270 |
+
'did': {'type': 'Past Action', 'expected': 'Yes/No about past events'}
|
| 271 |
+
}
|
| 272 |
+
|
| 273 |
+
# Extract keywords from question
|
| 274 |
+
words = question.split()
|
| 275 |
+
keywords = [word for word in words if len(word) > 2 and word not in ['the', 'and', 'but', 'for']]
|
| 276 |
+
|
| 277 |
+
# Determine question type
|
| 278 |
+
for pattern, info in patterns.items():
|
| 279 |
+
if question.startswith(pattern):
|
| 280 |
+
return {
|
| 281 |
+
'type': info['type'],
|
| 282 |
+
'expected': info['expected'],
|
| 283 |
+
'keywords': keywords[:5] # Top 5 keywords
|
| 284 |
+
}
|
| 285 |
+
|
| 286 |
+
# Default classification
|
| 287 |
+
return {
|
| 288 |
+
'type': 'General',
|
| 289 |
+
'expected': 'Text span or explanation',
|
| 290 |
+
'keywords': keywords[:5]
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
+
def extract_answer_tfidf(context, question):
|
| 294 |
+
"""Extract answer using TF-IDF similarity."""
|
| 295 |
+
# Split context into sentences
|
| 296 |
+
sentences = nltk.sent_tokenize(context)
|
| 297 |
+
|
| 298 |
+
if len(sentences) == 0:
|
| 299 |
+
return {'sentence': 'No sentences found', 'score': 0.0}
|
| 300 |
+
|
| 301 |
+
# Create TF-IDF vectors
|
| 302 |
+
vectorizer = TfidfVectorizer(stop_words='english', lowercase=True)
|
| 303 |
+
|
| 304 |
+
# Combine question with sentences for vectorization
|
| 305 |
+
texts = [question] + sentences
|
| 306 |
+
tfidf_matrix = vectorizer.fit_transform(texts)
|
| 307 |
+
|
| 308 |
+
# Calculate cosine similarity between question and each sentence
|
| 309 |
+
question_vector = tfidf_matrix[0:1]
|
| 310 |
+
sentence_vectors = tfidf_matrix[1:]
|
| 311 |
+
|
| 312 |
+
similarities = cosine_similarity(question_vector, sentence_vectors).flatten()
|
| 313 |
+
|
| 314 |
+
# Find the most similar sentence
|
| 315 |
+
best_idx = np.argmax(similarities)
|
| 316 |
+
best_sentence = sentences[best_idx]
|
| 317 |
+
best_score = similarities[best_idx]
|
| 318 |
+
|
| 319 |
+
return {
|
| 320 |
+
'sentence': best_sentence,
|
| 321 |
+
'score': best_score
|
| 322 |
+
}
|
| 323 |
+
|
| 324 |
+
def highlight_answer_in_context(context, start_pos, end_pos):
|
| 325 |
+
"""Highlight the answer span in the context."""
|
| 326 |
+
before = context[:start_pos]
|
| 327 |
+
answer = context[start_pos:end_pos]
|
| 328 |
+
after = context[end_pos:]
|
| 329 |
+
|
| 330 |
+
highlighted = f'{before}<mark style="background-color: #FFEB3B; padding: 2px 4px; border-radius: 3px; font-weight: bold;">{answer}</mark>{after}'
|
| 331 |
+
|
| 332 |
+
return highlighted
|
| 333 |
+
|
| 334 |
+
def assess_answer_quality(question, answer, confidence, context):
|
| 335 |
+
"""Assess the quality of the extracted answer."""
|
| 336 |
+
metrics = {}
|
| 337 |
+
|
| 338 |
+
# Confidence score (from model)
|
| 339 |
+
metrics['Confidence'] = confidence
|
| 340 |
+
|
| 341 |
+
# Relevance (simple keyword overlap)
|
| 342 |
+
question_words = set(question.lower().split())
|
| 343 |
+
answer_words = set(answer.lower().split())
|
| 344 |
+
overlap = len(question_words.intersection(answer_words))
|
| 345 |
+
metrics['Relevance'] = min(overlap / max(len(question_words), 1), 1.0)
|
| 346 |
+
|
| 347 |
+
# Completeness (answer length appropriateness)
|
| 348 |
+
answer_length = len(answer.split())
|
| 349 |
+
if answer_length == 0:
|
| 350 |
+
metrics['Completeness'] = 0.0
|
| 351 |
+
elif answer_length < 3:
|
| 352 |
+
metrics['Completeness'] = 0.6
|
| 353 |
+
elif answer_length <= 20:
|
| 354 |
+
metrics['Completeness'] = 1.0
|
| 355 |
+
else:
|
| 356 |
+
metrics['Completeness'] = 0.8 # Very long answers might be too verbose
|
| 357 |
+
|
| 358 |
+
# Context match (how well the answer fits in context)
|
| 359 |
+
answer_in_context = answer.lower() in context.lower()
|
| 360 |
+
metrics['Context_Match'] = 1.0 if answer_in_context else 0.5
|
| 361 |
+
|
| 362 |
+
return metrics
|
| 363 |
+
|
| 364 |
+
def generate_followup_questions(context, original_question, answer):
|
| 365 |
+
"""Generate relevant follow-up questions based on the context and answer."""
|
| 366 |
+
suggestions = []
|
| 367 |
+
|
| 368 |
+
# Extract key entities and concepts from context
|
| 369 |
+
words = context.split()
|
| 370 |
+
|
| 371 |
+
# Template-based question generation
|
| 372 |
+
templates = [
|
| 373 |
+
f"What else can you tell me about {answer}?",
|
| 374 |
+
"Can you provide more details about this topic?",
|
| 375 |
+
"What are the implications of this information?",
|
| 376 |
+
"How does this relate to other concepts mentioned?",
|
| 377 |
+
"What evidence supports this answer?"
|
| 378 |
+
]
|
| 379 |
+
|
| 380 |
+
# Add context-specific questions
|
| 381 |
+
if "when" not in original_question.lower():
|
| 382 |
+
suggestions.append("When did this happen?")
|
| 383 |
+
|
| 384 |
+
if "where" not in original_question.lower():
|
| 385 |
+
suggestions.append("Where did this take place?")
|
| 386 |
+
|
| 387 |
+
if "why" not in original_question.lower():
|
| 388 |
+
suggestions.append("Why is this significant?")
|
| 389 |
+
|
| 390 |
+
if "how" not in original_question.lower():
|
| 391 |
+
suggestions.append("How does this work?")
|
| 392 |
+
|
| 393 |
+
# Combine and limit suggestions
|
| 394 |
+
all_suggestions = templates + suggestions
|
| 395 |
+
return all_suggestions[:5] # Return top 5 suggestions
|
| 396 |
+
|
| 397 |
+
def qa_api_handler(context, question):
|
| 398 |
+
"""API handler for question answering that returns structured data."""
|
| 399 |
+
try:
|
| 400 |
+
qa_pipeline = load_qa_pipeline()
|
| 401 |
+
result = qa_pipeline(question=question, context=context)
|
| 402 |
+
|
| 403 |
+
return {
|
| 404 |
+
"answer": result['answer'],
|
| 405 |
+
"confidence": result['score'],
|
| 406 |
+
"start_position": result['start'],
|
| 407 |
+
"end_position": result['end'],
|
| 408 |
+
"success": True,
|
| 409 |
+
"error": None
|
| 410 |
+
}
|
| 411 |
+
except Exception as e:
|
| 412 |
+
return {
|
| 413 |
+
"answer": "",
|
| 414 |
+
"confidence": 0.0,
|
| 415 |
+
"start_position": 0,
|
| 416 |
+
"end_position": 0,
|
| 417 |
+
"success": False,
|
| 418 |
+
"error": str(e)
|
| 419 |
+
}
|
| 420 |
+
|
| 421 |
+
def process_question_with_context(context_text, question):
|
| 422 |
+
"""Process a question with the given context and return a formatted result."""
|
| 423 |
+
if not context_text or not context_text.strip():
|
| 424 |
+
return {
|
| 425 |
+
"success": False,
|
| 426 |
+
"error": "No context text provided",
|
| 427 |
+
"html": '<div class="alert alert-warning">⚠️ No context text provided.</div>'
|
| 428 |
+
}
|
| 429 |
+
|
| 430 |
+
if not question or not question.strip():
|
| 431 |
+
return {
|
| 432 |
+
"success": False,
|
| 433 |
+
"error": "No question provided",
|
| 434 |
+
"html": '<div class="alert alert-warning">⚠️ Please enter a question.</div>'
|
| 435 |
+
}
|
| 436 |
+
|
| 437 |
+
try:
|
| 438 |
+
qa_pipeline = load_qa_pipeline()
|
| 439 |
+
result = qa_pipeline(question=question, context=context_text)
|
| 440 |
+
|
| 441 |
+
answer = result['answer']
|
| 442 |
+
confidence = result['score']
|
| 443 |
+
start_pos = result['start']
|
| 444 |
+
end_pos = result['end']
|
| 445 |
+
|
| 446 |
+
# Determine confidence level
|
| 447 |
+
confidence_status = "High" if confidence >= 0.7 else "Medium" if confidence >= 0.3 else "Low"
|
| 448 |
+
confidence_color = "#4CAF50" if confidence >= 0.7 else "#FF9800" if confidence >= 0.3 else "#F44336"
|
| 449 |
+
|
| 450 |
+
# Highlight answer in context
|
| 451 |
+
highlighted_context = highlight_answer_in_context(context_text, start_pos, end_pos)
|
| 452 |
+
|
| 453 |
+
# Create formatted HTML result
|
| 454 |
+
html_result = f"""
|
| 455 |
+
<div class="card">
|
| 456 |
+
<div class="card-header">
|
| 457 |
+
<h5 class="mb-0">📝 Answer Found!</h5>
|
| 458 |
+
</div>
|
| 459 |
+
<div class="card-body">
|
| 460 |
+
<div class="alert alert-light">
|
| 461 |
+
<p><strong>Question:</strong> {question}</p>
|
| 462 |
+
<p><strong>Answer:</strong> <span class="badge bg-warning text-dark fs-6">{answer}</span></p>
|
| 463 |
+
<p><strong>Confidence:</strong> {confidence:.3f} ({confidence_status})</p>
|
| 464 |
+
</div>
|
| 465 |
+
|
| 466 |
+
<div class="alert alert-light">
|
| 467 |
+
<h6>📄 Context with Highlighted Answer:</h6>
|
| 468 |
+
<div style="line-height: 1.6; font-size: 0.9rem; max-height: 200px; overflow-y: auto;">
|
| 469 |
+
{highlighted_context}
|
| 470 |
+
</div>
|
| 471 |
+
</div>
|
| 472 |
+
|
| 473 |
+
<div class="alert alert-info">
|
| 474 |
+
<strong>Quality Assessment:</strong>
|
| 475 |
+
<ul class="mb-0">
|
| 476 |
+
<li>Confidence: {confidence_status} ({confidence:.1%})</li>
|
| 477 |
+
<li>Answer found at position: {start_pos}-{end_pos}</li>
|
| 478 |
+
<li>Answer length: {len(answer.split())} words</li>
|
| 479 |
+
</ul>
|
| 480 |
+
</div>
|
| 481 |
+
</div>
|
| 482 |
+
</div>
|
| 483 |
+
"""
|
| 484 |
+
|
| 485 |
+
return {
|
| 486 |
+
"success": True,
|
| 487 |
+
"answer": answer,
|
| 488 |
+
"confidence": confidence,
|
| 489 |
+
"html": html_result
|
| 490 |
+
}
|
| 491 |
+
|
| 492 |
+
except Exception as e:
|
| 493 |
+
error_html = f'<div class="alert alert-danger">❌ Error processing question: {str(e)}</div>'
|
| 494 |
+
return {
|
| 495 |
+
"success": False,
|
| 496 |
+
"error": str(e),
|
| 497 |
+
"html": error_html
|
| 498 |
+
}
|
components/sentiment.py
ADDED
|
@@ -0,0 +1,549 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import matplotlib
|
| 2 |
+
matplotlib.use('Agg') # Use non-interactive backend
|
| 3 |
+
import matplotlib.pyplot as plt
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import numpy as np
|
| 6 |
+
from nltk.sentiment.vader import SentimentIntensityAnalyzer
|
| 7 |
+
import nltk
|
| 8 |
+
from collections import Counter
|
| 9 |
+
|
| 10 |
+
from utils.model_loader import load_sentiment_analyzer, load_emotion_classifier
|
| 11 |
+
from utils.helpers import fig_to_html, df_to_html_table
|
| 12 |
+
|
| 13 |
+
def sentiment_handler(text_input):
|
| 14 |
+
"""Show sentiment analysis capabilities."""
|
| 15 |
+
output_html = []
|
| 16 |
+
|
| 17 |
+
# Add result area container
|
| 18 |
+
output_html.append('<div class="result-area">')
|
| 19 |
+
output_html.append('<h2 class="task-header">Sentiment Analysis</h2>')
|
| 20 |
+
|
| 21 |
+
output_html.append("""
|
| 22 |
+
<div class="alert alert-info">
|
| 23 |
+
<i class="fas fa-info-circle"></i>
|
| 24 |
+
Sentiment analysis determines the emotional tone behind text to identify if it expresses positive, negative, or neutral sentiment.
|
| 25 |
+
</div>
|
| 26 |
+
""")
|
| 27 |
+
|
| 28 |
+
# Model info
|
| 29 |
+
output_html.append("""
|
| 30 |
+
<div class="alert alert-info">
|
| 31 |
+
<h4><i class="fas fa-tools"></i> Models Used:</h4>
|
| 32 |
+
<ul>
|
| 33 |
+
<li><b>NLTK VADER</b> - Rule-based sentiment analyzer specifically tuned for social media text</li>
|
| 34 |
+
<li><b>DistilBERT</b> - Transformer model fine-tuned on SST-2 dataset, achieving ~91% accuracy</li>
|
| 35 |
+
<li><b>RoBERTa Emotion</b> - Transformer model for multi-label emotion detection</li>
|
| 36 |
+
</ul>
|
| 37 |
+
</div>
|
| 38 |
+
""")
|
| 39 |
+
|
| 40 |
+
try:
|
| 41 |
+
# VADER Analysis
|
| 42 |
+
output_html.append('<h3 class="task-subheader">VADER Sentiment Analysis</h3>')
|
| 43 |
+
output_html.append('<p>VADER (Valence Aware Dictionary and sEntiment Reasoner) is a lexicon and rule-based sentiment analysis tool specifically attuned to sentiments expressed in social media.</p>')
|
| 44 |
+
|
| 45 |
+
# Get VADER analyzer
|
| 46 |
+
vader_analyzer = SentimentIntensityAnalyzer()
|
| 47 |
+
vader_scores = vader_analyzer.polarity_scores(text_input)
|
| 48 |
+
|
| 49 |
+
# Extract scores
|
| 50 |
+
compound_score = vader_scores['compound']
|
| 51 |
+
pos_score = vader_scores['pos']
|
| 52 |
+
neg_score = vader_scores['neg']
|
| 53 |
+
neu_score = vader_scores['neu']
|
| 54 |
+
|
| 55 |
+
# Determine sentiment category
|
| 56 |
+
if compound_score >= 0.05:
|
| 57 |
+
sentiment_category = "Positive"
|
| 58 |
+
sentiment_color = "#4CAF50" # Green
|
| 59 |
+
sentiment_emoji = "😊"
|
| 60 |
+
elif compound_score <= -0.05:
|
| 61 |
+
sentiment_category = "Negative"
|
| 62 |
+
sentiment_color = "#F44336" # Red
|
| 63 |
+
sentiment_emoji = "😞"
|
| 64 |
+
else:
|
| 65 |
+
sentiment_category = "Neutral"
|
| 66 |
+
sentiment_color = "#FFC107" # Amber
|
| 67 |
+
sentiment_emoji = "😐"
|
| 68 |
+
|
| 69 |
+
# Create sentiment gauge display
|
| 70 |
+
output_html.append(f"""
|
| 71 |
+
<div class="card">
|
| 72 |
+
<div class="card-body">
|
| 73 |
+
<div class="text-center mb-3">
|
| 74 |
+
<span style="font-size: 3rem; margin-right: 15px;">{sentiment_emoji}</span>
|
| 75 |
+
<div>
|
| 76 |
+
<h3 class="mb-0" style="color: {sentiment_color};">{sentiment_category}</h3>
|
| 77 |
+
<p class="mb-0 fs-5">Compound Score: {compound_score:.2f}</p>
|
| 78 |
+
</div>
|
| 79 |
+
</div>
|
| 80 |
+
|
| 81 |
+
<div style="height: 30px; background-color: #e0e0e0; border-radius: 15px; position: relative; overflow: hidden; margin: 10px 0;">
|
| 82 |
+
<div style="position: absolute; top: 0; bottom: 0; left: 50%; width: 2px; background-color: #000; z-index: 2;"></div>
|
| 83 |
+
<div style="position: absolute; top: 0; bottom: 0; left: {(compound_score + 1) / 2 * 100}%; width: 10px; background-color: {sentiment_color}; border-radius: 5px; transform: translateX(-50%); z-index: 3;"></div>
|
| 84 |
+
<div style="position: absolute; top: 0; bottom: 0; left: 0; width: 50%; background: linear-gradient(90deg, #F44336 0%, #FFC107 100%);"></div>
|
| 85 |
+
<div style="position: absolute; top: 0; bottom: 0; right: 0; width: 50%; background: linear-gradient(90deg, #FFC107 0%, #4CAF50 100%);"></div>
|
| 86 |
+
</div>
|
| 87 |
+
<div class="d-flex justify-content-between mt-2">
|
| 88 |
+
<span>Negative (-1.0)</span>
|
| 89 |
+
<span>Neutral (0.0)</span>
|
| 90 |
+
<span>Positive (1.0)</span>
|
| 91 |
+
</div>
|
| 92 |
+
</div>
|
| 93 |
+
</div>
|
| 94 |
+
""")
|
| 95 |
+
|
| 96 |
+
# VADER score breakdown
|
| 97 |
+
output_html.append('<h4>VADER Score Breakdown</h4>')
|
| 98 |
+
|
| 99 |
+
# Create pie chart
|
| 100 |
+
fig = plt.figure(figsize=(8, 8))
|
| 101 |
+
labels = ['Positive', 'Neutral', 'Negative']
|
| 102 |
+
sizes = [pos_score, neu_score, neg_score]
|
| 103 |
+
colors = ['#4CAF50', '#FFC107', '#F44336']
|
| 104 |
+
explode = (0.1, 0, 0) if pos_score > neg_score and pos_score > neu_score else \
|
| 105 |
+
(0, 0.1, 0) if neu_score > pos_score and neu_score > neg_score else \
|
| 106 |
+
(0, 0, 0.1)
|
| 107 |
+
|
| 108 |
+
plt.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%',
|
| 109 |
+
shadow=True, startangle=90)
|
| 110 |
+
plt.axis('equal')
|
| 111 |
+
plt.title('VADER Sentiment Distribution')
|
| 112 |
+
|
| 113 |
+
# Create detail table
|
| 114 |
+
detail_df = pd.DataFrame({
|
| 115 |
+
'Metric': ['Positive Score', 'Neutral Score', 'Negative Score', 'Compound Score'],
|
| 116 |
+
'Value': [pos_score, neu_score, neg_score, compound_score]
|
| 117 |
+
})
|
| 118 |
+
|
| 119 |
+
# Layout with columns for VADER results
|
| 120 |
+
output_html.append('<div class="row">')
|
| 121 |
+
|
| 122 |
+
# Column 1: Chart
|
| 123 |
+
output_html.append('<div class="col-md-6">')
|
| 124 |
+
output_html.append(fig_to_html(fig))
|
| 125 |
+
output_html.append('</div>')
|
| 126 |
+
|
| 127 |
+
# Column 2: Data
|
| 128 |
+
output_html.append('<div class="col-md-6">')
|
| 129 |
+
output_html.append(df_to_html_table(detail_df))
|
| 130 |
+
|
| 131 |
+
# Add interpretation
|
| 132 |
+
if compound_score >= 0.75:
|
| 133 |
+
interpretation = "Extremely positive sentiment"
|
| 134 |
+
elif compound_score >= 0.5:
|
| 135 |
+
interpretation = "Moderately positive sentiment"
|
| 136 |
+
elif compound_score >= 0.05:
|
| 137 |
+
interpretation = "Slightly positive sentiment"
|
| 138 |
+
elif compound_score > -0.05:
|
| 139 |
+
interpretation = "Neutral sentiment"
|
| 140 |
+
elif compound_score > -0.5:
|
| 141 |
+
interpretation = "Slightly negative sentiment"
|
| 142 |
+
elif compound_score > -0.75:
|
| 143 |
+
interpretation = "Moderately negative sentiment"
|
| 144 |
+
else:
|
| 145 |
+
interpretation = "Extremely negative sentiment"
|
| 146 |
+
|
| 147 |
+
output_html.append(f"""
|
| 148 |
+
<div class="alert alert-success mt-3">
|
| 149 |
+
<h4>Interpretation</h4>
|
| 150 |
+
<p class="mb-0">{interpretation}</p>
|
| 151 |
+
</div>
|
| 152 |
+
""")
|
| 153 |
+
|
| 154 |
+
output_html.append('</div>') # Close column 2
|
| 155 |
+
output_html.append('</div>') # Close row
|
| 156 |
+
|
| 157 |
+
# Transformer-based Sentiment Analysis
|
| 158 |
+
output_html.append('<h3 class="task-subheader">Transformer-based Sentiment Analysis</h3>')
|
| 159 |
+
output_html.append('<p>This analysis uses a DistilBERT model fine-tuned on the Stanford Sentiment Treebank dataset.</p>')
|
| 160 |
+
|
| 161 |
+
try:
|
| 162 |
+
# Load transformer model
|
| 163 |
+
sentiment_model = load_sentiment_analyzer()
|
| 164 |
+
|
| 165 |
+
# Maximum text length for transformer model (BERT has a 512 token limit)
|
| 166 |
+
max_length = 512
|
| 167 |
+
|
| 168 |
+
# Get prediction
|
| 169 |
+
truncated_text = text_input[:max_length * 4] # Rough character estimate
|
| 170 |
+
transformer_result = sentiment_model(truncated_text)
|
| 171 |
+
|
| 172 |
+
if len(text_input) > max_length * 4:
|
| 173 |
+
output_html.append(f"""
|
| 174 |
+
<div class="alert alert-warning">
|
| 175 |
+
<p class="mb-0"><b>⚠️ Note:</b> Text was truncated for analysis as it exceeds the model's length limit.</p>
|
| 176 |
+
</div>
|
| 177 |
+
""")
|
| 178 |
+
|
| 179 |
+
# Extract prediction
|
| 180 |
+
transformer_label = transformer_result[0]['label']
|
| 181 |
+
transformer_score = transformer_result[0]['score']
|
| 182 |
+
|
| 183 |
+
# Display transformer result
|
| 184 |
+
sentiment_color = "#4CAF50" if transformer_label == "POSITIVE" else "#F44336"
|
| 185 |
+
sentiment_emoji = "😊" if transformer_label == "POSITIVE" else "😞"
|
| 186 |
+
|
| 187 |
+
output_html.append(f"""
|
| 188 |
+
<div class="card" style="border-color: {sentiment_color};">
|
| 189 |
+
<div class="card-body" style="background-color: {sentiment_color}22;">
|
| 190 |
+
<div class="d-flex align-items-center">
|
| 191 |
+
<span style="font-size: 3rem; margin-right: 15px;">{sentiment_emoji}</span>
|
| 192 |
+
<div>
|
| 193 |
+
<h3 class="mb-0" style="color: {sentiment_color};">{transformer_label.capitalize()}</h3>
|
| 194 |
+
<p class="mb-0 fs-5">Confidence: {transformer_score:.2%}</p>
|
| 195 |
+
</div>
|
| 196 |
+
</div>
|
| 197 |
+
</div>
|
| 198 |
+
</div>
|
| 199 |
+
""")
|
| 200 |
+
|
| 201 |
+
# Confidence bar
|
| 202 |
+
output_html.append(f"""
|
| 203 |
+
<div style="height: 30px; background-color: #e0e0e0; border-radius: 15px; position: relative; overflow: hidden; margin: 10px 0;">
|
| 204 |
+
<div style="position: absolute; top: 0; bottom: 0; left: 0; width: {transformer_score * 100}%; background-color: {sentiment_color}; border-radius: 5px;"></div>
|
| 205 |
+
<div style="position: absolute; top: 0; bottom: 0; width: 100%; text-align: center; line-height: 30px; color: #000; font-weight: bold;">
|
| 206 |
+
{transformer_score:.1%} Confidence
|
| 207 |
+
</div>
|
| 208 |
+
</div>
|
| 209 |
+
""")
|
| 210 |
+
|
| 211 |
+
except Exception as e:
|
| 212 |
+
output_html.append(f"""
|
| 213 |
+
<div class="alert alert-danger">
|
| 214 |
+
<h4>Transformer Model Error</h4>
|
| 215 |
+
<p>Failed to load or run transformer sentiment model: {str(e)}</p>
|
| 216 |
+
<p>Falling back to VADER results only.</p>
|
| 217 |
+
</div>
|
| 218 |
+
""")
|
| 219 |
+
|
| 220 |
+
# Emotion Analysis
|
| 221 |
+
output_html.append('<h3 class="task-subheader">Emotion Analysis</h3>')
|
| 222 |
+
output_html.append('<p>Identifying specific emotions in text using a RoBERTa model fine-tuned on the emotion dataset.</p>')
|
| 223 |
+
|
| 224 |
+
try:
|
| 225 |
+
# Load emotion classifier
|
| 226 |
+
emotion_classifier = load_emotion_classifier()
|
| 227 |
+
|
| 228 |
+
# Get predictions
|
| 229 |
+
truncated_text = text_input[:max_length * 4] # Rough character estimate
|
| 230 |
+
emotion_result = emotion_classifier(truncated_text)
|
| 231 |
+
|
| 232 |
+
# Extract emotion scores
|
| 233 |
+
emotion_scores = {}
|
| 234 |
+
for item in emotion_result[0]:
|
| 235 |
+
emotion_scores[item['label']] = item['score']
|
| 236 |
+
|
| 237 |
+
# Create emotion dataframe
|
| 238 |
+
emotion_df = pd.DataFrame({
|
| 239 |
+
'Emotion': list(emotion_scores.keys()),
|
| 240 |
+
'Score': list(emotion_scores.values())
|
| 241 |
+
}).sort_values('Score', ascending=False)
|
| 242 |
+
|
| 243 |
+
# Get primary emotion
|
| 244 |
+
primary_emotion = emotion_df.iloc[0]['Emotion']
|
| 245 |
+
primary_score = emotion_df.iloc[0]['Score']
|
| 246 |
+
|
| 247 |
+
# Emotion color map
|
| 248 |
+
emotion_colors = {
|
| 249 |
+
'joy': '#FFD54F',
|
| 250 |
+
'anger': '#EF5350',
|
| 251 |
+
'sadness': '#42A5F5',
|
| 252 |
+
'fear': '#9C27B0',
|
| 253 |
+
'surprise': '#26C6DA',
|
| 254 |
+
'love': '#EC407A',
|
| 255 |
+
'disgust': '#66BB6A',
|
| 256 |
+
'optimism': '#FF9800',
|
| 257 |
+
'pessimism': '#795548',
|
| 258 |
+
'trust': '#4CAF50',
|
| 259 |
+
'anticipation': '#FF7043',
|
| 260 |
+
'neutral': '#9E9E9E'
|
| 261 |
+
}
|
| 262 |
+
|
| 263 |
+
# Emotion emoji map
|
| 264 |
+
emotion_emojis = {
|
| 265 |
+
'joy': '😃',
|
| 266 |
+
'anger': '😠',
|
| 267 |
+
'sadness': '😢',
|
| 268 |
+
'fear': '😨',
|
| 269 |
+
'surprise': '😲',
|
| 270 |
+
'love': '❤️',
|
| 271 |
+
'disgust': '🤢',
|
| 272 |
+
'optimism': '🤩',
|
| 273 |
+
'pessimism': '😒',
|
| 274 |
+
'trust': '🤝',
|
| 275 |
+
'anticipation': '🤔',
|
| 276 |
+
'neutral': '😐'
|
| 277 |
+
}
|
| 278 |
+
|
| 279 |
+
# Create bar chart
|
| 280 |
+
fig = plt.figure(figsize=(10, 6))
|
| 281 |
+
bars = plt.barh(
|
| 282 |
+
emotion_df['Emotion'],
|
| 283 |
+
emotion_df['Score'],
|
| 284 |
+
color=[emotion_colors.get(emotion, '#9E9E9E') for emotion in emotion_df['Emotion']]
|
| 285 |
+
)
|
| 286 |
+
plt.xlabel('Score')
|
| 287 |
+
plt.title('Emotion Scores')
|
| 288 |
+
|
| 289 |
+
# Add value labels
|
| 290 |
+
for i, bar in enumerate(bars):
|
| 291 |
+
plt.text(bar.get_width() + 0.01, bar.get_y() + bar.get_height()/2,
|
| 292 |
+
f"{bar.get_width():.2f}", va='center')
|
| 293 |
+
|
| 294 |
+
plt.xlim(0, 1)
|
| 295 |
+
plt.tight_layout()
|
| 296 |
+
|
| 297 |
+
# Chart section
|
| 298 |
+
output_html.append('<section class="emotion-chart-section">')
|
| 299 |
+
output_html.append('<div class="chart-container">')
|
| 300 |
+
output_html.append(fig_to_html(fig))
|
| 301 |
+
output_html.append('</div>')
|
| 302 |
+
output_html.append('</section>')
|
| 303 |
+
|
| 304 |
+
# Primary emotion section
|
| 305 |
+
primary_color = emotion_colors.get(primary_emotion, '#9E9E9E')
|
| 306 |
+
primary_emoji = emotion_emojis.get(primary_emotion, '😐')
|
| 307 |
+
|
| 308 |
+
output_html.append('<section class="emotion-result-container">')
|
| 309 |
+
output_html.append(f"""
|
| 310 |
+
<div class="card" style="border-color: {primary_color};">
|
| 311 |
+
<div class="card-body" style="background-color: {primary_color}22;">
|
| 312 |
+
<div class="d-flex align-items-center">
|
| 313 |
+
<span style="font-size: 3rem; margin-right: 15px;">{primary_emoji}</span>
|
| 314 |
+
<div>
|
| 315 |
+
<h3 class="mb-0" style="color: {primary_color};">{primary_emotion.capitalize()}</h3>
|
| 316 |
+
<p class="mb-0 fs-5">Score: {primary_score:.2f}</p>
|
| 317 |
+
</div>
|
| 318 |
+
</div>
|
| 319 |
+
</div>
|
| 320 |
+
</div>
|
| 321 |
+
""")
|
| 322 |
+
|
| 323 |
+
# Show top emotions table
|
| 324 |
+
output_html.append('<h4>Top Emotions</h4>')
|
| 325 |
+
output_html.append(df_to_html_table(emotion_df.head(5)))
|
| 326 |
+
output_html.append('</section>') # Close emotion result container
|
| 327 |
+
|
| 328 |
+
except Exception as e:
|
| 329 |
+
output_html.append(f"""
|
| 330 |
+
<div class="alert alert-danger">
|
| 331 |
+
<h4>Emotion Analysis Error</h4>
|
| 332 |
+
<p>Failed to load or run emotion classifier: {str(e)}</p>
|
| 333 |
+
</div>
|
| 334 |
+
""")
|
| 335 |
+
|
| 336 |
+
# Sentence-level Analysis
|
| 337 |
+
output_html.append('<h3 class="task-subheader">Sentence-level Analysis</h3>')
|
| 338 |
+
output_html.append('<p>Breaking down sentiment by individual sentences to identify sentiment variations throughout the text.</p>')
|
| 339 |
+
|
| 340 |
+
# Split text into sentences
|
| 341 |
+
sentences = nltk.sent_tokenize(text_input)
|
| 342 |
+
|
| 343 |
+
# Minimum 2 sentences to do the analysis
|
| 344 |
+
if len(sentences) >= 2:
|
| 345 |
+
# Calculate sentiment for each sentence
|
| 346 |
+
sentence_sentiments = []
|
| 347 |
+
for i, sentence in enumerate(sentences):
|
| 348 |
+
vader_score = vader_analyzer.polarity_scores(sentence)
|
| 349 |
+
sentence_sentiments.append({
|
| 350 |
+
'Sentence': sentence,
|
| 351 |
+
'Index': i + 1,
|
| 352 |
+
'Compound': vader_score['compound'],
|
| 353 |
+
'Positive': vader_score['pos'],
|
| 354 |
+
'Negative': vader_score['neg'],
|
| 355 |
+
'Neutral': vader_score['neu'],
|
| 356 |
+
'Sentiment': 'Positive' if vader_score['compound'] >= 0.05 else 'Negative' if vader_score['compound'] <= -0.05 else 'Neutral'
|
| 357 |
+
})
|
| 358 |
+
|
| 359 |
+
# Create DataFrame
|
| 360 |
+
sent_df = pd.DataFrame(sentence_sentiments)
|
| 361 |
+
|
| 362 |
+
# Create line graph of sentiment flow
|
| 363 |
+
fig = plt.figure(figsize=(10, 6))
|
| 364 |
+
plt.plot(sent_df['Index'], sent_df['Compound'], 'o-', color='#1976D2', linewidth=2, markersize=8)
|
| 365 |
+
plt.axhline(y=0, color='#9E9E9E', linestyle='-', alpha=0.3)
|
| 366 |
+
plt.axhline(y=0.05, color='#4CAF50', linestyle='--', alpha=0.3)
|
| 367 |
+
plt.axhline(y=-0.05, color='#F44336', linestyle='--', alpha=0.3)
|
| 368 |
+
|
| 369 |
+
# Annotate with sentiment
|
| 370 |
+
for i, row in sent_df.iterrows():
|
| 371 |
+
if row['Sentiment'] == 'Positive':
|
| 372 |
+
color = '#4CAF50'
|
| 373 |
+
elif row['Sentiment'] == 'Negative':
|
| 374 |
+
color = '#F44336'
|
| 375 |
+
else:
|
| 376 |
+
color = '#9E9E9E'
|
| 377 |
+
|
| 378 |
+
plt.scatter(row['Index'], row['Compound'], color=color, s=100, zorder=5)
|
| 379 |
+
|
| 380 |
+
plt.grid(alpha=0.3)
|
| 381 |
+
plt.xlabel('Sentence Number')
|
| 382 |
+
plt.ylabel('Compound Sentiment Score')
|
| 383 |
+
plt.title('Sentiment Flow Through Text')
|
| 384 |
+
plt.ylim(-1.05, 1.05)
|
| 385 |
+
plt.tight_layout()
|
| 386 |
+
|
| 387 |
+
# Calculate statistics
|
| 388 |
+
positive_count = sum(1 for score in sent_df['Compound'] if score >= 0.05)
|
| 389 |
+
negative_count = sum(1 for score in sent_df['Compound'] if score <= -0.05)
|
| 390 |
+
neutral_count = len(sent_df) - positive_count - negative_count
|
| 391 |
+
|
| 392 |
+
# Chart section
|
| 393 |
+
output_html.append('<section class="sentence-chart-section">')
|
| 394 |
+
output_html.append('<div class="chart-container">')
|
| 395 |
+
output_html.append(fig_to_html(fig))
|
| 396 |
+
output_html.append('</div>')
|
| 397 |
+
output_html.append('</section>')
|
| 398 |
+
|
| 399 |
+
# Sentence analysis section
|
| 400 |
+
output_html.append('<section class="sentence-analysis-container">')
|
| 401 |
+
|
| 402 |
+
# Create sentence stats
|
| 403 |
+
output_html.append(f"""
|
| 404 |
+
<div class="row mb-3">
|
| 405 |
+
<div class="col-4">
|
| 406 |
+
<div class="card text-center">
|
| 407 |
+
<div class="card-body p-2">
|
| 408 |
+
<h5 class="text-success">{positive_count}</h5>
|
| 409 |
+
<small>Positive</small>
|
| 410 |
+
</div>
|
| 411 |
+
</div>
|
| 412 |
+
</div>
|
| 413 |
+
<div class="col-4">
|
| 414 |
+
<div class="card text-center">
|
| 415 |
+
<div class="card-body p-2">
|
| 416 |
+
<h5 class="text-warning">{neutral_count}</h5>
|
| 417 |
+
<small>Neutral</small>
|
| 418 |
+
</div>
|
| 419 |
+
</div>
|
| 420 |
+
</div>
|
| 421 |
+
<div class="col-4">
|
| 422 |
+
<div class="card text-center">
|
| 423 |
+
<div class="card-body p-2">
|
| 424 |
+
<h5 class="text-danger">{negative_count}</h5>
|
| 425 |
+
<small>Negative</small>
|
| 426 |
+
</div>
|
| 427 |
+
</div>
|
| 428 |
+
</div>
|
| 429 |
+
</div>
|
| 430 |
+
""")
|
| 431 |
+
|
| 432 |
+
# Display sentiment swings
|
| 433 |
+
sentiment_changes = 0
|
| 434 |
+
prev_sentiment = None
|
| 435 |
+
for sentiment in sent_df['Sentiment']:
|
| 436 |
+
if prev_sentiment is not None and sentiment != prev_sentiment:
|
| 437 |
+
sentiment_changes += 1
|
| 438 |
+
prev_sentiment = sentiment
|
| 439 |
+
|
| 440 |
+
if sentiment_changes > 0:
|
| 441 |
+
output_html.append(f"""
|
| 442 |
+
<div class="alert alert-success">
|
| 443 |
+
<p class="mb-0"><b>Sentiment Shifts:</b> {sentiment_changes}</p>
|
| 444 |
+
<p class="mb-0">The text shows {sentiment_changes} shifts in sentiment between sentences.</p>
|
| 445 |
+
</div>
|
| 446 |
+
""")
|
| 447 |
+
|
| 448 |
+
# Show sentence breakdown table
|
| 449 |
+
output_html.append('<h4>Sentence-by-Sentence Analysis</h4>')
|
| 450 |
+
|
| 451 |
+
# Custom HTML table for better formatting
|
| 452 |
+
output_html.append('<div class="table-responsive" style="max-height: 400px;">')
|
| 453 |
+
output_html.append('<table class="table table-striped">')
|
| 454 |
+
output_html.append('<thead><tr><th>#</th><th>Sentence</th><th>Sentiment</th></tr></thead>')
|
| 455 |
+
output_html.append('<tbody>')
|
| 456 |
+
|
| 457 |
+
for i, row in sent_df.iterrows():
|
| 458 |
+
if row['Sentiment'] == 'Positive':
|
| 459 |
+
bg_class = 'table-success'
|
| 460 |
+
sentiment_html = f"""
|
| 461 |
+
<div class="d-flex align-items-center">
|
| 462 |
+
<span class="me-2">😊</span>
|
| 463 |
+
<span class="text-success fw-bold">Positive</span>
|
| 464 |
+
<span class="ms-2 text-muted">({row['Compound']:.2f})</span>
|
| 465 |
+
</div>
|
| 466 |
+
"""
|
| 467 |
+
elif row['Sentiment'] == 'Negative':
|
| 468 |
+
bg_class = 'table-danger'
|
| 469 |
+
sentiment_html = f"""
|
| 470 |
+
<div class="d-flex align-items-center">
|
| 471 |
+
<span class="me-2">😞</span>
|
| 472 |
+
<span class="text-danger fw-bold">Negative</span>
|
| 473 |
+
<span class="ms-2 text-muted">({row['Compound']:.2f})</span>
|
| 474 |
+
</div>
|
| 475 |
+
"""
|
| 476 |
+
else:
|
| 477 |
+
bg_class = 'table-warning'
|
| 478 |
+
sentiment_html = f"""
|
| 479 |
+
<div class="d-flex align-items-center">
|
| 480 |
+
<span class="me-2">😐</span>
|
| 481 |
+
<span class="text-warning fw-bold">Neutral</span>
|
| 482 |
+
<span class="ms-2 text-muted">({row['Compound']:.2f})</span>
|
| 483 |
+
</div>
|
| 484 |
+
"""
|
| 485 |
+
|
| 486 |
+
output_html.append(f'<tr class="{bg_class}">')
|
| 487 |
+
output_html.append(f'<td>{i+1}</td>')
|
| 488 |
+
output_html.append(f'<td>{row["Sentence"]}</td>')
|
| 489 |
+
output_html.append(f'<td>{sentiment_html}</td>')
|
| 490 |
+
output_html.append('</tr>')
|
| 491 |
+
|
| 492 |
+
output_html.append('</tbody></table>')
|
| 493 |
+
output_html.append('</div>')
|
| 494 |
+
output_html.append('</section>') # Close sentence analysis container
|
| 495 |
+
else:
|
| 496 |
+
output_html.append("""
|
| 497 |
+
<div class="alert alert-warning">
|
| 498 |
+
<p class="mb-0">Sentence-level analysis requires at least two sentences. The provided text doesn't have enough sentences for this analysis.</p>
|
| 499 |
+
</div>
|
| 500 |
+
""")
|
| 501 |
+
|
| 502 |
+
except Exception as e:
|
| 503 |
+
output_html.append(f"""
|
| 504 |
+
<div class="alert alert-danger">
|
| 505 |
+
<h3>Error</h3>
|
| 506 |
+
<p>Failed to analyze sentiment: {str(e)}</p>
|
| 507 |
+
</div>
|
| 508 |
+
""")
|
| 509 |
+
|
| 510 |
+
# About Sentiment Analysis section
|
| 511 |
+
output_html.append("""
|
| 512 |
+
<div class="card mt-4">
|
| 513 |
+
<div class="card-header">
|
| 514 |
+
<h4 class="mb-0">
|
| 515 |
+
<i class="fas fa-info-circle"></i>
|
| 516 |
+
About Sentiment Analysis
|
| 517 |
+
</h4>
|
| 518 |
+
</div>
|
| 519 |
+
<div class="card-body">
|
| 520 |
+
<h5>What is Sentiment Analysis?</h5>
|
| 521 |
+
|
| 522 |
+
<p>Sentiment Analysis (also known as opinion mining) is a natural language processing technique that identifies
|
| 523 |
+
and extracts subjective information from text. It determines whether a piece of text expresses positive, negative,
|
| 524 |
+
or neutral sentiment.</p>
|
| 525 |
+
|
| 526 |
+
<h5>Common Approaches:</h5>
|
| 527 |
+
|
| 528 |
+
<ol>
|
| 529 |
+
<li><b>Lexicon-based</b> (like VADER) - Uses dictionaries of words with pre-assigned sentiment scores</li>
|
| 530 |
+
<li><b>Machine learning</b> - Supervised techniques that learn from labeled data</li>
|
| 531 |
+
<li><b>Deep learning</b> (like our Transformer models) - Neural networks that can capture complex patterns and contexts</li>
|
| 532 |
+
</ol>
|
| 533 |
+
|
| 534 |
+
<h5>Applications:</h5>
|
| 535 |
+
|
| 536 |
+
<ul>
|
| 537 |
+
<li><b>Brand monitoring</b> - Track public perception of a brand</li>
|
| 538 |
+
<li><b>Customer feedback analysis</b> - Understand customer satisfaction</li>
|
| 539 |
+
<li><b>Market research</b> - Analyze product reviews and consumer opinions</li>
|
| 540 |
+
<li><b>Social media monitoring</b> - Track public sentiment on topics or events</li>
|
| 541 |
+
<li><b>Stock market prediction</b> - Analyze news sentiment to predict stock movements</li>
|
| 542 |
+
</ul>
|
| 543 |
+
</div>
|
| 544 |
+
</div>
|
| 545 |
+
""")
|
| 546 |
+
|
| 547 |
+
output_html.append('</div>') # Close result-area div
|
| 548 |
+
|
| 549 |
+
return '\n'.join(output_html)
|
components/summarization.py
ADDED
|
@@ -0,0 +1,541 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import matplotlib.pyplot as plt
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
import nltk
|
| 5 |
+
from collections import Counter
|
| 6 |
+
import networkx as nx
|
| 7 |
+
from nltk.corpus import stopwords
|
| 8 |
+
from nltk.tokenize import sent_tokenize, word_tokenize
|
| 9 |
+
from nltk.stem import WordNetLemmatizer
|
| 10 |
+
import re
|
| 11 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 12 |
+
from matplotlib_venn import venn2
|
| 13 |
+
|
| 14 |
+
from utils.model_loader import load_summarizer
|
| 15 |
+
from utils.helpers import fig_to_html, df_to_html_table
|
| 16 |
+
|
| 17 |
+
def summarization_handler(text_input, min_length=30, max_length=300, use_sampling=False):
|
| 18 |
+
"""Show text summarization capabilities."""
|
| 19 |
+
output_html = []
|
| 20 |
+
|
| 21 |
+
# Add result area container
|
| 22 |
+
output_html.append('<div class="result-area">')
|
| 23 |
+
output_html.append('<h2 class="task-header">Text Summarization</h2>')
|
| 24 |
+
|
| 25 |
+
output_html.append("""
|
| 26 |
+
<div class="alert alert-info">
|
| 27 |
+
<i class="fas fa-info-circle"></i>
|
| 28 |
+
Text summarization condenses text to capture its main points, enabling quicker comprehension of large volumes of information.
|
| 29 |
+
</div>
|
| 30 |
+
""")
|
| 31 |
+
|
| 32 |
+
# Model info
|
| 33 |
+
output_html.append("""
|
| 34 |
+
<div class="alert alert-info">
|
| 35 |
+
<h4><i class="fas fa-tools"></i> Models & Techniques Used:</h4>
|
| 36 |
+
<ul>
|
| 37 |
+
<li><b>Extractive Summarization</b> - Selects important sentences from the original text</li>
|
| 38 |
+
<li><b>Abstractive Summarization</b> - BART model fine-tuned on CNN/DM dataset to generate new summary text</li>
|
| 39 |
+
<li><b>Performance</b> - ROUGE scores of approximately 40-45 on CNN/DM benchmark</li>
|
| 40 |
+
</ul>
|
| 41 |
+
</div>
|
| 42 |
+
""")
|
| 43 |
+
|
| 44 |
+
try:
|
| 45 |
+
# Check if text is long enough for summarization
|
| 46 |
+
sentences = nltk.sent_tokenize(text_input)
|
| 47 |
+
word_count = len(text_input.split())
|
| 48 |
+
|
| 49 |
+
if len(sentences) < 3 or word_count < 40:
|
| 50 |
+
output_html.append(f"""
|
| 51 |
+
<div class="alert alert-warning">
|
| 52 |
+
<h3>Text Too Short for Summarization</h3>
|
| 53 |
+
<p>The provided text contains only {len(sentences)} sentences and {word_count} words.
|
| 54 |
+
For effective summarization, please provide a longer text (at least 3 sentences and 40 words).</p>
|
| 55 |
+
</div>
|
| 56 |
+
""")
|
| 57 |
+
else:
|
| 58 |
+
# Original Text Section
|
| 59 |
+
output_html.append('<h3 class="task-subheader">Original Text</h3>')
|
| 60 |
+
output_html.append(f"""
|
| 61 |
+
<div class="card">
|
| 62 |
+
<div class="card-body">
|
| 63 |
+
<div class="text-content" style="word-wrap: break-word; word-break: break-word; overflow-wrap: break-word; max-height: 500px; overflow-y: auto; padding: 15px; background-color: #f8f9fa; border-radius: 5px; border: 1px solid #e9ecef; line-height: 1.6;">{text_input}</div>
|
| 64 |
+
</div>
|
| 65 |
+
</div>
|
| 66 |
+
<p>Length: {word_count} words.</p>
|
| 67 |
+
""")
|
| 68 |
+
|
| 69 |
+
# Text Statistics
|
| 70 |
+
char_count = len(text_input)
|
| 71 |
+
avg_sentence_length = word_count / len(sentences)
|
| 72 |
+
avg_word_length = sum(len(word) for word in text_input.split()) / word_count
|
| 73 |
+
|
| 74 |
+
# Neural Summarization Section
|
| 75 |
+
output_html.append('<h3 class="task-subheader">Neural Abstractive Summarization</h3>')
|
| 76 |
+
output_html.append('<p>Using BART model to generate a human-like summary</p>')
|
| 77 |
+
|
| 78 |
+
# Parameter summary
|
| 79 |
+
output_html.append(f"""
|
| 80 |
+
<div class="alert alert-light">
|
| 81 |
+
<span><strong>Parameters:</strong> Min Length: {min_length} | Max Length: {max_length} | Sampling: {'Enabled' if use_sampling else 'Disabled'}</span>
|
| 82 |
+
</div>
|
| 83 |
+
""")
|
| 84 |
+
|
| 85 |
+
try:
|
| 86 |
+
# Load summarizer model
|
| 87 |
+
summarizer = load_summarizer()
|
| 88 |
+
|
| 89 |
+
if summarizer is None:
|
| 90 |
+
output_html.append("""
|
| 91 |
+
<div class="alert alert-danger">
|
| 92 |
+
<p>Failed to load the abstractive summarization model. This may be due to memory constraints or missing dependencies.</p>
|
| 93 |
+
</div>
|
| 94 |
+
""")
|
| 95 |
+
else:
|
| 96 |
+
# Check length limitations
|
| 97 |
+
max_token_limit = 1024 # BART typically has 1024 token limit
|
| 98 |
+
|
| 99 |
+
# If text is too long, warn user and truncate
|
| 100 |
+
if word_count > max_token_limit:
|
| 101 |
+
output_html.append(f"""
|
| 102 |
+
<div class="alert alert-warning">
|
| 103 |
+
<p><b>⚠️ Note:</b> Text exceeds model's length limit. Only the first ~{max_token_limit} tokens will be used for summarization.</p>
|
| 104 |
+
</div>
|
| 105 |
+
""")
|
| 106 |
+
|
| 107 |
+
# Generate summary using the specified min_length and max_length
|
| 108 |
+
abstractive_results = summarizer(
|
| 109 |
+
text_input,
|
| 110 |
+
max_length=max_length,
|
| 111 |
+
min_length=min_length,
|
| 112 |
+
do_sample=use_sampling,
|
| 113 |
+
temperature=0.7 if use_sampling else 1.0,
|
| 114 |
+
top_p=0.9 if use_sampling else 1.0,
|
| 115 |
+
length_penalty=2.0
|
| 116 |
+
)
|
| 117 |
+
|
| 118 |
+
abstractive_summary = abstractive_results[0]['summary_text']
|
| 119 |
+
|
| 120 |
+
# Calculate reduction statistics
|
| 121 |
+
abstractive_word_count = len(abstractive_summary.split())
|
| 122 |
+
abstractive_reduction = (1 - abstractive_word_count / word_count) * 100
|
| 123 |
+
|
| 124 |
+
# Summary Results
|
| 125 |
+
output_html.append(f"""
|
| 126 |
+
<div class="card">
|
| 127 |
+
<div class="card-header">
|
| 128 |
+
<h4 class="mb-0">Neural Summary</h4>
|
| 129 |
+
</div>
|
| 130 |
+
<div class="card-body">
|
| 131 |
+
<div style="line-height: 1.6;">
|
| 132 |
+
{abstractive_summary}
|
| 133 |
+
</div>
|
| 134 |
+
</div>
|
| 135 |
+
</div>
|
| 136 |
+
|
| 137 |
+
<div class="row mt-3">
|
| 138 |
+
<div class="col-md-4">
|
| 139 |
+
<div class="card text-center">
|
| 140 |
+
<div class="card-body">
|
| 141 |
+
<h5 class="text-muted">Original Length</h5>
|
| 142 |
+
<h3 class="text-primary">{word_count} words</h3>
|
| 143 |
+
</div>
|
| 144 |
+
</div>
|
| 145 |
+
</div>
|
| 146 |
+
<div class="col-md-4">
|
| 147 |
+
<div class="card text-center">
|
| 148 |
+
<div class="card-body">
|
| 149 |
+
<h5 class="text-muted">Summary Length</h5>
|
| 150 |
+
<h3 class="text-success">{abstractive_word_count} words</h3>
|
| 151 |
+
</div>
|
| 152 |
+
</div>
|
| 153 |
+
</div>
|
| 154 |
+
<div class="col-md-4">
|
| 155 |
+
<div class="card text-center">
|
| 156 |
+
<div class="card-body">
|
| 157 |
+
<h5 class="text-muted">Compression</h5>
|
| 158 |
+
<h3 class="text-info">{abstractive_reduction:.1f}%</h3>
|
| 159 |
+
</div>
|
| 160 |
+
</div>
|
| 161 |
+
</div>
|
| 162 |
+
</div>
|
| 163 |
+
""")
|
| 164 |
+
|
| 165 |
+
# Key Terms & Topics Section
|
| 166 |
+
output_html.append('<h3 class="task-subheader">Key Topics & Terms</h3>')
|
| 167 |
+
|
| 168 |
+
# Extract key terms with TF-IDF
|
| 169 |
+
key_terms = extract_key_terms(text_input, n=10)
|
| 170 |
+
|
| 171 |
+
# Create layout stacked vertically: table first, then chart
|
| 172 |
+
output_html.append('<div class="row">')
|
| 173 |
+
|
| 174 |
+
# Row 1: Key terms table (full width)
|
| 175 |
+
output_html.append('<div class="col-12">')
|
| 176 |
+
output_html.append('<h4>Key Terms</h4>')
|
| 177 |
+
|
| 178 |
+
# Create key terms table
|
| 179 |
+
terms_df = pd.DataFrame({
|
| 180 |
+
'#': range(1, len(key_terms) + 1),
|
| 181 |
+
'Keyword': [term[0] for term in key_terms],
|
| 182 |
+
'TF-IDF Score': [f"{term[1]:.4f}" for term in key_terms]
|
| 183 |
+
})
|
| 184 |
+
|
| 185 |
+
output_html.append(df_to_html_table(terms_df))
|
| 186 |
+
output_html.append('</div>') # Close row 1 column
|
| 187 |
+
output_html.append('</div>') # Close row 1
|
| 188 |
+
|
| 189 |
+
# Row 2: Term importance chart (full width)
|
| 190 |
+
output_html.append('<div class="row mt-3">')
|
| 191 |
+
output_html.append('<div class="col-12">')
|
| 192 |
+
output_html.append('<h4>Term Importance</h4>')
|
| 193 |
+
|
| 194 |
+
# Create horizontal bar chart of key terms
|
| 195 |
+
fig = plt.figure(figsize=(10, 8))
|
| 196 |
+
|
| 197 |
+
# Reverse the order for bottom-to-top display
|
| 198 |
+
terms = [term[0] for term in key_terms]
|
| 199 |
+
scores = [term[1] for term in key_terms]
|
| 200 |
+
|
| 201 |
+
# Sort by score for better visualization
|
| 202 |
+
sorted_data = sorted(zip(terms, scores), key=lambda x: x[1])
|
| 203 |
+
terms = [x[0] for x in sorted_data]
|
| 204 |
+
scores = [x[1] for x in sorted_data]
|
| 205 |
+
|
| 206 |
+
# Create horizontal bar chart
|
| 207 |
+
plt.barh(terms, scores, color='#1976D2')
|
| 208 |
+
plt.xlabel('TF-IDF Score')
|
| 209 |
+
plt.ylabel('Keyword')
|
| 210 |
+
plt.title('Key Terms by TF-IDF Score')
|
| 211 |
+
plt.tight_layout()
|
| 212 |
+
|
| 213 |
+
output_html.append(fig_to_html(fig))
|
| 214 |
+
|
| 215 |
+
output_html.append('</div>') # Close row 2 column
|
| 216 |
+
output_html.append('</div>') # Close row 2
|
| 217 |
+
|
| 218 |
+
except Exception as e:
|
| 219 |
+
output_html.append(f"""
|
| 220 |
+
<div class="alert alert-danger">
|
| 221 |
+
<h4>Abstractive Summarization Error</h4>
|
| 222 |
+
<p>Failed to perform abstractive summarization: {str(e)}</p>
|
| 223 |
+
</div>
|
| 224 |
+
""")
|
| 225 |
+
|
| 226 |
+
# Extractive Summarization
|
| 227 |
+
output_html.append('<h3 class="task-subheader">Extractive Summarization</h3>')
|
| 228 |
+
output_html.append("""
|
| 229 |
+
<div class="alert alert-light">
|
| 230 |
+
<p class="mb-0">
|
| 231 |
+
Extractive summarization works by identifying important sentences in the text and extracting them to form a summary.
|
| 232 |
+
This implementation uses a variant of the TextRank algorithm, which is based on Google's PageRank.
|
| 233 |
+
</p>
|
| 234 |
+
</div>
|
| 235 |
+
""")
|
| 236 |
+
|
| 237 |
+
# Perform TextRank Summarization
|
| 238 |
+
extractive_summary = textrank_summarize(text_input, num_sentences=min(3, max(1, len(sentences) // 3)))
|
| 239 |
+
|
| 240 |
+
# Clean up the placeholder separator
|
| 241 |
+
extractive_summary = extractive_summary.replace("SENTBREAKOS.OS", " ")
|
| 242 |
+
|
| 243 |
+
# Calculate reduction statistics
|
| 244 |
+
extractive_word_count = len(extractive_summary.split())
|
| 245 |
+
extractive_reduction = (1 - extractive_word_count / word_count) * 100
|
| 246 |
+
|
| 247 |
+
output_html.append(f"""
|
| 248 |
+
<div class="alert alert-success">
|
| 249 |
+
<h4>Extractive Summary ({extractive_reduction:.1f}% reduction)</h4>
|
| 250 |
+
<div style="line-height: 1.6;">
|
| 251 |
+
{extractive_summary}
|
| 252 |
+
</div>
|
| 253 |
+
</div>
|
| 254 |
+
""")
|
| 255 |
+
|
| 256 |
+
# Sentence importance visualization
|
| 257 |
+
output_html.append('<h4>Sentence Importance</h4>')
|
| 258 |
+
output_html.append('<p>The graph below shows the relative importance of each sentence based on the TextRank algorithm:</p>')
|
| 259 |
+
|
| 260 |
+
# Get sentence scores from TextRank
|
| 261 |
+
sentence_scores = textrank_sentence_scores(text_input)
|
| 262 |
+
|
| 263 |
+
# Sort sentences by their original order
|
| 264 |
+
sentence_items = list(sentence_scores.items())
|
| 265 |
+
sentence_items.sort(key=lambda x: int(x[0].split('_')[1]))
|
| 266 |
+
|
| 267 |
+
# Create visualization
|
| 268 |
+
fig = plt.figure(figsize=(10, 6))
|
| 269 |
+
bars = plt.bar(
|
| 270 |
+
[f"Sent {item[0].split('_')[1]}" for item in sentence_items],
|
| 271 |
+
[item[1] for item in sentence_items],
|
| 272 |
+
color='#1976D2'
|
| 273 |
+
)
|
| 274 |
+
|
| 275 |
+
# Highlight selected sentences
|
| 276 |
+
selected_indices = [int(idx.split('_')[1]) for idx in sentence_scores.keys() if idx in extractive_summary.split('SENTBREAKOS.OS')]
|
| 277 |
+
for i, bar in enumerate(bars):
|
| 278 |
+
if i+1 in selected_indices:
|
| 279 |
+
bar.set_color('#4CAF50')
|
| 280 |
+
plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02,
|
| 281 |
+
'Selected', ha='center', va='bottom', fontsize=8, rotation=90)
|
| 282 |
+
|
| 283 |
+
plt.xlabel('Sentence')
|
| 284 |
+
plt.ylabel('Importance Score')
|
| 285 |
+
plt.title('Sentence Importance Based on TextRank')
|
| 286 |
+
plt.xticks(rotation=45)
|
| 287 |
+
plt.tight_layout()
|
| 288 |
+
|
| 289 |
+
output_html.append(fig_to_html(fig))
|
| 290 |
+
|
| 291 |
+
# Compare the two approaches
|
| 292 |
+
output_html.append('<h3 class="task-subheader">Summary Comparison</h3>')
|
| 293 |
+
|
| 294 |
+
# Calculate overlap between summaries
|
| 295 |
+
extractive_words = set(re.findall(r'\b\w+\b', extractive_summary.lower()))
|
| 296 |
+
abstractive_words = set(re.findall(r'\b\w+\b', abstractive_summary.lower()))
|
| 297 |
+
common_words = extractive_words.intersection(abstractive_words)
|
| 298 |
+
|
| 299 |
+
if len(extractive_words) > 0 and len(abstractive_words) > 0:
|
| 300 |
+
overlap_percentage = len(common_words) / ((len(extractive_words) + len(abstractive_words)) / 2) * 100
|
| 301 |
+
else:
|
| 302 |
+
overlap_percentage = 0
|
| 303 |
+
|
| 304 |
+
# Create comparison table
|
| 305 |
+
comparison_data = {
|
| 306 |
+
'Metric': ['Word Count', 'Reduction %', 'Sentences', 'Words per Sentence', 'Unique Words'],
|
| 307 |
+
'Extractive': [
|
| 308 |
+
extractive_word_count,
|
| 309 |
+
f"{extractive_reduction:.1f}%",
|
| 310 |
+
len(nltk.sent_tokenize(extractive_summary)),
|
| 311 |
+
f"{extractive_word_count / max(1, len(nltk.sent_tokenize(extractive_summary))):.1f}",
|
| 312 |
+
len(extractive_words)
|
| 313 |
+
],
|
| 314 |
+
'Abstractive': [
|
| 315 |
+
abstractive_word_count,
|
| 316 |
+
f"{abstractive_reduction:.1f}%",
|
| 317 |
+
len(nltk.sent_tokenize(abstractive_summary)),
|
| 318 |
+
f"{abstractive_word_count / max(1, len(nltk.sent_tokenize(abstractive_summary))):.1f}",
|
| 319 |
+
len(abstractive_words)
|
| 320 |
+
]
|
| 321 |
+
}
|
| 322 |
+
|
| 323 |
+
comparison_df = pd.DataFrame(comparison_data)
|
| 324 |
+
|
| 325 |
+
output_html.append('<div class="row">')
|
| 326 |
+
|
| 327 |
+
# Column 1: Comparison table
|
| 328 |
+
output_html.append('<div class="col-md-6">')
|
| 329 |
+
output_html.append('<h4>Summary Statistics</h4>')
|
| 330 |
+
output_html.append(df_to_html_table(comparison_df))
|
| 331 |
+
output_html.append('</div>')
|
| 332 |
+
|
| 333 |
+
# Column 2: Venn diagram of word overlap
|
| 334 |
+
output_html.append('<div class="col-md-6">')
|
| 335 |
+
output_html.append('<h4>Word Overlap Visualization</h4>')
|
| 336 |
+
|
| 337 |
+
# Create Venn diagram
|
| 338 |
+
fig = plt.figure(figsize=(8, 6))
|
| 339 |
+
venn = venn2(
|
| 340 |
+
subsets=(
|
| 341 |
+
len(extractive_words - abstractive_words),
|
| 342 |
+
len(abstractive_words - extractive_words),
|
| 343 |
+
len(common_words)
|
| 344 |
+
),
|
| 345 |
+
set_labels=('Extractive', 'Abstractive')
|
| 346 |
+
)
|
| 347 |
+
|
| 348 |
+
# Set colors
|
| 349 |
+
venn.get_patch_by_id('10').set_color('#4CAF50')
|
| 350 |
+
venn.get_patch_by_id('01').set_color('#03A9F4')
|
| 351 |
+
venn.get_patch_by_id('11').set_color('#9C27B0')
|
| 352 |
+
|
| 353 |
+
plt.title('Word Overlap Between Summaries')
|
| 354 |
+
plt.text(0, -0.25, f"Overlap: {overlap_percentage:.1f}%", ha='center')
|
| 355 |
+
|
| 356 |
+
output_html.append(fig_to_html(fig))
|
| 357 |
+
|
| 358 |
+
# Show key shared and unique words
|
| 359 |
+
shared_words_list = list(common_words)
|
| 360 |
+
extractive_only = list(extractive_words - abstractive_words)
|
| 361 |
+
abstractive_only = list(abstractive_words - extractive_words)
|
| 362 |
+
|
| 363 |
+
# Limit the number of words shown
|
| 364 |
+
max_words = 10
|
| 365 |
+
|
| 366 |
+
output_html.append(f"""
|
| 367 |
+
<div class="mt-3">
|
| 368 |
+
<h5>Key Shared Words ({min(max_words, len(shared_words_list))} of {len(shared_words_list)})</h5>
|
| 369 |
+
<div class="d-flex flex-wrap gap-1 mb-2">
|
| 370 |
+
{' '.join([f'<span class="badge bg-primary">{word}</span>' for word in shared_words_list[:max_words]])}
|
| 371 |
+
</div>
|
| 372 |
+
|
| 373 |
+
<h5>Unique to Extractive ({min(max_words, len(extractive_only))} of {len(extractive_only)})</h5>
|
| 374 |
+
<div class="d-flex flex-wrap gap-1 mb-2">
|
| 375 |
+
{' '.join([f'<span class="badge bg-success">{word}</span>' for word in extractive_only[:max_words]])}
|
| 376 |
+
</div>
|
| 377 |
+
|
| 378 |
+
<h5>Unique to Abstractive ({min(max_words, len(abstractive_only))} of {len(abstractive_only)})</h5>
|
| 379 |
+
<div class="d-flex flex-wrap gap-1 mb-2">
|
| 380 |
+
{' '.join([f'<span class="badge bg-info">{word}</span>' for word in abstractive_only[:max_words]])}
|
| 381 |
+
</div>
|
| 382 |
+
</div>
|
| 383 |
+
""")
|
| 384 |
+
|
| 385 |
+
output_html.append('</div>') # Close column 2
|
| 386 |
+
output_html.append('</div>') # Close row
|
| 387 |
+
|
| 388 |
+
except Exception as e:
|
| 389 |
+
output_html.append(f"""
|
| 390 |
+
<div class="alert alert-danger">
|
| 391 |
+
<h3>Error</h3>
|
| 392 |
+
<p>Failed to summarize text: {str(e)}</p>
|
| 393 |
+
</div>
|
| 394 |
+
""")
|
| 395 |
+
|
| 396 |
+
# About Text Summarization section
|
| 397 |
+
output_html.append("""
|
| 398 |
+
<div class="card mt-4">
|
| 399 |
+
<div class="card-header">
|
| 400 |
+
<h4 class="mb-0">
|
| 401 |
+
<i class="fas fa-info-circle"></i>
|
| 402 |
+
About Text Summarization
|
| 403 |
+
</h4>
|
| 404 |
+
</div>
|
| 405 |
+
<div class="card-body">
|
| 406 |
+
<h5>What is Text Summarization?</h5>
|
| 407 |
+
|
| 408 |
+
<p>Text summarization is the process of creating a shorter version of a text while preserving its key information
|
| 409 |
+
and meaning. It helps users quickly grasp the main points without reading the entire document.</p>
|
| 410 |
+
|
| 411 |
+
<h5>Two Main Approaches:</h5>
|
| 412 |
+
|
| 413 |
+
<ul>
|
| 414 |
+
<li><b>Extractive Summarization:</b> Selects and extracts existing sentences from the source text based on their importance</li>
|
| 415 |
+
<li><b>Abstractive Summarization:</b> Generates new sentences that capture the meaning of the source text (similar to how humans write summaries)</li>
|
| 416 |
+
</ul>
|
| 417 |
+
|
| 418 |
+
<h5>Applications:</h5>
|
| 419 |
+
|
| 420 |
+
<ul>
|
| 421 |
+
<li><b>News digests</b> - Quick summaries of news articles</li>
|
| 422 |
+
<li><b>Research papers</b> - Condensing long academic papers</li>
|
| 423 |
+
<li><b>Legal documents</b> - Summarizing complex legal text</li>
|
| 424 |
+
<li><b>Meeting notes</b> - Extracting key points from discussions</li>
|
| 425 |
+
<li><b>Content curation</b> - Creating snippets for content recommendations</li>
|
| 426 |
+
</ul>
|
| 427 |
+
</div>
|
| 428 |
+
</div>
|
| 429 |
+
""")
|
| 430 |
+
|
| 431 |
+
output_html.append('</div>') # Close result-area div
|
| 432 |
+
|
| 433 |
+
return '\n'.join(output_html)
|
| 434 |
+
|
| 435 |
+
def extract_key_terms(text, n=10):
|
| 436 |
+
"""Extract key terms using TF-IDF"""
|
| 437 |
+
try:
|
| 438 |
+
# Tokenize and preprocess
|
| 439 |
+
stop_words = set(stopwords.words('english'))
|
| 440 |
+
lemmatizer = WordNetLemmatizer()
|
| 441 |
+
|
| 442 |
+
# Tokenize and clean text
|
| 443 |
+
words = word_tokenize(text.lower())
|
| 444 |
+
words = [lemmatizer.lemmatize(word) for word in words
|
| 445 |
+
if word.isalnum() and word not in stop_words and len(word) > 2]
|
| 446 |
+
|
| 447 |
+
# Create document for TF-IDF
|
| 448 |
+
document = [' '.join(words)]
|
| 449 |
+
|
| 450 |
+
# Create TF-IDF vectorizer
|
| 451 |
+
vectorizer = TfidfVectorizer(max_features=100)
|
| 452 |
+
tfidf_matrix = vectorizer.fit_transform(document)
|
| 453 |
+
|
| 454 |
+
# Get feature names and scores
|
| 455 |
+
feature_names = vectorizer.get_feature_names_out()
|
| 456 |
+
scores = tfidf_matrix.toarray()[0]
|
| 457 |
+
|
| 458 |
+
# Create term-score pairs and sort by score
|
| 459 |
+
term_scores = [(term, score) for term, score in zip(feature_names, scores)]
|
| 460 |
+
term_scores.sort(key=lambda x: x[1], reverse=True)
|
| 461 |
+
|
| 462 |
+
return term_scores[:n]
|
| 463 |
+
except Exception as e:
|
| 464 |
+
print(f"Error extracting key terms: {str(e)}")
|
| 465 |
+
return [("term", 0.0) for _ in range(n)] # Return empty placeholder
|
| 466 |
+
|
| 467 |
+
# TextRank extractive summarization algorithm
|
| 468 |
+
def textrank_summarize(text, num_sentences=3):
|
| 469 |
+
"""Generate an extractive summary using TextRank algorithm"""
|
| 470 |
+
# Tokenize text into sentences
|
| 471 |
+
sentences = sent_tokenize(text)
|
| 472 |
+
|
| 473 |
+
# If text is too short, return the original text
|
| 474 |
+
if len(sentences) <= num_sentences:
|
| 475 |
+
return text
|
| 476 |
+
|
| 477 |
+
# Build a graph of sentences with similarity edges
|
| 478 |
+
sentence_scores = textrank_sentence_scores(text)
|
| 479 |
+
|
| 480 |
+
# Sort sentences by score
|
| 481 |
+
ranked_sentences = sorted([(score, i, s) for i, (s, score) in enumerate(zip(sentences, sentence_scores.values()))], reverse=True)
|
| 482 |
+
|
| 483 |
+
# Select top sentences based on score
|
| 484 |
+
selected_sentences = sorted(ranked_sentences[:num_sentences], key=lambda x: x[1])
|
| 485 |
+
|
| 486 |
+
# Combine selected sentences
|
| 487 |
+
summary = "SENTBREAKOS.OS".join([s[2] for s in selected_sentences])
|
| 488 |
+
|
| 489 |
+
return summary
|
| 490 |
+
|
| 491 |
+
def textrank_sentence_scores(text):
|
| 492 |
+
"""Generate sentence scores using TextRank algorithm"""
|
| 493 |
+
# Tokenize text into sentences
|
| 494 |
+
sentences = sent_tokenize(text)
|
| 495 |
+
|
| 496 |
+
# Create sentence IDs
|
| 497 |
+
sentence_ids = [f"sentence_{i+1}" for i in range(len(sentences))]
|
| 498 |
+
|
| 499 |
+
# Create sentence graph
|
| 500 |
+
G = nx.Graph()
|
| 501 |
+
|
| 502 |
+
# Add nodes
|
| 503 |
+
for sentence_id in sentence_ids:
|
| 504 |
+
G.add_node(sentence_id)
|
| 505 |
+
|
| 506 |
+
# Remove stopwords and preprocess sentences
|
| 507 |
+
stop_words = set(stopwords.words('english'))
|
| 508 |
+
sentence_words = []
|
| 509 |
+
|
| 510 |
+
for sentence in sentences:
|
| 511 |
+
words = [word.lower() for word in word_tokenize(sentence) if word.lower() not in stop_words and word.isalnum()]
|
| 512 |
+
sentence_words.append(words)
|
| 513 |
+
|
| 514 |
+
# Add edges based on sentence similarity
|
| 515 |
+
for i in range(len(sentence_ids)):
|
| 516 |
+
for j in range(i+1, len(sentence_ids)):
|
| 517 |
+
similarity = sentence_similarity(sentence_words[i], sentence_words[j])
|
| 518 |
+
if similarity > 0:
|
| 519 |
+
G.add_edge(sentence_ids[i], sentence_ids[j], weight=similarity)
|
| 520 |
+
|
| 521 |
+
# Run PageRank
|
| 522 |
+
scores = nx.pagerank(G)
|
| 523 |
+
|
| 524 |
+
return scores
|
| 525 |
+
|
| 526 |
+
def sentence_similarity(words1, words2):
|
| 527 |
+
"""Calculate similarity between two sentences based on word overlap"""
|
| 528 |
+
if not words1 or not words2:
|
| 529 |
+
return 0
|
| 530 |
+
|
| 531 |
+
# Convert to sets for intersection
|
| 532 |
+
set1 = set(words1)
|
| 533 |
+
set2 = set(words2)
|
| 534 |
+
|
| 535 |
+
# Jaccard similarity
|
| 536 |
+
intersection = len(set1.intersection(set2))
|
| 537 |
+
union = len(set1.union(set2))
|
| 538 |
+
|
| 539 |
+
if union == 0:
|
| 540 |
+
return 0
|
| 541 |
+
return intersection / union
|
components/text_generation.py
ADDED
|
@@ -0,0 +1,259 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import matplotlib.pyplot as plt
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import nltk
|
| 4 |
+
import time
|
| 5 |
+
|
| 6 |
+
from utils.model_loader import load_text_generator
|
| 7 |
+
from utils.helpers import fig_to_html, df_to_html_table
|
| 8 |
+
|
| 9 |
+
def text_generation_handler(text_input, max_length=100, temperature=0.7, top_p=0.9, num_sequences=1):
|
| 10 |
+
"""Show text generation capabilities."""
|
| 11 |
+
output_html = []
|
| 12 |
+
|
| 13 |
+
# Add result area container
|
| 14 |
+
output_html.append('<div class="result-area">')
|
| 15 |
+
output_html.append('<h2 class="task-header">Text Generation</h2>')
|
| 16 |
+
|
| 17 |
+
output_html.append("""
|
| 18 |
+
<div class="alert alert-info">
|
| 19 |
+
<i class="fas fa-info-circle"></i>
|
| 20 |
+
Text generation models can continue or expand on a given text prompt, creating new content that follows the style and context of the input.
|
| 21 |
+
</div>
|
| 22 |
+
""")
|
| 23 |
+
|
| 24 |
+
# Model info
|
| 25 |
+
output_html.append("""
|
| 26 |
+
<div class="alert alert-info">
|
| 27 |
+
<h4><i class="fas fa-tools"></i> Model Used:</h4>
|
| 28 |
+
<ul>
|
| 29 |
+
<li><b>GPT-2</b> - 124M parameter language model trained on a diverse corpus of internet text</li>
|
| 30 |
+
<li><b>Capabilities</b> - Can generate coherent text continuations and completions</li>
|
| 31 |
+
<li><b>Limitations</b> - May occasionally produce repetitive or nonsensical content</li>
|
| 32 |
+
</ul>
|
| 33 |
+
</div>
|
| 34 |
+
""")
|
| 35 |
+
|
| 36 |
+
try:
|
| 37 |
+
# Check text length and possibly truncate
|
| 38 |
+
MAX_PROMPT_LENGTH = 100 # tokens
|
| 39 |
+
|
| 40 |
+
# Count tokens (rough approximation)
|
| 41 |
+
token_count = len(text_input.split())
|
| 42 |
+
|
| 43 |
+
# Truncate if necessary
|
| 44 |
+
if token_count > MAX_PROMPT_LENGTH:
|
| 45 |
+
prompt_text = " ".join(text_input.split()[:MAX_PROMPT_LENGTH])
|
| 46 |
+
output_html.append("""
|
| 47 |
+
<div class="alert alert-warning">
|
| 48 |
+
<p class="mb-0">⚠️ Text truncated to approximately 100 tokens for better generation results.</p>
|
| 49 |
+
</div>
|
| 50 |
+
""")
|
| 51 |
+
else:
|
| 52 |
+
prompt_text = text_input
|
| 53 |
+
|
| 54 |
+
# Display prompt
|
| 55 |
+
output_html.append('<h3 class="task-subheader">Prompt</h3>')
|
| 56 |
+
output_html.append(f'<div class="card"><div class="card-body">{prompt_text}</div></div>')
|
| 57 |
+
|
| 58 |
+
# Load model
|
| 59 |
+
text_generator = load_text_generator()
|
| 60 |
+
|
| 61 |
+
# Set up generation parameters
|
| 62 |
+
generation_kwargs = {
|
| 63 |
+
"max_length": token_count + max_length,
|
| 64 |
+
"num_return_sequences": num_sequences,
|
| 65 |
+
"temperature": temperature,
|
| 66 |
+
"top_p": top_p,
|
| 67 |
+
"do_sample": True,
|
| 68 |
+
"no_repeat_ngram_size": 2,
|
| 69 |
+
"pad_token_id": 50256 # GPT-2's pad token ID
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
# Generate text
|
| 73 |
+
start_time = time.time()
|
| 74 |
+
result = text_generator(prompt_text, **generation_kwargs)
|
| 75 |
+
generation_time = time.time() - start_time
|
| 76 |
+
|
| 77 |
+
# Display results
|
| 78 |
+
output_html.append('<h3 class="task-subheader">Generated Text</h3>')
|
| 79 |
+
|
| 80 |
+
for i, sequence in enumerate(result):
|
| 81 |
+
generated_text = sequence['generated_text']
|
| 82 |
+
new_text = generated_text[len(prompt_text):]
|
| 83 |
+
|
| 84 |
+
# Display in a nice format with the prompt and generated text distinguished
|
| 85 |
+
if num_sequences > 1:
|
| 86 |
+
output_html.append(f'<h4>Version {i+1}</h4>')
|
| 87 |
+
|
| 88 |
+
output_html.append(f"""
|
| 89 |
+
<div class="card">
|
| 90 |
+
<div class="card-body">
|
| 91 |
+
<span class="text-muted">{prompt_text}</span>
|
| 92 |
+
<span class="text-primary fw-bold">{new_text}</span>
|
| 93 |
+
</div>
|
| 94 |
+
</div>
|
| 95 |
+
""")
|
| 96 |
+
|
| 97 |
+
# Generation stats for this sequence
|
| 98 |
+
prompt_tokens = len(prompt_text.split())
|
| 99 |
+
gen_tokens = len(new_text.split())
|
| 100 |
+
|
| 101 |
+
# Calculate average word length as a crude complexity metric
|
| 102 |
+
avg_word_len = sum(len(word) for word in new_text.split()) / max(1, len(new_text.split()))
|
| 103 |
+
|
| 104 |
+
output_html.append(f"""
|
| 105 |
+
<div class="alert alert-success">
|
| 106 |
+
<h4 class="mb-3">Generation Statistics</h4>
|
| 107 |
+
<div class="row">
|
| 108 |
+
<div class="col-md-6">
|
| 109 |
+
<p><b>Prompt length:</b> {prompt_tokens} tokens</p>
|
| 110 |
+
<p><b>Generated length:</b> {gen_tokens} tokens</p>
|
| 111 |
+
<p><b>Total length:</b> {prompt_tokens + gen_tokens} tokens</p>
|
| 112 |
+
</div>
|
| 113 |
+
<div class="col-md-6">
|
| 114 |
+
<p><b>Temperature:</b> {temperature}</p>
|
| 115 |
+
<p><b>Top-p:</b> {top_p}</p>
|
| 116 |
+
<p><b>Avg word length:</b> {avg_word_len:.2f} characters</p>
|
| 117 |
+
</div>
|
| 118 |
+
</div>
|
| 119 |
+
<p><b>Generation time:</b> {generation_time:.2f} seconds</p>
|
| 120 |
+
</div>
|
| 121 |
+
""")
|
| 122 |
+
|
| 123 |
+
# Option to see full text
|
| 124 |
+
output_html.append(f"""
|
| 125 |
+
<div class="card">
|
| 126 |
+
<div class="card-header">
|
| 127 |
+
<h5 class="mb-0">
|
| 128 |
+
<button class="btn btn-link" type="button" data-bs-toggle="collapse" data-bs-target="#fullText{i}" aria-expanded="false">
|
| 129 |
+
Show full text (copy-paste friendly)
|
| 130 |
+
</button>
|
| 131 |
+
</h5>
|
| 132 |
+
</div>
|
| 133 |
+
<div class="collapse" id="fullText{i}">
|
| 134 |
+
<div class="card-body">
|
| 135 |
+
<div class="text-content" style="word-wrap: break-word; word-break: break-word; overflow-wrap: break-word; max-height: 500px; overflow-y: auto; padding: 15px; background-color: #f8f9fa; border-radius: 5px; border: 1px solid #e9ecef; line-height: 1.6;">{generated_text}</div>
|
| 136 |
+
</div>
|
| 137 |
+
</div>
|
| 138 |
+
</div>
|
| 139 |
+
""")
|
| 140 |
+
|
| 141 |
+
# Generate a text complexity analysis
|
| 142 |
+
if len(result) > 0:
|
| 143 |
+
output_html.append('<h3 class="task-subheader">Text Analysis</h3>')
|
| 144 |
+
|
| 145 |
+
# Get the first generated text for analysis
|
| 146 |
+
full_text = result[0]['generated_text']
|
| 147 |
+
prompt_words = prompt_text.split()
|
| 148 |
+
full_words = full_text.split()
|
| 149 |
+
generated_words = full_words[len(prompt_words):]
|
| 150 |
+
|
| 151 |
+
# Analyze word length distribution
|
| 152 |
+
prompt_word_lengths = [len(word) for word in prompt_words]
|
| 153 |
+
generated_word_lengths = [len(word) for word in generated_words]
|
| 154 |
+
|
| 155 |
+
# Create comparison chart
|
| 156 |
+
fig, ax = plt.subplots(figsize=(10, 5))
|
| 157 |
+
|
| 158 |
+
# Plot histograms
|
| 159 |
+
bins = range(1, 16) # Word lengths from 1 to 15
|
| 160 |
+
ax.hist(prompt_word_lengths, bins=bins, alpha=0.7, label='Prompt', color='#1976D2')
|
| 161 |
+
ax.hist(generated_word_lengths, bins=bins, alpha=0.7, label='Generated', color='#4CAF50')
|
| 162 |
+
|
| 163 |
+
ax.set_xlabel('Word Length (characters)')
|
| 164 |
+
ax.set_ylabel('Frequency')
|
| 165 |
+
ax.set_title('Word Length Distribution: Prompt vs Generated')
|
| 166 |
+
ax.legend()
|
| 167 |
+
ax.grid(alpha=0.3)
|
| 168 |
+
|
| 169 |
+
output_html.append(fig_to_html(fig))
|
| 170 |
+
|
| 171 |
+
# Calculate some linguistic statistics
|
| 172 |
+
prompt_avg_word_len = sum(prompt_word_lengths) / len(prompt_word_lengths) if prompt_word_lengths else 0
|
| 173 |
+
generated_avg_word_len = sum(generated_word_lengths) / len(generated_word_lengths) if generated_word_lengths else 0
|
| 174 |
+
|
| 175 |
+
# Create comparison table
|
| 176 |
+
stats_data = {
|
| 177 |
+
'Metric': ['Word count', 'Average word length', 'Unique words', 'Lexical diversity*'],
|
| 178 |
+
'Prompt': [
|
| 179 |
+
len(prompt_words),
|
| 180 |
+
f"{prompt_avg_word_len:.2f}",
|
| 181 |
+
len(set(word.lower() for word in prompt_words)),
|
| 182 |
+
f"{len(set(word.lower() for word in prompt_words)) / len(prompt_words):.2f}" if prompt_words else "0"
|
| 183 |
+
],
|
| 184 |
+
'Generated': [
|
| 185 |
+
len(generated_words),
|
| 186 |
+
f"{generated_avg_word_len:.2f}",
|
| 187 |
+
len(set(word.lower() for word in generated_words)),
|
| 188 |
+
f"{len(set(word.lower() for word in generated_words)) / len(generated_words):.2f}" if generated_words else "0"
|
| 189 |
+
]
|
| 190 |
+
}
|
| 191 |
+
|
| 192 |
+
stats_df = pd.DataFrame(stats_data)
|
| 193 |
+
|
| 194 |
+
output_html.append('<div class="mt-3">')
|
| 195 |
+
output_html.append(df_to_html_table(stats_df))
|
| 196 |
+
output_html.append('<p><small>*Lexical diversity = unique words / total words</small></p>')
|
| 197 |
+
output_html.append('</div>')
|
| 198 |
+
|
| 199 |
+
# Show tips for better results
|
| 200 |
+
output_html.append("""
|
| 201 |
+
<div class="alert alert-info">
|
| 202 |
+
<h4>Tips for Better Generation Results</h4>
|
| 203 |
+
<ul class="mb-0">
|
| 204 |
+
<li><b>Be specific</b> - More detailed prompts give the model better context</li>
|
| 205 |
+
<li><b>Format matters</b> - If you want a list, start with a list item; if you want dialogue, include dialogue format</li>
|
| 206 |
+
<li><b>Play with temperature</b> - Lower values (0.3-0.5) for focused, consistent text; higher values (0.7-1.0) for creative, varied output</li>
|
| 207 |
+
<li><b>Try multiple generations</b> - Generate several options to pick the best result</li>
|
| 208 |
+
</ul>
|
| 209 |
+
</div>
|
| 210 |
+
""")
|
| 211 |
+
|
| 212 |
+
except Exception as e:
|
| 213 |
+
output_html.append(f"""
|
| 214 |
+
<div class="alert alert-danger">
|
| 215 |
+
<h3>Error</h3>
|
| 216 |
+
<p>Failed to generate text: {str(e)}</p>
|
| 217 |
+
</div>
|
| 218 |
+
""")
|
| 219 |
+
|
| 220 |
+
# About Text Generation section
|
| 221 |
+
output_html.append("""
|
| 222 |
+
<div class="card mt-4">
|
| 223 |
+
<div class="card-header">
|
| 224 |
+
<h4 class="mb-0">
|
| 225 |
+
<i class="fas fa-info-circle"></i>
|
| 226 |
+
About Text Generation
|
| 227 |
+
</h4>
|
| 228 |
+
</div>
|
| 229 |
+
<div class="card-body">
|
| 230 |
+
<h5>What is Text Generation?</h5>
|
| 231 |
+
|
| 232 |
+
<p>Text generation is the task of creating human-like text using machine learning models. Modern text generation
|
| 233 |
+
systems use large neural networks trained on vast amounts of text data to predict the next tokens in a sequence.</p>
|
| 234 |
+
|
| 235 |
+
<h5>How It Works:</h5>
|
| 236 |
+
|
| 237 |
+
<ol>
|
| 238 |
+
<li><b>Training</b> - Models learn patterns in language by predicting the next word in billions of text examples</li>
|
| 239 |
+
<li><b>Prompting</b> - You provide a starting text that gives context and direction</li>
|
| 240 |
+
<li><b>Generation</b> - The model repeatedly predicts the most likely next token based on previous context</li>
|
| 241 |
+
<li><b>Sampling</b> - Various techniques (temperature, top-p) control the randomness and creativity of output</li>
|
| 242 |
+
</ol>
|
| 243 |
+
|
| 244 |
+
<h5>Applications:</h5>
|
| 245 |
+
|
| 246 |
+
<ul>
|
| 247 |
+
<li><b>Content creation</b> - Drafting articles, stories, and marketing copy</li>
|
| 248 |
+
<li><b>Assistive writing</b> - Helping with email drafting, summarization, and editing</li>
|
| 249 |
+
<li><b>Conversational AI</b> - Powering chatbots and digital assistants</li>
|
| 250 |
+
<li><b>Code generation</b> - Assisting developers with coding tasks</li>
|
| 251 |
+
<li><b>Creative writing</b> - Generating stories, poetry, and other creative content</li>
|
| 252 |
+
</ul>
|
| 253 |
+
</div>
|
| 254 |
+
</div>
|
| 255 |
+
""")
|
| 256 |
+
|
| 257 |
+
output_html.append('</div>') # Close result-area div
|
| 258 |
+
|
| 259 |
+
return '\n'.join(output_html)
|
components/tokenization.py
ADDED
|
@@ -0,0 +1,460 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import matplotlib.pyplot as plt
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import nltk
|
| 4 |
+
import re
|
| 5 |
+
from collections import Counter
|
| 6 |
+
from nltk.tokenize import word_tokenize, sent_tokenize
|
| 7 |
+
import spacy
|
| 8 |
+
|
| 9 |
+
from utils.model_loader import load_spacy, download_nltk_resources
|
| 10 |
+
from utils.helpers import fig_to_html, df_to_html_table
|
| 11 |
+
|
| 12 |
+
def tokenization_handler(text_input):
|
| 13 |
+
"""Show tokenization capabilities."""
|
| 14 |
+
output_html = []
|
| 15 |
+
|
| 16 |
+
# Add result area container
|
| 17 |
+
output_html.append('<div class="result-area">')
|
| 18 |
+
output_html.append('<h2 class="task-header">Tokenization</h2>')
|
| 19 |
+
|
| 20 |
+
output_html.append("""
|
| 21 |
+
<div class="alert alert-info">
|
| 22 |
+
<i class="fas fa-info-circle"></i>
|
| 23 |
+
Tokenization is the process of breaking text into smaller units called tokens, which can be words, characters, or subwords.
|
| 24 |
+
</div>
|
| 25 |
+
""")
|
| 26 |
+
|
| 27 |
+
# Model info
|
| 28 |
+
output_html.append("""
|
| 29 |
+
<div class="alert alert-info">
|
| 30 |
+
<h4><i class="fas fa-tools"></i> Tools Used:</h4>
|
| 31 |
+
<ul>
|
| 32 |
+
<li><b>NLTK</b> - Natural Language Toolkit for basic word and sentence tokenization</li>
|
| 33 |
+
<li><b>spaCy</b> - Advanced tokenization with linguistic features</li>
|
| 34 |
+
<li><b>WordPiece</b> - Subword tokenization used by BERT and other transformers</li>
|
| 35 |
+
</ul>
|
| 36 |
+
</div>
|
| 37 |
+
""")
|
| 38 |
+
|
| 39 |
+
try:
|
| 40 |
+
# Ensure NLTK resources are downloaded
|
| 41 |
+
download_nltk_resources()
|
| 42 |
+
|
| 43 |
+
# Original Text
|
| 44 |
+
output_html.append('<h3 class="task-subheader">Original Text</h3>')
|
| 45 |
+
output_html.append(f'<div class="card"><div class="card-body"><div class="text-content" style="word-wrap: break-word; word-break: break-word; overflow-wrap: break-word; max-height: 500px; overflow-y: auto; padding: 15px; background-color: #f8f9fa; border-radius: 5px; border: 1px solid #e9ecef; line-height: 1.6;">{text_input}</div></div></div>')
|
| 46 |
+
|
| 47 |
+
# Word Tokenization
|
| 48 |
+
output_html.append('<h3 class="task-subheader">Word Tokenization</h3>')
|
| 49 |
+
output_html.append('<p>Breaking text into individual words and punctuation marks.</p>')
|
| 50 |
+
|
| 51 |
+
# NLTK Word Tokenization
|
| 52 |
+
nltk_tokens = word_tokenize(text_input)
|
| 53 |
+
|
| 54 |
+
# Format tokens
|
| 55 |
+
token_html = ""
|
| 56 |
+
for token in nltk_tokens:
|
| 57 |
+
token_html += f'<span class="token">{token}</span>'
|
| 58 |
+
|
| 59 |
+
output_html.append(f"""
|
| 60 |
+
<div class="card">
|
| 61 |
+
<div class="card-body">
|
| 62 |
+
<div style="background-color: #f5f5f5; padding: 15px; border-radius: 5px; line-height: 2.5;">
|
| 63 |
+
{token_html}
|
| 64 |
+
</div>
|
| 65 |
+
</div>
|
| 66 |
+
</div>
|
| 67 |
+
<style>
|
| 68 |
+
.token {{
|
| 69 |
+
background-color: #E3F2FD;
|
| 70 |
+
border: 1px solid #1976D2;
|
| 71 |
+
border-radius: 4px;
|
| 72 |
+
padding: 3px 6px;
|
| 73 |
+
margin: 3px;
|
| 74 |
+
display: inline-block;
|
| 75 |
+
}}
|
| 76 |
+
</style>
|
| 77 |
+
""")
|
| 78 |
+
|
| 79 |
+
# Token statistics
|
| 80 |
+
token_count = len(nltk_tokens)
|
| 81 |
+
unique_tokens = len(set([t.lower() for t in nltk_tokens]))
|
| 82 |
+
alpha_only = sum(1 for t in nltk_tokens if t.isalpha())
|
| 83 |
+
numeric = sum(1 for t in nltk_tokens if t.isnumeric())
|
| 84 |
+
punct = sum(1 for t in nltk_tokens if all(c in '.,;:!?-"\'()[]{}' for c in t))
|
| 85 |
+
|
| 86 |
+
output_html.append(f"""
|
| 87 |
+
<div class="row mt-3">
|
| 88 |
+
<div class="col-md-2">
|
| 89 |
+
<div class="card text-center">
|
| 90 |
+
<div class="card-body">
|
| 91 |
+
<h5 class="text-primary">{token_count}</h5>
|
| 92 |
+
<small>Total Tokens</small>
|
| 93 |
+
</div>
|
| 94 |
+
</div>
|
| 95 |
+
</div>
|
| 96 |
+
<div class="col-md-2">
|
| 97 |
+
<div class="card text-center">
|
| 98 |
+
<div class="card-body">
|
| 99 |
+
<h5 class="text-success">{unique_tokens}</h5>
|
| 100 |
+
<small>Unique Tokens</small>
|
| 101 |
+
</div>
|
| 102 |
+
</div>
|
| 103 |
+
</div>
|
| 104 |
+
<div class="col-md-2">
|
| 105 |
+
<div class="card text-center">
|
| 106 |
+
<div class="card-body">
|
| 107 |
+
<h5 class="text-info">{alpha_only}</h5>
|
| 108 |
+
<small>Alphabetic</small>
|
| 109 |
+
</div>
|
| 110 |
+
</div>
|
| 111 |
+
</div>
|
| 112 |
+
<div class="col-md-2">
|
| 113 |
+
<div class="card text-center">
|
| 114 |
+
<div class="card-body">
|
| 115 |
+
<h5 class="text-warning">{numeric}</h5>
|
| 116 |
+
<small>Numeric</small>
|
| 117 |
+
</div>
|
| 118 |
+
</div>
|
| 119 |
+
</div>
|
| 120 |
+
<div class="col-md-2">
|
| 121 |
+
<div class="card text-center">
|
| 122 |
+
<div class="card-body">
|
| 123 |
+
<h5 class="text-danger">{punct}</h5>
|
| 124 |
+
<small>Punctuation</small>
|
| 125 |
+
</div>
|
| 126 |
+
</div>
|
| 127 |
+
</div>
|
| 128 |
+
</div>
|
| 129 |
+
""")
|
| 130 |
+
|
| 131 |
+
# Sentence Tokenization
|
| 132 |
+
output_html.append('<h3 class="task-subheader">Sentence Tokenization</h3>')
|
| 133 |
+
output_html.append('<p>Dividing text into individual sentences.</p>')
|
| 134 |
+
|
| 135 |
+
# NLTK Sentence Tokenization
|
| 136 |
+
nltk_sentences = sent_tokenize(text_input)
|
| 137 |
+
|
| 138 |
+
# Format sentences
|
| 139 |
+
sentence_html = ""
|
| 140 |
+
for i, sentence in enumerate(nltk_sentences):
|
| 141 |
+
sentence_html += f'<div class="sentence"><span class="sentence-num">{i+1}</span> {sentence}</div>'
|
| 142 |
+
|
| 143 |
+
output_html.append(f"""
|
| 144 |
+
<div class="card">
|
| 145 |
+
<div class="card-body">
|
| 146 |
+
<div style="background-color: #f5f5f5; padding: 15px; border-radius: 5px;">
|
| 147 |
+
{sentence_html}
|
| 148 |
+
</div>
|
| 149 |
+
</div>
|
| 150 |
+
</div>
|
| 151 |
+
<style>
|
| 152 |
+
.sentence {{
|
| 153 |
+
background-color: #E1F5FE;
|
| 154 |
+
border-left: 3px solid #03A9F4;
|
| 155 |
+
padding: 10px;
|
| 156 |
+
margin: 8px 0;
|
| 157 |
+
border-radius: 0 5px 5px 0;
|
| 158 |
+
position: relative;
|
| 159 |
+
}}
|
| 160 |
+
.sentence-num {{
|
| 161 |
+
font-weight: bold;
|
| 162 |
+
color: #0277BD;
|
| 163 |
+
margin-right: 5px;
|
| 164 |
+
}}
|
| 165 |
+
</style>
|
| 166 |
+
""")
|
| 167 |
+
|
| 168 |
+
output_html.append(f'<p class="mt-3">Text contains {len(nltk_sentences)} sentences with an average of {token_count / len(nltk_sentences):.1f} tokens per sentence.</p>')
|
| 169 |
+
|
| 170 |
+
# Advanced Tokenization with spaCy
|
| 171 |
+
output_html.append('<h3 class="task-subheader">Linguistic Tokenization (spaCy)</h3>')
|
| 172 |
+
output_html.append('<p>spaCy provides more linguistically-aware tokenization with additional token properties.</p>')
|
| 173 |
+
|
| 174 |
+
# Load spaCy model
|
| 175 |
+
nlp = load_spacy()
|
| 176 |
+
doc = nlp(text_input)
|
| 177 |
+
|
| 178 |
+
# Create token table
|
| 179 |
+
token_data = []
|
| 180 |
+
for token in doc:
|
| 181 |
+
token_data.append({
|
| 182 |
+
'Text': token.text,
|
| 183 |
+
'Lemma': token.lemma_,
|
| 184 |
+
'POS': token.pos_,
|
| 185 |
+
'Tag': token.tag_,
|
| 186 |
+
'Dep': token.dep_,
|
| 187 |
+
'Shape': token.shape_,
|
| 188 |
+
'Alpha': token.is_alpha,
|
| 189 |
+
'Stop': token.is_stop
|
| 190 |
+
})
|
| 191 |
+
|
| 192 |
+
token_df = pd.DataFrame(token_data)
|
| 193 |
+
|
| 194 |
+
# Display interactive table with expandable rows
|
| 195 |
+
output_html.append("""
|
| 196 |
+
<div class="table-responsive">
|
| 197 |
+
<table class="table table-striped table-hover">
|
| 198 |
+
<thead class="table-primary sticky-top">
|
| 199 |
+
<tr>
|
| 200 |
+
<th>Token</th>
|
| 201 |
+
<th>Lemma</th>
|
| 202 |
+
<th>POS</th>
|
| 203 |
+
<th>Tag</th>
|
| 204 |
+
<th>Dependency</th>
|
| 205 |
+
<th>Properties</th>
|
| 206 |
+
</tr>
|
| 207 |
+
</thead>
|
| 208 |
+
<tbody>
|
| 209 |
+
""")
|
| 210 |
+
|
| 211 |
+
for token in doc:
|
| 212 |
+
# Determine row color based on token type
|
| 213 |
+
row_class = ""
|
| 214 |
+
if token.is_stop:
|
| 215 |
+
row_class = "table-danger" # Light red for stopwords
|
| 216 |
+
elif token.pos_ == "VERB":
|
| 217 |
+
row_class = "table-success" # Light green for verbs
|
| 218 |
+
elif token.pos_ == "NOUN" or token.pos_ == "PROPN":
|
| 219 |
+
row_class = "table-primary" # Light blue for nouns
|
| 220 |
+
elif token.pos_ == "ADJ":
|
| 221 |
+
row_class = "table-warning" # Light yellow for adjectives
|
| 222 |
+
|
| 223 |
+
output_html.append(f"""
|
| 224 |
+
<tr class="{row_class}">
|
| 225 |
+
<td><strong>{token.text}</strong></td>
|
| 226 |
+
<td>{token.lemma_}</td>
|
| 227 |
+
<td>{token.pos_}</td>
|
| 228 |
+
<td>{token.tag_}</td>
|
| 229 |
+
<td>{token.dep_}</td>
|
| 230 |
+
<td>
|
| 231 |
+
<span class="badge {'bg-success' if token.is_alpha else 'bg-danger'}">
|
| 232 |
+
{'Alpha' if token.is_alpha else 'Non-alpha'}
|
| 233 |
+
</span>
|
| 234 |
+
<span class="badge {'bg-danger' if token.is_stop else 'bg-success'}">
|
| 235 |
+
{'Stopword' if token.is_stop else 'Content'}
|
| 236 |
+
</span>
|
| 237 |
+
<span class="badge bg-info">
|
| 238 |
+
Shape: {token.shape_}
|
| 239 |
+
</span>
|
| 240 |
+
</td>
|
| 241 |
+
</tr>
|
| 242 |
+
""")
|
| 243 |
+
|
| 244 |
+
output_html.append("""
|
| 245 |
+
</tbody>
|
| 246 |
+
</table>
|
| 247 |
+
</div>
|
| 248 |
+
""")
|
| 249 |
+
|
| 250 |
+
# Create visualization for POS distribution
|
| 251 |
+
pos_counts = Counter([token.pos_ for token in doc])
|
| 252 |
+
|
| 253 |
+
# Create bar chart for POS distribution
|
| 254 |
+
fig = plt.figure(figsize=(10, 6))
|
| 255 |
+
plt.bar(pos_counts.keys(), pos_counts.values(), color='#1976D2')
|
| 256 |
+
plt.xlabel('Part of Speech')
|
| 257 |
+
plt.ylabel('Count')
|
| 258 |
+
plt.title('Part-of-Speech Distribution')
|
| 259 |
+
plt.xticks(rotation=45)
|
| 260 |
+
plt.tight_layout()
|
| 261 |
+
|
| 262 |
+
output_html.append('<h4>Token Distribution by Part of Speech</h4>')
|
| 263 |
+
output_html.append(fig_to_html(fig))
|
| 264 |
+
|
| 265 |
+
# Subword Tokenization
|
| 266 |
+
output_html.append('<h3 class="task-subheader">Subword Tokenization (WordPiece/BPE)</h3>')
|
| 267 |
+
output_html.append("""
|
| 268 |
+
<div class="alert alert-light">
|
| 269 |
+
<p>
|
| 270 |
+
Subword tokenization breaks words into smaller units to handle rare words and morphologically rich languages.
|
| 271 |
+
This technique is widely used in modern transformer models like BERT, GPT, etc.
|
| 272 |
+
</p>
|
| 273 |
+
</div>
|
| 274 |
+
""")
|
| 275 |
+
|
| 276 |
+
try:
|
| 277 |
+
from transformers import BertTokenizer, GPT2Tokenizer
|
| 278 |
+
|
| 279 |
+
# Load tokenizers
|
| 280 |
+
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
| 281 |
+
gpt2_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
|
| 282 |
+
|
| 283 |
+
# Tokenize with BERT
|
| 284 |
+
bert_tokens = bert_tokenizer.tokenize(text_input)
|
| 285 |
+
|
| 286 |
+
# Tokenize with GPT-2
|
| 287 |
+
# GPT-2 doesn't have a special tokenize method like BERT, so we encode and decode
|
| 288 |
+
gpt2_encoding = gpt2_tokenizer.encode(text_input)
|
| 289 |
+
gpt2_tokens = [gpt2_tokenizer.decode([token]).strip() for token in gpt2_encoding]
|
| 290 |
+
|
| 291 |
+
# BERT WordPiece Section
|
| 292 |
+
output_html.append('<h4 class="bg-primary text-white p-3 rounded">BERT WordPiece</h4>')
|
| 293 |
+
output_html.append('<p>BERT uses WordPiece tokenization which marks subword units with ##.</p>')
|
| 294 |
+
|
| 295 |
+
# Create token display
|
| 296 |
+
output_html.append('<div class="card"><div class="card-body">')
|
| 297 |
+
output_html.append('<div style="background-color: #f5f5f5; padding: 15px; border-radius: 5px; line-height: 2.5;">')
|
| 298 |
+
|
| 299 |
+
for token in bert_tokens:
|
| 300 |
+
if token.startswith("##"):
|
| 301 |
+
output_html.append(f'<span class="token" style="background-color: #FFECB3; border-color: #FFA000;">{token}</span>')
|
| 302 |
+
else:
|
| 303 |
+
output_html.append(f'<span class="token">{token}</span>')
|
| 304 |
+
|
| 305 |
+
output_html.append('</div></div></div>')
|
| 306 |
+
output_html.append(f'<p class="mt-2">Total BERT tokens: {len(bert_tokens)}</p>')
|
| 307 |
+
|
| 308 |
+
# GPT-2 BPE Section
|
| 309 |
+
output_html.append('<h4 class="bg-primary text-white p-3 rounded mt-4">GPT-2 BPE</h4>')
|
| 310 |
+
output_html.append('<p>GPT-2 uses Byte-Pair Encoding (BPE) tokenization where Ġ represents a space before the token.</p>')
|
| 311 |
+
|
| 312 |
+
output_html.append('<div class="card"><div class="card-body">')
|
| 313 |
+
output_html.append('<div style="background-color: #f5f5f5; padding: 15px; border-radius: 5px; line-height: 2.5;">')
|
| 314 |
+
|
| 315 |
+
for token in gpt2_tokens:
|
| 316 |
+
if token.startswith("Ġ"):
|
| 317 |
+
output_html.append(f'<span class="token">{token}</span>')
|
| 318 |
+
else:
|
| 319 |
+
output_html.append(f'<span class="token" style="background-color: #FFECB3; border-color: #FFA000;">{token}</span>')
|
| 320 |
+
|
| 321 |
+
output_html.append('</div></div></div>')
|
| 322 |
+
output_html.append(f'<p class="mt-2">Total GPT-2 tokens: {len(gpt2_tokens)}</p>')
|
| 323 |
+
|
| 324 |
+
# Compare token counts
|
| 325 |
+
output_html.append('<h4>Token Count Comparison</h4>')
|
| 326 |
+
token_count_data = {
|
| 327 |
+
'Tokenizer': ['Words (spaces)', 'NLTK', 'spaCy', 'BERT WordPiece', 'GPT-2 BPE'],
|
| 328 |
+
'Token Count': [
|
| 329 |
+
len(text_input.split()),
|
| 330 |
+
len(nltk_tokens),
|
| 331 |
+
len(doc),
|
| 332 |
+
len(bert_tokens),
|
| 333 |
+
len(gpt2_tokens)
|
| 334 |
+
]
|
| 335 |
+
}
|
| 336 |
+
|
| 337 |
+
token_count_df = pd.DataFrame(token_count_data)
|
| 338 |
+
|
| 339 |
+
# Create comparison chart
|
| 340 |
+
fig = plt.figure(figsize=(10, 6))
|
| 341 |
+
bars = plt.bar(token_count_df['Tokenizer'], token_count_df['Token Count'], color=['#BBDEFB', '#90CAF9', '#64B5F6', '#42A5F5', '#2196F3'])
|
| 342 |
+
|
| 343 |
+
# Add value labels on top of bars
|
| 344 |
+
for bar in bars:
|
| 345 |
+
height = bar.get_height()
|
| 346 |
+
plt.text(bar.get_x() + bar.get_width()/2., height + 0.5,
|
| 347 |
+
f'{height}',
|
| 348 |
+
ha='center', va='bottom')
|
| 349 |
+
|
| 350 |
+
plt.ylabel('Token Count')
|
| 351 |
+
plt.title('Tokenization Comparison by Method')
|
| 352 |
+
plt.ylim(0, max(token_count_df['Token Count']) * 1.1) # Add some headroom for labels
|
| 353 |
+
plt.tight_layout()
|
| 354 |
+
|
| 355 |
+
output_html.append(fig_to_html(fig))
|
| 356 |
+
|
| 357 |
+
# Add token length distribution analysis
|
| 358 |
+
output_html.append('<h4>Token Length Distribution</h4>')
|
| 359 |
+
token_lengths = [len(token) for token in nltk_tokens]
|
| 360 |
+
|
| 361 |
+
fig = plt.figure(figsize=(10, 6))
|
| 362 |
+
plt.hist(token_lengths, bins=range(1, max(token_lengths) + 2), color='#4CAF50', alpha=0.7)
|
| 363 |
+
plt.xlabel('Token Length')
|
| 364 |
+
plt.ylabel('Frequency')
|
| 365 |
+
plt.title('Token Length Distribution')
|
| 366 |
+
plt.grid(axis='y', alpha=0.3)
|
| 367 |
+
plt.tight_layout()
|
| 368 |
+
|
| 369 |
+
output_html.append(fig_to_html(fig))
|
| 370 |
+
|
| 371 |
+
# Add tokenization statistics summary
|
| 372 |
+
avg_token_length = sum(token_lengths) / len(token_lengths) if token_lengths else 0
|
| 373 |
+
output_html.append(f"""
|
| 374 |
+
<h4>Tokenization Statistics</h4>
|
| 375 |
+
<div class="row mt-3">
|
| 376 |
+
<div class="col-md-4">
|
| 377 |
+
<div class="card text-center">
|
| 378 |
+
<div class="card-body">
|
| 379 |
+
<h3 class="text-success">{token_count}</h3>
|
| 380 |
+
<p class="mb-0">Total Tokens</p>
|
| 381 |
+
</div>
|
| 382 |
+
</div>
|
| 383 |
+
</div>
|
| 384 |
+
<div class="col-md-4">
|
| 385 |
+
<div class="card text-center">
|
| 386 |
+
<div class="card-body">
|
| 387 |
+
<h3 class="text-primary">{avg_token_length:.2f}</h3>
|
| 388 |
+
<p class="mb-0">Average Token Length</p>
|
| 389 |
+
</div>
|
| 390 |
+
</div>
|
| 391 |
+
</div>
|
| 392 |
+
<div class="col-md-4">
|
| 393 |
+
<div class="card text-center">
|
| 394 |
+
<div class="card-body">
|
| 395 |
+
<h3 class="text-warning">{token_count / len(nltk_sentences):.2f}</h3>
|
| 396 |
+
<p class="mb-0">Tokens per Sentence</p>
|
| 397 |
+
</div>
|
| 398 |
+
</div>
|
| 399 |
+
</div>
|
| 400 |
+
</div>
|
| 401 |
+
""")
|
| 402 |
+
|
| 403 |
+
except Exception as e:
|
| 404 |
+
output_html.append(f"""
|
| 405 |
+
<div class="alert alert-warning">
|
| 406 |
+
<h4>Subword Tokenization Error</h4>
|
| 407 |
+
<p>Failed to load transformer tokenizers: {str(e)}</p>
|
| 408 |
+
<p>The transformers library may not be installed or there might be network issues when downloading models.</p>
|
| 409 |
+
</div>
|
| 410 |
+
""")
|
| 411 |
+
|
| 412 |
+
except Exception as e:
|
| 413 |
+
output_html.append(f"""
|
| 414 |
+
<div class="alert alert-danger">
|
| 415 |
+
<h3>Error</h3>
|
| 416 |
+
<p>Failed to process tokenization: {str(e)}</p>
|
| 417 |
+
</div>
|
| 418 |
+
""")
|
| 419 |
+
|
| 420 |
+
# About Tokenization section
|
| 421 |
+
output_html.append("""
|
| 422 |
+
<div class="card mt-4">
|
| 423 |
+
<div class="card-header">
|
| 424 |
+
<h4 class="mb-0">
|
| 425 |
+
<i class="fas fa-info-circle"></i>
|
| 426 |
+
About Tokenization
|
| 427 |
+
</h4>
|
| 428 |
+
</div>
|
| 429 |
+
<div class="card-body">
|
| 430 |
+
<h5>What is Tokenization?</h5>
|
| 431 |
+
|
| 432 |
+
<p>Tokenization is the process of breaking down text into smaller units called tokens.
|
| 433 |
+
These tokens can be words, subwords, characters, or symbols, depending on the approach.
|
| 434 |
+
It's typically the first step in most NLP pipelines.</p>
|
| 435 |
+
|
| 436 |
+
<h5>Types of Tokenization:</h5>
|
| 437 |
+
|
| 438 |
+
<ul>
|
| 439 |
+
<li><b>Word Tokenization</b> - Splits text on whitespace and punctuation (with various rules)</li>
|
| 440 |
+
<li><b>Sentence Tokenization</b> - Divides text into sentences using punctuation and other rules</li>
|
| 441 |
+
<li><b>Subword Tokenization</b> - Splits words into meaningful subunits (WordPiece, BPE, SentencePiece)</li>
|
| 442 |
+
<li><b>Character Tokenization</b> - Treats each character as a separate token</li>
|
| 443 |
+
</ul>
|
| 444 |
+
|
| 445 |
+
<h5>Why Subword Tokenization?</h5>
|
| 446 |
+
|
| 447 |
+
<p>Modern NLP models use subword tokenization because:</p>
|
| 448 |
+
<ul>
|
| 449 |
+
<li>It handles out-of-vocabulary words better</li>
|
| 450 |
+
<li>It represents rare words by decomposing them</li>
|
| 451 |
+
<li>It works well for morphologically rich languages</li>
|
| 452 |
+
<li>It balances vocabulary size and token length</li>
|
| 453 |
+
</ul>
|
| 454 |
+
</div>
|
| 455 |
+
</div>
|
| 456 |
+
""")
|
| 457 |
+
|
| 458 |
+
output_html.append('</div>') # Close result-area div
|
| 459 |
+
|
| 460 |
+
return '\n'.join(output_html)
|
components/topic_analysis.py
ADDED
|
@@ -0,0 +1,766 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import matplotlib.pyplot as plt
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
import nltk
|
| 5 |
+
from collections import Counter
|
| 6 |
+
import networkx as nx
|
| 7 |
+
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
|
| 8 |
+
from sklearn.decomposition import LatentDirichletAllocation, NMF
|
| 9 |
+
import wordcloud
|
| 10 |
+
from nltk.corpus import stopwords
|
| 11 |
+
from nltk.tokenize import word_tokenize
|
| 12 |
+
from nltk.stem import WordNetLemmatizer
|
| 13 |
+
import matplotlib.colors as mcolors
|
| 14 |
+
import io
|
| 15 |
+
import base64
|
| 16 |
+
|
| 17 |
+
from utils.model_loader import download_nltk_resources
|
| 18 |
+
from utils.helpers import fig_to_html, df_to_html_table
|
| 19 |
+
|
| 20 |
+
def classify_topic(text_input):
|
| 21 |
+
"""Classify the topic of the text into predefined categories."""
|
| 22 |
+
# Define topic keywords
|
| 23 |
+
topic_keywords = {
|
| 24 |
+
'environment': ['climate', 'environment', 'weather', 'earth', 'temperature', 'pollution', 'warming', 'planet', 'ecosystem', 'sustainable'],
|
| 25 |
+
'science': ['science', 'scientific', 'research', 'study', 'experiment', 'discovery', 'theory', 'laboratory', 'data'],
|
| 26 |
+
'business': ['business', 'company', 'market', 'economy', 'economic', 'finance', 'industry', 'corporate', 'trade'],
|
| 27 |
+
'education': ['education', 'school', 'student', 'learn', 'teach', 'academic', 'university', 'college', 'knowledge'],
|
| 28 |
+
'health': ['health', 'medical', 'doctor', 'patient', 'disease', 'treatment', 'hospital', 'medicine', 'healthcare'],
|
| 29 |
+
'technology': ['technology', 'tech', 'computer', 'digital', 'software', 'hardware', 'internet', 'device', 'innovation'],
|
| 30 |
+
'politics': ['politics', 'government', 'policy', 'election', 'political', 'law', 'president', 'party', 'vote'],
|
| 31 |
+
'sports': ['sport', 'game', 'team', 'player', 'competition', 'athlete', 'championship', 'tournament', 'coach'],
|
| 32 |
+
'entertainment': ['entertainment', 'movie', 'music', 'film', 'television', 'celebrity', 'actor', 'actress', 'show'],
|
| 33 |
+
'travel': ['travel', 'trip', 'vacation', 'tourist', 'destination', 'journey', 'adventure', 'flight', 'hotel']
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
# Convert text to lowercase
|
| 37 |
+
text = text_input.lower()
|
| 38 |
+
|
| 39 |
+
# Count keyword occurrences for each topic
|
| 40 |
+
topic_scores = {}
|
| 41 |
+
for topic, keywords in topic_keywords.items():
|
| 42 |
+
score = 0
|
| 43 |
+
for keyword in keywords:
|
| 44 |
+
# Count occurrences of the keyword
|
| 45 |
+
count = text.count(keyword)
|
| 46 |
+
# Add to the topic score
|
| 47 |
+
score += count
|
| 48 |
+
|
| 49 |
+
# Store the normalized score
|
| 50 |
+
topic_scores[topic] = score / (len(text.split()) + 0.001) # Normalize by text length
|
| 51 |
+
|
| 52 |
+
# Get the main topic and confidence
|
| 53 |
+
main_topic = max(topic_scores.items(), key=lambda x: x[1])
|
| 54 |
+
total_score = sum(topic_scores.values()) + 0.001 # Avoid division by zero
|
| 55 |
+
confidence = main_topic[1] / total_score if total_score > 0 else 0
|
| 56 |
+
confidence = round(confidence * 100, 1) # Convert to percentage
|
| 57 |
+
|
| 58 |
+
# Sort topics by score for visualization
|
| 59 |
+
sorted_topics = sorted(topic_scores.items(), key=lambda x: x[1], reverse=True)
|
| 60 |
+
|
| 61 |
+
return main_topic[0], confidence, sorted_topics, topic_scores
|
| 62 |
+
|
| 63 |
+
def extract_key_phrases(text_input, top_n=10):
|
| 64 |
+
"""Extract key phrases from text."""
|
| 65 |
+
# Download required NLTK resources
|
| 66 |
+
download_nltk_resources()
|
| 67 |
+
|
| 68 |
+
# Define stop words
|
| 69 |
+
stop_words = set(stopwords.words('english'))
|
| 70 |
+
|
| 71 |
+
# Tokenize into sentences
|
| 72 |
+
sentences = nltk.sent_tokenize(text_input)
|
| 73 |
+
|
| 74 |
+
# Extract 2-3 word phrases (n-grams)
|
| 75 |
+
phrases = []
|
| 76 |
+
|
| 77 |
+
# Get bigrams
|
| 78 |
+
bigram_vectorizer = CountVectorizer(ngram_range=(2, 2), stop_words='english', max_features=100)
|
| 79 |
+
try:
|
| 80 |
+
bigram_matrix = bigram_vectorizer.fit_transform([text_input])
|
| 81 |
+
bigram_features = bigram_vectorizer.get_feature_names_out()
|
| 82 |
+
bigram_scores = bigram_matrix.toarray()[0]
|
| 83 |
+
|
| 84 |
+
for phrase, score in zip(bigram_features, bigram_scores):
|
| 85 |
+
if score >= 1: # Must appear at least once
|
| 86 |
+
phrases.append((phrase, int(score)))
|
| 87 |
+
except:
|
| 88 |
+
pass # Handle potential errors
|
| 89 |
+
|
| 90 |
+
# Get trigrams
|
| 91 |
+
trigram_vectorizer = CountVectorizer(ngram_range=(3, 3), stop_words='english', max_features=100)
|
| 92 |
+
try:
|
| 93 |
+
trigram_matrix = trigram_vectorizer.fit_transform([text_input])
|
| 94 |
+
trigram_features = trigram_vectorizer.get_feature_names_out()
|
| 95 |
+
trigram_scores = trigram_matrix.toarray()[0]
|
| 96 |
+
|
| 97 |
+
for phrase, score in zip(trigram_features, trigram_scores):
|
| 98 |
+
if score >= 1: # Must appear at least once
|
| 99 |
+
phrases.append((phrase, int(score)))
|
| 100 |
+
except:
|
| 101 |
+
pass
|
| 102 |
+
|
| 103 |
+
# Also extract single important words (nouns, verbs, adjectives)
|
| 104 |
+
words = word_tokenize(text_input)
|
| 105 |
+
pos_tags = nltk.pos_tag(words)
|
| 106 |
+
|
| 107 |
+
important_words = []
|
| 108 |
+
for word, tag in pos_tags:
|
| 109 |
+
# Only consider nouns, verbs, and adjectives
|
| 110 |
+
if (tag.startswith('NN') or tag.startswith('VB') or tag.startswith('JJ')) and word.lower() not in stop_words and len(word) > 2:
|
| 111 |
+
important_words.append(word.lower())
|
| 112 |
+
|
| 113 |
+
# Count word frequencies
|
| 114 |
+
word_freq = Counter(important_words)
|
| 115 |
+
|
| 116 |
+
# Add important single words to phrases
|
| 117 |
+
for word, freq in word_freq.most_common(top_n):
|
| 118 |
+
if freq >= 1:
|
| 119 |
+
phrases.append((word, freq))
|
| 120 |
+
|
| 121 |
+
# Sort phrases by frequency
|
| 122 |
+
sorted_phrases = sorted(phrases, key=lambda x: x[1], reverse=True)
|
| 123 |
+
|
| 124 |
+
# Return top N phrases
|
| 125 |
+
return sorted_phrases[:top_n]
|
| 126 |
+
|
| 127 |
+
def create_phrase_cloud(phrases):
|
| 128 |
+
"""Create a word cloud from phrases."""
|
| 129 |
+
# Convert phrases to a dictionary of {phrase: frequency}
|
| 130 |
+
phrase_freq = {phrase: freq for phrase, freq in phrases}
|
| 131 |
+
|
| 132 |
+
# Create word cloud
|
| 133 |
+
wc = wordcloud.WordCloud(
|
| 134 |
+
background_color='white',
|
| 135 |
+
width=600,
|
| 136 |
+
height=400,
|
| 137 |
+
colormap='viridis',
|
| 138 |
+
max_words=50,
|
| 139 |
+
prefer_horizontal=0.9,
|
| 140 |
+
random_state=42
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
try:
|
| 144 |
+
# Generate word cloud from phrases
|
| 145 |
+
wc.generate_from_frequencies(phrase_freq)
|
| 146 |
+
|
| 147 |
+
# Create figure
|
| 148 |
+
fig = plt.figure(figsize=(10, 6))
|
| 149 |
+
plt.imshow(wc, interpolation='bilinear')
|
| 150 |
+
plt.axis('off')
|
| 151 |
+
plt.tight_layout()
|
| 152 |
+
|
| 153 |
+
return fig_to_html(fig)
|
| 154 |
+
except:
|
| 155 |
+
return "<p>Could not generate phrase cloud due to insufficient data.</p>"
|
| 156 |
+
|
| 157 |
+
def topic_analysis_handler(text_input):
|
| 158 |
+
"""Show topic analysis capabilities."""
|
| 159 |
+
output_html = []
|
| 160 |
+
|
| 161 |
+
# Add result area container
|
| 162 |
+
output_html.append('<div class="result-area">')
|
| 163 |
+
output_html.append('<h2 class="task-header">Topic Analysis</h2>')
|
| 164 |
+
|
| 165 |
+
output_html.append("""
|
| 166 |
+
<div class="alert alert-info">
|
| 167 |
+
<i class="fas fa-info-circle"></i>
|
| 168 |
+
Topic analysis identifies the main themes and subjects in a text, helping to categorize content and understand what it's about.
|
| 169 |
+
</div>
|
| 170 |
+
""")
|
| 171 |
+
|
| 172 |
+
# Model info
|
| 173 |
+
output_html.append("""
|
| 174 |
+
<div class="alert alert-info">
|
| 175 |
+
<h4><i class="fas fa-tools"></i> Models & Techniques Used:</h4>
|
| 176 |
+
<ul>
|
| 177 |
+
<li><b>Zero-shot Classification</b> - BART model that can classify text without specific training</li>
|
| 178 |
+
<li><b>TF-IDF Vectorizer</b> - Statistical method to identify important terms</li>
|
| 179 |
+
<li><b>Word/Phrase Analysis</b> - Extraction of important n-grams</li>
|
| 180 |
+
</ul>
|
| 181 |
+
</div>
|
| 182 |
+
""")
|
| 183 |
+
|
| 184 |
+
try:
|
| 185 |
+
# Ensure NLTK resources are downloaded
|
| 186 |
+
download_nltk_resources()
|
| 187 |
+
|
| 188 |
+
# Check if text is long enough for meaningful analysis
|
| 189 |
+
if len(text_input.split()) < 50:
|
| 190 |
+
output_html.append(f"""
|
| 191 |
+
<div class="alert alert-warning">
|
| 192 |
+
<h3>Text Too Short for Full Topic Analysis</h3>
|
| 193 |
+
<p>The provided text contains only {len(text_input.split())} words.
|
| 194 |
+
For meaningful topic analysis, please provide a longer text (at least 50 words).
|
| 195 |
+
We'll still perform basic frequency analysis, but topic modeling results may not be reliable.</p>
|
| 196 |
+
</div>
|
| 197 |
+
""")
|
| 198 |
+
|
| 199 |
+
# Text cleaning and preprocessing
|
| 200 |
+
stop_words = set(stopwords.words('english'))
|
| 201 |
+
lemmatizer = WordNetLemmatizer()
|
| 202 |
+
|
| 203 |
+
def preprocess_text(text):
|
| 204 |
+
# Tokenize
|
| 205 |
+
tokens = word_tokenize(text.lower())
|
| 206 |
+
# Remove stopwords and non-alphabetic tokens
|
| 207 |
+
filtered_tokens = [token for token in tokens if token.isalpha() and token not in stop_words]
|
| 208 |
+
# Lemmatize
|
| 209 |
+
lemmatized_tokens = [lemmatizer.lemmatize(token) for token in filtered_tokens]
|
| 210 |
+
return lemmatized_tokens
|
| 211 |
+
|
| 212 |
+
# Process the text
|
| 213 |
+
processed_tokens = preprocess_text(text_input)
|
| 214 |
+
processed_text = ' '.join(processed_tokens)
|
| 215 |
+
|
| 216 |
+
# Add Topic Classification section
|
| 217 |
+
output_html.append('<h3 class="task-subheader">Topic Classification</h3>')
|
| 218 |
+
|
| 219 |
+
# Get topic classification
|
| 220 |
+
main_topic, confidence, sorted_topics, topic_scores = classify_topic(text_input)
|
| 221 |
+
|
| 222 |
+
# Display topic classification results
|
| 223 |
+
output_html.append(f"""
|
| 224 |
+
<div class="alert alert-success">
|
| 225 |
+
<p class="mb-0 fs-5">This text is primarily about <strong>{main_topic}</strong> with {confidence}% confidence</p>
|
| 226 |
+
</div>
|
| 227 |
+
""")
|
| 228 |
+
|
| 229 |
+
# Display topic scores (stacked rows to avoid overlap)
|
| 230 |
+
output_html.append('<div class="row">')
|
| 231 |
+
|
| 232 |
+
# Row 1: Topic Relevance Chart (full width)
|
| 233 |
+
output_html.append('<div class="col-12">')
|
| 234 |
+
output_html.append('<h4>Topic Relevance</h4>')
|
| 235 |
+
|
| 236 |
+
# Create horizontal bar chart for topic scores
|
| 237 |
+
plt.figure(figsize=(10, 6))
|
| 238 |
+
topics = [topic for topic, score in sorted_topics]
|
| 239 |
+
scores = [score for topic, score in sorted_topics]
|
| 240 |
+
|
| 241 |
+
# Only show top topics for clarity
|
| 242 |
+
top_n = min(10, len(topics))
|
| 243 |
+
y_pos = np.arange(top_n)
|
| 244 |
+
|
| 245 |
+
# Get a color gradient
|
| 246 |
+
colors = plt.cm.Blues(np.linspace(0.4, 0.8, top_n))
|
| 247 |
+
|
| 248 |
+
# Create horizontal bars
|
| 249 |
+
bars = plt.barh(y_pos, [s * 100 for s in scores[:top_n]], color=colors)
|
| 250 |
+
|
| 251 |
+
# Add labels and values
|
| 252 |
+
for i, bar in enumerate(bars):
|
| 253 |
+
width = bar.get_width()
|
| 254 |
+
plt.text(width + 0.5, bar.get_y() + bar.get_height()/2,
|
| 255 |
+
f"{width:.1f}%",
|
| 256 |
+
va='center')
|
| 257 |
+
|
| 258 |
+
plt.yticks(y_pos, topics[:top_n])
|
| 259 |
+
plt.xlabel('Relevance')
|
| 260 |
+
plt.title('Topic Scores')
|
| 261 |
+
plt.tight_layout()
|
| 262 |
+
|
| 263 |
+
output_html.append(fig_to_html(plt.gcf()))
|
| 264 |
+
output_html.append('</div>')
|
| 265 |
+
output_html.append('</div>') # Close row 1
|
| 266 |
+
|
| 267 |
+
# Row 2: Topic Scores Table (full width)
|
| 268 |
+
output_html.append('<div class="row mt-3">')
|
| 269 |
+
output_html.append('<div class="col-12">')
|
| 270 |
+
output_html.append('<h4>Topic Scores</h4>')
|
| 271 |
+
|
| 272 |
+
# Create table of topic scores
|
| 273 |
+
topic_scores_df = pd.DataFrame({
|
| 274 |
+
'Rank': range(1, len(sorted_topics) + 1),
|
| 275 |
+
'Topic': [topic.capitalize() for topic, _ in sorted_topics],
|
| 276 |
+
'Confidence': [f"{score:.4f}" for _, score in sorted_topics]
|
| 277 |
+
})
|
| 278 |
+
|
| 279 |
+
output_html.append(df_to_html_table(topic_scores_df))
|
| 280 |
+
output_html.append('</div>')
|
| 281 |
+
output_html.append('</div>') # Close row 2
|
| 282 |
+
|
| 283 |
+
# Extract and display key phrases
|
| 284 |
+
output_html.append('<h3 class="task-subheader">Key Phrases</h3>')
|
| 285 |
+
|
| 286 |
+
# Extract key phrases
|
| 287 |
+
key_phrases = extract_key_phrases(text_input)
|
| 288 |
+
|
| 289 |
+
# Display key phrases in a table
|
| 290 |
+
if key_phrases:
|
| 291 |
+
phrase_df = pd.DataFrame({
|
| 292 |
+
'Phrase': [phrase for phrase, _ in key_phrases],
|
| 293 |
+
'Frequency': [freq for _, freq in key_phrases]
|
| 294 |
+
})
|
| 295 |
+
|
| 296 |
+
output_html.append('<div class="row">')
|
| 297 |
+
|
| 298 |
+
# Row 1: Key phrases table (full width)
|
| 299 |
+
output_html.append('<div class="col-12">')
|
| 300 |
+
output_html.append(df_to_html_table(phrase_df))
|
| 301 |
+
output_html.append('</div>')
|
| 302 |
+
|
| 303 |
+
# Row 2: Phrase cloud (full width)
|
| 304 |
+
output_html.append('</div>') # Close row 1
|
| 305 |
+
output_html.append('<div class="row mt-3">')
|
| 306 |
+
output_html.append('<div class="col-12">')
|
| 307 |
+
output_html.append(create_phrase_cloud(key_phrases))
|
| 308 |
+
output_html.append('</div>')
|
| 309 |
+
|
| 310 |
+
output_html.append('</div>') # Close row 2
|
| 311 |
+
else:
|
| 312 |
+
output_html.append("<p>No key phrases could be extracted from the text.</p>")
|
| 313 |
+
|
| 314 |
+
# Term Frequency Analysis
|
| 315 |
+
output_html.append('<h3 class="task-subheader">Key Term Frequency Analysis</h3>')
|
| 316 |
+
|
| 317 |
+
# Get token frequencies
|
| 318 |
+
token_freq = Counter(processed_tokens)
|
| 319 |
+
|
| 320 |
+
# Sort by frequency
|
| 321 |
+
sorted_word_freq = dict(sorted(token_freq.items(), key=lambda item: item[1], reverse=True))
|
| 322 |
+
|
| 323 |
+
# Take top 25 words for visualization
|
| 324 |
+
top_n = 25
|
| 325 |
+
top_words = list(sorted_word_freq.keys())[:top_n]
|
| 326 |
+
top_freqs = list(sorted_word_freq.values())[:top_n]
|
| 327 |
+
|
| 328 |
+
# Create visualization
|
| 329 |
+
fig = plt.figure(figsize=(10, 6))
|
| 330 |
+
colors = plt.cm.viridis(np.linspace(0.3, 0.85, len(top_words)))
|
| 331 |
+
bars = plt.bar(top_words, top_freqs, color=colors)
|
| 332 |
+
plt.xlabel('Term')
|
| 333 |
+
plt.ylabel('Frequency')
|
| 334 |
+
plt.title(f'Top {top_n} Term Frequencies')
|
| 335 |
+
plt.xticks(rotation=45, ha='right')
|
| 336 |
+
plt.tight_layout()
|
| 337 |
+
|
| 338 |
+
# Add value labels on top of bars
|
| 339 |
+
for bar in bars:
|
| 340 |
+
height = bar.get_height()
|
| 341 |
+
plt.text(bar.get_x() + bar.get_width()/2., height + 0.1,
|
| 342 |
+
f'{height}',
|
| 343 |
+
ha='center', va='bottom',
|
| 344 |
+
fontsize=8)
|
| 345 |
+
|
| 346 |
+
# Show plots and table in stacked rows
|
| 347 |
+
output_html.append('<div class="row">')
|
| 348 |
+
|
| 349 |
+
# Row 1: Chart (full width)
|
| 350 |
+
output_html.append('<div class="col-12">')
|
| 351 |
+
output_html.append(fig_to_html(fig))
|
| 352 |
+
output_html.append('</div>')
|
| 353 |
+
|
| 354 |
+
# Row 2: Top terms table (full width)
|
| 355 |
+
output_html.append('</div>') # Close row 1
|
| 356 |
+
output_html.append('<div class="row mt-3">')
|
| 357 |
+
output_html.append('<div class="col-12">')
|
| 358 |
+
output_html.append('<h4>Top Terms</h4>')
|
| 359 |
+
|
| 360 |
+
# Create DataFrame of top terms
|
| 361 |
+
top_terms_df = pd.DataFrame({
|
| 362 |
+
'Term': list(sorted_word_freq.keys())[:15],
|
| 363 |
+
'Frequency': list(sorted_word_freq.values())[:15]
|
| 364 |
+
})
|
| 365 |
+
|
| 366 |
+
output_html.append(df_to_html_table(top_terms_df))
|
| 367 |
+
output_html.append('</div>')
|
| 368 |
+
output_html.append('</div>') # Close row 2
|
| 369 |
+
|
| 370 |
+
# WordCloud visualization
|
| 371 |
+
output_html.append('<h3 class="task-subheader">Word Cloud Visualization</h3>')
|
| 372 |
+
output_html.append('<p>The size of each word represents its frequency in the text.</p>')
|
| 373 |
+
|
| 374 |
+
# Generate word cloud
|
| 375 |
+
wc = wordcloud.WordCloud(
|
| 376 |
+
background_color='white',
|
| 377 |
+
max_words=100,
|
| 378 |
+
width=800,
|
| 379 |
+
height=400,
|
| 380 |
+
colormap='viridis',
|
| 381 |
+
contour_width=1,
|
| 382 |
+
contour_color='steelblue'
|
| 383 |
+
)
|
| 384 |
+
wc.generate_from_frequencies(sorted_word_freq)
|
| 385 |
+
|
| 386 |
+
# Create figure
|
| 387 |
+
fig = plt.figure(figsize=(12, 6))
|
| 388 |
+
plt.imshow(wc, interpolation='bilinear')
|
| 389 |
+
plt.axis('off')
|
| 390 |
+
plt.tight_layout()
|
| 391 |
+
|
| 392 |
+
output_html.append(fig_to_html(fig))
|
| 393 |
+
|
| 394 |
+
# TF-IDF Analysis
|
| 395 |
+
output_html.append('<h3 class="task-subheader">TF-IDF Analysis</h3>')
|
| 396 |
+
output_html.append("""
|
| 397 |
+
<div class="alert alert-light">
|
| 398 |
+
<p class="mb-0">
|
| 399 |
+
Term Frequency-Inverse Document Frequency (TF-IDF) identifies terms that are distinctive to parts of the text.
|
| 400 |
+
In this case, we treat each sentence as a separate "document" for the analysis.
|
| 401 |
+
</p>
|
| 402 |
+
</div>
|
| 403 |
+
""")
|
| 404 |
+
|
| 405 |
+
# Split text into sentences
|
| 406 |
+
sentences = nltk.sent_tokenize(text_input)
|
| 407 |
+
|
| 408 |
+
# Only perform TF-IDF if there are enough sentences
|
| 409 |
+
if len(sentences) >= 3:
|
| 410 |
+
# Create TF-IDF vectorizer
|
| 411 |
+
tfidf_vectorizer = TfidfVectorizer(
|
| 412 |
+
max_features=100,
|
| 413 |
+
stop_words='english',
|
| 414 |
+
min_df=1
|
| 415 |
+
)
|
| 416 |
+
|
| 417 |
+
# Fit and transform the sentences
|
| 418 |
+
tfidf_matrix = tfidf_vectorizer.fit_transform(sentences)
|
| 419 |
+
|
| 420 |
+
# Get feature names
|
| 421 |
+
feature_names = tfidf_vectorizer.get_feature_names_out()
|
| 422 |
+
|
| 423 |
+
# Create a table of top TF-IDF terms for each sentence
|
| 424 |
+
tfidf_data = []
|
| 425 |
+
|
| 426 |
+
for i, sentence in enumerate(sentences[:min(len(sentences), 5)]): # Show max 5 sentences to avoid clutter
|
| 427 |
+
# Get top terms for this sentence
|
| 428 |
+
tfidf_scores = tfidf_matrix[i].toarray()[0]
|
| 429 |
+
top_indices = np.argsort(tfidf_scores)[-5:][::-1] # Top 5 terms
|
| 430 |
+
|
| 431 |
+
top_terms = [feature_names[idx] for idx in top_indices]
|
| 432 |
+
top_scores = [tfidf_scores[idx] for idx in top_indices]
|
| 433 |
+
|
| 434 |
+
# Format for display
|
| 435 |
+
formatted_terms = ', '.join([f"{term} ({score:.3f})" for term, score in zip(top_terms, top_scores)])
|
| 436 |
+
|
| 437 |
+
shortened_sentence = (sentence[:75] + '...') if len(sentence) > 75 else sentence
|
| 438 |
+
|
| 439 |
+
tfidf_data.append({
|
| 440 |
+
'Sentence': shortened_sentence,
|
| 441 |
+
'Distinctive Terms (TF-IDF scores)': formatted_terms
|
| 442 |
+
})
|
| 443 |
+
|
| 444 |
+
# Create dataframe
|
| 445 |
+
tfidf_df = pd.DataFrame(tfidf_data)
|
| 446 |
+
|
| 447 |
+
output_html.append('<div class="mt-3">')
|
| 448 |
+
output_html.append(df_to_html_table(tfidf_df))
|
| 449 |
+
output_html.append('</div>')
|
| 450 |
+
|
| 451 |
+
# Create a TF-IDF term-sentence heatmap
|
| 452 |
+
if len(sentences) <= 10: # Only create heatmap for reasonable number of sentences
|
| 453 |
+
# Get top terms across all sentences
|
| 454 |
+
mean_tfidf = np.mean(tfidf_matrix.toarray(), axis=0)
|
| 455 |
+
top_indices = np.argsort(mean_tfidf)[-10:][::-1] # Top 10 terms
|
| 456 |
+
top_terms = [feature_names[idx] for idx in top_indices]
|
| 457 |
+
|
| 458 |
+
# Create heatmap data
|
| 459 |
+
heatmap_data = tfidf_matrix[:, top_indices].toarray()
|
| 460 |
+
|
| 461 |
+
# Create heatmap
|
| 462 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
| 463 |
+
plt.imshow(heatmap_data, cmap='viridis', aspect='auto')
|
| 464 |
+
|
| 465 |
+
# Add labels
|
| 466 |
+
plt.yticks(range(len(sentences)), [f"Sent {i+1}" for i in range(len(sentences))])
|
| 467 |
+
plt.xticks(range(len(top_terms)), top_terms, rotation=45, ha='right')
|
| 468 |
+
|
| 469 |
+
plt.colorbar(label='TF-IDF Score')
|
| 470 |
+
plt.xlabel('Terms')
|
| 471 |
+
plt.ylabel('Sentences')
|
| 472 |
+
plt.title('TF-IDF Heatmap: Term Importance by Sentence')
|
| 473 |
+
plt.tight_layout()
|
| 474 |
+
|
| 475 |
+
output_html.append('<h4>Term Importance Heatmap</h4>')
|
| 476 |
+
output_html.append('<p>This heatmap shows which terms are most distinctive in each sentence.</p>')
|
| 477 |
+
output_html.append(fig_to_html(fig))
|
| 478 |
+
else:
|
| 479 |
+
output_html.append("""
|
| 480 |
+
<div class="alert alert-warning">
|
| 481 |
+
<p class="mb-0">TF-IDF analysis requires at least 3 sentences. The provided text doesn't have enough sentences for this analysis.</p>
|
| 482 |
+
</div>
|
| 483 |
+
""")
|
| 484 |
+
|
| 485 |
+
# Topic Modeling
|
| 486 |
+
output_html.append('<h3 class="task-subheader">Topic Modeling</h3>')
|
| 487 |
+
output_html.append("""
|
| 488 |
+
<div class="alert alert-light">
|
| 489 |
+
<p class="mb-0">
|
| 490 |
+
Topic modeling uses statistical methods to discover abstract "topics" that occur in a collection of documents.
|
| 491 |
+
Here, we use Latent Dirichlet Allocation (LDA) to identify potential topics.
|
| 492 |
+
</p>
|
| 493 |
+
</div>
|
| 494 |
+
""")
|
| 495 |
+
|
| 496 |
+
# Check if text is long enough for topic modeling
|
| 497 |
+
if len(text_input.split()) < 50:
|
| 498 |
+
output_html.append("""
|
| 499 |
+
<div class="alert alert-warning">
|
| 500 |
+
<p class="mb-0">Topic modeling works best with longer texts. The provided text is too short for reliable topic modeling.</p>
|
| 501 |
+
</div>
|
| 502 |
+
""")
|
| 503 |
+
else:
|
| 504 |
+
# Create document-term matrix
|
| 505 |
+
# For short single-document text, we'll split by sentences to create a "corpus"
|
| 506 |
+
sentences = nltk.sent_tokenize(text_input)
|
| 507 |
+
|
| 508 |
+
if len(sentences) < 4:
|
| 509 |
+
output_html.append("""
|
| 510 |
+
<div class="alert alert-warning">
|
| 511 |
+
<p class="mb-0">Topic modeling works best with multiple documents or paragraphs. Since the provided text has few sentences,
|
| 512 |
+
the topic modeling results may not be meaningful.</p>
|
| 513 |
+
</div>
|
| 514 |
+
""")
|
| 515 |
+
|
| 516 |
+
# Create document-term matrix using CountVectorizer
|
| 517 |
+
vectorizer = CountVectorizer(
|
| 518 |
+
max_features=1000,
|
| 519 |
+
stop_words='english',
|
| 520 |
+
min_df=1
|
| 521 |
+
)
|
| 522 |
+
|
| 523 |
+
# Create a document-term matrix
|
| 524 |
+
dtm = vectorizer.fit_transform(sentences)
|
| 525 |
+
feature_names = vectorizer.get_feature_names_out()
|
| 526 |
+
|
| 527 |
+
# Set number of topics based on text length
|
| 528 |
+
n_topics = min(3, max(2, len(sentences) // 3))
|
| 529 |
+
|
| 530 |
+
# LDA Topic Modeling
|
| 531 |
+
lda_model = LatentDirichletAllocation(
|
| 532 |
+
n_components=n_topics,
|
| 533 |
+
max_iter=10,
|
| 534 |
+
learning_method='online',
|
| 535 |
+
random_state=42
|
| 536 |
+
)
|
| 537 |
+
|
| 538 |
+
lda_model.fit(dtm)
|
| 539 |
+
|
| 540 |
+
# Get top terms for each topic
|
| 541 |
+
n_top_words = 10
|
| 542 |
+
topic_terms = []
|
| 543 |
+
for topic_idx, topic in enumerate(lda_model.components_):
|
| 544 |
+
top_indices = topic.argsort()[:-n_top_words - 1:-1]
|
| 545 |
+
top_terms = [feature_names[i] for i in top_indices]
|
| 546 |
+
topic_weight = topic[top_indices].sum() / topic.sum() # Approximation of topic "importance"
|
| 547 |
+
topic_terms.append({
|
| 548 |
+
"Topic": f"Topic {topic_idx + 1}",
|
| 549 |
+
"Top Terms": ", ".join(top_terms),
|
| 550 |
+
"Weight": f"{topic_weight:.2f}"
|
| 551 |
+
})
|
| 552 |
+
|
| 553 |
+
topic_df = pd.DataFrame(topic_terms)
|
| 554 |
+
|
| 555 |
+
output_html.append('<h4>LDA Topic Model Results</h4>')
|
| 556 |
+
output_html.append(df_to_html_table(topic_df))
|
| 557 |
+
|
| 558 |
+
# Create word cloud for each topic
|
| 559 |
+
output_html.append('<h4>Topic Word Clouds</h4>')
|
| 560 |
+
output_html.append('<div class="row">')
|
| 561 |
+
|
| 562 |
+
for topic_idx, topic in enumerate(lda_model.components_):
|
| 563 |
+
# Get topic words and weights
|
| 564 |
+
word_weights = {feature_names[i]: topic[i] for i in topic.argsort()[:-50-1:-1]}
|
| 565 |
+
|
| 566 |
+
# Generate word cloud
|
| 567 |
+
wc = wordcloud.WordCloud(
|
| 568 |
+
background_color='white',
|
| 569 |
+
max_words=30,
|
| 570 |
+
width=400,
|
| 571 |
+
height=300,
|
| 572 |
+
colormap='plasma',
|
| 573 |
+
contour_width=1,
|
| 574 |
+
contour_color='steelblue'
|
| 575 |
+
)
|
| 576 |
+
wc.generate_from_frequencies(word_weights)
|
| 577 |
+
|
| 578 |
+
# Create figure
|
| 579 |
+
fig = plt.figure(figsize=(6, 4))
|
| 580 |
+
plt.imshow(wc, interpolation='bilinear')
|
| 581 |
+
plt.axis('off')
|
| 582 |
+
plt.title(f'Topic {topic_idx + 1}')
|
| 583 |
+
plt.tight_layout()
|
| 584 |
+
|
| 585 |
+
output_html.append(f'<div class="col-12 mb-3">')
|
| 586 |
+
output_html.append(fig_to_html(fig))
|
| 587 |
+
output_html.append('</div>')
|
| 588 |
+
|
| 589 |
+
output_html.append('</div>') # Close row for word clouds
|
| 590 |
+
|
| 591 |
+
# Topic distribution visualization
|
| 592 |
+
topic_distribution = lda_model.transform(dtm)
|
| 593 |
+
|
| 594 |
+
# Calculate dominant topic for each sentence
|
| 595 |
+
dominant_topics = np.argmax(topic_distribution, axis=1)
|
| 596 |
+
|
| 597 |
+
# Count number of sentences for each dominant topic
|
| 598 |
+
topic_counts = Counter(dominant_topics)
|
| 599 |
+
|
| 600 |
+
# Prepare data for visualization
|
| 601 |
+
topics = [f"Topic {i+1}" for i in range(n_topics)]
|
| 602 |
+
counts = [topic_counts.get(i, 0) for i in range(n_topics)]
|
| 603 |
+
|
| 604 |
+
# Create visualization
|
| 605 |
+
fig = plt.figure(figsize=(8, 5))
|
| 606 |
+
bars = plt.bar(topics, counts, color=plt.cm.plasma(np.linspace(0.15, 0.85, n_topics)))
|
| 607 |
+
|
| 608 |
+
# Add value labels
|
| 609 |
+
for bar in bars:
|
| 610 |
+
height = bar.get_height()
|
| 611 |
+
plt.text(bar.get_x() + bar.get_width()/2., height + 0.1,
|
| 612 |
+
f'{height}',
|
| 613 |
+
ha='center', va='bottom')
|
| 614 |
+
|
| 615 |
+
plt.xlabel('Topic')
|
| 616 |
+
plt.ylabel('Number of Sentences')
|
| 617 |
+
plt.title('Distribution of Dominant Topics Across Sentences')
|
| 618 |
+
plt.tight_layout()
|
| 619 |
+
|
| 620 |
+
output_html.append('<h4>Topic Distribution</h4>')
|
| 621 |
+
output_html.append(fig_to_html(fig))
|
| 622 |
+
|
| 623 |
+
# Topic network graph
|
| 624 |
+
output_html.append('<h4>Topic-Term Network</h4>')
|
| 625 |
+
output_html.append('<p>This visualization shows the relationships between topics and their most important terms.</p>')
|
| 626 |
+
|
| 627 |
+
# Create network graph
|
| 628 |
+
G = nx.Graph()
|
| 629 |
+
|
| 630 |
+
# Add topic nodes
|
| 631 |
+
for i in range(n_topics):
|
| 632 |
+
G.add_node(f"Topic {i+1}", type='topic', size=1000)
|
| 633 |
+
|
| 634 |
+
# Add term nodes and edges
|
| 635 |
+
for topic_idx, topic in enumerate(lda_model.components_):
|
| 636 |
+
topic_name = f"Topic {topic_idx+1}"
|
| 637 |
+
|
| 638 |
+
# Get top terms for this topic
|
| 639 |
+
top_indices = topic.argsort()[:-11:-1]
|
| 640 |
+
|
| 641 |
+
for i in top_indices:
|
| 642 |
+
term = feature_names[i]
|
| 643 |
+
weight = topic[i]
|
| 644 |
+
|
| 645 |
+
# Only add terms with significant weight
|
| 646 |
+
if weight > 0.01:
|
| 647 |
+
if not G.has_node(term):
|
| 648 |
+
G.add_node(term, type='term', size=300)
|
| 649 |
+
|
| 650 |
+
G.add_edge(topic_name, term, weight=weight)
|
| 651 |
+
|
| 652 |
+
# Create graph visualization
|
| 653 |
+
fig = plt.figure(figsize=(10, 8))
|
| 654 |
+
|
| 655 |
+
# Position nodes using spring layout
|
| 656 |
+
pos = nx.spring_layout(G, k=0.3, seed=42)
|
| 657 |
+
|
| 658 |
+
# Draw nodes
|
| 659 |
+
topic_nodes = [node for node in G.nodes() if G.nodes[node]['type'] == 'topic']
|
| 660 |
+
term_nodes = [node for node in G.nodes() if G.nodes[node]['type'] == 'term']
|
| 661 |
+
|
| 662 |
+
# Draw topic nodes
|
| 663 |
+
nx.draw_networkx_nodes(
|
| 664 |
+
G, pos,
|
| 665 |
+
nodelist=topic_nodes,
|
| 666 |
+
node_color='#E53935',
|
| 667 |
+
node_size=[G.nodes[node]['size'] for node in topic_nodes],
|
| 668 |
+
alpha=0.8
|
| 669 |
+
)
|
| 670 |
+
|
| 671 |
+
# Draw term nodes
|
| 672 |
+
nx.draw_networkx_nodes(
|
| 673 |
+
G, pos,
|
| 674 |
+
nodelist=term_nodes,
|
| 675 |
+
node_color='#1976D2',
|
| 676 |
+
node_size=[G.nodes[node]['size'] for node in term_nodes],
|
| 677 |
+
alpha=0.6
|
| 678 |
+
)
|
| 679 |
+
|
| 680 |
+
# Draw edges with varying thickness
|
| 681 |
+
edge_weights = [G[u][v]['weight'] * 5 for u, v in G.edges()]
|
| 682 |
+
nx.draw_networkx_edges(
|
| 683 |
+
G, pos,
|
| 684 |
+
width=edge_weights,
|
| 685 |
+
alpha=0.5,
|
| 686 |
+
edge_color='gray'
|
| 687 |
+
)
|
| 688 |
+
|
| 689 |
+
# Draw labels
|
| 690 |
+
nx.draw_networkx_labels(
|
| 691 |
+
G, pos,
|
| 692 |
+
font_size=10,
|
| 693 |
+
font_weight='bold'
|
| 694 |
+
)
|
| 695 |
+
|
| 696 |
+
plt.axis('off')
|
| 697 |
+
plt.tight_layout()
|
| 698 |
+
|
| 699 |
+
output_html.append(fig_to_html(fig))
|
| 700 |
+
|
| 701 |
+
# Add note about interpreting results
|
| 702 |
+
output_html.append("""
|
| 703 |
+
<div class="alert alert-info">
|
| 704 |
+
<h4>Interpreting Topic Models</h4>
|
| 705 |
+
<p>Topic modeling is an unsupervised technique that works best with large collections of documents.
|
| 706 |
+
For a single text, especially shorter ones, topics may be less distinct or meaningful.
|
| 707 |
+
The "topics" shown here represent clusters of words that frequently appear together in the text.</p>
|
| 708 |
+
<p>For better topic modeling results:</p>
|
| 709 |
+
<ul>
|
| 710 |
+
<li>Use longer texts with at least several paragraphs</li>
|
| 711 |
+
<li>Provide multiple related documents for analysis</li>
|
| 712 |
+
<li>Consider domain-specific preprocessing</li>
|
| 713 |
+
</ul>
|
| 714 |
+
</div>
|
| 715 |
+
""")
|
| 716 |
+
|
| 717 |
+
except Exception as e:
|
| 718 |
+
output_html.append(f"""
|
| 719 |
+
<div class="alert alert-danger">
|
| 720 |
+
<h3>Error</h3>
|
| 721 |
+
<p>Failed to analyze topics: {str(e)}</p>
|
| 722 |
+
</div>
|
| 723 |
+
""")
|
| 724 |
+
|
| 725 |
+
# About Topic Analysis section
|
| 726 |
+
output_html.append("""
|
| 727 |
+
<div class="card mt-4">
|
| 728 |
+
<div class="card-header">
|
| 729 |
+
<h4 class="mb-0">
|
| 730 |
+
<i class="fas fa-info-circle"></i>
|
| 731 |
+
About Topic Analysis
|
| 732 |
+
</h4>
|
| 733 |
+
</div>
|
| 734 |
+
<div class="card-body">
|
| 735 |
+
<h5>What is Topic Analysis?</h5>
|
| 736 |
+
|
| 737 |
+
<p>Topic analysis, also known as topic modeling or topic extraction, is the process of identifying the main themes
|
| 738 |
+
or topics that occur in a collection of documents. It uses statistical models to discover abstract topics based
|
| 739 |
+
on word distributions throughout the texts.</p>
|
| 740 |
+
|
| 741 |
+
<h5>Common Approaches:</h5>
|
| 742 |
+
|
| 743 |
+
<ul>
|
| 744 |
+
<li><b>Term Frequency Analysis</b> - Simple counting of terms to find the most common topics</li>
|
| 745 |
+
<li><b>TF-IDF (Term Frequency-Inverse Document Frequency)</b> - Identifies terms that are distinctive to particular documents or sections</li>
|
| 746 |
+
<li><b>LDA (Latent Dirichlet Allocation)</b> - A probabilistic model that assigns topic distributions to documents</li>
|
| 747 |
+
<li><b>NMF (Non-negative Matrix Factorization)</b> - A linear-algebraic approach to topic discovery</li>
|
| 748 |
+
<li><b>BERTopic</b> - A modern approach that uses BERT embeddings and clustering for topic modeling</li>
|
| 749 |
+
</ul>
|
| 750 |
+
|
| 751 |
+
<h5>Applications:</h5>
|
| 752 |
+
|
| 753 |
+
<ul>
|
| 754 |
+
<li><b>Content organization</b> - Categorizing documents by topic</li>
|
| 755 |
+
<li><b>Trend analysis</b> - Tracking how topics evolve over time</li>
|
| 756 |
+
<li><b>Content recommendation</b> - Suggesting related content based on topic similarity</li>
|
| 757 |
+
<li><b>Customer feedback analysis</b> - Understanding main themes in reviews or feedback</li>
|
| 758 |
+
<li><b>Research insights</b> - Identifying research themes in academic papers</li>
|
| 759 |
+
</ul>
|
| 760 |
+
</div>
|
| 761 |
+
</div>
|
| 762 |
+
""")
|
| 763 |
+
|
| 764 |
+
output_html.append('</div>') # Close result-area div
|
| 765 |
+
|
| 766 |
+
return '\n'.join(output_html)
|
components/translation.py
ADDED
|
@@ -0,0 +1,337 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import matplotlib.pyplot as plt
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
from collections import Counter
|
| 5 |
+
import time
|
| 6 |
+
|
| 7 |
+
from utils.model_loader import load_translator
|
| 8 |
+
from utils.helpers import fig_to_html, df_to_html_table
|
| 9 |
+
|
| 10 |
+
def translation_handler(text_input, source_lang="auto", target_lang="en"):
|
| 11 |
+
"""Show machine translation capabilities."""
|
| 12 |
+
output_html = []
|
| 13 |
+
|
| 14 |
+
# Add result area container
|
| 15 |
+
output_html.append('<div class="result-area">')
|
| 16 |
+
output_html.append('<h2 class="task-header">Machine Translation</h2>')
|
| 17 |
+
|
| 18 |
+
output_html.append("""
|
| 19 |
+
<div class="alert alert-info">
|
| 20 |
+
<i class="fas fa-language"></i>
|
| 21 |
+
Machine translation converts text from one language to another while preserving meaning and context as accurately as possible.
|
| 22 |
+
</div>
|
| 23 |
+
""")
|
| 24 |
+
|
| 25 |
+
# Model info
|
| 26 |
+
output_html.append("""
|
| 27 |
+
<div class="alert alert-info">
|
| 28 |
+
<h4><i class="fas fa-tools"></i> Model Used:</h4>
|
| 29 |
+
<ul>
|
| 30 |
+
<li><b>Helsinki-NLP/opus-mt</b> - A collection of pre-trained neural machine translation models</li>
|
| 31 |
+
<li><b>Capabilities</b> - Translates between various language pairs with good accuracy</li>
|
| 32 |
+
<li><b>Architecture</b> - Transformer-based sequence-to-sequence model</li>
|
| 33 |
+
</ul>
|
| 34 |
+
</div>
|
| 35 |
+
""")
|
| 36 |
+
|
| 37 |
+
try:
|
| 38 |
+
# Check if text is empty
|
| 39 |
+
if not text_input.strip():
|
| 40 |
+
output_html.append("""
|
| 41 |
+
<div class="alert alert-warning">
|
| 42 |
+
<h3>No Text Provided</h3>
|
| 43 |
+
<p>Please enter some text to translate.</p>
|
| 44 |
+
</div>
|
| 45 |
+
""")
|
| 46 |
+
output_html.append('</div>') # Close result-area div
|
| 47 |
+
return '\n'.join(output_html)
|
| 48 |
+
|
| 49 |
+
# Display source text
|
| 50 |
+
output_html.append('<h3 class="task-subheader">Source Text</h3>')
|
| 51 |
+
|
| 52 |
+
# Language mapping for display
|
| 53 |
+
language_names = {
|
| 54 |
+
"auto": "Auto-detect",
|
| 55 |
+
"en": "English",
|
| 56 |
+
"es": "Spanish",
|
| 57 |
+
"fr": "French",
|
| 58 |
+
"de": "German",
|
| 59 |
+
"ru": "Russian",
|
| 60 |
+
"zh": "Chinese",
|
| 61 |
+
"ar": "Arabic",
|
| 62 |
+
"hi": "Hindi",
|
| 63 |
+
"ja": "Japanese",
|
| 64 |
+
"pt": "Portuguese",
|
| 65 |
+
"it": "Italian"
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
source_lang_display = language_names.get(source_lang, source_lang)
|
| 69 |
+
target_lang_display = language_names.get(target_lang, target_lang)
|
| 70 |
+
|
| 71 |
+
# Format source text info
|
| 72 |
+
output_html.append(f"""
|
| 73 |
+
<div class="mb-2">
|
| 74 |
+
<span class="badge bg-primary">
|
| 75 |
+
{source_lang_display}
|
| 76 |
+
</span>
|
| 77 |
+
</div>
|
| 78 |
+
""")
|
| 79 |
+
|
| 80 |
+
# Display source text
|
| 81 |
+
output_html.append(f'<div class="card"><div class="card-body">{text_input}</div></div>')
|
| 82 |
+
|
| 83 |
+
# Load translation model
|
| 84 |
+
translator = load_translator(source_lang, target_lang)
|
| 85 |
+
|
| 86 |
+
# Translate text
|
| 87 |
+
start_time = time.time()
|
| 88 |
+
|
| 89 |
+
# Check text length and apply limit if needed
|
| 90 |
+
MAX_TEXT_LENGTH = 500 # Characters
|
| 91 |
+
truncated = False
|
| 92 |
+
|
| 93 |
+
if len(text_input) > MAX_TEXT_LENGTH:
|
| 94 |
+
truncated_text = text_input[:MAX_TEXT_LENGTH]
|
| 95 |
+
truncated = True
|
| 96 |
+
else:
|
| 97 |
+
truncated_text = text_input
|
| 98 |
+
|
| 99 |
+
# Perform translation
|
| 100 |
+
translation = translator(truncated_text)
|
| 101 |
+
translated_text = translation[0]['translation_text']
|
| 102 |
+
|
| 103 |
+
# Calculate processing time
|
| 104 |
+
translation_time = time.time() - start_time
|
| 105 |
+
|
| 106 |
+
# Display translation results
|
| 107 |
+
output_html.append('<h3 class="task-subheader">Translation</h3>')
|
| 108 |
+
|
| 109 |
+
# Show target language
|
| 110 |
+
output_html.append(f"""
|
| 111 |
+
<div class="mb-2">
|
| 112 |
+
<span class="badge bg-success">
|
| 113 |
+
{target_lang_display}
|
| 114 |
+
</span>
|
| 115 |
+
</div>
|
| 116 |
+
""")
|
| 117 |
+
|
| 118 |
+
# Display translated text
|
| 119 |
+
output_html.append(f'<div class="card"><div class="card-body bg-light">{translated_text}</div></div>')
|
| 120 |
+
|
| 121 |
+
# Show truncation warning if needed
|
| 122 |
+
if truncated:
|
| 123 |
+
output_html.append(f"""
|
| 124 |
+
<div class="alert alert-warning">
|
| 125 |
+
<p class="mb-0"><b>⚠️ Note:</b> Your text was truncated to {MAX_TEXT_LENGTH} characters due to model limitations. Only the first part was translated.</p>
|
| 126 |
+
</div>
|
| 127 |
+
""")
|
| 128 |
+
|
| 129 |
+
# Translation statistics
|
| 130 |
+
output_html.append('<h3 class="task-subheader">Translation Analysis</h3>')
|
| 131 |
+
|
| 132 |
+
# Calculate basic stats
|
| 133 |
+
source_chars = len(text_input)
|
| 134 |
+
source_words = len(text_input.split())
|
| 135 |
+
target_chars = len(translated_text)
|
| 136 |
+
target_words = len(translated_text.split())
|
| 137 |
+
|
| 138 |
+
# Display stats in a nice format
|
| 139 |
+
output_html.append(f"""
|
| 140 |
+
<div class="row text-center mb-4">
|
| 141 |
+
<div class="col-md-4">
|
| 142 |
+
<div class="card">
|
| 143 |
+
<div class="card-body">
|
| 144 |
+
<div class="display-4 text-primary">{source_words}</div>
|
| 145 |
+
<div>Source Words</div>
|
| 146 |
+
</div>
|
| 147 |
+
</div>
|
| 148 |
+
</div>
|
| 149 |
+
<div class="col-md-4">
|
| 150 |
+
<div class="card">
|
| 151 |
+
<div class="card-body">
|
| 152 |
+
<div class="display-4 text-success">{target_words}</div>
|
| 153 |
+
<div>Translated Words</div>
|
| 154 |
+
</div>
|
| 155 |
+
</div>
|
| 156 |
+
</div>
|
| 157 |
+
<div class="col-md-4">
|
| 158 |
+
<div class="card">
|
| 159 |
+
<div class="card-body">
|
| 160 |
+
<div class="display-4 text-warning">{translation_time:.2f}s</div>
|
| 161 |
+
<div>Processing Time</div>
|
| 162 |
+
</div>
|
| 163 |
+
</div>
|
| 164 |
+
</div>
|
| 165 |
+
</div>
|
| 166 |
+
""")
|
| 167 |
+
|
| 168 |
+
# Length comparison
|
| 169 |
+
output_html.append('<h4>Length Comparison</h4>')
|
| 170 |
+
|
| 171 |
+
# Create bar chart comparing text lengths
|
| 172 |
+
fig, ax = plt.subplots(figsize=(10, 5))
|
| 173 |
+
|
| 174 |
+
# Create grouped bar chart
|
| 175 |
+
x = np.arange(2)
|
| 176 |
+
width = 0.35
|
| 177 |
+
|
| 178 |
+
ax.bar(x - width/2, [source_words, source_chars], width, label='Source Text', color='#1976D2')
|
| 179 |
+
ax.bar(x + width/2, [target_words, target_chars], width, label='Translated Text', color='#4CAF50')
|
| 180 |
+
|
| 181 |
+
ax.set_xticks(x)
|
| 182 |
+
ax.set_xticklabels(['Word Count', 'Character Count'])
|
| 183 |
+
ax.legend()
|
| 184 |
+
|
| 185 |
+
# Add value labels on top of bars
|
| 186 |
+
for i, v in enumerate([source_words, source_chars]):
|
| 187 |
+
ax.text(i - width/2, v + 0.5, str(v), ha='center')
|
| 188 |
+
|
| 189 |
+
for i, v in enumerate([target_words, target_chars]):
|
| 190 |
+
ax.text(i + width/2, v + 0.5, str(v), ha='center')
|
| 191 |
+
|
| 192 |
+
plt.title('Source vs. Translation Length Comparison')
|
| 193 |
+
plt.tight_layout()
|
| 194 |
+
|
| 195 |
+
output_html.append(fig_to_html(fig))
|
| 196 |
+
|
| 197 |
+
# Expansion/contraction ratio
|
| 198 |
+
word_ratio = target_words / source_words if source_words > 0 else 0
|
| 199 |
+
char_ratio = target_chars / source_chars if source_chars > 0 else 0
|
| 200 |
+
|
| 201 |
+
expansion_type = "expansion" if word_ratio > 1.1 else "contraction" if word_ratio < 0.9 else "similar length"
|
| 202 |
+
|
| 203 |
+
output_html.append(f"""
|
| 204 |
+
<div class="alert alert-info">
|
| 205 |
+
<h4>Translation Length Analysis</h4>
|
| 206 |
+
<p>The translation shows <b>{expansion_type}</b> compared to the source text.</p>
|
| 207 |
+
<ul>
|
| 208 |
+
<li>Word ratio: {word_ratio:.2f} (target/source)</li>
|
| 209 |
+
<li>Character ratio: {char_ratio:.2f} (target/source)</li>
|
| 210 |
+
</ul>
|
| 211 |
+
<p><small>Note: Different languages naturally have different word and character counts when expressing the same meaning.</small></p>
|
| 212 |
+
</div>
|
| 213 |
+
""")
|
| 214 |
+
|
| 215 |
+
# Language characteristics comparison
|
| 216 |
+
source_avg_word_len = source_chars / source_words if source_words > 0 else 0
|
| 217 |
+
target_avg_word_len = target_chars / target_words if target_words > 0 else 0
|
| 218 |
+
|
| 219 |
+
output_html.append('<h4>Language Characteristics</h4>')
|
| 220 |
+
|
| 221 |
+
# Create comparison table
|
| 222 |
+
lang_data = {
|
| 223 |
+
'Metric': ['Average Word Length', 'Words per Character', 'Characters per Word'],
|
| 224 |
+
f'Source ({source_lang_display})': [
|
| 225 |
+
f"{source_avg_word_len:.2f} chars",
|
| 226 |
+
f"{source_words / source_chars:.3f}" if source_chars > 0 else "N/A",
|
| 227 |
+
f"{source_chars / source_words:.2f}" if source_words > 0 else "N/A"
|
| 228 |
+
],
|
| 229 |
+
f'Target ({target_lang_display})': [
|
| 230 |
+
f"{target_avg_word_len:.2f} chars",
|
| 231 |
+
f"{target_words / target_chars:.3f}" if target_chars > 0 else "N/A",
|
| 232 |
+
f"{target_chars / target_words:.2f}" if target_words > 0 else "N/A"
|
| 233 |
+
]
|
| 234 |
+
}
|
| 235 |
+
|
| 236 |
+
lang_df = pd.DataFrame(lang_data)
|
| 237 |
+
|
| 238 |
+
output_html.append(df_to_html_table(lang_df))
|
| 239 |
+
|
| 240 |
+
# Alternative translations section
|
| 241 |
+
output_html.append('<h3 class="task-subheader">Alternative Translation Options</h3>')
|
| 242 |
+
output_html.append('<p>Machine translation models often have different ways of translating the same text. Here are some general tips for better translations:</p>')
|
| 243 |
+
|
| 244 |
+
output_html.append("""
|
| 245 |
+
<div class="alert alert-info">
|
| 246 |
+
<h4>Tips for Better Machine Translation</h4>
|
| 247 |
+
<ul class="mb-0">
|
| 248 |
+
<li><b>Use clear, simple language</b> in your source text</li>
|
| 249 |
+
<li><b>Avoid idioms and slang</b> that may not translate well across cultures</li>
|
| 250 |
+
<li><b>Break up long, complex sentences</b> into simpler ones</li>
|
| 251 |
+
<li><b>Provide context</b> when dealing with ambiguous terms</li>
|
| 252 |
+
<li><b>Review and post-edit</b> machine translations for important documents</li>
|
| 253 |
+
</ul>
|
| 254 |
+
</div>
|
| 255 |
+
""")
|
| 256 |
+
|
| 257 |
+
# Common translation challenges
|
| 258 |
+
output_html.append('<h4>Common Translation Challenges</h4>')
|
| 259 |
+
|
| 260 |
+
challenge_data = {
|
| 261 |
+
'Challenge': [
|
| 262 |
+
'Ambiguity',
|
| 263 |
+
'Idioms & Expressions',
|
| 264 |
+
'Cultural References',
|
| 265 |
+
'Technical Terminology',
|
| 266 |
+
'Grammatical Differences'
|
| 267 |
+
],
|
| 268 |
+
'Description': [
|
| 269 |
+
'Words with multiple meanings may be incorrectly translated without proper context',
|
| 270 |
+
'Expressions that are unique to a culture often lose meaning when translated literally',
|
| 271 |
+
'References to culture-specific concepts may not have direct equivalents',
|
| 272 |
+
'Specialized terminology may not translate accurately without domain-specific models',
|
| 273 |
+
'Different languages have different grammatical structures that can affect translation'
|
| 274 |
+
],
|
| 275 |
+
'Example': [
|
| 276 |
+
'"Bank" could mean financial institution or river edge',
|
| 277 |
+
'"It\'s raining cats and dogs" translated literally loses its meaning',
|
| 278 |
+
'References to local holidays or customs may be confusing when translated',
|
| 279 |
+
'Medical or legal terms often need specialized translation knowledge',
|
| 280 |
+
'Languages differ in word order, gender agreement, verb tenses, etc.'
|
| 281 |
+
]
|
| 282 |
+
}
|
| 283 |
+
|
| 284 |
+
challenge_df = pd.DataFrame(challenge_data)
|
| 285 |
+
|
| 286 |
+
output_html.append(df_to_html_table(challenge_df))
|
| 287 |
+
|
| 288 |
+
except Exception as e:
|
| 289 |
+
output_html.append(f"""
|
| 290 |
+
<div class="alert alert-danger">
|
| 291 |
+
<h3>Translation Error</h3>
|
| 292 |
+
<p>{str(e)}</p>
|
| 293 |
+
<p>This could be due to an unsupported language pair or an issue loading the translation model.</p>
|
| 294 |
+
</div>
|
| 295 |
+
""")
|
| 296 |
+
|
| 297 |
+
# About Machine Translation section
|
| 298 |
+
output_html.append("""
|
| 299 |
+
<div class="card mt-4">
|
| 300 |
+
<div class="card-header">
|
| 301 |
+
<h4 class="mb-0">
|
| 302 |
+
<i class="fas fa-info-circle"></i>
|
| 303 |
+
About Machine Translation
|
| 304 |
+
</h4>
|
| 305 |
+
</div>
|
| 306 |
+
<div class="card-body">
|
| 307 |
+
<h5>What is Machine Translation?</h5>
|
| 308 |
+
|
| 309 |
+
<p>Machine translation is the automated translation of text from one language to another using computer software.
|
| 310 |
+
Modern machine translation systems use neural networks to understand and generate text, leading to significant
|
| 311 |
+
improvements in fluency and accuracy compared to older rule-based or statistical systems.</p>
|
| 312 |
+
|
| 313 |
+
<h5>Types of Machine Translation:</h5>
|
| 314 |
+
|
| 315 |
+
<ul>
|
| 316 |
+
<li><b>Rule-based MT</b> - Uses linguistic rules crafted by human experts</li>
|
| 317 |
+
<li><b>Statistical MT</b> - Uses statistical models trained on parallel texts</li>
|
| 318 |
+
<li><b>Neural MT</b> - Uses deep learning and neural networks (current state-of-the-art)</li>
|
| 319 |
+
<li><b>Hybrid MT</b> - Combines multiple approaches for better results</li>
|
| 320 |
+
</ul>
|
| 321 |
+
|
| 322 |
+
<h5>Applications:</h5>
|
| 323 |
+
|
| 324 |
+
<ul>
|
| 325 |
+
<li><b>Website localization</b> - Translating web content for international audiences</li>
|
| 326 |
+
<li><b>Document translation</b> - Quickly obtaining translations of documents</li>
|
| 327 |
+
<li><b>Real-time communication</b> - Enabling conversations across language barriers</li>
|
| 328 |
+
<li><b>E-commerce</b> - Making product listings available in multiple languages</li>
|
| 329 |
+
<li><b>Content accessibility</b> - Making information available to speakers of different languages</li>
|
| 330 |
+
</ul>
|
| 331 |
+
</div>
|
| 332 |
+
</div>
|
| 333 |
+
""")
|
| 334 |
+
|
| 335 |
+
output_html.append('</div>') # Close result-area div
|
| 336 |
+
|
| 337 |
+
return '\n'.join(output_html)
|
components/vector_embeddings.py
ADDED
|
@@ -0,0 +1,241 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import matplotlib
|
| 2 |
+
matplotlib.use('Agg') # Use non-GUI backend
|
| 3 |
+
import matplotlib.pyplot as plt
|
| 4 |
+
import numpy as np
|
| 5 |
+
import pandas as pd
|
| 6 |
+
import spacy
|
| 7 |
+
import time
|
| 8 |
+
import faiss
|
| 9 |
+
from sentence_transformers import SentenceTransformer, util
|
| 10 |
+
from sklearn.decomposition import PCA
|
| 11 |
+
import textwrap
|
| 12 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 13 |
+
|
| 14 |
+
from utils.model_loader import load_embedding_model
|
| 15 |
+
from utils.helpers import fig_to_html, df_to_html_table
|
| 16 |
+
|
| 17 |
+
def vector_embeddings_handler(text_input, search_query=""):
|
| 18 |
+
"""Show vector embeddings and semantic search capabilities."""
|
| 19 |
+
output_html = []
|
| 20 |
+
|
| 21 |
+
# Add result area container
|
| 22 |
+
output_html.append('<div class="result-area">')
|
| 23 |
+
output_html.append('<h2 class="task-header">Vector Embeddings Analysis Results</h2>')
|
| 24 |
+
|
| 25 |
+
output_html.append("""
|
| 26 |
+
<div class="alert alert-success">
|
| 27 |
+
<h4><i class="fas fa-check-circle me-2"></i>Embeddings Generated Successfully!</h4>
|
| 28 |
+
<p class="mb-0">Your text has been processed and converted into high-dimensional vector representations.</p>
|
| 29 |
+
</div>
|
| 30 |
+
""")
|
| 31 |
+
|
| 32 |
+
# Load model and create embeddings
|
| 33 |
+
try:
|
| 34 |
+
model = load_embedding_model()
|
| 35 |
+
|
| 36 |
+
# Split the text into chunks (sentences)
|
| 37 |
+
import spacy
|
| 38 |
+
nlp = spacy.load("en_core_web_sm")
|
| 39 |
+
doc = nlp(text_input)
|
| 40 |
+
sentences = [sent.text.strip() for sent in doc.sents if len(sent.text.strip()) > 10]
|
| 41 |
+
|
| 42 |
+
# If we have too few sentences, create artificial chunks
|
| 43 |
+
if len(sentences) < 3:
|
| 44 |
+
words = text_input.split()
|
| 45 |
+
chunk_size = max(10, len(words) // 3)
|
| 46 |
+
sentences = [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size) if i+chunk_size <= len(words)]
|
| 47 |
+
|
| 48 |
+
# Limit to 10 sentences to avoid overwhelming the visualization
|
| 49 |
+
if len(sentences) > 10:
|
| 50 |
+
sentences = sentences[:10]
|
| 51 |
+
|
| 52 |
+
# Create embeddings
|
| 53 |
+
embeddings = model.encode(sentences)
|
| 54 |
+
|
| 55 |
+
# Text Statistics
|
| 56 |
+
output_html.append(f"""
|
| 57 |
+
<div class="row mb-4">
|
| 58 |
+
<div class="col-12">
|
| 59 |
+
<div class="card">
|
| 60 |
+
<div class="card-header bg-primary text-white">
|
| 61 |
+
<h4 class="mb-0"><i class="fas fa-chart-bar me-2"></i>Processing Statistics</h4>
|
| 62 |
+
</div>
|
| 63 |
+
<div class="card-body">
|
| 64 |
+
<div class="row text-center">
|
| 65 |
+
<div class="col-md-3">
|
| 66 |
+
<div class="stat-item">
|
| 67 |
+
<h3 class="text-primary">{len(text_input)}</h3>
|
| 68 |
+
<p class="text-muted mb-0">Characters</p>
|
| 69 |
+
</div>
|
| 70 |
+
</div>
|
| 71 |
+
<div class="col-md-3">
|
| 72 |
+
<div class="stat-item">
|
| 73 |
+
<h3 class="text-success">{len(sentences)}</h3>
|
| 74 |
+
<p class="text-muted mb-0">Text Segments</p>
|
| 75 |
+
</div>
|
| 76 |
+
</div>
|
| 77 |
+
<div class="col-md-3">
|
| 78 |
+
<div class="stat-item">
|
| 79 |
+
<h3 class="text-info">{embeddings.shape[1]}</h3>
|
| 80 |
+
<p class="text-muted mb-0">Vector Dimensions</p>
|
| 81 |
+
</div>
|
| 82 |
+
</div>
|
| 83 |
+
<div class="col-md-3">
|
| 84 |
+
<div class="stat-item">
|
| 85 |
+
<h3 class="text-warning">{embeddings.shape[0]}</h3>
|
| 86 |
+
<p class="text-muted mb-0">Embedding Vectors</p>
|
| 87 |
+
</div>
|
| 88 |
+
</div>
|
| 89 |
+
</div>
|
| 90 |
+
</div>
|
| 91 |
+
</div>
|
| 92 |
+
</div>
|
| 93 |
+
</div>
|
| 94 |
+
""")
|
| 95 |
+
|
| 96 |
+
# Text Segments Display
|
| 97 |
+
output_html.append("""
|
| 98 |
+
<div class="row mb-4">
|
| 99 |
+
<div class="col-12">
|
| 100 |
+
<div class="card">
|
| 101 |
+
<div class="card-header bg-info text-white">
|
| 102 |
+
<h4 class="mb-0"><i class="fas fa-list me-2"></i>Text Segments</h4>
|
| 103 |
+
</div>
|
| 104 |
+
<div class="card-body">
|
| 105 |
+
<div class="row">
|
| 106 |
+
""")
|
| 107 |
+
|
| 108 |
+
for i, sentence in enumerate(sentences[:6]): # Show max 6 segments
|
| 109 |
+
output_html.append(f"""
|
| 110 |
+
<div class="col-md-6 mb-3">
|
| 111 |
+
<div class="p-3 border rounded bg-light">
|
| 112 |
+
<h6 class="text-primary mb-2">Segment {i+1}</h6>
|
| 113 |
+
<p class="mb-0 small">{sentence}</p>
|
| 114 |
+
</div>
|
| 115 |
+
</div>
|
| 116 |
+
""")
|
| 117 |
+
|
| 118 |
+
output_html.append("""
|
| 119 |
+
</div>
|
| 120 |
+
</div>
|
| 121 |
+
</div>
|
| 122 |
+
</div>
|
| 123 |
+
</div>
|
| 124 |
+
""")
|
| 125 |
+
|
| 126 |
+
# Semantic Search Interface
|
| 127 |
+
output_html.append("""
|
| 128 |
+
<div class="row mb-4">
|
| 129 |
+
<div class="col-12">
|
| 130 |
+
<div class="card border-warning">
|
| 131 |
+
<div class="card-header bg-warning text-dark">
|
| 132 |
+
<h4 class="mb-0"><i class="fas fa-search me-2"></i>Semantic Search</h4>
|
| 133 |
+
</div>
|
| 134 |
+
<div class="card-body">
|
| 135 |
+
<p class="mb-3">Search for content by meaning, not just keywords. The system will find the most semantically similar text segments.</p>
|
| 136 |
+
|
| 137 |
+
<div class="row mb-3">
|
| 138 |
+
<div class="col-md-10">
|
| 139 |
+
<input type="text" id="search-input" class="form-control form-control-lg" placeholder="Enter a search query to find similar content...">
|
| 140 |
+
</div>
|
| 141 |
+
<div class="col-md-2">
|
| 142 |
+
<button onclick="performSemanticSearch()" class="btn btn-warning btn-lg w-100">
|
| 143 |
+
<i class="fas fa-search me-1"></i>Search
|
| 144 |
+
</button>
|
| 145 |
+
</div>
|
| 146 |
+
</div>
|
| 147 |
+
|
| 148 |
+
<div class="mb-3">
|
| 149 |
+
<h6 class="mb-2"><i class="fas fa-lightbulb me-2"></i>Try these example searches:</h6>
|
| 150 |
+
<div class="d-flex flex-wrap gap-2">
|
| 151 |
+
<button onclick="document.getElementById('search-input').value = 'space research'; performSemanticSearch();"
|
| 152 |
+
class="btn btn-outline-secondary btn-sm">
|
| 153 |
+
<i class="fas fa-rocket me-1"></i>space research
|
| 154 |
+
</button>
|
| 155 |
+
<button onclick="document.getElementById('search-input').value = 'scientific collaboration'; performSemanticSearch();"
|
| 156 |
+
class="btn btn-outline-secondary btn-sm">
|
| 157 |
+
<i class="fas fa-users me-1"></i>scientific collaboration
|
| 158 |
+
</button>
|
| 159 |
+
<button onclick="document.getElementById('search-input').value = 'international project'; performSemanticSearch();"
|
| 160 |
+
class="btn btn-outline-secondary btn-sm">
|
| 161 |
+
<i class="fas fa-globe me-1"></i>international project
|
| 162 |
+
</button>
|
| 163 |
+
<button onclick="document.getElementById('search-input').value = 'laboratory experiments'; performSemanticSearch();"
|
| 164 |
+
class="btn btn-outline-secondary btn-sm">
|
| 165 |
+
<i class="fas fa-flask me-1"></i>laboratory experiments
|
| 166 |
+
</button>
|
| 167 |
+
<button onclick="document.getElementById('search-input').value = 'space agencies'; performSemanticSearch();"
|
| 168 |
+
class="btn btn-outline-secondary btn-sm">
|
| 169 |
+
<i class="fas fa-building me-1"></i>space agencies
|
| 170 |
+
</button>
|
| 171 |
+
<button onclick="document.getElementById('search-input').value = 'microgravity environment'; performSemanticSearch();"
|
| 172 |
+
class="btn btn-outline-secondary btn-sm">
|
| 173 |
+
<i class="fas fa-weight me-1"></i>microgravity environment
|
| 174 |
+
</button>
|
| 175 |
+
</div>
|
| 176 |
+
</div>
|
| 177 |
+
|
| 178 |
+
<div id="search-results" style="display: none;">
|
| 179 |
+
<hr>
|
| 180 |
+
<h5><i class="fas fa-list-ol me-2"></i>Search Results:</h5>
|
| 181 |
+
<div id="results-container" class="border rounded p-3 bg-light" style="max-height: 400px; overflow-y: auto;">
|
| 182 |
+
</div>
|
| 183 |
+
</div>
|
| 184 |
+
</div>
|
| 185 |
+
</div>
|
| 186 |
+
</div>
|
| 187 |
+
</div>
|
| 188 |
+
""")
|
| 189 |
+
|
| 190 |
+
except Exception as e:
|
| 191 |
+
output_html.append(f"""
|
| 192 |
+
<div class="alert alert-danger">
|
| 193 |
+
<h4><i class="fas fa-exclamation-triangle me-2"></i>Error</h4>
|
| 194 |
+
<p>Could not generate embeddings: {str(e)}</p>
|
| 195 |
+
</div>
|
| 196 |
+
""")
|
| 197 |
+
|
| 198 |
+
# Close result-area div
|
| 199 |
+
output_html.append('</div>')
|
| 200 |
+
return '\n'.join(output_html)
|
| 201 |
+
|
| 202 |
+
def perform_semantic_search(context, query):
|
| 203 |
+
"""Perform semantic search on the given context with the query."""
|
| 204 |
+
try:
|
| 205 |
+
# Load model
|
| 206 |
+
model = load_embedding_model()
|
| 207 |
+
|
| 208 |
+
# Split context into sentences
|
| 209 |
+
import spacy
|
| 210 |
+
nlp = spacy.load("en_core_web_sm")
|
| 211 |
+
doc = nlp(context)
|
| 212 |
+
sentences = [sent.text.strip() for sent in doc.sents if len(sent.text.strip()) > 5]
|
| 213 |
+
|
| 214 |
+
# Create embeddings
|
| 215 |
+
sentence_embeddings = model.encode(sentences)
|
| 216 |
+
query_embedding = model.encode([query])[0]
|
| 217 |
+
|
| 218 |
+
# Calculate similarities
|
| 219 |
+
from sentence_transformers import util
|
| 220 |
+
similarities = util.pytorch_cos_sim(query_embedding, sentence_embeddings)[0].cpu().numpy()
|
| 221 |
+
|
| 222 |
+
# Create result pairs (sentence, similarity)
|
| 223 |
+
results = [(sentences[i], float(similarities[i])) for i in range(len(sentences))]
|
| 224 |
+
|
| 225 |
+
# Sort by similarity (descending)
|
| 226 |
+
results.sort(key=lambda x: x[1], reverse=True)
|
| 227 |
+
|
| 228 |
+
# Return top results
|
| 229 |
+
return {
|
| 230 |
+
"success": True,
|
| 231 |
+
"results": [
|
| 232 |
+
{"text": text, "score": score}
|
| 233 |
+
for text, score in results[:5] # Return top 5 results
|
| 234 |
+
]
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
except Exception as e:
|
| 238 |
+
return {
|
| 239 |
+
"success": False,
|
| 240 |
+
"error": str(e)
|
| 241 |
+
}
|
requirements.txt
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Core web framework
|
| 2 |
+
Flask==3.0.0
|
| 3 |
+
flask-cors==4.0.0
|
| 4 |
+
|
| 5 |
+
# NLP Core Libraries
|
| 6 |
+
transformers==4.36.2
|
| 7 |
+
torch==2.1.2
|
| 8 |
+
tokenizers==0.15.0
|
| 9 |
+
sentence-transformers==2.2.2
|
| 10 |
+
nltk==3.8.1
|
| 11 |
+
spacy==3.7.2
|
| 12 |
+
|
| 13 |
+
# Data Science Libraries
|
| 14 |
+
numpy==1.24.3
|
| 15 |
+
pandas==2.0.3
|
| 16 |
+
scikit-learn==1.3.2
|
| 17 |
+
scipy==1.11.4
|
| 18 |
+
|
| 19 |
+
# Text Processing
|
| 20 |
+
textblob==0.17.1
|
| 21 |
+
langdetect==1.0.9
|
| 22 |
+
|
| 23 |
+
# Visualization (lightweight versions)
|
| 24 |
+
matplotlib==3.7.5
|
| 25 |
+
plotly==5.17.0
|
| 26 |
+
seaborn==0.13.0
|
| 27 |
+
networkx==3.1.1
|
| 28 |
+
matplotlib-venn==0.11.9
|
| 29 |
+
|
| 30 |
+
# Utilities
|
| 31 |
+
requests==2.31.0
|
| 32 |
+
Pillow==10.1.0
|
| 33 |
+
faiss-cpu==1.7.4
|
| 34 |
+
wordcloud==1.9.3
|
| 35 |
+
|
| 36 |
+
# Memory optimization
|
| 37 |
+
psutil==5.9.6
|
static/css/components.css
ADDED
|
@@ -0,0 +1,1756 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/* Enhanced CSS for NLP Ultimate Tutorial - Component Specific Styles */
|
| 2 |
+
|
| 3 |
+
/* Vector Embeddings Page Enhancements */
|
| 4 |
+
.vector-embeddings-page {
|
| 5 |
+
background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
|
| 6 |
+
min-height: 100vh;
|
| 7 |
+
padding: 20px 0;
|
| 8 |
+
}
|
| 9 |
+
|
| 10 |
+
.model-info-cards .card {
|
| 11 |
+
transition: all 0.3s ease;
|
| 12 |
+
border: none;
|
| 13 |
+
box-shadow: 0 4px 8px rgba(0,0,0,0.1);
|
| 14 |
+
overflow: hidden;
|
| 15 |
+
position: relative;
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
.model-info-cards .card:hover {
|
| 19 |
+
transform: translateY(-5px);
|
| 20 |
+
box-shadow: 0 8px 25px rgba(0,0,0,0.15);
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
.model-info-cards .card::before {
|
| 24 |
+
content: '';
|
| 25 |
+
position: absolute;
|
| 26 |
+
top: 0;
|
| 27 |
+
left: 0;
|
| 28 |
+
right: 0;
|
| 29 |
+
height: 4px;
|
| 30 |
+
background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
.model-info-cards .card-header {
|
| 34 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
|
| 35 |
+
border: none;
|
| 36 |
+
padding: 20px;
|
| 37 |
+
position: relative;
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
.model-info-cards .card-header.bg-primary {
|
| 41 |
+
background: linear-gradient(135deg, #4e73df 0%, #224abe 100%) !important;
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
.model-info-cards .card-header.bg-success {
|
| 45 |
+
background: linear-gradient(135deg, #1cc88a 0%, #13855c 100%) !important;
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
.model-info-cards .card-header.bg-info {
|
| 49 |
+
background: linear-gradient(135deg, #36b9cc 0%, #258391 100%) !important;
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
.model-info-cards .card-body {
|
| 53 |
+
padding: 25px;
|
| 54 |
+
background: rgba(255,255,255,0.95);
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
.model-info-cards .list-unstyled li {
|
| 58 |
+
padding: 8px 0;
|
| 59 |
+
border-bottom: 1px solid rgba(0,0,0,0.05);
|
| 60 |
+
transition: all 0.2s ease;
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
.model-info-cards .list-unstyled li:hover {
|
| 64 |
+
background: rgba(0,0,0,0.02);
|
| 65 |
+
padding-left: 10px;
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
.model-info-cards .list-unstyled li:last-child {
|
| 69 |
+
border-bottom: none;
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
/* Visualization Cards */
|
| 73 |
+
.visualization-card {
|
| 74 |
+
background: linear-gradient(135deg, #ffffff 0%, #f8f9fa 100%);
|
| 75 |
+
border: none;
|
| 76 |
+
box-shadow: 0 6px 20px rgba(0,0,0,0.1);
|
| 77 |
+
border-radius: 15px;
|
| 78 |
+
overflow: hidden;
|
| 79 |
+
transition: all 0.3s ease;
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
.visualization-card:hover {
|
| 83 |
+
transform: translateY(-3px);
|
| 84 |
+
box-shadow: 0 10px 30px rgba(0,0,0,0.15);
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
.visualization-card .card-header {
|
| 88 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
| 89 |
+
border: none;
|
| 90 |
+
padding: 20px;
|
| 91 |
+
color: white;
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
.visualization-card .card-body {
|
| 95 |
+
padding: 30px;
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
/* Semantic Search Enhancement */
|
| 99 |
+
.semantic-search-card {
|
| 100 |
+
background: linear-gradient(135deg, #ffeaa7 0%, #fab1a0 100%);
|
| 101 |
+
border: none;
|
| 102 |
+
box-shadow: 0 8px 25px rgba(255,193,7,0.3);
|
| 103 |
+
border-radius: 15px;
|
| 104 |
+
overflow: hidden;
|
| 105 |
+
transition: all 0.3s ease;
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
.semantic-search-card:hover {
|
| 109 |
+
transform: translateY(-2px);
|
| 110 |
+
box-shadow: 0 12px 35px rgba(255,193,7,0.4);
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
.semantic-search-card .card-header {
|
| 114 |
+
background: linear-gradient(135deg, #fdcb6e 0%, #e17055 100%) !important;
|
| 115 |
+
border: none;
|
| 116 |
+
padding: 25px;
|
| 117 |
+
color: white;
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
.semantic-search-card .card-body {
|
| 121 |
+
background: rgba(255,255,255,0.9);
|
| 122 |
+
padding: 30px;
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
.semantic-search-card .form-control-lg {
|
| 126 |
+
border: 2px solid #fdcb6e;
|
| 127 |
+
border-radius: 10px;
|
| 128 |
+
transition: all 0.3s ease;
|
| 129 |
+
box-shadow: 0 2px 10px rgba(253,203,110,0.2);
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
.semantic-search-card .form-control-lg:focus {
|
| 133 |
+
border-color: #e17055;
|
| 134 |
+
box-shadow: 0 4px 15px rgba(225,112,85,0.3);
|
| 135 |
+
transform: translateY(-2px);
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
.semantic-search-card .btn-warning {
|
| 139 |
+
background: linear-gradient(135deg, #fdcb6e 0%, #e17055 100%);
|
| 140 |
+
border: none;
|
| 141 |
+
border-radius: 10px;
|
| 142 |
+
font-weight: 600;
|
| 143 |
+
transition: all 0.3s ease;
|
| 144 |
+
box-shadow: 0 4px 15px rgba(253,203,110,0.4);
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
.semantic-search-card .btn-warning:hover {
|
| 148 |
+
transform: translateY(-2px);
|
| 149 |
+
box-shadow: 0 6px 20px rgba(225,112,85,0.5);
|
| 150 |
+
}
|
| 151 |
+
|
| 152 |
+
/* Example Search Buttons */
|
| 153 |
+
.example-search-card {
|
| 154 |
+
background: linear-gradient(135deg, #a8edea 0%, #fed6e3 100%);
|
| 155 |
+
border: none;
|
| 156 |
+
box-shadow: 0 6px 20px rgba(168,237,234,0.3);
|
| 157 |
+
border-radius: 15px;
|
| 158 |
+
overflow: hidden;
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
.example-search-card .card-header {
|
| 162 |
+
background: linear-gradient(135deg, #74b9ff 0%, #0984e3 100%) !important;
|
| 163 |
+
border: none;
|
| 164 |
+
padding: 20px;
|
| 165 |
+
color: white;
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
+
.example-search-card .card-body {
|
| 169 |
+
background: rgba(255,255,255,0.8);
|
| 170 |
+
padding: 25px;
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
.example-search-card .btn-outline-secondary {
|
| 174 |
+
border: 2px solid #74b9ff;
|
| 175 |
+
color: #0984e3;
|
| 176 |
+
background: rgba(255,255,255,0.9);
|
| 177 |
+
border-radius: 25px;
|
| 178 |
+
padding: 10px 20px;
|
| 179 |
+
margin: 5px;
|
| 180 |
+
font-weight: 500;
|
| 181 |
+
transition: all 0.3s ease;
|
| 182 |
+
box-shadow: 0 2px 10px rgba(116,185,255,0.2);
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
+
.example-search-card .btn-outline-secondary:hover {
|
| 186 |
+
background: linear-gradient(135deg, #74b9ff 0%, #0984e3 100%);
|
| 187 |
+
color: white;
|
| 188 |
+
transform: translateY(-2px);
|
| 189 |
+
box-shadow: 0 4px 15px rgba(116,185,255,0.4);
|
| 190 |
+
}
|
| 191 |
+
|
| 192 |
+
/* Process Flow Cards */
|
| 193 |
+
.process-flow-card {
|
| 194 |
+
background: linear-gradient(135deg, #e8f4fd 0%, #d1ecf1 100%);
|
| 195 |
+
border: none;
|
| 196 |
+
border-radius: 15px;
|
| 197 |
+
box-shadow: 0 4px 15px rgba(0,0,0,0.1);
|
| 198 |
+
transition: all 0.3s ease;
|
| 199 |
+
overflow: hidden;
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
.process-flow-card:hover {
|
| 203 |
+
transform: translateY(-3px);
|
| 204 |
+
box-shadow: 0 8px 25px rgba(0,0,0,0.15);
|
| 205 |
+
}
|
| 206 |
+
|
| 207 |
+
.process-flow-card h4 {
|
| 208 |
+
color: #0984e3;
|
| 209 |
+
font-weight: 600;
|
| 210 |
+
margin-bottom: 15px;
|
| 211 |
+
}
|
| 212 |
+
|
| 213 |
+
.process-flow-card p {
|
| 214 |
+
color: #636e72;
|
| 215 |
+
line-height: 1.6;
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
/* Info Alert Enhancements */
|
| 219 |
+
.vector-embeddings-page .alert-info {
|
| 220 |
+
background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%);
|
| 221 |
+
border: none;
|
| 222 |
+
border-left: 5px solid #2196f3;
|
| 223 |
+
border-radius: 10px;
|
| 224 |
+
box-shadow: 0 3px 10px rgba(33,150,243,0.2);
|
| 225 |
+
}
|
| 226 |
+
|
| 227 |
+
.vector-embeddings-page .alert-success {
|
| 228 |
+
background: linear-gradient(135deg, #e8f5e8 0%, #c8e6c9 100%);
|
| 229 |
+
border: none;
|
| 230 |
+
border-left: 5px solid #4caf50;
|
| 231 |
+
border-radius: 10px;
|
| 232 |
+
box-shadow: 0 3px 10px rgba(76,175,80,0.2);
|
| 233 |
+
}
|
| 234 |
+
|
| 235 |
+
/* Chart Container Enhancements */
|
| 236 |
+
.chart-container {
|
| 237 |
+
background: white;
|
| 238 |
+
border-radius: 15px;
|
| 239 |
+
padding: 20px;
|
| 240 |
+
box-shadow: 0 4px 15px rgba(0,0,0,0.1);
|
| 241 |
+
margin: 20px 0;
|
| 242 |
+
transition: all 0.3s ease;
|
| 243 |
+
}
|
| 244 |
+
|
| 245 |
+
.chart-container:hover {
|
| 246 |
+
box-shadow: 0 8px 25px rgba(0,0,0,0.15);
|
| 247 |
+
}
|
| 248 |
+
|
| 249 |
+
/* Animation for page load */
|
| 250 |
+
@keyframes fadeInUp {
|
| 251 |
+
from {
|
| 252 |
+
opacity: 0;
|
| 253 |
+
transform: translateY(30px);
|
| 254 |
+
}
|
| 255 |
+
to {
|
| 256 |
+
opacity: 1;
|
| 257 |
+
transform: translateY(0);
|
| 258 |
+
}
|
| 259 |
+
}
|
| 260 |
+
|
| 261 |
+
.vector-embeddings-page .card {
|
| 262 |
+
animation: fadeInUp 0.6s ease forwards;
|
| 263 |
+
}
|
| 264 |
+
|
| 265 |
+
.vector-embeddings-page .card:nth-child(1) { animation-delay: 0.1s; }
|
| 266 |
+
.vector-embeddings-page .card:nth-child(2) { animation-delay: 0.2s; }
|
| 267 |
+
.vector-embeddings-page .card:nth-child(3) { animation-delay: 0.3s; }
|
| 268 |
+
.vector-embeddings-page .card:nth-child(4) { animation-delay: 0.4s; }
|
| 269 |
+
|
| 270 |
+
/* Enhanced Vector Embeddings Components */
|
| 271 |
+
.feature-list .d-flex {
|
| 272 |
+
transition: all 0.2s ease;
|
| 273 |
+
border-radius: 8px;
|
| 274 |
+
padding: 8px;
|
| 275 |
+
margin: 0 -8px 8px -8px;
|
| 276 |
+
}
|
| 277 |
+
|
| 278 |
+
.feature-list .d-flex:hover {
|
| 279 |
+
background: rgba(0,0,0,0.02);
|
| 280 |
+
transform: translateX(5px);
|
| 281 |
+
}
|
| 282 |
+
|
| 283 |
+
.process-step-icon {
|
| 284 |
+
transition: all 0.3s ease;
|
| 285 |
+
box-shadow: 0 4px 15px rgba(0,0,0,0.1);
|
| 286 |
+
}
|
| 287 |
+
|
| 288 |
+
.process-step-icon:hover {
|
| 289 |
+
transform: scale(1.1);
|
| 290 |
+
box-shadow: 0 8px 25px rgba(0,0,0,0.2);
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
+
.progress-bar {
|
| 294 |
+
transition: width 1s ease-in-out;
|
| 295 |
+
}
|
| 296 |
+
|
| 297 |
+
.bg-gradient-primary {
|
| 298 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
|
| 299 |
+
}
|
| 300 |
+
|
| 301 |
+
.bg-gradient-info {
|
| 302 |
+
background: linear-gradient(135deg, #36b9cc 0%, #258391 100%) !important;
|
| 303 |
+
}
|
| 304 |
+
|
| 305 |
+
/* Model Information Cards Enhancement */
|
| 306 |
+
.model-info-cards .card {
|
| 307 |
+
transition: all 0.4s cubic-bezier(0.25, 0.8, 0.25, 1);
|
| 308 |
+
border-radius: 12px;
|
| 309 |
+
}
|
| 310 |
+
|
| 311 |
+
.model-info-cards .card:hover {
|
| 312 |
+
transform: translateY(-8px) scale(1.02);
|
| 313 |
+
box-shadow: 0 15px 40px rgba(0,0,0,0.15);
|
| 314 |
+
}
|
| 315 |
+
|
| 316 |
+
.model-info-cards .card-header {
|
| 317 |
+
border-radius: 12px 12px 0 0;
|
| 318 |
+
}
|
| 319 |
+
|
| 320 |
+
/* Process Flow Enhancement */
|
| 321 |
+
.process-flow-card h5 {
|
| 322 |
+
position: relative;
|
| 323 |
+
}
|
| 324 |
+
|
| 325 |
+
.process-flow-card h5::after {
|
| 326 |
+
content: '';
|
| 327 |
+
position: absolute;
|
| 328 |
+
bottom: -8px;
|
| 329 |
+
left: 50%;
|
| 330 |
+
transform: translateX(-50%);
|
| 331 |
+
width: 40px;
|
| 332 |
+
height: 2px;
|
| 333 |
+
background: currentColor;
|
| 334 |
+
opacity: 0.3;
|
| 335 |
+
}
|
| 336 |
+
|
| 337 |
+
/* Interactive Elements */
|
| 338 |
+
.btn-outline-secondary:hover {
|
| 339 |
+
transform: translateY(-2px) scale(1.05);
|
| 340 |
+
box-shadow: 0 6px 20px rgba(0,0,0,0.15);
|
| 341 |
+
}
|
| 342 |
+
|
| 343 |
+
/* Chart Container Improvements */
|
| 344 |
+
.chart-container {
|
| 345 |
+
position: relative;
|
| 346 |
+
overflow: hidden;
|
| 347 |
+
}
|
| 348 |
+
|
| 349 |
+
.chart-container::before {
|
| 350 |
+
content: '';
|
| 351 |
+
position: absolute;
|
| 352 |
+
top: -2px;
|
| 353 |
+
left: -2px;
|
| 354 |
+
right: -2px;
|
| 355 |
+
bottom: -2px;
|
| 356 |
+
background: linear-gradient(45deg, #667eea, #764ba2, #36b9cc, #1cc88a);
|
| 357 |
+
border-radius: 17px;
|
| 358 |
+
z-index: -1;
|
| 359 |
+
opacity: 0;
|
| 360 |
+
transition: opacity 0.3s ease;
|
| 361 |
+
}
|
| 362 |
+
|
| 363 |
+
.chart-container:hover::before {
|
| 364 |
+
opacity: 0.1;
|
| 365 |
+
}
|
| 366 |
+
|
| 367 |
+
/* Responsive Improvements */
|
| 368 |
+
@media (max-width: 768px) {
|
| 369 |
+
.model-info-cards .card {
|
| 370 |
+
margin-bottom: 1.5rem;
|
| 371 |
+
}
|
| 372 |
+
|
| 373 |
+
.process-step-icon {
|
| 374 |
+
width: 50px !important;
|
| 375 |
+
height: 50px !important;
|
| 376 |
+
}
|
| 377 |
+
|
| 378 |
+
.feature-list .d-flex:hover {
|
| 379 |
+
transform: none;
|
| 380 |
+
}
|
| 381 |
+
}
|
| 382 |
+
|
| 383 |
+
/* POS Tagging specific styles */
|
| 384 |
+
.pos-token {
|
| 385 |
+
display: inline-block;
|
| 386 |
+
margin: 2px;
|
| 387 |
+
padding: 4px 8px;
|
| 388 |
+
border-radius: 4px;
|
| 389 |
+
font-size: 0.9em;
|
| 390 |
+
transition: all 0.2s ease;
|
| 391 |
+
}
|
| 392 |
+
|
| 393 |
+
.pos-token:hover {
|
| 394 |
+
transform: scale(1.05);
|
| 395 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.2);
|
| 396 |
+
}
|
| 397 |
+
|
| 398 |
+
/* Named Entity Recognition specific styles */
|
| 399 |
+
.entity-token {
|
| 400 |
+
display: inline-block;
|
| 401 |
+
margin: 2px;
|
| 402 |
+
padding: 4px 8px;
|
| 403 |
+
border-radius: 4px;
|
| 404 |
+
font-size: 0.9em;
|
| 405 |
+
transition: all 0.2s ease;
|
| 406 |
+
cursor: pointer;
|
| 407 |
+
}
|
| 408 |
+
|
| 409 |
+
.entity-token:hover {
|
| 410 |
+
transform: scale(1.05);
|
| 411 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.2);
|
| 412 |
+
}
|
| 413 |
+
|
| 414 |
+
/* Sentiment Analysis specific styles */
|
| 415 |
+
.sentiment-gauge {
|
| 416 |
+
position: relative;
|
| 417 |
+
width: 200px;
|
| 418 |
+
height: 200px;
|
| 419 |
+
margin: 0 auto;
|
| 420 |
+
}
|
| 421 |
+
|
| 422 |
+
.sentiment-score {
|
| 423 |
+
position: absolute;
|
| 424 |
+
top: 50%;
|
| 425 |
+
left: 50%;
|
| 426 |
+
transform: translate(-50%, -50%);
|
| 427 |
+
font-size: 2rem;
|
| 428 |
+
font-weight: bold;
|
| 429 |
+
}
|
| 430 |
+
|
| 431 |
+
.sentiment-label {
|
| 432 |
+
position: absolute;
|
| 433 |
+
bottom: -30px;
|
| 434 |
+
left: 50%;
|
| 435 |
+
transform: translateX(-50%);
|
| 436 |
+
font-size: 1.2rem;
|
| 437 |
+
font-weight: 600;
|
| 438 |
+
}
|
| 439 |
+
|
| 440 |
+
/* Text Generation specific styles */
|
| 441 |
+
.generated-text {
|
| 442 |
+
background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);
|
| 443 |
+
border-left: 4px solid var(--primary-color);
|
| 444 |
+
padding: 15px;
|
| 445 |
+
margin: 10px 0;
|
| 446 |
+
border-radius: 8px;
|
| 447 |
+
font-family: 'Georgia', serif;
|
| 448 |
+
line-height: 1.6;
|
| 449 |
+
}
|
| 450 |
+
|
| 451 |
+
.prompt-text {
|
| 452 |
+
color: #666;
|
| 453 |
+
font-style: italic;
|
| 454 |
+
}
|
| 455 |
+
|
| 456 |
+
.generated-content {
|
| 457 |
+
color: var(--primary-color);
|
| 458 |
+
font-weight: 500;
|
| 459 |
+
}
|
| 460 |
+
|
| 461 |
+
/* Translation specific styles */
|
| 462 |
+
.translation-pair {
|
| 463 |
+
display: flex;
|
| 464 |
+
gap: 20px;
|
| 465 |
+
margin: 20px 0;
|
| 466 |
+
}
|
| 467 |
+
|
| 468 |
+
.source-text, .target-text {
|
| 469 |
+
flex: 1;
|
| 470 |
+
padding: 15px;
|
| 471 |
+
border-radius: 8px;
|
| 472 |
+
border: 2px solid #e0e0e0;
|
| 473 |
+
}
|
| 474 |
+
|
| 475 |
+
.source-text {
|
| 476 |
+
background: linear-gradient(135deg, #E3F2FD 0%, #BBDEFB 100%);
|
| 477 |
+
border-color: var(--primary-color);
|
| 478 |
+
}
|
| 479 |
+
|
| 480 |
+
.target-text {
|
| 481 |
+
background: linear-gradient(135deg, #E8F5E9 0%, #C8E6C9 100%);
|
| 482 |
+
border-color: var(--success-color);
|
| 483 |
+
}
|
| 484 |
+
|
| 485 |
+
.language-badge {
|
| 486 |
+
display: inline-block;
|
| 487 |
+
padding: 4px 12px;
|
| 488 |
+
border-radius: 20px;
|
| 489 |
+
font-size: 0.8em;
|
| 490 |
+
font-weight: 600;
|
| 491 |
+
margin-bottom: 10px;
|
| 492 |
+
}
|
| 493 |
+
|
| 494 |
+
/* Classification specific styles */
|
| 495 |
+
.classification-result {
|
| 496 |
+
display: flex;
|
| 497 |
+
align-items: center;
|
| 498 |
+
padding: 10px;
|
| 499 |
+
margin: 5px 0;
|
| 500 |
+
border-radius: 8px;
|
| 501 |
+
background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);
|
| 502 |
+
transition: all 0.2s ease;
|
| 503 |
+
}
|
| 504 |
+
|
| 505 |
+
.classification-result:hover {
|
| 506 |
+
transform: translateX(5px);
|
| 507 |
+
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
|
| 508 |
+
}
|
| 509 |
+
|
| 510 |
+
.classification-label {
|
| 511 |
+
flex: 1;
|
| 512 |
+
font-weight: 600;
|
| 513 |
+
}
|
| 514 |
+
|
| 515 |
+
.classification-score {
|
| 516 |
+
font-size: 1.2rem;
|
| 517 |
+
font-weight: bold;
|
| 518 |
+
margin-left: 10px;
|
| 519 |
+
}
|
| 520 |
+
|
| 521 |
+
/* Vector Embeddings specific styles */
|
| 522 |
+
.embedding-visualization {
|
| 523 |
+
text-align: center;
|
| 524 |
+
margin: 20px 0;
|
| 525 |
+
}
|
| 526 |
+
|
| 527 |
+
.similarity-matrix {
|
| 528 |
+
display: grid;
|
| 529 |
+
gap: 2px;
|
| 530 |
+
margin: 20px 0;
|
| 531 |
+
}
|
| 532 |
+
|
| 533 |
+
.similarity-cell {
|
| 534 |
+
padding: 10px;
|
| 535 |
+
text-align: center;
|
| 536 |
+
border-radius: 4px;
|
| 537 |
+
font-weight: 600;
|
| 538 |
+
transition: all 0.2s ease;
|
| 539 |
+
}
|
| 540 |
+
|
| 541 |
+
.similarity-cell:hover {
|
| 542 |
+
transform: scale(1.1);
|
| 543 |
+
z-index: 10;
|
| 544 |
+
position: relative;
|
| 545 |
+
}
|
| 546 |
+
|
| 547 |
+
/* Search results specific styles */
|
| 548 |
+
.search-result {
|
| 549 |
+
padding: 15px;
|
| 550 |
+
margin: 10px 0;
|
| 551 |
+
border-radius: 8px;
|
| 552 |
+
border-left: 4px solid var(--primary-color);
|
| 553 |
+
background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);
|
| 554 |
+
transition: all 0.2s ease;
|
| 555 |
+
}
|
| 556 |
+
|
| 557 |
+
.search-result:hover {
|
| 558 |
+
transform: translateY(-2px);
|
| 559 |
+
box-shadow: 0 4px 12px rgba(0,0,0,0.1);
|
| 560 |
+
}
|
| 561 |
+
|
| 562 |
+
.search-score {
|
| 563 |
+
font-size: 0.9em;
|
| 564 |
+
color: var(--primary-color);
|
| 565 |
+
font-weight: 600;
|
| 566 |
+
margin-top: 5px;
|
| 567 |
+
}
|
| 568 |
+
|
| 569 |
+
/* Chart containers */
|
| 570 |
+
.chart-wrapper {
|
| 571 |
+
position: relative;
|
| 572 |
+
margin: 20px 0;
|
| 573 |
+
padding: 20px;
|
| 574 |
+
background: white;
|
| 575 |
+
border-radius: 10px;
|
| 576 |
+
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
|
| 577 |
+
}
|
| 578 |
+
|
| 579 |
+
[data-theme="dark"] .chart-wrapper {
|
| 580 |
+
background: #2d2d2d;
|
| 581 |
+
}
|
| 582 |
+
|
| 583 |
+
/* Loading animations */
|
| 584 |
+
.loading-dots {
|
| 585 |
+
display: inline-block;
|
| 586 |
+
}
|
| 587 |
+
|
| 588 |
+
.loading-dots::after {
|
| 589 |
+
content: '';
|
| 590 |
+
animation: dots 1.5s steps(4, end) infinite;
|
| 591 |
+
}
|
| 592 |
+
|
| 593 |
+
@keyframes dots {
|
| 594 |
+
0%, 20% { content: ''; }
|
| 595 |
+
40% { content: '.'; }
|
| 596 |
+
60% { content: '..'; }
|
| 597 |
+
80%, 100% { content: '...'; }
|
| 598 |
+
}
|
| 599 |
+
|
| 600 |
+
/* Pulse animation for important elements */
|
| 601 |
+
.pulse {
|
| 602 |
+
animation: pulse 2s infinite;
|
| 603 |
+
}
|
| 604 |
+
|
| 605 |
+
@keyframes pulse {
|
| 606 |
+
0% { transform: scale(1); }
|
| 607 |
+
50% { transform: scale(1.05); }
|
| 608 |
+
100% { transform: scale(1); }
|
| 609 |
+
}
|
| 610 |
+
|
| 611 |
+
/* Slide animations */
|
| 612 |
+
.slide-in-left {
|
| 613 |
+
animation: slideInLeft 0.5s ease-out;
|
| 614 |
+
}
|
| 615 |
+
|
| 616 |
+
.slide-in-right {
|
| 617 |
+
animation: slideInRight 0.5s ease-out;
|
| 618 |
+
}
|
| 619 |
+
|
| 620 |
+
@keyframes slideInLeft {
|
| 621 |
+
from { transform: translateX(-100%); opacity: 0; }
|
| 622 |
+
to { transform: translateX(0); opacity: 1; }
|
| 623 |
+
}
|
| 624 |
+
|
| 625 |
+
@keyframes slideInRight {
|
| 626 |
+
from { transform: translateX(100%); opacity: 0; }
|
| 627 |
+
to { transform: translateX(0); opacity: 1; }
|
| 628 |
+
}
|
| 629 |
+
|
| 630 |
+
/* Bounce animation for success states */
|
| 631 |
+
.bounce-in {
|
| 632 |
+
animation: bounceIn 0.6s ease-out;
|
| 633 |
+
}
|
| 634 |
+
|
| 635 |
+
@keyframes bounceIn {
|
| 636 |
+
0% { transform: scale(0.3); opacity: 0; }
|
| 637 |
+
50% { transform: scale(1.05); }
|
| 638 |
+
70% { transform: scale(0.9); }
|
| 639 |
+
100% { transform: scale(1); opacity: 1; }
|
| 640 |
+
}
|
| 641 |
+
|
| 642 |
+
/* Shake animation for error states */
|
| 643 |
+
.shake {
|
| 644 |
+
animation: shake 0.5s ease-in-out;
|
| 645 |
+
}
|
| 646 |
+
|
| 647 |
+
@keyframes shake {
|
| 648 |
+
0%, 100% { transform: translateX(0); }
|
| 649 |
+
10%, 30%, 50%, 70%, 90% { transform: translateX(-5px); }
|
| 650 |
+
20%, 40%, 60%, 80% { transform: translateX(5px); }
|
| 651 |
+
}
|
| 652 |
+
|
| 653 |
+
/* Gradient text effects */
|
| 654 |
+
.gradient-text {
|
| 655 |
+
background: linear-gradient(135deg, var(--primary-color) 0%, var(--success-color) 100%);
|
| 656 |
+
-webkit-background-clip: text;
|
| 657 |
+
-webkit-text-fill-color: transparent;
|
| 658 |
+
background-clip: text;
|
| 659 |
+
font-weight: 700;
|
| 660 |
+
}
|
| 661 |
+
|
| 662 |
+
/* Custom scrollbar for specific containers */
|
| 663 |
+
.custom-scrollbar {
|
| 664 |
+
scrollbar-width: thin;
|
| 665 |
+
scrollbar-color: var(--primary-color) #f1f1f1;
|
| 666 |
+
}
|
| 667 |
+
|
| 668 |
+
.custom-scrollbar::-webkit-scrollbar {
|
| 669 |
+
width: 6px;
|
| 670 |
+
}
|
| 671 |
+
|
| 672 |
+
.custom-scrollbar::-webkit-scrollbar-track {
|
| 673 |
+
background: #f1f1f1;
|
| 674 |
+
border-radius: 3px;
|
| 675 |
+
}
|
| 676 |
+
|
| 677 |
+
.custom-scrollbar::-webkit-scrollbar-thumb {
|
| 678 |
+
background: var(--primary-color);
|
| 679 |
+
border-radius: 3px;
|
| 680 |
+
}
|
| 681 |
+
|
| 682 |
+
.custom-scrollbar::-webkit-scrollbar-thumb:hover {
|
| 683 |
+
background: var(--primary-hover);
|
| 684 |
+
}
|
| 685 |
+
|
| 686 |
+
/* Responsive design enhancements */
|
| 687 |
+
@media (max-width: 576px) {
|
| 688 |
+
.translation-pair {
|
| 689 |
+
flex-direction: column;
|
| 690 |
+
}
|
| 691 |
+
|
| 692 |
+
.classification-result {
|
| 693 |
+
flex-direction: column;
|
| 694 |
+
text-align: center;
|
| 695 |
+
}
|
| 696 |
+
|
| 697 |
+
.classification-score {
|
| 698 |
+
margin-left: 0;
|
| 699 |
+
margin-top: 5px;
|
| 700 |
+
}
|
| 701 |
+
|
| 702 |
+
.sentiment-gauge {
|
| 703 |
+
width: 150px;
|
| 704 |
+
height: 150px;
|
| 705 |
+
}
|
| 706 |
+
|
| 707 |
+
.sentiment-score {
|
| 708 |
+
font-size: 1.5rem;
|
| 709 |
+
}
|
| 710 |
+
}
|
| 711 |
+
|
| 712 |
+
/* Print styles */
|
| 713 |
+
@media print {
|
| 714 |
+
.navbar, .btn, .alert {
|
| 715 |
+
display: none !important;
|
| 716 |
+
}
|
| 717 |
+
|
| 718 |
+
.card {
|
| 719 |
+
border: 1px solid #000 !important;
|
| 720 |
+
box-shadow: none !important;
|
| 721 |
+
}
|
| 722 |
+
|
| 723 |
+
body {
|
| 724 |
+
background: white !important;
|
| 725 |
+
color: black !important;
|
| 726 |
+
}
|
| 727 |
+
}
|
| 728 |
+
|
| 729 |
+
/* High contrast mode support */
|
| 730 |
+
@media (prefers-contrast: high) {
|
| 731 |
+
:root {
|
| 732 |
+
--primary-color: #0000FF;
|
| 733 |
+
--success-color: #008000;
|
| 734 |
+
--warning-color: #FF8000;
|
| 735 |
+
--danger-color: #FF0000;
|
| 736 |
+
}
|
| 737 |
+
|
| 738 |
+
.card {
|
| 739 |
+
border: 2px solid #000;
|
| 740 |
+
}
|
| 741 |
+
|
| 742 |
+
.btn {
|
| 743 |
+
border: 2px solid #000;
|
| 744 |
+
}
|
| 745 |
+
}
|
| 746 |
+
|
| 747 |
+
/* Reduced motion support */
|
| 748 |
+
@media (prefers-reduced-motion: reduce) {
|
| 749 |
+
* {
|
| 750 |
+
animation-duration: 0.01ms !important;
|
| 751 |
+
animation-iteration-count: 1 !important;
|
| 752 |
+
transition-duration: 0.01ms !important;
|
| 753 |
+
}
|
| 754 |
+
}
|
| 755 |
+
|
| 756 |
+
/* Focus styles for accessibility */
|
| 757 |
+
.btn:focus,
|
| 758 |
+
.form-control:focus,
|
| 759 |
+
.form-select:focus {
|
| 760 |
+
outline: 3px solid var(--primary-color);
|
| 761 |
+
outline-offset: 2px;
|
| 762 |
+
}
|
| 763 |
+
|
| 764 |
+
/* Skip link for screen readers */
|
| 765 |
+
.skip-link {
|
| 766 |
+
position: absolute;
|
| 767 |
+
top: -40px;
|
| 768 |
+
left: 6px;
|
| 769 |
+
background: var(--primary-color);
|
| 770 |
+
color: white;
|
| 771 |
+
padding: 8px;
|
| 772 |
+
text-decoration: none;
|
| 773 |
+
border-radius: 4px;
|
| 774 |
+
z-index: 1000;
|
| 775 |
+
}
|
| 776 |
+
|
| 777 |
+
.skip-link:focus {
|
| 778 |
+
top: 6px;
|
| 779 |
+
}
|
| 780 |
+
|
| 781 |
+
/* ARIA live region for dynamic content */
|
| 782 |
+
.aria-live {
|
| 783 |
+
position: absolute;
|
| 784 |
+
left: -10000px;
|
| 785 |
+
width: 1px;
|
| 786 |
+
height: 1px;
|
| 787 |
+
overflow: hidden;
|
| 788 |
+
}
|
| 789 |
+
|
| 790 |
+
/* Prevent horizontal scrolling in text containers */
|
| 791 |
+
.card-body {
|
| 792 |
+
overflow-x: hidden;
|
| 793 |
+
max-width: 100%;
|
| 794 |
+
}
|
| 795 |
+
|
| 796 |
+
.text-content {
|
| 797 |
+
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
| 798 |
+
line-height: 1.6;
|
| 799 |
+
font-size: 14px;
|
| 800 |
+
color: #333;
|
| 801 |
+
word-wrap: break-word;
|
| 802 |
+
word-break: break-word;
|
| 803 |
+
overflow-wrap: break-word;
|
| 804 |
+
white-space: normal;
|
| 805 |
+
max-width: 100%;
|
| 806 |
+
overflow-x: hidden;
|
| 807 |
+
overflow-y: auto;
|
| 808 |
+
padding: 15px;
|
| 809 |
+
background-color: #f8f9fa;
|
| 810 |
+
border-radius: 5px;
|
| 811 |
+
border: 1px solid #e9ecef;
|
| 812 |
+
margin-bottom: 15px;
|
| 813 |
+
}
|
| 814 |
+
|
| 815 |
+
[data-theme="dark"] .text-content {
|
| 816 |
+
background-color: #2d2d2d !important;
|
| 817 |
+
color: #e0e0e0 !important;
|
| 818 |
+
border-color: #555 !important;
|
| 819 |
+
}
|
| 820 |
+
|
| 821 |
+
/* Responsive text containers */
|
| 822 |
+
.text-container {
|
| 823 |
+
max-width: 100%;
|
| 824 |
+
overflow-x: auto;
|
| 825 |
+
}
|
| 826 |
+
|
| 827 |
+
@media (max-width: 768px) {
|
| 828 |
+
.text-content {
|
| 829 |
+
font-size: 13px;
|
| 830 |
+
padding: 10px !important;
|
| 831 |
+
max-height: 150px !important;
|
| 832 |
+
}
|
| 833 |
+
}
|
| 834 |
+
|
| 835 |
+
/* Additional text wrapping rules */
|
| 836 |
+
* {
|
| 837 |
+
box-sizing: border-box;
|
| 838 |
+
}
|
| 839 |
+
|
| 840 |
+
.container, .container-fluid {
|
| 841 |
+
overflow-x: hidden;
|
| 842 |
+
max-width: 100%;
|
| 843 |
+
}
|
| 844 |
+
|
| 845 |
+
/* Ensure all text elements wrap properly */
|
| 846 |
+
p, div, span, pre, code {
|
| 847 |
+
word-wrap: break-word;
|
| 848 |
+
word-break: break-word;
|
| 849 |
+
overflow-wrap: break-word;
|
| 850 |
+
max-width: 100%;
|
| 851 |
+
}
|
| 852 |
+
|
| 853 |
+
/* Enhanced table styling for multi-column layouts */
|
| 854 |
+
.table-responsive {
|
| 855 |
+
border-radius: 8px;
|
| 856 |
+
overflow: hidden;
|
| 857 |
+
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
|
| 858 |
+
}
|
| 859 |
+
|
| 860 |
+
.table th {
|
| 861 |
+
font-weight: 600;
|
| 862 |
+
font-size: 0.9em;
|
| 863 |
+
padding: 12px 8px;
|
| 864 |
+
text-align: center !important;
|
| 865 |
+
vertical-align: middle !important;
|
| 866 |
+
background-color: var(--primary-color) !important;
|
| 867 |
+
color: white !important;
|
| 868 |
+
border: none !important;
|
| 869 |
+
}
|
| 870 |
+
|
| 871 |
+
.table td {
|
| 872 |
+
padding: 10px 8px;
|
| 873 |
+
text-align: center !important;
|
| 874 |
+
vertical-align: middle !important;
|
| 875 |
+
font-size: 0.9em;
|
| 876 |
+
border: 1px solid #dee2e6;
|
| 877 |
+
}
|
| 878 |
+
|
| 879 |
+
.table td code {
|
| 880 |
+
background-color: #f8f9fa;
|
| 881 |
+
padding: 2px 6px;
|
| 882 |
+
border-radius: 4px;
|
| 883 |
+
font-size: 0.85em;
|
| 884 |
+
color: #e83e8c;
|
| 885 |
+
display: inline-block;
|
| 886 |
+
}
|
| 887 |
+
|
| 888 |
+
[data-theme="dark"] .table td code {
|
| 889 |
+
background-color: #2d2d2d;
|
| 890 |
+
color: #ff6b9d;
|
| 891 |
+
}
|
| 892 |
+
|
| 893 |
+
.table td .badge {
|
| 894 |
+
font-size: 0.8em;
|
| 895 |
+
padding: 4px 8px;
|
| 896 |
+
display: inline-block;
|
| 897 |
+
}
|
| 898 |
+
|
| 899 |
+
/* Fix table header alignment */
|
| 900 |
+
.table thead th {
|
| 901 |
+
text-align: center !important;
|
| 902 |
+
vertical-align: middle !important;
|
| 903 |
+
white-space: nowrap;
|
| 904 |
+
}
|
| 905 |
+
|
| 906 |
+
/* Ensure proper table structure */
|
| 907 |
+
.table {
|
| 908 |
+
margin-bottom: 0;
|
| 909 |
+
width: 100%;
|
| 910 |
+
table-layout: fixed;
|
| 911 |
+
}
|
| 912 |
+
|
| 913 |
+
.table tbody tr:nth-of-type(odd) {
|
| 914 |
+
background-color: rgba(0,0,0,.02);
|
| 915 |
+
}
|
| 916 |
+
|
| 917 |
+
.table tbody tr:hover {
|
| 918 |
+
background-color: rgba(0,0,0,.075);
|
| 919 |
+
}
|
| 920 |
+
|
| 921 |
+
/* Responsive table adjustments */
|
| 922 |
+
@media (max-width: 768px) {
|
| 923 |
+
.table th, .table td {
|
| 924 |
+
padding: 8px 4px;
|
| 925 |
+
font-size: 0.8em;
|
| 926 |
+
}
|
| 927 |
+
|
| 928 |
+
.table td code {
|
| 929 |
+
font-size: 0.75em;
|
| 930 |
+
padding: 1px 4px;
|
| 931 |
+
}
|
| 932 |
+
|
| 933 |
+
.table td .badge {
|
| 934 |
+
font-size: 0.7em;
|
| 935 |
+
padding: 2px 6px;
|
| 936 |
+
}
|
| 937 |
+
}
|
| 938 |
+
|
| 939 |
+
/* Additional table header fixes */
|
| 940 |
+
.table-primary th {
|
| 941 |
+
background-color: var(--primary-color) !important;
|
| 942 |
+
color: white !important;
|
| 943 |
+
text-align: center !important;
|
| 944 |
+
vertical-align: middle !important;
|
| 945 |
+
font-weight: 600 !important;
|
| 946 |
+
border: none !important;
|
| 947 |
+
padding: 12px 8px !important;
|
| 948 |
+
}
|
| 949 |
+
|
| 950 |
+
/* Override Bootstrap table styles */
|
| 951 |
+
.table thead th {
|
| 952 |
+
border-bottom: 2px solid #dee2e6 !important;
|
| 953 |
+
text-align: center !important;
|
| 954 |
+
vertical-align: middle !important;
|
| 955 |
+
}
|
| 956 |
+
|
| 957 |
+
/* Ensure consistent column widths */
|
| 958 |
+
.table th[style*="width"] {
|
| 959 |
+
text-align: center !important;
|
| 960 |
+
}
|
| 961 |
+
|
| 962 |
+
/* Fix any alignment issues */
|
| 963 |
+
.table td, .table th {
|
| 964 |
+
text-align: center !important;
|
| 965 |
+
vertical-align: middle !important;
|
| 966 |
+
}
|
| 967 |
+
|
| 968 |
+
/* ===== VERTICAL SPACING AND TITLE HIERARCHY ===== */
|
| 969 |
+
|
| 970 |
+
/* Main section headers */
|
| 971 |
+
.task-subheader {
|
| 972 |
+
font-size: 1.5rem;
|
| 973 |
+
font-weight: 600;
|
| 974 |
+
color: var(--primary-color);
|
| 975 |
+
margin-top: 2rem;
|
| 976 |
+
margin-bottom: 1.5rem;
|
| 977 |
+
padding-bottom: 0.5rem;
|
| 978 |
+
border-bottom: 2px solid var(--primary-color);
|
| 979 |
+
position: relative;
|
| 980 |
+
}
|
| 981 |
+
|
| 982 |
+
.task-subheader:first-child {
|
| 983 |
+
margin-top: 1rem;
|
| 984 |
+
}
|
| 985 |
+
|
| 986 |
+
.task-subheader::after {
|
| 987 |
+
content: '';
|
| 988 |
+
position: absolute;
|
| 989 |
+
bottom: -2px;
|
| 990 |
+
left: 0;
|
| 991 |
+
width: 50px;
|
| 992 |
+
height: 2px;
|
| 993 |
+
background-color: var(--accent-color);
|
| 994 |
+
}
|
| 995 |
+
|
| 996 |
+
/* Subsection headers */
|
| 997 |
+
h4 {
|
| 998 |
+
font-size: 1.25rem;
|
| 999 |
+
font-weight: 600;
|
| 1000 |
+
color: #2c3e50;
|
| 1001 |
+
margin-top: 1.5rem;
|
| 1002 |
+
margin-bottom: 1rem;
|
| 1003 |
+
padding-left: 0.5rem;
|
| 1004 |
+
border-left: 4px solid var(--primary-color);
|
| 1005 |
+
background-color: #f8f9fa;
|
| 1006 |
+
padding: 0.75rem 1rem;
|
| 1007 |
+
border-radius: 0 5px 5px 0;
|
| 1008 |
+
}
|
| 1009 |
+
|
| 1010 |
+
[data-theme="dark"] h4 {
|
| 1011 |
+
color: #e0e0e0;
|
| 1012 |
+
background-color: #2d2d2d;
|
| 1013 |
+
border-left-color: var(--primary-color);
|
| 1014 |
+
}
|
| 1015 |
+
|
| 1016 |
+
/* Card spacing */
|
| 1017 |
+
.card {
|
| 1018 |
+
margin-bottom: 1.5rem;
|
| 1019 |
+
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
|
| 1020 |
+
border: none;
|
| 1021 |
+
border-radius: 8px;
|
| 1022 |
+
}
|
| 1023 |
+
|
| 1024 |
+
.card-header {
|
| 1025 |
+
background-color: var(--primary-color);
|
| 1026 |
+
color: white;
|
| 1027 |
+
border-radius: 8px 8px 0 0 !important;
|
| 1028 |
+
padding: 1rem 1.25rem;
|
| 1029 |
+
border-bottom: none;
|
| 1030 |
+
}
|
| 1031 |
+
|
| 1032 |
+
.card-header h5 {
|
| 1033 |
+
margin: 0;
|
| 1034 |
+
font-weight: 600;
|
| 1035 |
+
font-size: 1.1rem;
|
| 1036 |
+
}
|
| 1037 |
+
|
| 1038 |
+
.card-body {
|
| 1039 |
+
padding: 1.25rem;
|
| 1040 |
+
background-color: white;
|
| 1041 |
+
border-radius: 0 0 8px 8px;
|
| 1042 |
+
}
|
| 1043 |
+
|
| 1044 |
+
[data-theme="dark"] .card-body {
|
| 1045 |
+
background-color: #2d2d2d;
|
| 1046 |
+
}
|
| 1047 |
+
|
| 1048 |
+
/* Row spacing */
|
| 1049 |
+
.row {
|
| 1050 |
+
margin-bottom: 1.5rem;
|
| 1051 |
+
}
|
| 1052 |
+
|
| 1053 |
+
.row:last-child {
|
| 1054 |
+
margin-bottom: 0;
|
| 1055 |
+
}
|
| 1056 |
+
|
| 1057 |
+
/* Alert boxes spacing */
|
| 1058 |
+
.alert {
|
| 1059 |
+
margin: 1.5rem 0;
|
| 1060 |
+
padding: 1rem 1.25rem;
|
| 1061 |
+
border-radius: 8px;
|
| 1062 |
+
border: none;
|
| 1063 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
| 1064 |
+
}
|
| 1065 |
+
|
| 1066 |
+
.alert h4 {
|
| 1067 |
+
margin-top: 0;
|
| 1068 |
+
margin-bottom: 0.75rem;
|
| 1069 |
+
font-size: 1.1rem;
|
| 1070 |
+
}
|
| 1071 |
+
|
| 1072 |
+
.alert ul {
|
| 1073 |
+
margin-bottom: 0;
|
| 1074 |
+
}
|
| 1075 |
+
|
| 1076 |
+
.alert li {
|
| 1077 |
+
margin-bottom: 0.5rem;
|
| 1078 |
+
}
|
| 1079 |
+
|
| 1080 |
+
/* Statistics boxes */
|
| 1081 |
+
.stats-container {
|
| 1082 |
+
margin: 1.5rem 0;
|
| 1083 |
+
}
|
| 1084 |
+
|
| 1085 |
+
.stats-container .col-md-4 {
|
| 1086 |
+
margin-bottom: 1rem;
|
| 1087 |
+
}
|
| 1088 |
+
|
| 1089 |
+
/* Base stats card layout */
|
| 1090 |
+
.stats-container .card {
|
| 1091 |
+
text-align: center;
|
| 1092 |
+
}
|
| 1093 |
+
|
| 1094 |
+
/* Dark-mode stats styling (gradient + white text) */
|
| 1095 |
+
[data-theme="dark"] .stats-container .card {
|
| 1096 |
+
background: linear-gradient(135deg, var(--primary-color), var(--accent-color)) !important;
|
| 1097 |
+
color: #fff !important;
|
| 1098 |
+
border: none !important;
|
| 1099 |
+
}
|
| 1100 |
+
|
| 1101 |
+
.stats-container .card-body {
|
| 1102 |
+
background: transparent !important;
|
| 1103 |
+
padding: 1.5rem 1rem;
|
| 1104 |
+
}
|
| 1105 |
+
|
| 1106 |
+
/* Text colors for dark-mode stats */
|
| 1107 |
+
[data-theme="dark"] .stats-container .card-body h3,
|
| 1108 |
+
[data-theme="dark"] .stats-container .card-body p,
|
| 1109 |
+
[data-theme="dark"] .stats-container .card-body h2,
|
| 1110 |
+
[data-theme="dark"] .stats-container .card-title {
|
| 1111 |
+
color: #fff !important;
|
| 1112 |
+
}
|
| 1113 |
+
|
| 1114 |
+
.stats-container .card-body h3 {
|
| 1115 |
+
font-size: 2rem;
|
| 1116 |
+
font-weight: 700;
|
| 1117 |
+
margin-bottom: 0.5rem;
|
| 1118 |
+
}
|
| 1119 |
+
|
| 1120 |
+
.stats-container .card-body p {
|
| 1121 |
+
margin: 0;
|
| 1122 |
+
font-size: 0.9rem;
|
| 1123 |
+
opacity: 0.9;
|
| 1124 |
+
}
|
| 1125 |
+
|
| 1126 |
+
/* Table spacing */
|
| 1127 |
+
.table-responsive {
|
| 1128 |
+
margin: 1rem 0 1.5rem 0;
|
| 1129 |
+
}
|
| 1130 |
+
|
| 1131 |
+
/* Chart spacing */
|
| 1132 |
+
.chart-container {
|
| 1133 |
+
margin: 1.5rem 0;
|
| 1134 |
+
padding: 1rem;
|
| 1135 |
+
background-color: white;
|
| 1136 |
+
border-radius: 8px;
|
| 1137 |
+
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
|
| 1138 |
+
text-align: center;
|
| 1139 |
+
}
|
| 1140 |
+
|
| 1141 |
+
[data-theme="dark"] .chart-container {
|
| 1142 |
+
background-color: #2d2d2d;
|
| 1143 |
+
}
|
| 1144 |
+
|
| 1145 |
+
.chart-container img {
|
| 1146 |
+
max-width: 100%;
|
| 1147 |
+
height: auto;
|
| 1148 |
+
border-radius: 4px;
|
| 1149 |
+
}
|
| 1150 |
+
|
| 1151 |
+
/* Paragraph spacing */
|
| 1152 |
+
p {
|
| 1153 |
+
margin-bottom: 1rem;
|
| 1154 |
+
line-height: 1.6;
|
| 1155 |
+
}
|
| 1156 |
+
|
| 1157 |
+
p:last-child {
|
| 1158 |
+
margin-bottom: 0;
|
| 1159 |
+
}
|
| 1160 |
+
|
| 1161 |
+
/* Small text spacing */
|
| 1162 |
+
small {
|
| 1163 |
+
margin-top: 0.5rem;
|
| 1164 |
+
display: block;
|
| 1165 |
+
color: #6c757d;
|
| 1166 |
+
}
|
| 1167 |
+
|
| 1168 |
+
[data-theme="dark"] small {
|
| 1169 |
+
color: #adb5bd;
|
| 1170 |
+
}
|
| 1171 |
+
|
| 1172 |
+
/* Button spacing */
|
| 1173 |
+
.btn {
|
| 1174 |
+
margin: 0.25rem;
|
| 1175 |
+
border-radius: 6px;
|
| 1176 |
+
}
|
| 1177 |
+
|
| 1178 |
+
/* List spacing */
|
| 1179 |
+
ul, ol {
|
| 1180 |
+
margin-bottom: 1rem;
|
| 1181 |
+
padding-left: 1.5rem;
|
| 1182 |
+
}
|
| 1183 |
+
|
| 1184 |
+
ul li, ol li {
|
| 1185 |
+
margin-bottom: 0.5rem;
|
| 1186 |
+
line-height: 1.6;
|
| 1187 |
+
}
|
| 1188 |
+
|
| 1189 |
+
/* Collapsible content spacing */
|
| 1190 |
+
.collapse {
|
| 1191 |
+
margin-top: 0.5rem;
|
| 1192 |
+
}
|
| 1193 |
+
|
| 1194 |
+
/* Section dividers */
|
| 1195 |
+
.section-divider {
|
| 1196 |
+
height: 2px;
|
| 1197 |
+
background: linear-gradient(90deg, var(--primary-color), var(--accent-color), var(--primary-color));
|
| 1198 |
+
margin: 2rem 0;
|
| 1199 |
+
border-radius: 1px;
|
| 1200 |
+
}
|
| 1201 |
+
|
| 1202 |
+
/* Responsive spacing adjustments */
|
| 1203 |
+
@media (max-width: 768px) {
|
| 1204 |
+
.task-subheader {
|
| 1205 |
+
font-size: 1.3rem;
|
| 1206 |
+
margin-top: 1.5rem;
|
| 1207 |
+
margin-bottom: 1rem;
|
| 1208 |
+
}
|
| 1209 |
+
|
| 1210 |
+
h4 {
|
| 1211 |
+
font-size: 1.1rem;
|
| 1212 |
+
margin-top: 1.25rem;
|
| 1213 |
+
margin-bottom: 0.75rem;
|
| 1214 |
+
}
|
| 1215 |
+
|
| 1216 |
+
.card {
|
| 1217 |
+
margin-bottom: 1rem;
|
| 1218 |
+
}
|
| 1219 |
+
|
| 1220 |
+
.alert {
|
| 1221 |
+
margin: 1rem 0;
|
| 1222 |
+
}
|
| 1223 |
+
|
| 1224 |
+
.stats-container .card-body h3 {
|
| 1225 |
+
font-size: 1.5rem;
|
| 1226 |
+
}
|
| 1227 |
+
}
|
| 1228 |
+
|
| 1229 |
+
/* Quick Navigation styling (scoped) */
|
| 1230 |
+
.quick-nav.card {
|
| 1231 |
+
border: none;
|
| 1232 |
+
box-shadow: 0 6px 18px rgba(0,0,0,0.08);
|
| 1233 |
+
border-radius: 12px;
|
| 1234 |
+
overflow: hidden;
|
| 1235 |
+
}
|
| 1236 |
+
.quick-nav .card-header {
|
| 1237 |
+
background: linear-gradient(135deg, var(--primary-color), var(--accent-color)) !important;
|
| 1238 |
+
color: #fff !important;
|
| 1239 |
+
border-radius: 12px 12px 0 0 !important;
|
| 1240 |
+
padding: 0.875rem 1.25rem;
|
| 1241 |
+
}
|
| 1242 |
+
.quick-nav .card-header h5 { margin: 0; font-weight: 700; letter-spacing: .2px; }
|
| 1243 |
+
|
| 1244 |
+
.quick-nav h6 {
|
| 1245 |
+
color: #2c3e50;
|
| 1246 |
+
font-weight: 700;
|
| 1247 |
+
margin: .75rem 0 .5rem 0;
|
| 1248 |
+
display: flex; align-items: center; gap: .5rem;
|
| 1249 |
+
}
|
| 1250 |
+
|
| 1251 |
+
.quick-nav .btn {
|
| 1252 |
+
border-width: 2px;
|
| 1253 |
+
font-weight: 600;
|
| 1254 |
+
border-radius: 28px;
|
| 1255 |
+
padding: 10px 16px;
|
| 1256 |
+
display: inline-flex; align-items: center; gap: .35rem;
|
| 1257 |
+
transition: all .15s ease;
|
| 1258 |
+
}
|
| 1259 |
+
.quick-nav .btn i { margin: 0; font-size: .95rem; }
|
| 1260 |
+
.quick-nav .btn:hover { transform: translateY(-1px); box-shadow: 0 6px 12px rgba(0,0,0,.08); }
|
| 1261 |
+
|
| 1262 |
+
/* subtle background so outline buttons look like chips */
|
| 1263 |
+
.quick-nav .btn-outline-primary,
|
| 1264 |
+
.quick-nav .btn-outline-success,
|
| 1265 |
+
.quick-nav .btn-outline-info { background-color: #f8f9fa; }
|
| 1266 |
+
|
| 1267 |
+
/* layout tweaks */
|
| 1268 |
+
.quick-nav .row { row-gap: .75rem; }
|
| 1269 |
+
|
| 1270 |
+
@media (max-width: 768px) {
|
| 1271 |
+
.quick-nav .btn { width: 100%; justify-content: center; }
|
| 1272 |
+
}
|
| 1273 |
+
|
| 1274 |
+
/* WORD FREQUENCY OVERLAP FIX - Force vertical stacking */
|
| 1275 |
+
.wf-chart-section {
|
| 1276 |
+
display: block !important;
|
| 1277 |
+
width: 100% !important;
|
| 1278 |
+
float: none !important;
|
| 1279 |
+
clear: both !important;
|
| 1280 |
+
position: static !important;
|
| 1281 |
+
margin-bottom: 4rem !important;
|
| 1282 |
+
overflow: hidden !important;
|
| 1283 |
+
page-break-inside: avoid !important;
|
| 1284 |
+
}
|
| 1285 |
+
|
| 1286 |
+
.wf-table-container {
|
| 1287 |
+
display: block !important;
|
| 1288 |
+
width: 100% !important;
|
| 1289 |
+
float: none !important;
|
| 1290 |
+
clear: both !important;
|
| 1291 |
+
position: static !important;
|
| 1292 |
+
margin-top: 4rem !important;
|
| 1293 |
+
overflow: visible !important;
|
| 1294 |
+
page-break-inside: avoid !important;
|
| 1295 |
+
}
|
| 1296 |
+
|
| 1297 |
+
/* Force chart container to be completely separate */
|
| 1298 |
+
.wf-chart-section .chart-container {
|
| 1299 |
+
display: block !important;
|
| 1300 |
+
width: 100% !important;
|
| 1301 |
+
float: none !important;
|
| 1302 |
+
clear: both !important;
|
| 1303 |
+
position: static !important;
|
| 1304 |
+
margin: 0 !important;
|
| 1305 |
+
}
|
| 1306 |
+
|
| 1307 |
+
.chart-container {
|
| 1308 |
+
display: block !important;
|
| 1309 |
+
width: 100% !important;
|
| 1310 |
+
height: auto !important;
|
| 1311 |
+
min-height: auto !important;
|
| 1312 |
+
max-height: none !important;
|
| 1313 |
+
margin: 1.5rem 0 !important;
|
| 1314 |
+
position: static !important;
|
| 1315 |
+
z-index: auto !important;
|
| 1316 |
+
clear: both !important;
|
| 1317 |
+
overflow: visible !important;
|
| 1318 |
+
}
|
| 1319 |
+
|
| 1320 |
+
.chart-container img, .chart-container canvas, .chart-container svg {
|
| 1321 |
+
display: block !important;
|
| 1322 |
+
width: 100% !important;
|
| 1323 |
+
max-width: 100% !important;
|
| 1324 |
+
height: auto !important;
|
| 1325 |
+
margin: 0 auto !important;
|
| 1326 |
+
}
|
| 1327 |
+
|
| 1328 |
+
/* Kill all floats in word frequency section */
|
| 1329 |
+
.wf-table-container .table-responsive {
|
| 1330 |
+
float: none !important;
|
| 1331 |
+
clear: both !important;
|
| 1332 |
+
display: block !important;
|
| 1333 |
+
width: 100% !important;
|
| 1334 |
+
position: static !important;
|
| 1335 |
+
}
|
| 1336 |
+
|
| 1337 |
+
/* POS DISTRIBUTION OVERLAP FIX - Force vertical stacking */
|
| 1338 |
+
.pos-chart-section {
|
| 1339 |
+
display: block !important;
|
| 1340 |
+
width: 100% !important;
|
| 1341 |
+
float: none !important;
|
| 1342 |
+
clear: both !important;
|
| 1343 |
+
position: static !important;
|
| 1344 |
+
margin-bottom: 3rem !important;
|
| 1345 |
+
overflow: hidden !important;
|
| 1346 |
+
page-break-inside: avoid !important;
|
| 1347 |
+
}
|
| 1348 |
+
|
| 1349 |
+
.pos-table-container {
|
| 1350 |
+
display: block !important;
|
| 1351 |
+
width: 100% !important;
|
| 1352 |
+
float: none !important;
|
| 1353 |
+
clear: both !important;
|
| 1354 |
+
position: static !important;
|
| 1355 |
+
margin-top: 3rem !important;
|
| 1356 |
+
overflow: visible !important;
|
| 1357 |
+
page-break-inside: avoid !important;
|
| 1358 |
+
}
|
| 1359 |
+
|
| 1360 |
+
/* Force POS chart container to be completely separate */
|
| 1361 |
+
.pos-chart-section .chart-container {
|
| 1362 |
+
display: block !important;
|
| 1363 |
+
width: 100% !important;
|
| 1364 |
+
float: none !important;
|
| 1365 |
+
clear: both !important;
|
| 1366 |
+
position: static !important;
|
| 1367 |
+
margin: 0 !important;
|
| 1368 |
+
}
|
| 1369 |
+
|
| 1370 |
+
/* ENTITY DISTRIBUTION OVERLAP FIX - Force vertical stacking */
|
| 1371 |
+
.entity-chart-section {
|
| 1372 |
+
display: block !important;
|
| 1373 |
+
width: 100% !important;
|
| 1374 |
+
float: none !important;
|
| 1375 |
+
clear: both !important;
|
| 1376 |
+
position: static !important;
|
| 1377 |
+
margin-bottom: 3rem !important;
|
| 1378 |
+
overflow: hidden !important;
|
| 1379 |
+
page-break-inside: avoid !important;
|
| 1380 |
+
}
|
| 1381 |
+
|
| 1382 |
+
.entity-table-container {
|
| 1383 |
+
display: block !important;
|
| 1384 |
+
width: 100% !important;
|
| 1385 |
+
float: none !important;
|
| 1386 |
+
clear: both !important;
|
| 1387 |
+
position: static !important;
|
| 1388 |
+
margin-top: 3rem !important;
|
| 1389 |
+
overflow: visible !important;
|
| 1390 |
+
page-break-inside: avoid !important;
|
| 1391 |
+
}
|
| 1392 |
+
|
| 1393 |
+
/* Force entity chart container to be completely separate */
|
| 1394 |
+
.entity-chart-section .chart-container {
|
| 1395 |
+
display: block !important;
|
| 1396 |
+
width: 100% !important;
|
| 1397 |
+
float: none !important;
|
| 1398 |
+
clear: both !important;
|
| 1399 |
+
position: static !important;
|
| 1400 |
+
margin: 0 !important;
|
| 1401 |
+
}
|
| 1402 |
+
|
| 1403 |
+
/* Entity Badge Styling for Better Visibility */
|
| 1404 |
+
.entity-badge {
|
| 1405 |
+
display: inline-block !important;
|
| 1406 |
+
padding: 0.5rem 0.75rem !important;
|
| 1407 |
+
margin: 0.2rem !important;
|
| 1408 |
+
border-radius: 8px !important;
|
| 1409 |
+
font-size: 0.9rem !important;
|
| 1410 |
+
font-weight: 600 !important;
|
| 1411 |
+
text-shadow: 1px 1px 2px rgba(0,0,0,0.5) !important;
|
| 1412 |
+
border: 2px solid #ffffff !important;
|
| 1413 |
+
box-shadow: 0 3px 6px rgba(0,0,0,0.3) !important;
|
| 1414 |
+
transition: all 0.2s ease !important;
|
| 1415 |
+
cursor: help !important;
|
| 1416 |
+
}
|
| 1417 |
+
|
| 1418 |
+
.entity-badge:hover {
|
| 1419 |
+
transform: translateY(-1px) !important;
|
| 1420 |
+
box-shadow: 0 4px 8px rgba(0,0,0,0.4) !important;
|
| 1421 |
+
}
|
| 1422 |
+
|
| 1423 |
+
/* Ensure entity text container has clean white background */
|
| 1424 |
+
.entity-text-container {
|
| 1425 |
+
background-color: #ffffff !important;
|
| 1426 |
+
padding: 1.5rem !important;
|
| 1427 |
+
border-radius: 8px !important;
|
| 1428 |
+
border: 1px solid #dee2e6 !important;
|
| 1429 |
+
line-height: 2 !important;
|
| 1430 |
+
margin: 1rem 0 !important;
|
| 1431 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.05) !important;
|
| 1432 |
+
}
|
| 1433 |
+
|
| 1434 |
+
/* POS Legend Button Styling */
|
| 1435 |
+
.pos-legend-buttons {
|
| 1436 |
+
margin: 1rem 0;
|
| 1437 |
+
}
|
| 1438 |
+
|
| 1439 |
+
.pos-legend-buttons .btn {
|
| 1440 |
+
font-weight: 700 !important;
|
| 1441 |
+
border-radius: 8px !important;
|
| 1442 |
+
padding: 0.875rem 2.5rem !important;
|
| 1443 |
+
margin: 0 0.5rem;
|
| 1444 |
+
transition: all 0.3s ease;
|
| 1445 |
+
box-shadow: 0 4px 12px rgba(0,0,0,0.2) !important;
|
| 1446 |
+
font-size: 1.1rem !important;
|
| 1447 |
+
}
|
| 1448 |
+
|
| 1449 |
+
/* Active button - white background with dark text for high contrast */
|
| 1450 |
+
.pos-legend-buttons .btn.active {
|
| 1451 |
+
background-color: #ffffff !important;
|
| 1452 |
+
color: #1a365d !important;
|
| 1453 |
+
border: 3px solid #ffffff !important;
|
| 1454 |
+
transform: none;
|
| 1455 |
+
box-shadow: 0 4px 16px rgba(0,0,0,0.3) !important;
|
| 1456 |
+
}
|
| 1457 |
+
|
| 1458 |
+
/* Inactive button - dark background with white text */
|
| 1459 |
+
.pos-legend-buttons .btn:not(.active) {
|
| 1460 |
+
background-color: rgba(0,0,0,0.3) !important;
|
| 1461 |
+
color: #ffffff !important;
|
| 1462 |
+
border: 3px solid rgba(255,255,255,0.5) !important;
|
| 1463 |
+
}
|
| 1464 |
+
|
| 1465 |
+
.pos-legend-buttons .btn:hover {
|
| 1466 |
+
transform: translateY(-2px) !important;
|
| 1467 |
+
box-shadow: 0 6px 20px rgba(0,0,0,0.3) !important;
|
| 1468 |
+
}
|
| 1469 |
+
|
| 1470 |
+
/* Ensure text is always visible */
|
| 1471 |
+
.pos-legend-buttons .btn.active:hover {
|
| 1472 |
+
color: #1a365d !important;
|
| 1473 |
+
background-color: #f8f9fa !important;
|
| 1474 |
+
}
|
| 1475 |
+
|
| 1476 |
+
.pos-legend-buttons .btn:not(.active):hover {
|
| 1477 |
+
background-color: rgba(0,0,0,0.5) !important;
|
| 1478 |
+
color: #ffffff !important;
|
| 1479 |
+
border-color: #ffffff !important;
|
| 1480 |
+
}
|
| 1481 |
+
|
| 1482 |
+
.pos-tags-section {
|
| 1483 |
+
animation: fadeIn 0.3s ease-in;
|
| 1484 |
+
}
|
| 1485 |
+
|
| 1486 |
+
@keyframes fadeIn {
|
| 1487 |
+
from { opacity: 0; }
|
| 1488 |
+
to { opacity: 1; }
|
| 1489 |
+
}
|
| 1490 |
+
|
| 1491 |
+
/* SENTIMENT ANALYSIS METHOD CARDS - Modern Design */
|
| 1492 |
+
.sentiment-method-card {
|
| 1493 |
+
border: none !important;
|
| 1494 |
+
border-radius: 16px !important;
|
| 1495 |
+
box-shadow: 0 8px 32px rgba(0,0,0,0.1) !important;
|
| 1496 |
+
overflow: hidden !important;
|
| 1497 |
+
transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
|
| 1498 |
+
position: relative !important;
|
| 1499 |
+
}
|
| 1500 |
+
|
| 1501 |
+
.sentiment-method-card:hover {
|
| 1502 |
+
transform: translateY(-8px) !important;
|
| 1503 |
+
box-shadow: 0 16px 48px rgba(0,0,0,0.15) !important;
|
| 1504 |
+
}
|
| 1505 |
+
|
| 1506 |
+
/* Method Header with Gradients */
|
| 1507 |
+
.method-header {
|
| 1508 |
+
padding: 2rem 1.5rem 1rem !important;
|
| 1509 |
+
text-align: center !important;
|
| 1510 |
+
position: relative !important;
|
| 1511 |
+
color: white !important;
|
| 1512 |
+
}
|
| 1513 |
+
|
| 1514 |
+
.vader-card .method-header {
|
| 1515 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
|
| 1516 |
+
}
|
| 1517 |
+
|
| 1518 |
+
.distilbert-card .method-header {
|
| 1519 |
+
background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%) !important;
|
| 1520 |
+
}
|
| 1521 |
+
|
| 1522 |
+
.roberta-card .method-header {
|
| 1523 |
+
background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%) !important;
|
| 1524 |
+
}
|
| 1525 |
+
|
| 1526 |
+
.method-icon {
|
| 1527 |
+
width: 80px !important;
|
| 1528 |
+
height: 80px !important;
|
| 1529 |
+
border-radius: 50% !important;
|
| 1530 |
+
background: rgba(255,255,255,0.2) !important;
|
| 1531 |
+
display: flex !important;
|
| 1532 |
+
align-items: center !important;
|
| 1533 |
+
justify-content: center !important;
|
| 1534 |
+
margin: 0 auto 1rem !important;
|
| 1535 |
+
backdrop-filter: blur(10px) !important;
|
| 1536 |
+
}
|
| 1537 |
+
|
| 1538 |
+
.method-icon i {
|
| 1539 |
+
font-size: 2.5rem !important;
|
| 1540 |
+
color: white !important;
|
| 1541 |
+
}
|
| 1542 |
+
|
| 1543 |
+
.method-title {
|
| 1544 |
+
font-size: 1.5rem !important;
|
| 1545 |
+
font-weight: 700 !important;
|
| 1546 |
+
margin: 0 0 0.5rem 0 !important;
|
| 1547 |
+
color: white !important;
|
| 1548 |
+
text-shadow: 2px 2px 4px rgba(0,0,0,0.3) !important;
|
| 1549 |
+
}
|
| 1550 |
+
|
| 1551 |
+
.method-subtitle {
|
| 1552 |
+
font-size: 0.9rem !important;
|
| 1553 |
+
opacity: 1 !important;
|
| 1554 |
+
margin: 0 !important;
|
| 1555 |
+
font-weight: 500 !important;
|
| 1556 |
+
color: white !important;
|
| 1557 |
+
text-shadow: 1px 1px 2px rgba(0,0,0,0.3) !important;
|
| 1558 |
+
}
|
| 1559 |
+
|
| 1560 |
+
/* Method Body */
|
| 1561 |
+
.method-body {
|
| 1562 |
+
padding: 1.5rem !important;
|
| 1563 |
+
background: white !important;
|
| 1564 |
+
color: #333333 !important;
|
| 1565 |
+
}
|
| 1566 |
+
|
| 1567 |
+
.method-description {
|
| 1568 |
+
font-size: 0.95rem !important;
|
| 1569 |
+
color: #6c757d !important;
|
| 1570 |
+
margin-bottom: 1.5rem !important;
|
| 1571 |
+
line-height: 1.6 !important;
|
| 1572 |
+
}
|
| 1573 |
+
|
| 1574 |
+
.method-features {
|
| 1575 |
+
display: flex !important;
|
| 1576 |
+
flex-direction: column !important;
|
| 1577 |
+
gap: 0.75rem !important;
|
| 1578 |
+
}
|
| 1579 |
+
|
| 1580 |
+
.feature-item {
|
| 1581 |
+
display: flex !important;
|
| 1582 |
+
align-items: center !important;
|
| 1583 |
+
gap: 0.75rem !important;
|
| 1584 |
+
}
|
| 1585 |
+
|
| 1586 |
+
.feature-item i {
|
| 1587 |
+
color: #28a745 !important;
|
| 1588 |
+
font-size: 0.9rem !important;
|
| 1589 |
+
flex-shrink: 0 !important;
|
| 1590 |
+
}
|
| 1591 |
+
|
| 1592 |
+
.feature-item span {
|
| 1593 |
+
font-size: 0.9rem !important;
|
| 1594 |
+
color: #495057 !important;
|
| 1595 |
+
font-weight: 500 !important;
|
| 1596 |
+
}
|
| 1597 |
+
|
| 1598 |
+
/* Responsive adjustments */
|
| 1599 |
+
@media (max-width: 768px) {
|
| 1600 |
+
.method-header {
|
| 1601 |
+
padding: 1.5rem 1rem 0.75rem !important;
|
| 1602 |
+
}
|
| 1603 |
+
|
| 1604 |
+
.method-icon {
|
| 1605 |
+
width: 60px !important;
|
| 1606 |
+
height: 60px !important;
|
| 1607 |
+
}
|
| 1608 |
+
|
| 1609 |
+
.method-icon i {
|
| 1610 |
+
font-size: 2rem !important;
|
| 1611 |
+
}
|
| 1612 |
+
|
| 1613 |
+
.method-title {
|
| 1614 |
+
font-size: 1.25rem !important;
|
| 1615 |
+
}
|
| 1616 |
+
}
|
| 1617 |
+
|
| 1618 |
+
/* SENTIMENT ANALYSIS OVERLAP FIX - Force vertical stacking */
|
| 1619 |
+
.emotion-chart-section,
|
| 1620 |
+
.sentence-chart-section {
|
| 1621 |
+
display: block !important;
|
| 1622 |
+
width: 100% !important;
|
| 1623 |
+
float: none !important;
|
| 1624 |
+
clear: both !important;
|
| 1625 |
+
position: static !important;
|
| 1626 |
+
margin-bottom: 3rem !important;
|
| 1627 |
+
overflow: hidden !important;
|
| 1628 |
+
page-break-inside: avoid !important;
|
| 1629 |
+
}
|
| 1630 |
+
|
| 1631 |
+
.emotion-result-container,
|
| 1632 |
+
.sentence-analysis-container {
|
| 1633 |
+
display: block !important;
|
| 1634 |
+
width: 100% !important;
|
| 1635 |
+
float: none !important;
|
| 1636 |
+
clear: both !important;
|
| 1637 |
+
position: static !important;
|
| 1638 |
+
margin-top: 3rem !important;
|
| 1639 |
+
overflow: visible !important;
|
| 1640 |
+
page-break-inside: avoid !important;
|
| 1641 |
+
}
|
| 1642 |
+
|
| 1643 |
+
/* Force sentiment chart containers to be completely separate */
|
| 1644 |
+
.emotion-chart-section .chart-container,
|
| 1645 |
+
.sentence-chart-section .chart-container {
|
| 1646 |
+
display: block !important;
|
| 1647 |
+
width: 100% !important;
|
| 1648 |
+
float: none !important;
|
| 1649 |
+
clear: both !important;
|
| 1650 |
+
position: static !important;
|
| 1651 |
+
margin: 0 !important;
|
| 1652 |
+
}
|
| 1653 |
+
|
| 1654 |
+
/* Stats cards - high contrast in light mode */
|
| 1655 |
+
.stats-card {
|
| 1656 |
+
background: #eef5ff !important; /* light blue background for readability */
|
| 1657 |
+
border: 1px solid #d6e4ff !important;
|
| 1658 |
+
color: #0b3d91 !important;
|
| 1659 |
+
box-shadow: 0 4px 12px rgba(0,0,0,.06) !important;
|
| 1660 |
+
min-height: 110px;
|
| 1661 |
+
}
|
| 1662 |
+
/* Force dark text for all children in light mode */
|
| 1663 |
+
.stats-card *, .stats-card .card-body *, .stats-card .card-body {
|
| 1664 |
+
color: #0b3d91 !important;
|
| 1665 |
+
}
|
| 1666 |
+
.stats-card .card-body { background: transparent !important; display: flex; flex-direction: column; align-items: center; justify-content: center; }
|
| 1667 |
+
.stats-card h3 { font-size: 2rem !important; margin: 0 0 .25rem 0 !important; }
|
| 1668 |
+
.stats-card p, .stats-card .card-title { font-weight: 700 !important; margin: 0 !important; opacity: 1; }
|
| 1669 |
+
|
| 1670 |
+
/* Dark mode variant keeps white text on gradient */
|
| 1671 |
+
[data-theme="dark"] .stats-card {
|
| 1672 |
+
background: linear-gradient(135deg, var(--primary-color), var(--accent-color)) !important;
|
| 1673 |
+
border: none !important;
|
| 1674 |
+
color: #fff !important;
|
| 1675 |
+
}
|
| 1676 |
+
[data-theme="dark"] .stats-card *, [data-theme="dark"] .stats-card .card-body * { color: #fff !important; }
|
| 1677 |
+
|
| 1678 |
+
/* Quick Navigation button color fixes */
|
| 1679 |
+
.quick-nav .btn-outline-primary { color: #0d6efd; border-color: #0d6efd; }
|
| 1680 |
+
.quick-nav .btn-outline-primary:hover,
|
| 1681 |
+
.quick-nav .btn-outline-primary:focus,
|
| 1682 |
+
.quick-nav .btn-outline-primary:active { color: #fff; background-color: #0d6efd; border-color: #0d6efd; box-shadow: none; }
|
| 1683 |
+
|
| 1684 |
+
.quick-nav .btn-outline-success { color: #198754; border-color: #198754; }
|
| 1685 |
+
.quick-nav .btn-outline-success:hover,
|
| 1686 |
+
.quick-nav .btn-outline-success:focus,
|
| 1687 |
+
.quick-nav .btn-outline-success:active { color: #fff; background-color: #198754; border-color: #198754; box-shadow: none; }
|
| 1688 |
+
|
| 1689 |
+
.quick-nav .btn-outline-info { color: #0dcaf0; border-color: #0dcaf0; }
|
| 1690 |
+
.quick-nav .btn-outline-info:hover,
|
| 1691 |
+
.quick-nav .btn-outline-info:focus,
|
| 1692 |
+
.quick-nav .btn-outline-info:active { color: #0b2a2f; background-color: #0dcaf0; border-color: #0dcaf0; box-shadow: none; }
|
| 1693 |
+
|
| 1694 |
+
/* Ensure readable default text color on chip background */
|
| 1695 |
+
.quick-nav .btn { color: inherit; }
|
| 1696 |
+
|
| 1697 |
+
/* Quick Nav title color fixes */
|
| 1698 |
+
.quick-nav .card-header {
|
| 1699 |
+
background: transparent !important; /* keep card bg */
|
| 1700 |
+
color: #0d6efd !important; /* visible in light mode */
|
| 1701 |
+
border-bottom: 2px solid rgba(13,110,253,.15) !important;
|
| 1702 |
+
}
|
| 1703 |
+
|
| 1704 |
+
[data-theme="dark"] .quick-nav .card-header {
|
| 1705 |
+
color: #ffffff !important;
|
| 1706 |
+
border-bottom: 2px solid rgba(255,255,255,.2) !important;
|
| 1707 |
+
}
|
| 1708 |
+
|
| 1709 |
+
/* Dark mode quick navigation fixes */
|
| 1710 |
+
[data-theme="dark"] .quick-nav .card {
|
| 1711 |
+
background-color: rgba(255,255,255,0.1) !important;
|
| 1712 |
+
border: 1px solid rgba(255,255,255,0.2) !important;
|
| 1713 |
+
}
|
| 1714 |
+
|
| 1715 |
+
[data-theme="dark"] .quick-nav .btn-outline-primary {
|
| 1716 |
+
color: #6ea8fe !important;
|
| 1717 |
+
border-color: #6ea8fe !important;
|
| 1718 |
+
background-color: rgba(110,168,254,0.1) !important;
|
| 1719 |
+
}
|
| 1720 |
+
|
| 1721 |
+
[data-theme="dark"] .quick-nav .btn-outline-primary:hover,
|
| 1722 |
+
[data-theme="dark"] .quick-nav .btn-outline-primary:focus,
|
| 1723 |
+
[data-theme="dark"] .quick-nav .btn-outline-primary:active {
|
| 1724 |
+
color: #000 !important;
|
| 1725 |
+
background-color: #6ea8fe !important;
|
| 1726 |
+
border-color: #6ea8fe !important;
|
| 1727 |
+
}
|
| 1728 |
+
|
| 1729 |
+
[data-theme="dark"] .quick-nav .btn-outline-success {
|
| 1730 |
+
color: #75b798 !important;
|
| 1731 |
+
border-color: #75b798 !important;
|
| 1732 |
+
background-color: rgba(117,183,152,0.1) !important;
|
| 1733 |
+
}
|
| 1734 |
+
|
| 1735 |
+
[data-theme="dark"] .quick-nav .btn-outline-success:hover,
|
| 1736 |
+
[data-theme="dark"] .quick-nav .btn-outline-success:focus,
|
| 1737 |
+
[data-theme="dark"] .quick-nav .btn-outline-success:active {
|
| 1738 |
+
color: #000 !important;
|
| 1739 |
+
background-color: #75b798 !important;
|
| 1740 |
+
border-color: #75b798 !important;
|
| 1741 |
+
}
|
| 1742 |
+
|
| 1743 |
+
[data-theme="dark"] .quick-nav .btn-outline-info {
|
| 1744 |
+
color: #6edff6 !important;
|
| 1745 |
+
border-color: #6edff6 !important;
|
| 1746 |
+
background-color: rgba(110,223,246,0.1) !important;
|
| 1747 |
+
}
|
| 1748 |
+
|
| 1749 |
+
[data-theme="dark"] .quick-nav .btn-outline-info:hover,
|
| 1750 |
+
[data-theme="dark"] .quick-nav .btn-outline-info:focus,
|
| 1751 |
+
[data-theme="dark"] .quick-nav .btn-outline-info:active {
|
| 1752 |
+
color: #000 !important;
|
| 1753 |
+
background-color: #6edff6 !important;
|
| 1754 |
+
border-color: #6edff6 !important;
|
| 1755 |
+
}
|
| 1756 |
+
|
static/css/style.css
ADDED
|
@@ -0,0 +1,762 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/* Custom CSS for NLP Ultimate Tutorial */
|
| 2 |
+
|
| 3 |
+
:root {
|
| 4 |
+
--primary-color: #1976D2;
|
| 5 |
+
--primary-hover: #1565C0;
|
| 6 |
+
--secondary-color: #424242;
|
| 7 |
+
--success-color: #4CAF50;
|
| 8 |
+
--info-color: #2196F3;
|
| 9 |
+
--warning-color: #FF9800;
|
| 10 |
+
--danger-color: #F44336;
|
| 11 |
+
--light-color: #f8f9fa;
|
| 12 |
+
--dark-color: #212529;
|
| 13 |
+
--gradient-subtle: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
/* Dark theme variables */
|
| 17 |
+
[data-theme="dark"] {
|
| 18 |
+
--primary-color: #64B5F6;
|
| 19 |
+
--primary-hover: #42A5F5;
|
| 20 |
+
--secondary-color: #e0e0e0;
|
| 21 |
+
--success-color: #81C784;
|
| 22 |
+
--info-color: #64B5F6;
|
| 23 |
+
--warning-color: #FFB74D;
|
| 24 |
+
--danger-color: #FF8A80;
|
| 25 |
+
--light-color: #2d2d2d;
|
| 26 |
+
--dark-color: #e0e0e0;
|
| 27 |
+
--gradient-subtle: linear-gradient(135deg, #2d2d2d 0%, #1a1a1a 100%);
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
/* Body and background */
|
| 31 |
+
body {
|
| 32 |
+
background: var(--gradient-subtle);
|
| 33 |
+
transition: all 0.3s ease;
|
| 34 |
+
overflow-x: hidden;
|
| 35 |
+
max-width: 100%;
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
/* Navbar styling */
|
| 39 |
+
.navbar {
|
| 40 |
+
background: linear-gradient(135deg, var(--primary-color) 0%, var(--primary-hover) 100%) !important;
|
| 41 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
| 42 |
+
z-index: 1030 !important;
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
/* Ensure navbar container spans full width comfortably */
|
| 46 |
+
.navbar .container {
|
| 47 |
+
max-width: 100% !important;
|
| 48 |
+
padding-left: 1rem !important;
|
| 49 |
+
padding-right: 1rem !important;
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
/* Dropdown menus should overlay the page and scroll internally if too tall */
|
| 53 |
+
.navbar-nav .dropdown-menu {
|
| 54 |
+
z-index: 2000 !important; /* stay above navbar */
|
| 55 |
+
max-height: 70vh !important; /* avoid pushing page */
|
| 56 |
+
overflow-y: auto !important; /* scroll inside menu when needed */
|
| 57 |
+
margin-top: 0.5rem !important; /* small gap below trigger */
|
| 58 |
+
border-radius: 12px !important;
|
| 59 |
+
box-shadow: 0 12px 28px rgba(0,0,0,0.22) !important;
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
/* Navbar dropdown visual style */
|
| 63 |
+
.navbar-nav .dropdown-menu {
|
| 64 |
+
background: #ffffff !important;
|
| 65 |
+
border: 1px solid rgba(0,0,0,0.08) !important;
|
| 66 |
+
border-radius: 12px !important;
|
| 67 |
+
padding: 0.5rem 0 !important;
|
| 68 |
+
box-shadow: 0 12px 28px rgba(0,0,0,0.18) !important;
|
| 69 |
+
min-width: 220px !important;
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
.navbar-nav .dropdown-item {
|
| 73 |
+
padding: 0.6rem 1rem !important;
|
| 74 |
+
font-weight: 500 !important;
|
| 75 |
+
color: #2b2b2b !important;
|
| 76 |
+
border-radius: 8px !important;
|
| 77 |
+
margin: 0.1rem 0.5rem !important;
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
.navbar-nav .dropdown-item:hover,
|
| 81 |
+
.navbar-nav .dropdown-item:focus {
|
| 82 |
+
background: linear-gradient(135deg, var(--primary-color), var(--primary-hover)) !important;
|
| 83 |
+
color: #ffffff !important;
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
/* Dark mode dropdown contrast */
|
| 87 |
+
[data-theme="dark"] .navbar-nav .dropdown-menu {
|
| 88 |
+
background: #1f1f1f !important;
|
| 89 |
+
border-color: rgba(255,255,255,0.12) !important;
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
[data-theme="dark"] .navbar-nav .dropdown-item { color: #e0e0e0 !important; }
|
| 93 |
+
[data-theme="dark"] .navbar-nav .dropdown-item:hover, [data-theme="dark"] .navbar-nav .dropdown-item:focus { color: #ffffff !important; }
|
| 94 |
+
|
| 95 |
+
/* Mobile: let dropdowns be part of the flow inside the collapse */
|
| 96 |
+
@media (max-width: 991.98px) {
|
| 97 |
+
.navbar-nav .dropdown-menu {
|
| 98 |
+
position: static !important;
|
| 99 |
+
max-height: none !important;
|
| 100 |
+
overflow: visible !important;
|
| 101 |
+
width: 100% !important;
|
| 102 |
+
margin: 0.25rem 0 !important;
|
| 103 |
+
box-shadow: 0 6px 18px rgba(0,0,0,0.18) !important;
|
| 104 |
+
}
|
| 105 |
+
.navbar-collapse {
|
| 106 |
+
max-height: 80vh !important;
|
| 107 |
+
overflow-y: auto !important;
|
| 108 |
+
overflow-x: hidden !important;
|
| 109 |
+
padding-bottom: 1rem !important;
|
| 110 |
+
}
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
/* Anchor dropdowns directly under their toggles (desktop) */
|
| 114 |
+
.navbar-nav .dropdown { position: relative !important; }
|
| 115 |
+
.navbar-nav .dropdown-menu {
|
| 116 |
+
position: absolute !important;
|
| 117 |
+
top: 100% !important;
|
| 118 |
+
left: 0 !important;
|
| 119 |
+
right: auto !important;
|
| 120 |
+
transform: none !important;
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
/ * Respect Bootstrap end-aligned menus */
|
| 124 |
+
.navbar-nav .dropdown-menu-end { left: auto !important; right: 0 !important; }
|
| 125 |
+
|
| 126 |
+
/* Constrain dropdown height so page doesn't vertically scroll when open */
|
| 127 |
+
.navbar-nav .dropdown-menu { max-height: 70vh !important; overflow-y: auto !important; }
|
| 128 |
+
|
| 129 |
+
.navbar-nav .dropdown-menu { overflow-x: hidden !important; }
|
| 130 |
+
|
| 131 |
+
.navbar-brand {
|
| 132 |
+
font-weight: 600;
|
| 133 |
+
font-size: 1.5rem;
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
/* Modern Footer Styling */
|
| 137 |
+
.modern-footer {
|
| 138 |
+
background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%);
|
| 139 |
+
color: #ffffff;
|
| 140 |
+
padding: 3rem 0 2rem;
|
| 141 |
+
margin-top: 4rem;
|
| 142 |
+
position: relative;
|
| 143 |
+
overflow: hidden;
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
.modern-footer::before {
|
| 147 |
+
content: '';
|
| 148 |
+
position: absolute;
|
| 149 |
+
top: 0;
|
| 150 |
+
left: 0;
|
| 151 |
+
right: 0;
|
| 152 |
+
height: 1px;
|
| 153 |
+
background: linear-gradient(90deg, transparent, rgba(255,255,255,0.3), transparent);
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
.footer-brand h5 {
|
| 157 |
+
color: #64B5F6;
|
| 158 |
+
font-weight: 700;
|
| 159 |
+
margin-bottom: 1rem;
|
| 160 |
+
font-size: 1.4rem;
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
.footer-description {
|
| 164 |
+
color: rgba(255,255,255,0.8);
|
| 165 |
+
font-size: 0.95rem;
|
| 166 |
+
line-height: 1.6;
|
| 167 |
+
margin: 0;
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
.footer-credit {
|
| 171 |
+
display: flex;
|
| 172 |
+
justify-content: center;
|
| 173 |
+
align-items: center;
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
.credit-badge {
|
| 177 |
+
background: rgba(255,255,255,0.1);
|
| 178 |
+
backdrop-filter: blur(10px);
|
| 179 |
+
border: 1px solid rgba(255,255,255,0.2);
|
| 180 |
+
border-radius: 12px;
|
| 181 |
+
padding: 1rem 1.5rem;
|
| 182 |
+
text-align: center;
|
| 183 |
+
box-shadow: 0 4px 15px rgba(0,0,0,0.1);
|
| 184 |
+
transition: all 0.3s ease;
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
.credit-badge:hover {
|
| 188 |
+
background: rgba(255,255,255,0.15);
|
| 189 |
+
transform: translateY(-2px);
|
| 190 |
+
box-shadow: 0 6px 20px rgba(0,0,0,0.15);
|
| 191 |
+
}
|
| 192 |
+
|
| 193 |
+
.credit-text {
|
| 194 |
+
display: block;
|
| 195 |
+
font-size: 0.85rem;
|
| 196 |
+
color: rgba(255,255,255,0.8);
|
| 197 |
+
margin-bottom: 0.25rem;
|
| 198 |
+
}
|
| 199 |
+
|
| 200 |
+
.developer-name {
|
| 201 |
+
display: block;
|
| 202 |
+
font-size: 1.1rem;
|
| 203 |
+
color: #64B5F6;
|
| 204 |
+
font-weight: 700;
|
| 205 |
+
}
|
| 206 |
+
|
| 207 |
+
.social-links {
|
| 208 |
+
display: flex;
|
| 209 |
+
gap: 1rem;
|
| 210 |
+
justify-content: flex-end;
|
| 211 |
+
align-items: center;
|
| 212 |
+
}
|
| 213 |
+
|
| 214 |
+
.social-link {
|
| 215 |
+
display: flex;
|
| 216 |
+
align-items: center;
|
| 217 |
+
gap: 0.5rem;
|
| 218 |
+
padding: 0.75rem 1rem;
|
| 219 |
+
border-radius: 8px;
|
| 220 |
+
text-decoration: none;
|
| 221 |
+
transition: all 0.3s ease;
|
| 222 |
+
border: 1px solid rgba(255,255,255,0.2);
|
| 223 |
+
background: rgba(255,255,255,0.05);
|
| 224 |
+
color: #ffffff;
|
| 225 |
+
font-weight: 500;
|
| 226 |
+
}
|
| 227 |
+
|
| 228 |
+
.social-link:hover {
|
| 229 |
+
color: #ffffff;
|
| 230 |
+
text-decoration: none;
|
| 231 |
+
transform: translateY(-2px);
|
| 232 |
+
box-shadow: 0 4px 12px rgba(0,0,0,0.2);
|
| 233 |
+
}
|
| 234 |
+
|
| 235 |
+
.github-link:hover {
|
| 236 |
+
background: #333333;
|
| 237 |
+
border-color: #333333;
|
| 238 |
+
}
|
| 239 |
+
|
| 240 |
+
.linkedin-link:hover {
|
| 241 |
+
background: #0077B5;
|
| 242 |
+
border-color: #0077B5;
|
| 243 |
+
}
|
| 244 |
+
|
| 245 |
+
.social-link i {
|
| 246 |
+
font-size: 1.2rem;
|
| 247 |
+
}
|
| 248 |
+
|
| 249 |
+
/* Dark mode footer adjustments */
|
| 250 |
+
[data-theme="dark"] .modern-footer {
|
| 251 |
+
background: #0d1117 !important; /* solid dark, no glassy gradient */
|
| 252 |
+
}
|
| 253 |
+
|
| 254 |
+
[data-theme="dark"] .modern-footer::before {
|
| 255 |
+
background: transparent !important; /* remove glossy top line */
|
| 256 |
+
}
|
| 257 |
+
|
| 258 |
+
[data-theme="dark"] .credit-badge {
|
| 259 |
+
background: rgba(255,255,255,0.08);
|
| 260 |
+
border-color: rgba(255,255,255,0.15);
|
| 261 |
+
}
|
| 262 |
+
|
| 263 |
+
[data-theme="dark"] .social-link {
|
| 264 |
+
background: rgba(255,255,255,0.03);
|
| 265 |
+
border-color: rgba(255,255,255,0.15);
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
/* Responsive footer */
|
| 269 |
+
@media (max-width: 768px) {
|
| 270 |
+
.modern-footer {
|
| 271 |
+
padding: 2rem 0 1.5rem;
|
| 272 |
+
text-align: center;
|
| 273 |
+
}
|
| 274 |
+
|
| 275 |
+
.social-links {
|
| 276 |
+
justify-content: center;
|
| 277 |
+
margin-top: 1rem;
|
| 278 |
+
}
|
| 279 |
+
|
| 280 |
+
.footer-credit {
|
| 281 |
+
margin: 1.5rem 0;
|
| 282 |
+
}
|
| 283 |
+
}
|
| 284 |
+
|
| 285 |
+
/* Card styling */
|
| 286 |
+
.card {
|
| 287 |
+
border: none;
|
| 288 |
+
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
|
| 289 |
+
border-radius: 10px;
|
| 290 |
+
transition: transform 0.2s ease, box-shadow 0.2s ease;
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
+
.card:hover {
|
| 294 |
+
transform: translateY(-2px);
|
| 295 |
+
box-shadow: 0 8px 15px rgba(0,0,0,0.15);
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
.card-header {
|
| 299 |
+
background: linear-gradient(135deg, var(--primary-color) 0%, var(--primary-hover) 100%);
|
| 300 |
+
color: white;
|
| 301 |
+
border-radius: 10px 10px 0 0 !important;
|
| 302 |
+
border: none;
|
| 303 |
+
}
|
| 304 |
+
|
| 305 |
+
/* Button styling */
|
| 306 |
+
.btn {
|
| 307 |
+
border-radius: 8px;
|
| 308 |
+
font-weight: 500;
|
| 309 |
+
transition: all 0.3s ease;
|
| 310 |
+
border: none;
|
| 311 |
+
}
|
| 312 |
+
|
| 313 |
+
.btn:hover {
|
| 314 |
+
transform: translateY(-1px);
|
| 315 |
+
box-shadow: 0 4px 8px rgba(0,0,0,0.2);
|
| 316 |
+
}
|
| 317 |
+
|
| 318 |
+
.btn-primary {
|
| 319 |
+
background: linear-gradient(135deg, var(--primary-color) 0%, var(--primary-hover) 100%);
|
| 320 |
+
}
|
| 321 |
+
|
| 322 |
+
.btn-success {
|
| 323 |
+
background: linear-gradient(135deg, var(--success-color) 0%, #388E3C 100%);
|
| 324 |
+
}
|
| 325 |
+
|
| 326 |
+
.btn-info {
|
| 327 |
+
background: linear-gradient(135deg, var(--info-color) 0%, #1976D2 100%);
|
| 328 |
+
}
|
| 329 |
+
|
| 330 |
+
/* Form styling */
|
| 331 |
+
.form-control, .form-select {
|
| 332 |
+
border-radius: 8px;
|
| 333 |
+
border: 2px solid #e0e0e0;
|
| 334 |
+
transition: border-color 0.3s ease, box-shadow 0.3s ease;
|
| 335 |
+
}
|
| 336 |
+
|
| 337 |
+
.form-control:focus, .form-select:focus {
|
| 338 |
+
border-color: var(--primary-color);
|
| 339 |
+
box-shadow: 0 0 0 0.2rem rgba(25, 118, 210, 0.25);
|
| 340 |
+
}
|
| 341 |
+
|
| 342 |
+
/* Alert styling */
|
| 343 |
+
.alert {
|
| 344 |
+
border-radius: 10px;
|
| 345 |
+
border: none;
|
| 346 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
| 347 |
+
}
|
| 348 |
+
|
| 349 |
+
.alert-info {
|
| 350 |
+
background: linear-gradient(135deg, #E3F2FD 0%, #BBDEFB 100%);
|
| 351 |
+
color: #1565C0;
|
| 352 |
+
border-left: 5px solid var(--primary-color);
|
| 353 |
+
}
|
| 354 |
+
|
| 355 |
+
.alert-warning {
|
| 356 |
+
background: linear-gradient(135deg, #FFF8E1 0%, #FFECB3 100%);
|
| 357 |
+
color: #E65100;
|
| 358 |
+
border-left: 5px solid var(--warning-color);
|
| 359 |
+
}
|
| 360 |
+
|
| 361 |
+
.alert-success {
|
| 362 |
+
background: linear-gradient(135deg, #E8F5E9 0%, #C8E6C9 100%);
|
| 363 |
+
color: #2E7D32;
|
| 364 |
+
border-left: 5px solid var(--success-color);
|
| 365 |
+
}
|
| 366 |
+
|
| 367 |
+
.alert-danger {
|
| 368 |
+
background: linear-gradient(135deg, #FFEBEE 0%, #FFCDD2 100%);
|
| 369 |
+
color: #C62828;
|
| 370 |
+
border-left: 5px solid var(--danger-color);
|
| 371 |
+
}
|
| 372 |
+
|
| 373 |
+
/* Tab styling */
|
| 374 |
+
.nav-tabs {
|
| 375 |
+
border-bottom: 2px solid #e0e0e0;
|
| 376 |
+
}
|
| 377 |
+
|
| 378 |
+
.nav-tabs .nav-link {
|
| 379 |
+
border: none;
|
| 380 |
+
border-radius: 8px 8px 0 0;
|
| 381 |
+
margin-right: 5px;
|
| 382 |
+
color: var(--secondary-color);
|
| 383 |
+
font-weight: 500;
|
| 384 |
+
transition: all 0.3s ease;
|
| 385 |
+
}
|
| 386 |
+
|
| 387 |
+
.nav-tabs .nav-link:hover {
|
| 388 |
+
border-color: transparent;
|
| 389 |
+
background-color: rgba(25, 118, 210, 0.1);
|
| 390 |
+
}
|
| 391 |
+
|
| 392 |
+
.nav-tabs .nav-link.active {
|
| 393 |
+
background: linear-gradient(135deg, var(--primary-color) 0%, var(--primary-hover) 100%);
|
| 394 |
+
color: white;
|
| 395 |
+
border-color: transparent;
|
| 396 |
+
}
|
| 397 |
+
|
| 398 |
+
/* Statistics cards */
|
| 399 |
+
.card.text-center {
|
| 400 |
+
background: white;
|
| 401 |
+
border: 2px solid #e0e0e0;
|
| 402 |
+
}
|
| 403 |
+
|
| 404 |
+
.card.text-center:hover {
|
| 405 |
+
border-color: var(--primary-color);
|
| 406 |
+
transform: scale(1.02);
|
| 407 |
+
}
|
| 408 |
+
|
| 409 |
+
/* Footer styling */
|
| 410 |
+
footer {
|
| 411 |
+
background: linear-gradient(135deg, var(--dark-color) 0%, #1a1a1a 100%) !important;
|
| 412 |
+
margin-top: auto;
|
| 413 |
+
}
|
| 414 |
+
|
| 415 |
+
/* Loading spinner */
|
| 416 |
+
.spinner-border {
|
| 417 |
+
color: var(--primary-color);
|
| 418 |
+
}
|
| 419 |
+
|
| 420 |
+
/* Results container */
|
| 421 |
+
#resultsContainer {
|
| 422 |
+
min-height: 200px;
|
| 423 |
+
}
|
| 424 |
+
|
| 425 |
+
/* Dark theme styles */
|
| 426 |
+
[data-theme="dark"] {
|
| 427 |
+
background: var(--gradient-subtle);
|
| 428 |
+
color: var(--dark-color);
|
| 429 |
+
}
|
| 430 |
+
|
| 431 |
+
[data-theme="dark"] .card {
|
| 432 |
+
background-color: #2d2d2d;
|
| 433 |
+
color: var(--dark-color);
|
| 434 |
+
}
|
| 435 |
+
|
| 436 |
+
[data-theme="dark"] .card-header {
|
| 437 |
+
background: linear-gradient(135deg, var(--primary-color) 0%, var(--primary-hover) 100%);
|
| 438 |
+
}
|
| 439 |
+
|
| 440 |
+
[data-theme="dark"] .form-control,
|
| 441 |
+
[data-theme="dark"] .form-select {
|
| 442 |
+
background-color: #2d2d2d;
|
| 443 |
+
color: var(--dark-color);
|
| 444 |
+
border-color: #555;
|
| 445 |
+
}
|
| 446 |
+
|
| 447 |
+
[data-theme="dark"] .form-control:focus,
|
| 448 |
+
[data-theme="dark"] .form-select:focus {
|
| 449 |
+
background-color: #2d2d2d;
|
| 450 |
+
color: var(--dark-color);
|
| 451 |
+
border-color: var(--primary-color);
|
| 452 |
+
}
|
| 453 |
+
|
| 454 |
+
[data-theme="dark"] .nav-tabs {
|
| 455 |
+
border-bottom-color: #555;
|
| 456 |
+
}
|
| 457 |
+
|
| 458 |
+
[data-theme="dark"] .nav-tabs .nav-link {
|
| 459 |
+
color: var(--dark-color);
|
| 460 |
+
}
|
| 461 |
+
|
| 462 |
+
[data-theme="dark"] .nav-tabs .nav-link:hover {
|
| 463 |
+
background-color: rgba(100, 181, 246, 0.1);
|
| 464 |
+
}
|
| 465 |
+
|
| 466 |
+
[data-theme="dark"] .card.text-center {
|
| 467 |
+
background-color: #2d2d2d;
|
| 468 |
+
border-color: #555;
|
| 469 |
+
color: var(--dark-color);
|
| 470 |
+
}
|
| 471 |
+
|
| 472 |
+
[data-theme="dark"] .alert-info {
|
| 473 |
+
background: linear-gradient(135deg, #1a3c5a 0%, #0d2a3f 100%);
|
| 474 |
+
color: var(--primary-color);
|
| 475 |
+
}
|
| 476 |
+
|
| 477 |
+
[data-theme="dark"] .alert-warning {
|
| 478 |
+
background: linear-gradient(135deg, #3e2e00 0%, #2a1f00 100%);
|
| 479 |
+
color: var(--warning-color);
|
| 480 |
+
}
|
| 481 |
+
|
| 482 |
+
[data-theme="dark"] .alert-success {
|
| 483 |
+
background: linear-gradient(135deg, #1b3525 0%, #0f1f15 100%);
|
| 484 |
+
color: var(--success-color);
|
| 485 |
+
}
|
| 486 |
+
|
| 487 |
+
[data-theme="dark"] .alert-danger {
|
| 488 |
+
background: linear-gradient(135deg, #4A1515 0%, #2a0a0a 100%);
|
| 489 |
+
color: var(--danger-color);
|
| 490 |
+
}
|
| 491 |
+
|
| 492 |
+
/* Mobile responsiveness */
|
| 493 |
+
@media (max-width: 768px) {
|
| 494 |
+
.container-fluid {
|
| 495 |
+
padding: 10px;
|
| 496 |
+
}
|
| 497 |
+
|
| 498 |
+
.card {
|
| 499 |
+
margin-bottom: 15px;
|
| 500 |
+
}
|
| 501 |
+
|
| 502 |
+
.btn {
|
| 503 |
+
margin-bottom: 10px;
|
| 504 |
+
}
|
| 505 |
+
|
| 506 |
+
.display-4 {
|
| 507 |
+
font-size: 2rem;
|
| 508 |
+
}
|
| 509 |
+
|
| 510 |
+
.lead {
|
| 511 |
+
font-size: 1rem;
|
| 512 |
+
}
|
| 513 |
+
}
|
| 514 |
+
|
| 515 |
+
/* Animation classes */
|
| 516 |
+
.fade-in {
|
| 517 |
+
animation: fadeIn 0.5s ease-in;
|
| 518 |
+
}
|
| 519 |
+
|
| 520 |
+
@keyframes fadeIn {
|
| 521 |
+
from { opacity: 0; transform: translateY(20px); }
|
| 522 |
+
to { opacity: 1; transform: translateY(0); }
|
| 523 |
+
}
|
| 524 |
+
|
| 525 |
+
.slide-in {
|
| 526 |
+
animation: slideIn 0.3s ease-out;
|
| 527 |
+
}
|
| 528 |
+
|
| 529 |
+
@keyframes slideIn {
|
| 530 |
+
from { transform: translateX(-100%); }
|
| 531 |
+
to { transform: translateX(0); }
|
| 532 |
+
}
|
| 533 |
+
|
| 534 |
+
/* Custom scrollbar */
|
| 535 |
+
::-webkit-scrollbar {
|
| 536 |
+
width: 8px;
|
| 537 |
+
}
|
| 538 |
+
|
| 539 |
+
::-webkit-scrollbar-track {
|
| 540 |
+
background: #f1f1f1;
|
| 541 |
+
border-radius: 4px;
|
| 542 |
+
}
|
| 543 |
+
|
| 544 |
+
::-webkit-scrollbar-thumb {
|
| 545 |
+
background: var(--primary-color);
|
| 546 |
+
border-radius: 4px;
|
| 547 |
+
}
|
| 548 |
+
|
| 549 |
+
::-webkit-scrollbar-thumb:hover {
|
| 550 |
+
background: var(--primary-hover);
|
| 551 |
+
}
|
| 552 |
+
|
| 553 |
+
[data-theme="dark"] ::-webkit-scrollbar-track {
|
| 554 |
+
background: #2d2d2d;
|
| 555 |
+
}
|
| 556 |
+
|
| 557 |
+
[data-theme="dark"] ::-webkit-scrollbar-thumb {
|
| 558 |
+
background: var(--primary-color);
|
| 559 |
+
}
|
| 560 |
+
|
| 561 |
+
/* Table styling */
|
| 562 |
+
.table {
|
| 563 |
+
border-radius: 8px;
|
| 564 |
+
overflow: hidden;
|
| 565 |
+
}
|
| 566 |
+
|
| 567 |
+
.table thead th {
|
| 568 |
+
background: linear-gradient(135deg, var(--primary-color) 0%, var(--primary-hover) 100%);
|
| 569 |
+
color: white;
|
| 570 |
+
border: none;
|
| 571 |
+
font-weight: 600;
|
| 572 |
+
}
|
| 573 |
+
|
| 574 |
+
.table tbody tr:hover {
|
| 575 |
+
background-color: rgba(25, 118, 210, 0.1);
|
| 576 |
+
}
|
| 577 |
+
|
| 578 |
+
[data-theme="dark"] .table {
|
| 579 |
+
color: var(--dark-color);
|
| 580 |
+
}
|
| 581 |
+
|
| 582 |
+
[data-theme="dark"] .table tbody tr:hover {
|
| 583 |
+
background-color: rgba(100, 181, 246, 0.1);
|
| 584 |
+
}
|
| 585 |
+
|
| 586 |
+
/* Chart container */
|
| 587 |
+
.chart-container {
|
| 588 |
+
position: relative;
|
| 589 |
+
height: 400px;
|
| 590 |
+
margin: 20px 0;
|
| 591 |
+
}
|
| 592 |
+
|
| 593 |
+
/* Code blocks */
|
| 594 |
+
pre {
|
| 595 |
+
background-color: #f8f9fa;
|
| 596 |
+
border: 1px solid #e9ecef;
|
| 597 |
+
border-radius: 8px;
|
| 598 |
+
padding: 15px;
|
| 599 |
+
overflow-x: auto;
|
| 600 |
+
}
|
| 601 |
+
|
| 602 |
+
[data-theme="dark"] pre {
|
| 603 |
+
background-color: #2d2d2d;
|
| 604 |
+
border-color: #555;
|
| 605 |
+
color: var(--dark-color);
|
| 606 |
+
}
|
| 607 |
+
|
| 608 |
+
/* Badge styling */
|
| 609 |
+
.badge {
|
| 610 |
+
font-size: 0.8em;
|
| 611 |
+
padding: 0.5em 0.75em;
|
| 612 |
+
border-radius: 6px;
|
| 613 |
+
}
|
| 614 |
+
|
| 615 |
+
/* Progress bar */
|
| 616 |
+
.progress {
|
| 617 |
+
height: 8px;
|
| 618 |
+
border-radius: 4px;
|
| 619 |
+
background-color: #e9ecef;
|
| 620 |
+
}
|
| 621 |
+
|
| 622 |
+
.progress-bar {
|
| 623 |
+
background: linear-gradient(135deg, var(--primary-color) 0%, var(--primary-hover) 100%);
|
| 624 |
+
border-radius: 4px;
|
| 625 |
+
}
|
| 626 |
+
|
| 627 |
+
[data-theme="dark"] .progress {
|
| 628 |
+
background-color: #555;
|
| 629 |
+
}
|
| 630 |
+
|
| 631 |
+
/* Tooltip styling */
|
| 632 |
+
.tooltip {
|
| 633 |
+
font-size: 0.875rem;
|
| 634 |
+
}
|
| 635 |
+
|
| 636 |
+
.tooltip-inner {
|
| 637 |
+
background-color: var(--dark-color);
|
| 638 |
+
border-radius: 6px;
|
| 639 |
+
}
|
| 640 |
+
|
| 641 |
+
/* Modal styling */
|
| 642 |
+
.modal-content {
|
| 643 |
+
border-radius: 10px;
|
| 644 |
+
border: none;
|
| 645 |
+
box-shadow: 0 10px 30px rgba(0,0,0,0.3);
|
| 646 |
+
}
|
| 647 |
+
|
| 648 |
+
.modal-header {
|
| 649 |
+
background: linear-gradient(135deg, var(--primary-color) 0%, var(--primary-hover) 100%);
|
| 650 |
+
color: white;
|
| 651 |
+
border-radius: 10px 10px 0 0;
|
| 652 |
+
}
|
| 653 |
+
|
| 654 |
+
[data-theme="dark"] .modal-content {
|
| 655 |
+
background-color: #2d2d2d;
|
| 656 |
+
color: var(--dark-color);
|
| 657 |
+
}
|
| 658 |
+
|
| 659 |
+
/* Utility classes */
|
| 660 |
+
.text-gradient {
|
| 661 |
+
background: linear-gradient(135deg, var(--primary-color) 0%, var(--primary-hover) 100%);
|
| 662 |
+
-webkit-background-clip: text;
|
| 663 |
+
-webkit-text-fill-color: transparent;
|
| 664 |
+
background-clip: text;
|
| 665 |
+
}
|
| 666 |
+
|
| 667 |
+
.shadow-custom {
|
| 668 |
+
box-shadow: 0 8px 25px rgba(0,0,0,0.15);
|
| 669 |
+
}
|
| 670 |
+
|
| 671 |
+
.border-gradient {
|
| 672 |
+
border: 2px solid;
|
| 673 |
+
border-image: linear-gradient(135deg, var(--primary-color) 0%, var(--primary-hover) 100%) 1;
|
| 674 |
+
}
|
| 675 |
+
|
| 676 |
+
/***** Global horizontal scroll guards *****/
|
| 677 |
+
html, body { overflow-x: hidden !important; max-width: 100% !important; }
|
| 678 |
+
.container-fluid, .container { overflow-x: hidden !important; max-width: 100% !important; }
|
| 679 |
+
|
| 680 |
+
/* Reset dropdown positioning to Bootstrap defaults */
|
| 681 |
+
.navbar-nav .dropdown { position: static !important; }
|
| 682 |
+
.navbar-nav .dropdown-menu { position: absolute !important; top: auto !important; left: auto !important; right: auto !important; }
|
| 683 |
+
|
| 684 |
+
/* Cap dropdown width to viewport and allow wrapping */
|
| 685 |
+
.navbar-nav .dropdown-menu { max-width: calc(100vw - 2rem) !important; overflow-wrap: anywhere; }
|
| 686 |
+
|
| 687 |
+
/* Ensure quick-nav and badges shadows don't trigger scroll */
|
| 688 |
+
.quick-nav, .entity-text-container, .card { overflow: visible !important; }
|
| 689 |
+
|
| 690 |
+
/* --- Dark mode global text legibility --- */
|
| 691 |
+
[data-theme="dark"] body,
|
| 692 |
+
[data-theme="dark"] .container,
|
| 693 |
+
[data-theme="dark"] .container-fluid {
|
| 694 |
+
color: #e6e6e6 !important;
|
| 695 |
+
}
|
| 696 |
+
|
| 697 |
+
/* Links in dark mode */
|
| 698 |
+
[data-theme="dark"] a { color: #82b1ff !important; }
|
| 699 |
+
[data-theme="dark"] a:hover { color: #b3c8ff !important; }
|
| 700 |
+
|
| 701 |
+
/* Cards */
|
| 702 |
+
[data-theme="dark"] .card { background-color: #1f1f1f !important; color: #e8e8e8 !important; border-color: rgba(255,255,255,0.08) !important; }
|
| 703 |
+
[data-theme="dark"] .card-header { background-color: #242424 !important; color: #e8e8e8 !important; border-bottom-color: rgba(255,255,255,0.08) !important; }
|
| 704 |
+
[data-theme="dark"] .card .text-muted { color: #b0b0b0 !important; }
|
| 705 |
+
|
| 706 |
+
/* Alerts */
|
| 707 |
+
[data-theme="dark"] .alert { background-color: #262626 !important; color: #f0f0f0 !important; border-color: rgba(255,255,255,0.12) !important; }
|
| 708 |
+
[data-theme="dark"] .alert-info { background-color: rgba(33,150,243,0.12) !important; color: #dbe9ff !important; border-color: rgba(33,150,243,0.35) !important; }
|
| 709 |
+
[data-theme="dark"] .alert-warning { background-color: rgba(255,193,7,0.12) !important; color: #ffe6a3 !important; border-color: rgba(255,193,7,0.35) !important; }
|
| 710 |
+
|
| 711 |
+
/* Forms */
|
| 712 |
+
[data-theme="dark"] .form-control,
|
| 713 |
+
[data-theme="dark"] .form-select,
|
| 714 |
+
[data-theme="dark"] textarea.form-control {
|
| 715 |
+
background-color: #1e1e1e !important;
|
| 716 |
+
color: #f0f0f0 !important;
|
| 717 |
+
border-color: rgba(255,255,255,0.15) !important;
|
| 718 |
+
}
|
| 719 |
+
[data-theme="dark"] .form-control:focus,
|
| 720 |
+
[data-theme="dark"] .form-select:focus { box-shadow: 0 0 0 0.25rem rgba(100,181,246,0.25) !important; border-color: #64B5F6 !important; }
|
| 721 |
+
[data-theme="dark"] ::placeholder { color: #b8b8b8 !important; opacity: 1 !important; }
|
| 722 |
+
|
| 723 |
+
/* Tables */
|
| 724 |
+
[data-theme="dark"] .table { color: #e6e6e6 !important; }
|
| 725 |
+
[data-theme="dark"] .table-striped>tbody>tr:nth-of-type(odd) { --bs-table-accent-bg: rgba(255,255,255,0.04) !important; color: #e6e6e6 !important; }
|
| 726 |
+
[data-theme="dark"] .table-hover tbody tr:hover { background-color: rgba(255,255,255,0.06) !important; }
|
| 727 |
+
|
| 728 |
+
/* Badges and small chips */
|
| 729 |
+
[data-theme="dark"] .badge { filter: brightness(1.05) contrast(1.05); }
|
| 730 |
+
|
| 731 |
+
/* Home route: improve dark mode contrast for CHOOSE AN OPERATION header */
|
| 732 |
+
[data-theme="dark"] .card-header.bg-primary,
|
| 733 |
+
[data-theme="dark"] .card-header.bg-primary * {
|
| 734 |
+
color: #ffffff !important;
|
| 735 |
+
}
|
| 736 |
+
|
| 737 |
+
[data-theme="dark"] .card-header.bg-primary { filter: brightness(1.05) contrast(1.1); }
|
| 738 |
+
|
| 739 |
+
/* Dark mode navbar text visibility */
|
| 740 |
+
[data-theme="dark"] .navbar .navbar-brand,
|
| 741 |
+
[data-theme="dark"] .navbar .nav-link,
|
| 742 |
+
[data-theme="dark"] .navbar .dropdown-toggle,
|
| 743 |
+
[data-theme="dark"] .navbar .navbar-toggler-icon::after {
|
| 744 |
+
color: #ffffff !important;
|
| 745 |
+
}
|
| 746 |
+
|
| 747 |
+
/* Strengthen gradient navbar contrast in dark mode */
|
| 748 |
+
[data-theme="dark"] .navbar { filter: brightness(1.05) contrast(1.15); }
|
| 749 |
+
|
| 750 |
+
/* Home: CHOOSE AN OPERATION title contrast in dark mode */
|
| 751 |
+
[data-theme="dark"] .card-header.bg-primary h2,
|
| 752 |
+
[data-theme="dark"] .card-header.bg-primary .mb-0 {
|
| 753 |
+
color: #ffffff !important;
|
| 754 |
+
text-shadow: 0 1px 2px rgba(0,0,0,0.35);
|
| 755 |
+
}
|
| 756 |
+
|
| 757 |
+
/* Dark mode: ensure button text is readable */
|
| 758 |
+
[data-theme="dark"] .btn,
|
| 759 |
+
[data-theme="dark"] .btn * {
|
| 760 |
+
color: #ffffff !important;
|
| 761 |
+
}
|
| 762 |
+
[data-theme="dark"] .btn { text-shadow: 0 1px 2px rgba(0,0,0,0.35); }
|
static/js/api.js
ADDED
|
@@ -0,0 +1,335 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// API utilities for NLP Ultimate Tutorial Flask Application
|
| 2 |
+
|
| 3 |
+
class NLPAPI {
|
| 4 |
+
constructor(baseUrl = '') {
|
| 5 |
+
this.baseUrl = baseUrl;
|
| 6 |
+
this.endpoints = {
|
| 7 |
+
// Text processing endpoints
|
| 8 |
+
preprocessing: '/api/preprocessing',
|
| 9 |
+
tokenization: '/api/tokenization',
|
| 10 |
+
posTagging: '/api/pos-tagging',
|
| 11 |
+
namedEntity: '/api/named-entity',
|
| 12 |
+
sentiment: '/api/sentiment',
|
| 13 |
+
summarization: '/api/summarization',
|
| 14 |
+
topicAnalysis: '/api/topic-analysis',
|
| 15 |
+
questionAnswering: '/api/question-answering',
|
| 16 |
+
textGeneration: '/api/text-generation',
|
| 17 |
+
translation: '/api/translation',
|
| 18 |
+
classification: '/api/classification',
|
| 19 |
+
vectorEmbeddings: '/api/vector-embeddings',
|
| 20 |
+
|
| 21 |
+
// Utility endpoints
|
| 22 |
+
updateText: '/api/update_current_text',
|
| 23 |
+
getText: '/api/get_current_text',
|
| 24 |
+
textStatistics: '/api/text_statistics'
|
| 25 |
+
};
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
// Generic API request method
|
| 29 |
+
async request(endpoint, data = {}, method = 'POST') {
|
| 30 |
+
try {
|
| 31 |
+
const response = await fetch(this.baseUrl + endpoint, {
|
| 32 |
+
method: method,
|
| 33 |
+
headers: {
|
| 34 |
+
'Content-Type': 'application/json',
|
| 35 |
+
},
|
| 36 |
+
body: JSON.stringify(data)
|
| 37 |
+
});
|
| 38 |
+
|
| 39 |
+
if (!response.ok) {
|
| 40 |
+
throw new Error(`HTTP error! status: ${response.status}`);
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
return await response.json();
|
| 44 |
+
} catch (error) {
|
| 45 |
+
console.error(`API request failed for ${endpoint}:`, error);
|
| 46 |
+
throw error;
|
| 47 |
+
}
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
// Text preprocessing
|
| 51 |
+
async preprocessText(text, options = {}) {
|
| 52 |
+
return await this.request(this.endpoints.preprocessing, {
|
| 53 |
+
text: text,
|
| 54 |
+
...options
|
| 55 |
+
});
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
// Tokenization
|
| 59 |
+
async tokenizeText(text, tokenizerType = 'word') {
|
| 60 |
+
return await this.request(this.endpoints.tokenization, {
|
| 61 |
+
text: text,
|
| 62 |
+
tokenizer_type: tokenizerType
|
| 63 |
+
});
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
// POS Tagging
|
| 67 |
+
async posTagText(text, taggerType = 'nltk') {
|
| 68 |
+
return await this.request(this.endpoints.posTagging, {
|
| 69 |
+
text: text,
|
| 70 |
+
tagger_type: taggerType
|
| 71 |
+
});
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
// Named Entity Recognition
|
| 75 |
+
async recognizeEntities(text, modelType = 'spacy') {
|
| 76 |
+
return await this.request(this.endpoints.namedEntity, {
|
| 77 |
+
text: text,
|
| 78 |
+
model_type: modelType
|
| 79 |
+
});
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
// Sentiment Analysis
|
| 83 |
+
async analyzeSentiment(text, analyzerType = 'vader') {
|
| 84 |
+
return await this.request(this.endpoints.sentiment, {
|
| 85 |
+
text: text,
|
| 86 |
+
analyzer_type: analyzerType
|
| 87 |
+
});
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
// Text Summarization
|
| 91 |
+
async summarizeText(text, method = 'extractive', options = {}) {
|
| 92 |
+
return await this.request(this.endpoints.summarization, {
|
| 93 |
+
text: text,
|
| 94 |
+
method: method,
|
| 95 |
+
...options
|
| 96 |
+
});
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
// Topic Analysis
|
| 100 |
+
async analyzeTopics(text, method = 'lda') {
|
| 101 |
+
return await this.request(this.endpoints.topicAnalysis, {
|
| 102 |
+
text: text,
|
| 103 |
+
method: method
|
| 104 |
+
});
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
// Question Answering
|
| 108 |
+
async answerQuestion(context, question, options = {}) {
|
| 109 |
+
return await this.request(this.endpoints.questionAnswering, {
|
| 110 |
+
context: context,
|
| 111 |
+
question: question,
|
| 112 |
+
...options
|
| 113 |
+
});
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
// Text Generation
|
| 117 |
+
async generateText(prompt, options = {}) {
|
| 118 |
+
return await this.request(this.endpoints.textGeneration, {
|
| 119 |
+
prompt: prompt,
|
| 120 |
+
...options
|
| 121 |
+
});
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
// Translation
|
| 125 |
+
async translateText(text, sourceLang = 'auto', targetLang = 'en') {
|
| 126 |
+
return await this.request(this.endpoints.translation, {
|
| 127 |
+
text: text,
|
| 128 |
+
source_lang: sourceLang,
|
| 129 |
+
target_lang: targetLang
|
| 130 |
+
});
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
// Classification
|
| 134 |
+
async classifyText(text, scenario = 'sentiment', options = {}) {
|
| 135 |
+
return await this.request(this.endpoints.classification, {
|
| 136 |
+
text: text,
|
| 137 |
+
scenario: scenario,
|
| 138 |
+
...options
|
| 139 |
+
});
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
// Vector Embeddings
|
| 143 |
+
async getEmbeddings(text, query = '') {
|
| 144 |
+
return await this.request(this.endpoints.vectorEmbeddings, {
|
| 145 |
+
text: text,
|
| 146 |
+
query: query
|
| 147 |
+
});
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
+
// Utility methods
|
| 151 |
+
async updateCurrentText(text) {
|
| 152 |
+
return await this.request(this.endpoints.updateText, { text: text });
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
async getCurrentText() {
|
| 156 |
+
return await this.request(this.endpoints.getText, {}, 'GET');
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
async getTextStatistics(text) {
|
| 160 |
+
return await this.request(this.endpoints.textStatistics, { text: text });
|
| 161 |
+
}
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
// Batch processing utility
|
| 165 |
+
class BatchProcessor {
|
| 166 |
+
constructor(api) {
|
| 167 |
+
this.api = api;
|
| 168 |
+
this.queue = [];
|
| 169 |
+
this.processing = false;
|
| 170 |
+
}
|
| 171 |
+
|
| 172 |
+
addTask(task) {
|
| 173 |
+
this.queue.push(task);
|
| 174 |
+
if (!this.processing) {
|
| 175 |
+
this.processQueue();
|
| 176 |
+
}
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
async processQueue() {
|
| 180 |
+
this.processing = true;
|
| 181 |
+
|
| 182 |
+
while (this.queue.length > 0) {
|
| 183 |
+
const task = this.queue.shift();
|
| 184 |
+
try {
|
| 185 |
+
await task.execute();
|
| 186 |
+
if (task.onSuccess) task.onSuccess(task.result);
|
| 187 |
+
} catch (error) {
|
| 188 |
+
if (task.onError) task.onError(error);
|
| 189 |
+
}
|
| 190 |
+
}
|
| 191 |
+
|
| 192 |
+
this.processing = false;
|
| 193 |
+
}
|
| 194 |
+
}
|
| 195 |
+
|
| 196 |
+
// Caching utility
|
| 197 |
+
class APICache {
|
| 198 |
+
constructor(maxSize = 100) {
|
| 199 |
+
this.cache = new Map();
|
| 200 |
+
this.maxSize = maxSize;
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
get(key) {
|
| 204 |
+
if (this.cache.has(key)) {
|
| 205 |
+
const item = this.cache.get(key);
|
| 206 |
+
// Move to end (most recently used)
|
| 207 |
+
this.cache.delete(key);
|
| 208 |
+
this.cache.set(key, item);
|
| 209 |
+
return item;
|
| 210 |
+
}
|
| 211 |
+
return null;
|
| 212 |
+
}
|
| 213 |
+
|
| 214 |
+
set(key, value) {
|
| 215 |
+
if (this.cache.has(key)) {
|
| 216 |
+
this.cache.delete(key);
|
| 217 |
+
} else if (this.cache.size >= this.maxSize) {
|
| 218 |
+
// Remove least recently used item
|
| 219 |
+
const firstKey = this.cache.keys().next().value;
|
| 220 |
+
this.cache.delete(firstKey);
|
| 221 |
+
}
|
| 222 |
+
this.cache.set(key, value);
|
| 223 |
+
}
|
| 224 |
+
|
| 225 |
+
clear() {
|
| 226 |
+
this.cache.clear();
|
| 227 |
+
}
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
// Rate limiting utility
|
| 231 |
+
class RateLimiter {
|
| 232 |
+
constructor(requestsPerMinute = 60) {
|
| 233 |
+
this.requestsPerMinute = requestsPerMinute;
|
| 234 |
+
this.requests = [];
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
async waitIfNeeded() {
|
| 238 |
+
const now = Date.now();
|
| 239 |
+
const oneMinuteAgo = now - 60000;
|
| 240 |
+
|
| 241 |
+
// Remove old requests
|
| 242 |
+
this.requests = this.requests.filter(time => time > oneMinuteAgo);
|
| 243 |
+
|
| 244 |
+
if (this.requests.length >= this.requestsPerMinute) {
|
| 245 |
+
const oldestRequest = Math.min(...this.requests);
|
| 246 |
+
const waitTime = 60000 - (now - oldestRequest);
|
| 247 |
+
if (waitTime > 0) {
|
| 248 |
+
await new Promise(resolve => setTimeout(resolve, waitTime));
|
| 249 |
+
}
|
| 250 |
+
}
|
| 251 |
+
|
| 252 |
+
this.requests.push(now);
|
| 253 |
+
}
|
| 254 |
+
}
|
| 255 |
+
|
| 256 |
+
// Error handling utility
|
| 257 |
+
class ErrorHandler {
|
| 258 |
+
static handle(error, context = '') {
|
| 259 |
+
console.error(`Error in ${context}:`, error);
|
| 260 |
+
|
| 261 |
+
let message = 'An unexpected error occurred';
|
| 262 |
+
|
| 263 |
+
if (error.name === 'TypeError' && error.message.includes('fetch')) {
|
| 264 |
+
message = 'Network error: Unable to connect to the server';
|
| 265 |
+
} else if (error.message.includes('HTTP error')) {
|
| 266 |
+
message = `Server error: ${error.message}`;
|
| 267 |
+
} else if (error.message) {
|
| 268 |
+
message = error.message;
|
| 269 |
+
}
|
| 270 |
+
|
| 271 |
+
return {
|
| 272 |
+
success: false,
|
| 273 |
+
error: message,
|
| 274 |
+
context: context,
|
| 275 |
+
timestamp: new Date().toISOString()
|
| 276 |
+
};
|
| 277 |
+
}
|
| 278 |
+
|
| 279 |
+
static createErrorResponse(message, context = '') {
|
| 280 |
+
return {
|
| 281 |
+
success: false,
|
| 282 |
+
error: message,
|
| 283 |
+
context: context,
|
| 284 |
+
timestamp: new Date().toISOString()
|
| 285 |
+
};
|
| 286 |
+
}
|
| 287 |
+
}
|
| 288 |
+
|
| 289 |
+
// Progress tracking utility
|
| 290 |
+
class ProgressTracker {
|
| 291 |
+
constructor() {
|
| 292 |
+
this.progress = 0;
|
| 293 |
+
this.total = 0;
|
| 294 |
+
this.callbacks = [];
|
| 295 |
+
}
|
| 296 |
+
|
| 297 |
+
setTotal(total) {
|
| 298 |
+
this.total = total;
|
| 299 |
+
this.progress = 0;
|
| 300 |
+
this.notifyCallbacks();
|
| 301 |
+
}
|
| 302 |
+
|
| 303 |
+
increment(amount = 1) {
|
| 304 |
+
this.progress += amount;
|
| 305 |
+
this.notifyCallbacks();
|
| 306 |
+
}
|
| 307 |
+
|
| 308 |
+
setProgress(progress) {
|
| 309 |
+
this.progress = progress;
|
| 310 |
+
this.notifyCallbacks();
|
| 311 |
+
}
|
| 312 |
+
|
| 313 |
+
onProgress(callback) {
|
| 314 |
+
this.callbacks.push(callback);
|
| 315 |
+
}
|
| 316 |
+
|
| 317 |
+
notifyCallbacks() {
|
| 318 |
+
const percentage = this.total > 0 ? (this.progress / this.total) * 100 : 0;
|
| 319 |
+
this.callbacks.forEach(callback => callback(percentage, this.progress, this.total));
|
| 320 |
+
}
|
| 321 |
+
|
| 322 |
+
reset() {
|
| 323 |
+
this.progress = 0;
|
| 324 |
+
this.total = 0;
|
| 325 |
+
this.notifyCallbacks();
|
| 326 |
+
}
|
| 327 |
+
}
|
| 328 |
+
|
| 329 |
+
// Export utilities
|
| 330 |
+
window.NLPAPI = NLPAPI;
|
| 331 |
+
window.BatchProcessor = BatchProcessor;
|
| 332 |
+
window.APICache = APICache;
|
| 333 |
+
window.RateLimiter = RateLimiter;
|
| 334 |
+
window.ErrorHandler = ErrorHandler;
|
| 335 |
+
window.ProgressTracker = ProgressTracker;
|
static/js/components.js
ADDED
|
@@ -0,0 +1,380 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Component-specific JavaScript for NLP Ultimate Tutorial
|
| 2 |
+
|
| 3 |
+
// POS Tagging functionality
|
| 4 |
+
class POSTagging {
|
| 5 |
+
static highlightTokens(tokens, containerId) {
|
| 6 |
+
const container = document.getElementById(containerId);
|
| 7 |
+
if (!container) return;
|
| 8 |
+
|
| 9 |
+
container.innerHTML = tokens.map(token => {
|
| 10 |
+
const color = this.getPOSColor(token.pos);
|
| 11 |
+
return `<span class="pos-token" style="background-color: ${color};"
|
| 12 |
+
title="${token.explanation || ''}">${token.text}
|
| 13 |
+
<small>(${token.pos})</small></span>`;
|
| 14 |
+
}).join(' ');
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
static getPOSColor(pos) {
|
| 18 |
+
const colors = {
|
| 19 |
+
'NOUN': '#E3F2FD', 'PROPN': '#E3F2FD', 'VERB': '#E8F5E9',
|
| 20 |
+
'ADJ': '#FFF8E1', 'ADV': '#F3E5F5', 'ADP': '#EFEBE9',
|
| 21 |
+
'PRON': '#E8EAF6', 'DET': '#E0F7FA', 'CONJ': '#FBE9E7',
|
| 22 |
+
'NUM': '#FFEBEE', 'PART': '#F1F8E9', 'INTJ': '#FFF3E0',
|
| 23 |
+
'PUNCT': '#FAFAFA', 'SYM': '#FAFAFA', 'X': '#FAFAFA'
|
| 24 |
+
};
|
| 25 |
+
return colors[pos] || '#FAFAFA';
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
// Named Entity Recognition functionality
|
| 30 |
+
class NER {
|
| 31 |
+
static highlightEntities(entities, containerId) {
|
| 32 |
+
const container = document.getElementById(containerId);
|
| 33 |
+
if (!container) return;
|
| 34 |
+
|
| 35 |
+
container.innerHTML = entities.map(entity => {
|
| 36 |
+
const color = this.getEntityColor(entity.type);
|
| 37 |
+
return `<span class="entity-token" style="background-color: ${color};"
|
| 38 |
+
title="${entity.explanation || ''}">${entity.text}
|
| 39 |
+
<small>(${entity.type})</small></span>`;
|
| 40 |
+
}).join(' ');
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
static getEntityColor(type) {
|
| 44 |
+
const colors = {
|
| 45 |
+
'PERSON': '#E3F2FD', 'ORG': '#E8F5E9', 'GPE': '#FFF8E1',
|
| 46 |
+
'LOC': '#F3E5F5', 'PRODUCT': '#EFEBE9', 'EVENT': '#E8EAF6',
|
| 47 |
+
'WORK_OF_ART': '#E0F7FA', 'LAW': '#FBE9E7', 'LANGUAGE': '#FFEBEE',
|
| 48 |
+
'DATE': '#F1F8E9', 'TIME': '#FFF3E0', 'PERCENT': '#FAFAFA',
|
| 49 |
+
'MONEY': '#FAFAFA', 'QUANTITY': '#FAFAFA', 'ORDINAL': '#FAFAFA',
|
| 50 |
+
'CARDINAL': '#FAFAFA'
|
| 51 |
+
};
|
| 52 |
+
return colors[type] || '#FAFAFA';
|
| 53 |
+
}
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
// Sentiment Analysis functionality
|
| 57 |
+
class SentimentAnalysis {
|
| 58 |
+
static createGauge(score, containerId) {
|
| 59 |
+
const container = document.getElementById(containerId);
|
| 60 |
+
if (!container) return;
|
| 61 |
+
|
| 62 |
+
const color = this.getSentimentColor(score);
|
| 63 |
+
const label = this.getSentimentLabel(score);
|
| 64 |
+
|
| 65 |
+
container.innerHTML = `
|
| 66 |
+
<div class="sentiment-gauge">
|
| 67 |
+
<div class="sentiment-score" style="color: ${color};">${score.toFixed(3)}</div>
|
| 68 |
+
<div class="sentiment-label" style="color: ${color};">${label}</div>
|
| 69 |
+
</div>
|
| 70 |
+
`;
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
static getSentimentColor(score) {
|
| 74 |
+
if (score > 0.1) return '#4CAF50';
|
| 75 |
+
if (score < -0.1) return '#F44336';
|
| 76 |
+
return '#FF9800';
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
static getSentimentLabel(score) {
|
| 80 |
+
if (score > 0.1) return 'Positive';
|
| 81 |
+
if (score < -0.1) return 'Negative';
|
| 82 |
+
return 'Neutral';
|
| 83 |
+
}
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
// Text Generation functionality
|
| 87 |
+
class TextGeneration {
|
| 88 |
+
static displayGeneratedText(prompt, generated, containerId) {
|
| 89 |
+
const container = document.getElementById(containerId);
|
| 90 |
+
if (!container) return;
|
| 91 |
+
|
| 92 |
+
container.innerHTML = `
|
| 93 |
+
<div class="generated-text">
|
| 94 |
+
<span class="prompt-text">${prompt}</span>
|
| 95 |
+
<span class="generated-content">${generated}</span>
|
| 96 |
+
</div>
|
| 97 |
+
`;
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
static createParameterControls(containerId) {
|
| 101 |
+
const container = document.getElementById(containerId);
|
| 102 |
+
if (!container) return;
|
| 103 |
+
|
| 104 |
+
container.innerHTML = `
|
| 105 |
+
<div class="row">
|
| 106 |
+
<div class="col-md-4">
|
| 107 |
+
<label for="temperature" class="form-label">Temperature</label>
|
| 108 |
+
<input type="range" class="form-range" id="temperature" min="0.1" max="1.5" value="0.7" step="0.1">
|
| 109 |
+
<div class="d-flex justify-content-between">
|
| 110 |
+
<small>0.1</small>
|
| 111 |
+
<small id="temperature-value">0.7</small>
|
| 112 |
+
<small>1.5</small>
|
| 113 |
+
</div>
|
| 114 |
+
</div>
|
| 115 |
+
<div class="col-md-4">
|
| 116 |
+
<label for="top-p" class="form-label">Top-p</label>
|
| 117 |
+
<input type="range" class="form-range" id="top-p" min="0.1" max="1.0" value="0.9" step="0.1">
|
| 118 |
+
<div class="d-flex justify-content-between">
|
| 119 |
+
<small>0.1</small>
|
| 120 |
+
<small id="top-p-value">0.9</small>
|
| 121 |
+
<small>1.0</small>
|
| 122 |
+
</div>
|
| 123 |
+
</div>
|
| 124 |
+
<div class="col-md-4">
|
| 125 |
+
<label for="max-length" class="form-label">Max Length</label>
|
| 126 |
+
<input type="range" class="form-range" id="max-length" min="30" max="250" value="100" step="10">
|
| 127 |
+
<div class="d-flex justify-content-between">
|
| 128 |
+
<small>30</small>
|
| 129 |
+
<small id="max-length-value">100</small>
|
| 130 |
+
<small>250</small>
|
| 131 |
+
</div>
|
| 132 |
+
</div>
|
| 133 |
+
</div>
|
| 134 |
+
`;
|
| 135 |
+
|
| 136 |
+
// Add event listeners for parameter updates
|
| 137 |
+
['temperature', 'top-p', 'max-length'].forEach(param => {
|
| 138 |
+
const slider = document.getElementById(param);
|
| 139 |
+
const valueDisplay = document.getElementById(`${param}-value`);
|
| 140 |
+
if (slider && valueDisplay) {
|
| 141 |
+
slider.addEventListener('input', () => {
|
| 142 |
+
valueDisplay.textContent = slider.value;
|
| 143 |
+
});
|
| 144 |
+
}
|
| 145 |
+
});
|
| 146 |
+
}
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
// Translation functionality
|
| 150 |
+
class Translation {
|
| 151 |
+
static displayTranslationPair(sourceText, targetText, sourceLang, targetLang, containerId) {
|
| 152 |
+
const container = document.getElementById(containerId);
|
| 153 |
+
if (!container) return;
|
| 154 |
+
|
| 155 |
+
container.innerHTML = `
|
| 156 |
+
<div class="translation-pair">
|
| 157 |
+
<div class="source-text">
|
| 158 |
+
<div class="language-badge" style="background-color: var(--primary-color); color: white;">
|
| 159 |
+
${sourceLang}
|
| 160 |
+
</div>
|
| 161 |
+
<p>${sourceText}</p>
|
| 162 |
+
</div>
|
| 163 |
+
<div class="target-text">
|
| 164 |
+
<div class="language-badge" style="background-color: var(--success-color); color: white;">
|
| 165 |
+
${targetLang}
|
| 166 |
+
</div>
|
| 167 |
+
<p>${targetText}</p>
|
| 168 |
+
</div>
|
| 169 |
+
</div>
|
| 170 |
+
`;
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
static createLanguageSelector(containerId) {
|
| 174 |
+
const container = document.getElementById(containerId);
|
| 175 |
+
if (!container) return;
|
| 176 |
+
|
| 177 |
+
const languages = [
|
| 178 |
+
{ code: 'en', name: 'English' },
|
| 179 |
+
{ code: 'es', name: 'Spanish' },
|
| 180 |
+
{ code: 'fr', name: 'French' },
|
| 181 |
+
{ code: 'de', name: 'German' },
|
| 182 |
+
{ code: 'ru', name: 'Russian' },
|
| 183 |
+
{ code: 'zh', name: 'Chinese' },
|
| 184 |
+
{ code: 'ar', name: 'Arabic' },
|
| 185 |
+
{ code: 'hi', name: 'Hindi' },
|
| 186 |
+
{ code: 'ja', name: 'Japanese' },
|
| 187 |
+
{ code: 'pt', name: 'Portuguese' },
|
| 188 |
+
{ code: 'it', name: 'Italian' }
|
| 189 |
+
];
|
| 190 |
+
|
| 191 |
+
container.innerHTML = `
|
| 192 |
+
<div class="row">
|
| 193 |
+
<div class="col-md-6">
|
| 194 |
+
<label for="source-lang" class="form-label">Source Language</label>
|
| 195 |
+
<select id="source-lang" class="form-select">
|
| 196 |
+
<option value="auto">Auto-detect</option>
|
| 197 |
+
${languages.map(lang => `<option value="${lang.code}">${lang.name}</option>`).join('')}
|
| 198 |
+
</select>
|
| 199 |
+
</div>
|
| 200 |
+
<div class="col-md-6">
|
| 201 |
+
<label for="target-lang" class="form-label">Target Language</label>
|
| 202 |
+
<select id="target-lang" class="form-select">
|
| 203 |
+
${languages.map(lang => `<option value="${lang.code}" ${lang.code === 'en' ? 'selected' : ''}>${lang.name}</option>`).join('')}
|
| 204 |
+
</select>
|
| 205 |
+
</div>
|
| 206 |
+
</div>
|
| 207 |
+
`;
|
| 208 |
+
}
|
| 209 |
+
}
|
| 210 |
+
|
| 211 |
+
// Classification functionality
|
| 212 |
+
class Classification {
|
| 213 |
+
static displayResults(results, containerId) {
|
| 214 |
+
const container = document.getElementById(containerId);
|
| 215 |
+
if (!container) return;
|
| 216 |
+
|
| 217 |
+
container.innerHTML = results.map(result => `
|
| 218 |
+
<div class="classification-result">
|
| 219 |
+
<div class="classification-label">${result.label}</div>
|
| 220 |
+
<div class="classification-score" style="color: ${this.getScoreColor(result.score)};">
|
| 221 |
+
${(result.score * 100).toFixed(1)}%
|
| 222 |
+
</div>
|
| 223 |
+
</div>
|
| 224 |
+
`).join('');
|
| 225 |
+
}
|
| 226 |
+
|
| 227 |
+
static getScoreColor(score) {
|
| 228 |
+
if (score > 0.7) return '#4CAF50';
|
| 229 |
+
if (score > 0.4) return '#FF9800';
|
| 230 |
+
return '#F44336';
|
| 231 |
+
}
|
| 232 |
+
}
|
| 233 |
+
|
| 234 |
+
// Vector Embeddings functionality
|
| 235 |
+
class VectorEmbeddings {
|
| 236 |
+
static displaySearchResults(results, containerId) {
|
| 237 |
+
const container = document.getElementById(containerId);
|
| 238 |
+
if (!container) return;
|
| 239 |
+
|
| 240 |
+
container.innerHTML = results.map(result => `
|
| 241 |
+
<div class="search-result">
|
| 242 |
+
<div class="result-text">${result.text}</div>
|
| 243 |
+
<div class="search-score">Similarity: ${(result.score * 100).toFixed(1)}%</div>
|
| 244 |
+
<div class="progress mt-2" style="height: 8px;">
|
| 245 |
+
<div class="progress-bar" role="progressbar"
|
| 246 |
+
style="width: ${result.score * 100}%; background-color: ${this.getScoreColor(result.score)};">
|
| 247 |
+
</div>
|
| 248 |
+
</div>
|
| 249 |
+
</div>
|
| 250 |
+
`).join('');
|
| 251 |
+
}
|
| 252 |
+
|
| 253 |
+
static getScoreColor(score) {
|
| 254 |
+
if (score > 0.7) return '#4CAF50';
|
| 255 |
+
if (score > 0.4) return '#FF9800';
|
| 256 |
+
return '#F44336';
|
| 257 |
+
}
|
| 258 |
+
}
|
| 259 |
+
|
| 260 |
+
// Chart utilities
|
| 261 |
+
class ChartUtils {
|
| 262 |
+
static createBarChart(canvasId, data, options = {}) {
|
| 263 |
+
const ctx = document.getElementById(canvasId);
|
| 264 |
+
if (!ctx) return null;
|
| 265 |
+
|
| 266 |
+
const defaultOptions = {
|
| 267 |
+
responsive: true,
|
| 268 |
+
maintainAspectRatio: false,
|
| 269 |
+
plugins: {
|
| 270 |
+
legend: {
|
| 271 |
+
position: 'top',
|
| 272 |
+
}
|
| 273 |
+
},
|
| 274 |
+
scales: {
|
| 275 |
+
y: {
|
| 276 |
+
beginAtZero: true
|
| 277 |
+
}
|
| 278 |
+
}
|
| 279 |
+
};
|
| 280 |
+
|
| 281 |
+
return new Chart(ctx, {
|
| 282 |
+
type: 'bar',
|
| 283 |
+
data: data,
|
| 284 |
+
options: { ...defaultOptions, ...options }
|
| 285 |
+
});
|
| 286 |
+
}
|
| 287 |
+
|
| 288 |
+
static createPieChart(canvasId, data, options = {}) {
|
| 289 |
+
const ctx = document.getElementById(canvasId);
|
| 290 |
+
if (!ctx) return null;
|
| 291 |
+
|
| 292 |
+
const defaultOptions = {
|
| 293 |
+
responsive: true,
|
| 294 |
+
maintainAspectRatio: false,
|
| 295 |
+
plugins: {
|
| 296 |
+
legend: {
|
| 297 |
+
position: 'bottom',
|
| 298 |
+
}
|
| 299 |
+
}
|
| 300 |
+
};
|
| 301 |
+
|
| 302 |
+
return new Chart(ctx, {
|
| 303 |
+
type: 'pie',
|
| 304 |
+
data: data,
|
| 305 |
+
options: { ...defaultOptions, ...options }
|
| 306 |
+
});
|
| 307 |
+
}
|
| 308 |
+
|
| 309 |
+
static createLineChart(canvasId, data, options = {}) {
|
| 310 |
+
const ctx = document.getElementById(canvasId);
|
| 311 |
+
if (!ctx) return null;
|
| 312 |
+
|
| 313 |
+
const defaultOptions = {
|
| 314 |
+
responsive: true,
|
| 315 |
+
maintainAspectRatio: false,
|
| 316 |
+
plugins: {
|
| 317 |
+
legend: {
|
| 318 |
+
position: 'top',
|
| 319 |
+
}
|
| 320 |
+
},
|
| 321 |
+
scales: {
|
| 322 |
+
y: {
|
| 323 |
+
beginAtZero: true
|
| 324 |
+
}
|
| 325 |
+
}
|
| 326 |
+
};
|
| 327 |
+
|
| 328 |
+
return new Chart(ctx, {
|
| 329 |
+
type: 'line',
|
| 330 |
+
data: data,
|
| 331 |
+
options: { ...defaultOptions, ...options }
|
| 332 |
+
});
|
| 333 |
+
}
|
| 334 |
+
}
|
| 335 |
+
|
| 336 |
+
// Animation utilities
|
| 337 |
+
class AnimationUtils {
|
| 338 |
+
static fadeIn(element, duration = 500) {
|
| 339 |
+
element.style.opacity = '0';
|
| 340 |
+
element.style.transition = `opacity ${duration}ms ease-in`;
|
| 341 |
+
|
| 342 |
+
setTimeout(() => {
|
| 343 |
+
element.style.opacity = '1';
|
| 344 |
+
}, 10);
|
| 345 |
+
}
|
| 346 |
+
|
| 347 |
+
static slideIn(element, direction = 'left', duration = 500) {
|
| 348 |
+
const transform = direction === 'left' ? 'translateX(-100%)' : 'translateX(100%)';
|
| 349 |
+
element.style.transform = transform;
|
| 350 |
+
element.style.transition = `transform ${duration}ms ease-out`;
|
| 351 |
+
|
| 352 |
+
setTimeout(() => {
|
| 353 |
+
element.style.transform = 'translateX(0)';
|
| 354 |
+
}, 10);
|
| 355 |
+
}
|
| 356 |
+
|
| 357 |
+
static bounceIn(element, duration = 600) {
|
| 358 |
+
element.style.transform = 'scale(0.3)';
|
| 359 |
+
element.style.opacity = '0';
|
| 360 |
+
element.style.transition = `all ${duration}ms ease-out`;
|
| 361 |
+
|
| 362 |
+
setTimeout(() => {
|
| 363 |
+
element.style.transform = 'scale(1)';
|
| 364 |
+
element.style.opacity = '1';
|
| 365 |
+
}, 10);
|
| 366 |
+
}
|
| 367 |
+
}
|
| 368 |
+
|
| 369 |
+
// Export classes for global use
|
| 370 |
+
window.NLPComponents = {
|
| 371 |
+
POSTagging,
|
| 372 |
+
NER,
|
| 373 |
+
SentimentAnalysis,
|
| 374 |
+
TextGeneration,
|
| 375 |
+
Translation,
|
| 376 |
+
Classification,
|
| 377 |
+
VectorEmbeddings,
|
| 378 |
+
ChartUtils,
|
| 379 |
+
AnimationUtils
|
| 380 |
+
};
|
static/js/main.js
ADDED
|
@@ -0,0 +1,370 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Main JavaScript for NLP Ultimate Tutorial
|
| 2 |
+
|
| 3 |
+
// Theme management
|
| 4 |
+
function toggleTheme() {
|
| 5 |
+
const currentTheme = document.documentElement.getAttribute('data-theme');
|
| 6 |
+
const newTheme = currentTheme === 'dark' ? 'light' : 'dark';
|
| 7 |
+
|
| 8 |
+
document.documentElement.setAttribute('data-theme', newTheme);
|
| 9 |
+
localStorage.setItem('theme', newTheme);
|
| 10 |
+
|
| 11 |
+
// Update theme icon
|
| 12 |
+
const themeIcon = document.getElementById('theme-icon');
|
| 13 |
+
if (themeIcon) {
|
| 14 |
+
themeIcon.className = newTheme === 'dark' ? 'fas fa-sun' : 'fas fa-moon';
|
| 15 |
+
}
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
// Initialize theme on page load
|
| 19 |
+
function initializeTheme() {
|
| 20 |
+
const savedTheme = localStorage.getItem('theme') || 'light';
|
| 21 |
+
document.documentElement.setAttribute('data-theme', savedTheme);
|
| 22 |
+
|
| 23 |
+
const themeIcon = document.getElementById('theme-icon');
|
| 24 |
+
if (themeIcon) {
|
| 25 |
+
themeIcon.className = savedTheme === 'dark' ? 'fas fa-sun' : 'fas fa-moon';
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
// Loading state management
|
| 30 |
+
function showLoading(elementId) {
|
| 31 |
+
const element = document.getElementById(elementId);
|
| 32 |
+
if (element) {
|
| 33 |
+
element.innerHTML = `
|
| 34 |
+
<div class="text-center py-4">
|
| 35 |
+
<div class="spinner-border text-primary" role="status">
|
| 36 |
+
<span class="visually-hidden">Loading...</span>
|
| 37 |
+
</div>
|
| 38 |
+
<p class="mt-2">Processing your request...</p>
|
| 39 |
+
</div>
|
| 40 |
+
`;
|
| 41 |
+
}
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
function hideLoading(elementId) {
|
| 45 |
+
const element = document.getElementById(elementId);
|
| 46 |
+
if (element && element.innerHTML.includes('spinner-border')) {
|
| 47 |
+
element.innerHTML = '';
|
| 48 |
+
}
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
// Error handling
|
| 52 |
+
function showError(message, elementId = 'resultsContainer') {
|
| 53 |
+
const element = document.getElementById(elementId);
|
| 54 |
+
if (element) {
|
| 55 |
+
element.innerHTML = `
|
| 56 |
+
<div class="alert alert-danger fade-in">
|
| 57 |
+
<i class="fas fa-exclamation-triangle"></i>
|
| 58 |
+
<strong>Error:</strong> ${message}
|
| 59 |
+
</div>
|
| 60 |
+
`;
|
| 61 |
+
}
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
// Success message
|
| 65 |
+
function showSuccess(message, elementId = 'resultsContainer') {
|
| 66 |
+
const element = document.getElementById(elementId);
|
| 67 |
+
if (element) {
|
| 68 |
+
element.innerHTML = `
|
| 69 |
+
<div class="alert alert-success fade-in">
|
| 70 |
+
<i class="fas fa-check-circle"></i>
|
| 71 |
+
<strong>Success:</strong> ${message}
|
| 72 |
+
</div>
|
| 73 |
+
`;
|
| 74 |
+
}
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
// API request helper
|
| 78 |
+
async function makeApiRequest(url, data, method = 'POST') {
|
| 79 |
+
try {
|
| 80 |
+
const response = await fetch(url, {
|
| 81 |
+
method: method,
|
| 82 |
+
headers: {
|
| 83 |
+
'Content-Type': 'application/json',
|
| 84 |
+
},
|
| 85 |
+
body: JSON.stringify(data)
|
| 86 |
+
});
|
| 87 |
+
|
| 88 |
+
if (!response.ok) {
|
| 89 |
+
throw new Error(`HTTP error! status: ${response.status}`);
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
return await response.json();
|
| 93 |
+
} catch (error) {
|
| 94 |
+
console.error('API request failed:', error);
|
| 95 |
+
throw error;
|
| 96 |
+
}
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
// Text processing functions
|
| 100 |
+
function processText(endpoint, text, additionalData = {}) {
|
| 101 |
+
const data = { text: text, ...additionalData };
|
| 102 |
+
|
| 103 |
+
showLoading('resultsContainer');
|
| 104 |
+
|
| 105 |
+
makeApiRequest(endpoint, data)
|
| 106 |
+
.then(response => {
|
| 107 |
+
if (response.success) {
|
| 108 |
+
displayResults(response.result);
|
| 109 |
+
} else {
|
| 110 |
+
showError(response.error || 'An error occurred while processing the text');
|
| 111 |
+
}
|
| 112 |
+
})
|
| 113 |
+
.catch(error => {
|
| 114 |
+
showError('Failed to process text: ' + error.message);
|
| 115 |
+
})
|
| 116 |
+
.finally(() => {
|
| 117 |
+
hideLoading('resultsContainer');
|
| 118 |
+
});
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
// Display results
|
| 122 |
+
function displayResults(result) {
|
| 123 |
+
const container = document.getElementById('resultsContainer');
|
| 124 |
+
if (container) {
|
| 125 |
+
container.innerHTML = result;
|
| 126 |
+
container.classList.add('fade-in');
|
| 127 |
+
}
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
// Copy to clipboard
|
| 131 |
+
function copyToClipboard(text) {
|
| 132 |
+
navigator.clipboard.writeText(text).then(() => {
|
| 133 |
+
// Show temporary success message
|
| 134 |
+
const toast = document.createElement('div');
|
| 135 |
+
toast.className = 'alert alert-success position-fixed';
|
| 136 |
+
toast.style.cssText = 'top: 20px; right: 20px; z-index: 9999; min-width: 200px;';
|
| 137 |
+
toast.innerHTML = '<i class="fas fa-check"></i> Copied to clipboard!';
|
| 138 |
+
document.body.appendChild(toast);
|
| 139 |
+
|
| 140 |
+
setTimeout(() => {
|
| 141 |
+
toast.remove();
|
| 142 |
+
}, 2000);
|
| 143 |
+
}).catch(err => {
|
| 144 |
+
console.error('Failed to copy text: ', err);
|
| 145 |
+
});
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
// Download text as file
|
| 149 |
+
function downloadText(text, filename = 'nlp_result.txt') {
|
| 150 |
+
const blob = new Blob([text], { type: 'text/plain' });
|
| 151 |
+
const url = window.URL.createObjectURL(blob);
|
| 152 |
+
const a = document.createElement('a');
|
| 153 |
+
a.href = url;
|
| 154 |
+
a.download = filename;
|
| 155 |
+
document.body.appendChild(a);
|
| 156 |
+
a.click();
|
| 157 |
+
document.body.removeChild(a);
|
| 158 |
+
window.URL.revokeObjectURL(url);
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
// Format JSON for display
|
| 162 |
+
function formatJSON(obj) {
|
| 163 |
+
return JSON.stringify(obj, null, 2);
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
// Create data table
|
| 167 |
+
function createDataTable(data, headers) {
|
| 168 |
+
let table = '<div class="table-responsive"><table class="table table-striped table-hover">';
|
| 169 |
+
|
| 170 |
+
// Header
|
| 171 |
+
if (headers) {
|
| 172 |
+
table += '<thead><tr>';
|
| 173 |
+
headers.forEach(header => {
|
| 174 |
+
table += `<th>${header}</th>`;
|
| 175 |
+
});
|
| 176 |
+
table += '</tr></thead>';
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
// Body
|
| 180 |
+
table += '<tbody>';
|
| 181 |
+
data.forEach(row => {
|
| 182 |
+
table += '<tr>';
|
| 183 |
+
if (Array.isArray(row)) {
|
| 184 |
+
row.forEach(cell => {
|
| 185 |
+
table += `<td>${cell}</td>`;
|
| 186 |
+
});
|
| 187 |
+
} else {
|
| 188 |
+
Object.values(row).forEach(value => {
|
| 189 |
+
table += `<td>${value}</td>`;
|
| 190 |
+
});
|
| 191 |
+
}
|
| 192 |
+
table += '</tr>';
|
| 193 |
+
});
|
| 194 |
+
table += '</tbody></table></div>';
|
| 195 |
+
|
| 196 |
+
return table;
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
// Create chart
|
| 200 |
+
function createChart(canvasId, type, data, options = {}) {
|
| 201 |
+
const ctx = document.getElementById(canvasId);
|
| 202 |
+
if (!ctx) return null;
|
| 203 |
+
|
| 204 |
+
const defaultOptions = {
|
| 205 |
+
responsive: true,
|
| 206 |
+
maintainAspectRatio: false,
|
| 207 |
+
plugins: {
|
| 208 |
+
legend: {
|
| 209 |
+
position: 'top',
|
| 210 |
+
}
|
| 211 |
+
}
|
| 212 |
+
};
|
| 213 |
+
|
| 214 |
+
const chartOptions = { ...defaultOptions, ...options };
|
| 215 |
+
|
| 216 |
+
return new Chart(ctx, {
|
| 217 |
+
type: type,
|
| 218 |
+
data: data,
|
| 219 |
+
options: chartOptions
|
| 220 |
+
});
|
| 221 |
+
}
|
| 222 |
+
|
| 223 |
+
// Smooth scroll to element
|
| 224 |
+
function scrollToElement(elementId) {
|
| 225 |
+
const element = document.getElementById(elementId);
|
| 226 |
+
if (element) {
|
| 227 |
+
element.scrollIntoView({
|
| 228 |
+
behavior: 'smooth',
|
| 229 |
+
block: 'start'
|
| 230 |
+
});
|
| 231 |
+
}
|
| 232 |
+
}
|
| 233 |
+
|
| 234 |
+
// Debounce function for input handling
|
| 235 |
+
function debounce(func, wait) {
|
| 236 |
+
let timeout;
|
| 237 |
+
return function executedFunction(...args) {
|
| 238 |
+
const later = () => {
|
| 239 |
+
clearTimeout(timeout);
|
| 240 |
+
func(...args);
|
| 241 |
+
};
|
| 242 |
+
clearTimeout(timeout);
|
| 243 |
+
timeout = setTimeout(later, wait);
|
| 244 |
+
};
|
| 245 |
+
}
|
| 246 |
+
|
| 247 |
+
// Throttle function for scroll handling
|
| 248 |
+
function throttle(func, limit) {
|
| 249 |
+
let inThrottle;
|
| 250 |
+
return function() {
|
| 251 |
+
const args = arguments;
|
| 252 |
+
const context = this;
|
| 253 |
+
if (!inThrottle) {
|
| 254 |
+
func.apply(context, args);
|
| 255 |
+
inThrottle = true;
|
| 256 |
+
setTimeout(() => inThrottle = false, limit);
|
| 257 |
+
}
|
| 258 |
+
};
|
| 259 |
+
}
|
| 260 |
+
|
| 261 |
+
// Local storage helpers
|
| 262 |
+
function saveToStorage(key, value) {
|
| 263 |
+
try {
|
| 264 |
+
localStorage.setItem(key, JSON.stringify(value));
|
| 265 |
+
} catch (error) {
|
| 266 |
+
console.error('Failed to save to localStorage:', error);
|
| 267 |
+
}
|
| 268 |
+
}
|
| 269 |
+
|
| 270 |
+
function loadFromStorage(key, defaultValue = null) {
|
| 271 |
+
try {
|
| 272 |
+
const item = localStorage.getItem(key);
|
| 273 |
+
return item ? JSON.parse(item) : defaultValue;
|
| 274 |
+
} catch (error) {
|
| 275 |
+
console.error('Failed to load from localStorage:', error);
|
| 276 |
+
return defaultValue;
|
| 277 |
+
}
|
| 278 |
+
}
|
| 279 |
+
|
| 280 |
+
// Session storage helpers
|
| 281 |
+
function saveToSession(key, value) {
|
| 282 |
+
try {
|
| 283 |
+
sessionStorage.setItem(key, JSON.stringify(value));
|
| 284 |
+
} catch (error) {
|
| 285 |
+
console.error('Failed to save to sessionStorage:', error);
|
| 286 |
+
}
|
| 287 |
+
}
|
| 288 |
+
|
| 289 |
+
function loadFromSession(key, defaultValue = null) {
|
| 290 |
+
try {
|
| 291 |
+
const item = sessionStorage.getItem(key);
|
| 292 |
+
return item ? JSON.parse(item) : defaultValue;
|
| 293 |
+
} catch (error) {
|
| 294 |
+
console.error('Failed to load from sessionStorage:', error);
|
| 295 |
+
return defaultValue;
|
| 296 |
+
}
|
| 297 |
+
}
|
| 298 |
+
|
| 299 |
+
// Initialize page
|
| 300 |
+
document.addEventListener('DOMContentLoaded', function() {
|
| 301 |
+
// Initialize theme
|
| 302 |
+
initializeTheme();
|
| 303 |
+
|
| 304 |
+
// Add fade-in animation to cards
|
| 305 |
+
const cards = document.querySelectorAll('.card');
|
| 306 |
+
cards.forEach((card, index) => {
|
| 307 |
+
card.style.animationDelay = `${index * 0.1}s`;
|
| 308 |
+
card.classList.add('fade-in');
|
| 309 |
+
});
|
| 310 |
+
|
| 311 |
+
// Add click handlers for copy buttons
|
| 312 |
+
document.addEventListener('click', function(e) {
|
| 313 |
+
if (e.target.classList.contains('copy-btn')) {
|
| 314 |
+
const text = e.target.getAttribute('data-copy');
|
| 315 |
+
if (text) {
|
| 316 |
+
copyToClipboard(text);
|
| 317 |
+
}
|
| 318 |
+
}
|
| 319 |
+
});
|
| 320 |
+
|
| 321 |
+
// Add click handlers for download buttons
|
| 322 |
+
document.addEventListener('click', function(e) {
|
| 323 |
+
if (e.target.classList.contains('download-btn')) {
|
| 324 |
+
const text = e.target.getAttribute('data-download');
|
| 325 |
+
const filename = e.target.getAttribute('data-filename') || 'nlp_result.txt';
|
| 326 |
+
if (text) {
|
| 327 |
+
downloadText(text, filename);
|
| 328 |
+
}
|
| 329 |
+
}
|
| 330 |
+
});
|
| 331 |
+
|
| 332 |
+
// Handle form submissions
|
| 333 |
+
const forms = document.querySelectorAll('form');
|
| 334 |
+
forms.forEach(form => {
|
| 335 |
+
form.addEventListener('submit', function(e) {
|
| 336 |
+
e.preventDefault();
|
| 337 |
+
// Handle form submission here
|
| 338 |
+
});
|
| 339 |
+
});
|
| 340 |
+
|
| 341 |
+
// Add tooltips
|
| 342 |
+
const tooltipTriggerList = [].slice.call(document.querySelectorAll('[data-bs-toggle="tooltip"]'));
|
| 343 |
+
tooltipTriggerList.map(function (tooltipTriggerEl) {
|
| 344 |
+
return new bootstrap.Tooltip(tooltipTriggerEl);
|
| 345 |
+
});
|
| 346 |
+
});
|
| 347 |
+
|
| 348 |
+
// Export functions for global use
|
| 349 |
+
window.NLPUtils = {
|
| 350 |
+
toggleTheme,
|
| 351 |
+
showLoading,
|
| 352 |
+
hideLoading,
|
| 353 |
+
showError,
|
| 354 |
+
showSuccess,
|
| 355 |
+
makeApiRequest,
|
| 356 |
+
processText,
|
| 357 |
+
displayResults,
|
| 358 |
+
copyToClipboard,
|
| 359 |
+
downloadText,
|
| 360 |
+
formatJSON,
|
| 361 |
+
createDataTable,
|
| 362 |
+
createChart,
|
| 363 |
+
scrollToElement,
|
| 364 |
+
debounce,
|
| 365 |
+
throttle,
|
| 366 |
+
saveToStorage,
|
| 367 |
+
loadFromStorage,
|
| 368 |
+
saveToSession,
|
| 369 |
+
loadFromSession
|
| 370 |
+
};
|
templates/_analysis_nav.html
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<div class="card mb-4 quick-nav">
|
| 2 |
+
<div class="card-header">
|
| 3 |
+
<h5 class="mb-0"><i class="fas fa-compass me-2"></i>Quick Navigation</h5>
|
| 4 |
+
</div>
|
| 5 |
+
<div class="card-body">
|
| 6 |
+
<!-- Text Processing -->
|
| 7 |
+
<h6 class="mb-2"><i class="fas fa-edit me-2"></i>Text Processing</h6>
|
| 8 |
+
<div class="row mb-3">
|
| 9 |
+
<div class="col-md-3 mb-2">
|
| 10 |
+
<a href="{{ url_for('preprocessing') }}" class="btn btn-outline-primary w-100"><i class="fas fa-tools"></i> Preprocessing</a>
|
| 11 |
+
</div>
|
| 12 |
+
<div class="col-md-3 mb-2">
|
| 13 |
+
<a href="{{ url_for('tokenization') }}" class="btn btn-outline-primary w-100"><i class="fas fa-cut"></i> Tokenization</a>
|
| 14 |
+
</div>
|
| 15 |
+
<div class="col-md-3 mb-2">
|
| 16 |
+
<a href="{{ url_for('pos_tagging') }}" class="btn btn-outline-primary w-100"><i class="fas fa-tags"></i> POS</a>
|
| 17 |
+
</div>
|
| 18 |
+
<div class="col-md-3 mb-2">
|
| 19 |
+
<a href="{{ url_for('named_entity') }}" class="btn btn-outline-primary w-100"><i class="fas fa-user-tag"></i> NER</a>
|
| 20 |
+
</div>
|
| 21 |
+
</div>
|
| 22 |
+
|
| 23 |
+
<!-- Analysis -->
|
| 24 |
+
<h6 class="mb-2"><i class="fas fa-chart-line me-2"></i>Analysis</h6>
|
| 25 |
+
<div class="row mb-3">
|
| 26 |
+
<div class="col-md-4 mb-2">
|
| 27 |
+
<a href="{{ url_for('sentiment') }}" class="btn btn-outline-success w-100"><i class="fas fa-smile"></i> Sentiment</a>
|
| 28 |
+
</div>
|
| 29 |
+
<div class="col-md-4 mb-2">
|
| 30 |
+
<a href="{{ url_for('summarization') }}" class="btn btn-outline-success w-100"><i class="fas fa-compress"></i> Summarization</a>
|
| 31 |
+
</div>
|
| 32 |
+
<div class="col-md-4 mb-2">
|
| 33 |
+
<a href="{{ url_for('topic_analysis') }}" class="btn btn-outline-success w-100"><i class="fas fa-project-diagram"></i> Topics</a>
|
| 34 |
+
</div>
|
| 35 |
+
</div>
|
| 36 |
+
|
| 37 |
+
<!-- Advanced NLP -->
|
| 38 |
+
<h6 class="mb-2"><i class="fas fa-robot me-2"></i>Advanced NLP</h6>
|
| 39 |
+
<div class="row">
|
| 40 |
+
<div class="col-md-2 mb-2">
|
| 41 |
+
<a href="{{ url_for('question_answering') }}" class="btn btn-outline-info w-100"><i class="fas fa-question-circle"></i> QA</a>
|
| 42 |
+
</div>
|
| 43 |
+
<div class="col-md-2 mb-2">
|
| 44 |
+
<a href="{{ url_for('text_generation') }}" class="btn btn-outline-info w-100"><i class="fas fa-magic"></i> Generation</a>
|
| 45 |
+
</div>
|
| 46 |
+
<div class="col-md-2 mb-2">
|
| 47 |
+
<a href="{{ url_for('translation') }}" class="btn btn-outline-info w-100"><i class="fas fa-language"></i> Translate</a>
|
| 48 |
+
</div>
|
| 49 |
+
<div class="col-md-2 mb-2">
|
| 50 |
+
<a href="{{ url_for('classification') }}" class="btn btn-outline-info w-100"><i class="fas fa-sitemap"></i> Classify</a>
|
| 51 |
+
</div>
|
| 52 |
+
<div class="col-md-2 mb-2">
|
| 53 |
+
<a href="{{ url_for('vector_embeddings') }}" class="btn btn-outline-info w-100"><i class="fas fa-vector-square"></i> Embeddings</a>
|
| 54 |
+
</div>
|
| 55 |
+
</div>
|
| 56 |
+
</div>
|
| 57 |
+
</div>
|
| 58 |
+
|
| 59 |
+
<script>
|
| 60 |
+
// Remove Quick Nav functionality - let navbar handle all navigation
|
| 61 |
+
(function() {
|
| 62 |
+
const links = document.currentScript.parentElement.querySelectorAll('a[href]');
|
| 63 |
+
links.forEach(link => {
|
| 64 |
+
link.addEventListener('click', function(event) {
|
| 65 |
+
// Prevent Quick Nav from doing anything special
|
| 66 |
+
// Just let the normal browser navigation happen like navbar does
|
| 67 |
+
event.stopPropagation();
|
| 68 |
+
// Don't set any sessionStorage flags - let it work like navbar
|
| 69 |
+
});
|
| 70 |
+
});
|
| 71 |
+
})();
|
| 72 |
+
</script>
|
templates/base.html
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>{% block title %}NLP Ultimate Tutorial{% endblock %}</title>
|
| 7 |
+
|
| 8 |
+
<!-- Bootstrap CSS -->
|
| 9 |
+
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
|
| 10 |
+
|
| 11 |
+
<!-- Custom CSS -->
|
| 12 |
+
<link rel="stylesheet" href="{{ url_for('static', filename='css/style.css') }}">
|
| 13 |
+
<link rel="stylesheet" href="{{ url_for('static', filename='css/components.css') }}">
|
| 14 |
+
|
| 15 |
+
<!-- Font Awesome -->
|
| 16 |
+
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
|
| 17 |
+
|
| 18 |
+
<!-- Chart.js -->
|
| 19 |
+
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
| 20 |
+
|
| 21 |
+
{% block extra_head %}{% endblock %}
|
| 22 |
+
</head>
|
| 23 |
+
<body>
|
| 24 |
+
<!-- Navigation -->
|
| 25 |
+
<nav class="navbar navbar-expand-lg navbar-dark bg-primary">
|
| 26 |
+
<div class="container-fluid">
|
| 27 |
+
<a class="navbar-brand" href="{{ url_for('index') }}">
|
| 28 |
+
<i class="fas fa-brain"></i> NLP Ultimate Tutorial
|
| 29 |
+
</a>
|
| 30 |
+
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#mainNavbar" aria-controls="mainNavbar" aria-expanded="false" aria-label="Toggle navigation">
|
| 31 |
+
<span class="navbar-toggler-icon"></span>
|
| 32 |
+
</button>
|
| 33 |
+
<div class="collapse navbar-collapse" id="mainNavbar">
|
| 34 |
+
<ul class="navbar-nav me-auto mb-2 mb-lg-0">
|
| 35 |
+
<li class="nav-item dropdown">
|
| 36 |
+
<a class="nav-link dropdown-toggle" href="#" id="navTextProcessing" role="button" data-bs-toggle="dropdown" aria-expanded="false">
|
| 37 |
+
<i class="fas fa-edit"></i> Text Processing
|
| 38 |
+
</a>
|
| 39 |
+
<ul class="dropdown-menu" aria-labelledby="navTextProcessing">
|
| 40 |
+
<li><a class="dropdown-item" href="{{ url_for('preprocessing') }}">Preprocessing</a></li>
|
| 41 |
+
<li><a class="dropdown-item" href="{{ url_for('tokenization') }}">Tokenization</a></li>
|
| 42 |
+
<li><a class="dropdown-item" href="{{ url_for('pos_tagging') }}">POS Tagging</a></li>
|
| 43 |
+
<li><a class="dropdown-item" href="{{ url_for('named_entity') }}">Named Entities</a></li>
|
| 44 |
+
</ul>
|
| 45 |
+
</li>
|
| 46 |
+
<li class="nav-item dropdown">
|
| 47 |
+
<a class="nav-link dropdown-toggle" href="#" id="navAnalysis" role="button" data-bs-toggle="dropdown" aria-expanded="false">
|
| 48 |
+
<i class="fas fa-chart-line"></i> Analysis
|
| 49 |
+
</a>
|
| 50 |
+
<ul class="dropdown-menu" aria-labelledby="navAnalysis">
|
| 51 |
+
<li><a class="dropdown-item" href="{{ url_for('sentiment') }}">Sentiment</a></li>
|
| 52 |
+
<li><a class="dropdown-item" href="{{ url_for('summarization') }}">Summarization</a></li>
|
| 53 |
+
<li><a class="dropdown-item" href="{{ url_for('topic_analysis') }}">Topic Analysis</a></li>
|
| 54 |
+
<li><a class="dropdown-item" href="{{ url_for('question_answering') }}">Question Answering</a></li>
|
| 55 |
+
</ul>
|
| 56 |
+
</li>
|
| 57 |
+
<li class="nav-item dropdown">
|
| 58 |
+
<a class="nav-link dropdown-toggle" href="#" id="navAdvanced" role="button" data-bs-toggle="dropdown" aria-expanded="false">
|
| 59 |
+
<i class="fas fa-robot"></i> Advanced NLP
|
| 60 |
+
</a>
|
| 61 |
+
<ul class="dropdown-menu dropdown-menu-end" aria-labelledby="navAdvanced">
|
| 62 |
+
<li><a class="dropdown-item" href="{{ url_for('text_generation') }}">Text Generation</a></li>
|
| 63 |
+
<li><a class="dropdown-item" href="{{ url_for('translation') }}">Translation</a></li>
|
| 64 |
+
<li><a class="dropdown-item" href="{{ url_for('classification') }}">Classification</a></li>
|
| 65 |
+
<li><a class="dropdown-item" href="{{ url_for('vector_embeddings') }}">Embeddings</a></li>
|
| 66 |
+
</ul>
|
| 67 |
+
</li>
|
| 68 |
+
</ul>
|
| 69 |
+
<div class="d-flex">
|
| 70 |
+
<button class="btn btn-outline-light btn-sm" onclick="toggleTheme()" title="Toggle theme">
|
| 71 |
+
<i class="fas fa-moon" id="theme-icon"></i>
|
| 72 |
+
</button>
|
| 73 |
+
</div>
|
| 74 |
+
</div>
|
| 75 |
+
</div>
|
| 76 |
+
</nav>
|
| 77 |
+
|
| 78 |
+
<!-- Main Content -->
|
| 79 |
+
<main class="container-fluid py-4">
|
| 80 |
+
{% block content %}{% endblock %}
|
| 81 |
+
</main>
|
| 82 |
+
|
| 83 |
+
<!-- Footer -->
|
| 84 |
+
<footer class="modern-footer">
|
| 85 |
+
<div class="container">
|
| 86 |
+
<div class="row align-items-center">
|
| 87 |
+
<div class="col-md-4">
|
| 88 |
+
<div class="footer-brand">
|
| 89 |
+
<h5><i class="fas fa-brain"></i> NLP Ultimate Tutorial</h5>
|
| 90 |
+
<p class="footer-description">Comprehensive guide to Natural Language Processing concepts and techniques.</p>
|
| 91 |
+
</div>
|
| 92 |
+
</div>
|
| 93 |
+
<div class="col-md-4 text-center">
|
| 94 |
+
<div class="footer-credit">
|
| 95 |
+
<div class="credit-badge">
|
| 96 |
+
<span class="credit-text">Designed and developed by</span>
|
| 97 |
+
<strong class="developer-name">Aradhya Pavan H S</strong>
|
| 98 |
+
</div>
|
| 99 |
+
</div>
|
| 100 |
+
</div>
|
| 101 |
+
<div class="col-md-4 text-md-end">
|
| 102 |
+
<div class="social-links">
|
| 103 |
+
<a href="https://github.com/aradhyapavan" target="_blank" rel="noopener noreferrer" class="social-link github-link">
|
| 104 |
+
<i class="fab fa-github"></i>
|
| 105 |
+
<span>GitHub</span>
|
| 106 |
+
</a>
|
| 107 |
+
<a href="https://www.linkedin.com/in/aradhya-pavan/" target="_blank" rel="noopener noreferrer" class="social-link linkedin-link">
|
| 108 |
+
<i class="fab fa-linkedin"></i>
|
| 109 |
+
<span>LinkedIn</span>
|
| 110 |
+
</a>
|
| 111 |
+
</div>
|
| 112 |
+
</div>
|
| 113 |
+
</div>
|
| 114 |
+
</div>
|
| 115 |
+
</footer>
|
| 116 |
+
|
| 117 |
+
<!-- Bootstrap JS -->
|
| 118 |
+
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script>
|
| 119 |
+
|
| 120 |
+
<!-- Custom JS -->
|
| 121 |
+
<script src="{{ url_for('static', filename='js/main.js') }}"></script>
|
| 122 |
+
|
| 123 |
+
{% block extra_scripts %}{% endblock %}
|
| 124 |
+
</body>
|
| 125 |
+
</html>
|
templates/classification.html
ADDED
|
@@ -0,0 +1,449 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html" %}
|
| 2 |
+
|
| 3 |
+
{% block title %}Zero-shot Classification - NLP Ultimate Tutorial{% endblock %}
|
| 4 |
+
|
| 5 |
+
{% block content %}
|
| 6 |
+
<div class="container">
|
| 7 |
+
<!-- Header Section -->
|
| 8 |
+
<div class="row mb-4">
|
| 9 |
+
<div class="col-12">
|
| 10 |
+
<div class="card">
|
| 11 |
+
<div class="card-header">
|
| 12 |
+
<h1 class="mb-0">
|
| 13 |
+
<i class="fas fa-tags"></i>
|
| 14 |
+
Zero-shot Classification
|
| 15 |
+
</h1>
|
| 16 |
+
</div>
|
| 17 |
+
<div class="card-body">
|
| 18 |
+
<p class="lead">Classify text into arbitrary categories without training on specific examples.</p>
|
| 19 |
+
|
| 20 |
+
<div class="alert alert-info">
|
| 21 |
+
<i class="fas fa-info-circle"></i>
|
| 22 |
+
<strong>About:</strong> Zero-shot classification can categorize text into arbitrary classes without having been specifically trained on those categories.
|
| 23 |
+
</div>
|
| 24 |
+
</div>
|
| 25 |
+
</div>
|
| 26 |
+
</div>
|
| 27 |
+
</div>
|
| 28 |
+
|
| 29 |
+
{% include "_analysis_nav.html" %}
|
| 30 |
+
|
| 31 |
+
<!-- Input Section -->
|
| 32 |
+
<div class="row mb-4">
|
| 33 |
+
<div class="col-12">
|
| 34 |
+
<div class="card">
|
| 35 |
+
<div class="card-header">
|
| 36 |
+
<h3 class="mb-0">
|
| 37 |
+
<i class="fas fa-keyboard"></i>
|
| 38 |
+
Enter text to classify:
|
| 39 |
+
</h3>
|
| 40 |
+
</div>
|
| 41 |
+
<div class="card-body">
|
| 42 |
+
<div class="row mb-3">
|
| 43 |
+
<div class="col-md-8">
|
| 44 |
+
<textarea id="textInput" class="form-control" rows="6" placeholder="Enter text here...">I absolutely love this new product! It's amazing and works perfectly.</textarea>
|
| 45 |
+
</div>
|
| 46 |
+
<div class="col-md-4">
|
| 47 |
+
<label for="sampleSelect" class="form-label">Or choose a sample:</label>
|
| 48 |
+
<select id="sampleSelect" class="form-select">
|
| 49 |
+
<option value="Custom">Custom</option>
|
| 50 |
+
<option value="Sentiment">Sentiment</option>
|
| 51 |
+
<option value="Emotion">Emotion</option>
|
| 52 |
+
<option value="Writing Style">Writing Style</option>
|
| 53 |
+
<option value="Intent">Intent</option>
|
| 54 |
+
</select>
|
| 55 |
+
</div>
|
| 56 |
+
</div>
|
| 57 |
+
|
| 58 |
+
<div class="d-flex justify-content-between align-items-center">
|
| 59 |
+
<div>
|
| 60 |
+
<button id="processBtn" class="btn btn-primary btn-lg">
|
| 61 |
+
<i class="fas fa-tags"></i>
|
| 62 |
+
Classify Text
|
| 63 |
+
</button>
|
| 64 |
+
</div>
|
| 65 |
+
<div>
|
| 66 |
+
<button id="clearBtn" class="btn btn-outline-secondary">
|
| 67 |
+
<i class="fas fa-trash"></i>
|
| 68 |
+
Clear
|
| 69 |
+
</button>
|
| 70 |
+
</div>
|
| 71 |
+
</div>
|
| 72 |
+
</div>
|
| 73 |
+
</div>
|
| 74 |
+
</div>
|
| 75 |
+
</div>
|
| 76 |
+
|
| 77 |
+
<!-- Classification Settings Section -->
|
| 78 |
+
<div class="row mb-4">
|
| 79 |
+
<div class="col-12">
|
| 80 |
+
<div class="card">
|
| 81 |
+
<div class="card-header">
|
| 82 |
+
<h3 class="mb-0">
|
| 83 |
+
<i class="fas fa-cog"></i>
|
| 84 |
+
Classification Settings
|
| 85 |
+
</h3>
|
| 86 |
+
</div>
|
| 87 |
+
<div class="card-body">
|
| 88 |
+
<div class="row">
|
| 89 |
+
<div class="col-md-6">
|
| 90 |
+
<label for="scenario" class="form-label">Classification Scenario</label>
|
| 91 |
+
<select id="scenario" class="form-select">
|
| 92 |
+
<option value="Sentiment" selected>Sentiment</option>
|
| 93 |
+
<option value="Emotion">Emotion</option>
|
| 94 |
+
<option value="Writing Style">Writing Style</option>
|
| 95 |
+
<option value="Intent">Intent</option>
|
| 96 |
+
<option value="Content Type">Content Type</option>
|
| 97 |
+
<option value="Audience Level">Audience Level</option>
|
| 98 |
+
<option value="Custom">Custom</option>
|
| 99 |
+
</select>
|
| 100 |
+
</div>
|
| 101 |
+
<div class="col-md-6">
|
| 102 |
+
<div class="form-check form-switch mt-4">
|
| 103 |
+
<input class="form-check-input" type="checkbox" id="multiLabel">
|
| 104 |
+
<label class="form-check-label" for="multiLabel">
|
| 105 |
+
Multi-label classification
|
| 106 |
+
</label>
|
| 107 |
+
<small class="form-text text-muted">Allow multiple categories</small>
|
| 108 |
+
</div>
|
| 109 |
+
</div>
|
| 110 |
+
</div>
|
| 111 |
+
|
| 112 |
+
<!-- Custom labels input (hidden by default) -->
|
| 113 |
+
<div id="customLabelsDiv" class="mt-3" style="display: none;">
|
| 114 |
+
<label for="customLabels" class="form-label">Custom Categories (one per line)</label>
|
| 115 |
+
<textarea id="customLabels" class="form-control" rows="4" placeholder="Enter custom categories here..."></textarea>
|
| 116 |
+
</div>
|
| 117 |
+
</div>
|
| 118 |
+
</div>
|
| 119 |
+
</div>
|
| 120 |
+
</div>
|
| 121 |
+
|
| 122 |
+
<!-- Model Info Section -->
|
| 123 |
+
<div class="row mb-4">
|
| 124 |
+
<div class="col-12">
|
| 125 |
+
<div class="card">
|
| 126 |
+
<div class="card-header">
|
| 127 |
+
<h3 class="mb-0">
|
| 128 |
+
<i class="fas fa-info-circle"></i>
|
| 129 |
+
Model Information
|
| 130 |
+
</h3>
|
| 131 |
+
</div>
|
| 132 |
+
<div class="card-body">
|
| 133 |
+
<div class="row">
|
| 134 |
+
<div class="col-md-4">
|
| 135 |
+
<div class="card h-100">
|
| 136 |
+
<div class="card-body text-center">
|
| 137 |
+
<i class="fas fa-brain fa-2x text-primary mb-2"></i>
|
| 138 |
+
<h5>BART-large-mnli</h5>
|
| 139 |
+
<p class="small">BART model fine-tuned on MultiNLI dataset</p>
|
| 140 |
+
<ul class="list-unstyled small text-start">
|
| 141 |
+
<li>• Zero-shot classification</li>
|
| 142 |
+
<li>• Arbitrary categories</li>
|
| 143 |
+
<li>• High accuracy</li>
|
| 144 |
+
</ul>
|
| 145 |
+
</div>
|
| 146 |
+
</div>
|
| 147 |
+
</div>
|
| 148 |
+
<div class="col-md-4">
|
| 149 |
+
<div class="card h-100">
|
| 150 |
+
<div class="card-body text-center">
|
| 151 |
+
<i class="fas fa-sliders-h fa-2x text-success mb-2"></i>
|
| 152 |
+
<h5>Flexible Classification</h5>
|
| 153 |
+
<p class="small">Classify into any user-defined categories</p>
|
| 154 |
+
<ul class="list-unstyled small text-start">
|
| 155 |
+
<li>• Pre-defined scenarios</li>
|
| 156 |
+
<li>• Custom categories</li>
|
| 157 |
+
<li>• Multi-label support</li>
|
| 158 |
+
</ul>
|
| 159 |
+
</div>
|
| 160 |
+
</div>
|
| 161 |
+
</div>
|
| 162 |
+
<div class="col-md-4">
|
| 163 |
+
<div class="card h-100">
|
| 164 |
+
<div class="card-body text-center">
|
| 165 |
+
<i class="fas fa-chart-bar fa-2x text-info mb-2"></i>
|
| 166 |
+
<h5>Confidence Scoring</h5>
|
| 167 |
+
<p class="small">Detailed confidence scores for each category</p>
|
| 168 |
+
<ul class="list-unstyled small text-start">
|
| 169 |
+
<li>• Confidence visualization</li>
|
| 170 |
+
<li>• Ranking by score</li>
|
| 171 |
+
<li>• Multiple category detection</li>
|
| 172 |
+
</ul>
|
| 173 |
+
</div>
|
| 174 |
+
</div>
|
| 175 |
+
</div>
|
| 176 |
+
</div>
|
| 177 |
+
</div>
|
| 178 |
+
</div>
|
| 179 |
+
</div>
|
| 180 |
+
</div>
|
| 181 |
+
|
| 182 |
+
<!-- Example Texts Section -->
|
| 183 |
+
<div class="row mb-4">
|
| 184 |
+
<div class="col-12">
|
| 185 |
+
<div class="card">
|
| 186 |
+
<div class="card-header">
|
| 187 |
+
<h3 class="mb-0">
|
| 188 |
+
<i class="fas fa-list"></i>
|
| 189 |
+
Example Texts
|
| 190 |
+
</h3>
|
| 191 |
+
</div>
|
| 192 |
+
<div class="card-body">
|
| 193 |
+
<div class="row">
|
| 194 |
+
<div class="col-md-6">
|
| 195 |
+
<button class="btn btn-outline-primary w-100 mb-2" onclick="setExample('I absolutely love this new product! It\'s amazing and works perfectly.', 'Sentiment')">
|
| 196 |
+
I absolutely love this new product! It's amazing and works perfectly.
|
| 197 |
+
</button>
|
| 198 |
+
</div>
|
| 199 |
+
<div class="col-md-6">
|
| 200 |
+
<button class="btn btn-outline-primary w-100 mb-2" onclick="setExample('I am so excited about this opportunity!', 'Emotion')">
|
| 201 |
+
I am so excited about this opportunity!
|
| 202 |
+
</button>
|
| 203 |
+
</div>
|
| 204 |
+
<div class="col-md-6">
|
| 205 |
+
<button class="btn btn-outline-primary w-100 mb-2" onclick="setExample('The implementation requires careful consideration of the underlying architecture.', 'Writing Style')">
|
| 206 |
+
The implementation requires careful consideration of the underlying architecture.
|
| 207 |
+
</button>
|
| 208 |
+
</div>
|
| 209 |
+
<div class="col-md-6">
|
| 210 |
+
<button class="btn btn-outline-primary w-100 mb-2" onclick="setExample('You should definitely buy this product because it will solve all your problems.', 'Intent')">
|
| 211 |
+
You should definitely buy this product because it will solve all your problems.
|
| 212 |
+
</button>
|
| 213 |
+
</div>
|
| 214 |
+
</div>
|
| 215 |
+
</div>
|
| 216 |
+
</div>
|
| 217 |
+
</div>
|
| 218 |
+
</div>
|
| 219 |
+
|
| 220 |
+
<!-- Results Section -->
|
| 221 |
+
<div class="row">
|
| 222 |
+
<div class="col-12">
|
| 223 |
+
<div class="card">
|
| 224 |
+
<div class="card-header">
|
| 225 |
+
<h3 class="mb-0">
|
| 226 |
+
<i class="fas fa-chart-bar"></i>
|
| 227 |
+
Classification Results
|
| 228 |
+
</h3>
|
| 229 |
+
</div>
|
| 230 |
+
<div class="card-body">
|
| 231 |
+
<div id="resultsContainer">
|
| 232 |
+
<div class="text-center text-muted py-5">
|
| 233 |
+
<i class="fas fa-arrow-up fa-2x mb-3"></i>
|
| 234 |
+
<p>Click "Classify Text" to see classification results</p>
|
| 235 |
+
</div>
|
| 236 |
+
</div>
|
| 237 |
+
</div>
|
| 238 |
+
</div>
|
| 239 |
+
</div>
|
| 240 |
+
</div>
|
| 241 |
+
</div>
|
| 242 |
+
{% endblock %}
|
| 243 |
+
|
| 244 |
+
{% block extra_scripts %}
|
| 245 |
+
<script>
|
| 246 |
+
// Initialize page
|
| 247 |
+
document.addEventListener('DOMContentLoaded', function() {
|
| 248 |
+
// Only carry over when using Quick Nav; otherwise leave defaults
|
| 249 |
+
const shouldCarry = sessionStorage.getItem('carryTextOnNextPage') === '1';
|
| 250 |
+
if (shouldCarry) {
|
| 251 |
+
const storedText = sessionStorage.getItem('analysisText');
|
| 252 |
+
if (storedText) document.getElementById('textInput').value = storedText;
|
| 253 |
+
sessionStorage.removeItem('carryTextOnNextPage');
|
| 254 |
+
}
|
| 255 |
+
|
| 256 |
+
// Scenario change handler
|
| 257 |
+
document.getElementById('scenario').addEventListener('change', function() {
|
| 258 |
+
const customLabelsDiv = document.getElementById('customLabelsDiv');
|
| 259 |
+
if (this.value === 'Custom') {
|
| 260 |
+
customLabelsDiv.style.display = 'block';
|
| 261 |
+
} else {
|
| 262 |
+
customLabelsDiv.style.display = 'none';
|
| 263 |
+
}
|
| 264 |
+
|
| 265 |
+
// Update multi-label checkbox based on scenario
|
| 266 |
+
const multiLabelCheckbox = document.getElementById('multiLabel');
|
| 267 |
+
if (['Emotion', 'Intent', 'Content Type'].includes(this.value)) {
|
| 268 |
+
multiLabelCheckbox.checked = true;
|
| 269 |
+
}
|
| 270 |
+
});
|
| 271 |
+
|
| 272 |
+
// Sample text dropdown handler
|
| 273 |
+
document.getElementById('sampleSelect').addEventListener('change', function() {
|
| 274 |
+
const sampleType = this.value;
|
| 275 |
+
const textInput = document.getElementById('textInput');
|
| 276 |
+
const scenario = document.getElementById('scenario');
|
| 277 |
+
|
| 278 |
+
if (sampleType === 'Custom') {
|
| 279 |
+
textInput.value = '';
|
| 280 |
+
} else {
|
| 281 |
+
// Set sample prompts based on type
|
| 282 |
+
const samples = {
|
| 283 |
+
'Sentiment': 'I absolutely love this new product! It\'s amazing and works perfectly.',
|
| 284 |
+
'Emotion': 'I am so excited about this opportunity!',
|
| 285 |
+
'Writing Style': 'The implementation requires careful consideration of the underlying architecture.',
|
| 286 |
+
'Intent': 'You should definitely buy this product because it will solve all your problems.'
|
| 287 |
+
};
|
| 288 |
+
|
| 289 |
+
if (samples[sampleType]) {
|
| 290 |
+
textInput.value = samples[sampleType];
|
| 291 |
+
scenario.value = sampleType;
|
| 292 |
+
}
|
| 293 |
+
}
|
| 294 |
+
});
|
| 295 |
+
|
| 296 |
+
// Process button handler
|
| 297 |
+
document.getElementById('processBtn').addEventListener('click', function() {
|
| 298 |
+
const text = document.getElementById('textInput').value.trim();
|
| 299 |
+
|
| 300 |
+
if (!text) {
|
| 301 |
+
alert('Please enter text to classify.');
|
| 302 |
+
return;
|
| 303 |
+
}
|
| 304 |
+
|
| 305 |
+
// Show loading state
|
| 306 |
+
this.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Classifying...';
|
| 307 |
+
this.disabled = true;
|
| 308 |
+
|
| 309 |
+
// Process classification
|
| 310 |
+
processClassification();
|
| 311 |
+
|
| 312 |
+
// Reset button after a delay
|
| 313 |
+
setTimeout(() => {
|
| 314 |
+
this.innerHTML = '<i class="fas fa-tags"></i> Classify Text';
|
| 315 |
+
this.disabled = false;
|
| 316 |
+
}, 2000);
|
| 317 |
+
});
|
| 318 |
+
|
| 319 |
+
// Clear button handler
|
| 320 |
+
document.getElementById('clearBtn').addEventListener('click', function() {
|
| 321 |
+
document.getElementById('textInput').value = '';
|
| 322 |
+
document.getElementById('resultsContainer').innerHTML = `
|
| 323 |
+
<div class="text-center text-muted py-5">
|
| 324 |
+
<i class="fas fa-arrow-up fa-2x mb-3"></i>
|
| 325 |
+
<p>Click "Classify Text" to see classification results</p>
|
| 326 |
+
</div>
|
| 327 |
+
`;
|
| 328 |
+
});
|
| 329 |
+
|
| 330 |
+
// Keyboard shortcuts
|
| 331 |
+
document.addEventListener('keydown', function(e) {
|
| 332 |
+
// Ctrl+Enter to process
|
| 333 |
+
if (e.ctrlKey && e.key === 'Enter') {
|
| 334 |
+
document.getElementById('processBtn').click();
|
| 335 |
+
}
|
| 336 |
+
|
| 337 |
+
// Ctrl+L to clear
|
| 338 |
+
if (e.ctrlKey && e.key === 'l') {
|
| 339 |
+
e.preventDefault();
|
| 340 |
+
document.getElementById('clearBtn').click();
|
| 341 |
+
}
|
| 342 |
+
});
|
| 343 |
+
});
|
| 344 |
+
|
| 345 |
+
// Set example text and scenario
|
| 346 |
+
function setExample(text, scenario) {
|
| 347 |
+
document.getElementById('textInput').value = text;
|
| 348 |
+
document.getElementById('scenario').value = scenario;
|
| 349 |
+
|
| 350 |
+
// Update custom labels visibility
|
| 351 |
+
const customLabelsDiv = document.getElementById('customLabelsDiv');
|
| 352 |
+
if (scenario === 'Custom') {
|
| 353 |
+
customLabelsDiv.style.display = 'block';
|
| 354 |
+
} else {
|
| 355 |
+
customLabelsDiv.style.display = 'none';
|
| 356 |
+
}
|
| 357 |
+
}
|
| 358 |
+
|
| 359 |
+
// Process classification
|
| 360 |
+
function processClassification() {
|
| 361 |
+
const text = document.getElementById('textInput').value.trim();
|
| 362 |
+
const scenario = document.getElementById('scenario').value;
|
| 363 |
+
const multiLabel = document.getElementById('multiLabel').checked;
|
| 364 |
+
const customLabels = document.getElementById('customLabels').value;
|
| 365 |
+
|
| 366 |
+
if (!text) {
|
| 367 |
+
alert('Please enter text to classify.');
|
| 368 |
+
return;
|
| 369 |
+
}
|
| 370 |
+
|
| 371 |
+
showLoading('resultsContainer');
|
| 372 |
+
|
| 373 |
+
fetch('/api/classification', {
|
| 374 |
+
method: 'POST',
|
| 375 |
+
headers: {
|
| 376 |
+
'Content-Type': 'application/json',
|
| 377 |
+
},
|
| 378 |
+
body: JSON.stringify({
|
| 379 |
+
text: text,
|
| 380 |
+
scenario: scenario,
|
| 381 |
+
multi_label: multiLabel,
|
| 382 |
+
custom_labels: customLabels
|
| 383 |
+
})
|
| 384 |
+
})
|
| 385 |
+
.then(response => response.json())
|
| 386 |
+
.then(data => {
|
| 387 |
+
if (data.success) {
|
| 388 |
+
displayResults(data.result);
|
| 389 |
+
} else {
|
| 390 |
+
showError(data.error || 'An error occurred while classifying text');
|
| 391 |
+
}
|
| 392 |
+
})
|
| 393 |
+
.catch(error => {
|
| 394 |
+
showError('Failed to classify text: ' + error.message);
|
| 395 |
+
})
|
| 396 |
+
.finally(() => {
|
| 397 |
+
hideLoading('resultsContainer');
|
| 398 |
+
});
|
| 399 |
+
}
|
| 400 |
+
|
| 401 |
+
// Show loading state
|
| 402 |
+
function showLoading(elementId) {
|
| 403 |
+
const element = document.getElementById(elementId);
|
| 404 |
+
if (element) {
|
| 405 |
+
element.innerHTML = `
|
| 406 |
+
<div class="text-center py-4">
|
| 407 |
+
<div class="spinner-border text-primary" role="status">
|
| 408 |
+
<span class="visually-hidden">Loading...</span>
|
| 409 |
+
</div>
|
| 410 |
+
<p class="mt-2">Classifying text...</p>
|
| 411 |
+
</div>
|
| 412 |
+
`;
|
| 413 |
+
}
|
| 414 |
+
}
|
| 415 |
+
|
| 416 |
+
// Hide loading state
|
| 417 |
+
function hideLoading(elementId) {
|
| 418 |
+
const element = document.getElementById(elementId);
|
| 419 |
+
if (element && element.innerHTML.includes('spinner-border')) {
|
| 420 |
+
element.innerHTML = '';
|
| 421 |
+
}
|
| 422 |
+
}
|
| 423 |
+
|
| 424 |
+
// Show error message
|
| 425 |
+
function showError(message, elementId = 'resultsContainer') {
|
| 426 |
+
const element = document.getElementById(elementId);
|
| 427 |
+
if (element) {
|
| 428 |
+
element.innerHTML = `
|
| 429 |
+
<div class="alert alert-danger fade-in">
|
| 430 |
+
<i class="fas fa-exclamation-triangle"></i>
|
| 431 |
+
<strong>Error:</strong> ${message}
|
| 432 |
+
</div>
|
| 433 |
+
`;
|
| 434 |
+
}
|
| 435 |
+
}
|
| 436 |
+
|
| 437 |
+
// Display results
|
| 438 |
+
function displayResults(result) {
|
| 439 |
+
const container = document.getElementById('resultsContainer');
|
| 440 |
+
if (container) {
|
| 441 |
+
container.innerHTML = result;
|
| 442 |
+
container.classList.add('fade-in');
|
| 443 |
+
|
| 444 |
+
// Scroll to results
|
| 445 |
+
container.scrollIntoView({ behavior: 'smooth', block: 'start' });
|
| 446 |
+
}
|
| 447 |
+
}
|
| 448 |
+
</script>
|
| 449 |
+
{% endblock %}
|
templates/index.html
ADDED
|
@@ -0,0 +1,322 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html" %}
|
| 2 |
+
|
| 3 |
+
{% block title %}NLP Ultimate Tutorial - Home{% endblock %}
|
| 4 |
+
|
| 5 |
+
{% block content %}
|
| 6 |
+
<div class="container">
|
| 7 |
+
<!-- Header Section -->
|
| 8 |
+
<div class="row mb-5">
|
| 9 |
+
<div class="col-12 text-center">
|
| 10 |
+
<h1 class="display-4 mb-3">
|
| 11 |
+
<i class="fas fa-brain text-primary"></i>
|
| 12 |
+
Natural Language Processing Demo
|
| 13 |
+
</h1>
|
| 14 |
+
<p class="lead">Explore the capabilities of modern NLP models and techniques. Enter your text and select a task to analyze.</p>
|
| 15 |
+
|
| 16 |
+
<div class="alert alert-info">
|
| 17 |
+
<i class="fas fa-info-circle"></i>
|
| 18 |
+
Ultimate guide to all the NLP concepts - Designed and developed by <strong>Aradhya Pavan</strong>
|
| 19 |
+
</div>
|
| 20 |
+
</div>
|
| 21 |
+
</div>
|
| 22 |
+
|
| 23 |
+
<!-- Text Input Section -->
|
| 24 |
+
<div class="row mb-4">
|
| 25 |
+
<div class="col-12">
|
| 26 |
+
<div class="card">
|
| 27 |
+
<div class="card-header">
|
| 28 |
+
<h3 class="mb-0">
|
| 29 |
+
<i class="fas fa-keyboard"></i>
|
| 30 |
+
Enter your text:
|
| 31 |
+
</h3>
|
| 32 |
+
</div>
|
| 33 |
+
<div class="card-body">
|
| 34 |
+
<div class="row mb-3">
|
| 35 |
+
<div class="col-md-8">
|
| 36 |
+
<textarea id="textInput" class="form-control" rows="6" placeholder="Enter or paste your text here...">Climate change is the long-term alteration of temperature and typical weather patterns in a place. The cause of current climate change is largely human activity, like burning fossil fuels, which adds heat-trapping gases to Earth's atmosphere. The consequences of changing climate are already being felt worldwide.</textarea>
|
| 37 |
+
</div>
|
| 38 |
+
<div class="col-md-4">
|
| 39 |
+
<label for="sampleSelect" class="form-label">Or choose a sample:</label>
|
| 40 |
+
<select id="sampleSelect" class="form-select">
|
| 41 |
+
<option value="Custom">Custom</option>
|
| 42 |
+
{% for key, value in sample_texts.items() %}
|
| 43 |
+
<option value="{{ key }}" {% if key == 'Scientific Text' %}selected{% endif %}>{{ key }}</option>
|
| 44 |
+
{% endfor %}
|
| 45 |
+
</select>
|
| 46 |
+
</div>
|
| 47 |
+
</div>
|
| 48 |
+
|
| 49 |
+
<!-- Text Statistics -->
|
| 50 |
+
<div id="textStats" class="row mb-3" style="display: none;">
|
| 51 |
+
<div class="col-md-4">
|
| 52 |
+
<div class="card text-center">
|
| 53 |
+
<div class="card-body">
|
| 54 |
+
<h5 class="card-title">Characters</h5>
|
| 55 |
+
<h2 class="text-primary" id="charCount">0</h2>
|
| 56 |
+
</div>
|
| 57 |
+
</div>
|
| 58 |
+
</div>
|
| 59 |
+
<div class="col-md-4">
|
| 60 |
+
<div class="card text-center">
|
| 61 |
+
<div class="card-body">
|
| 62 |
+
<h5 class="card-title">Words</h5>
|
| 63 |
+
<h2 class="text-primary" id="wordCount">0</h2>
|
| 64 |
+
</div>
|
| 65 |
+
</div>
|
| 66 |
+
</div>
|
| 67 |
+
<div class="col-md-4">
|
| 68 |
+
<div class="card text-center">
|
| 69 |
+
<div class="card-body">
|
| 70 |
+
<h5 class="card-title">Sentences</h5>
|
| 71 |
+
<h2 class="text-primary" id="sentenceCount">0</h2>
|
| 72 |
+
</div>
|
| 73 |
+
</div>
|
| 74 |
+
</div>
|
| 75 |
+
</div>
|
| 76 |
+
|
| 77 |
+
<!-- Warning -->
|
| 78 |
+
<div id="warningBox" class="alert alert-warning" style="display: none;">
|
| 79 |
+
<i class="fas fa-exclamation-triangle"></i>
|
| 80 |
+
<strong>Warning:</strong> Text exceeds 500 words. Some models may truncate the input or perform slower.
|
| 81 |
+
</div>
|
| 82 |
+
</div>
|
| 83 |
+
</div>
|
| 84 |
+
</div>
|
| 85 |
+
</div>
|
| 86 |
+
|
| 87 |
+
<!-- Instructions -->
|
| 88 |
+
<div class="row mb-4">
|
| 89 |
+
<div class="col-12">
|
| 90 |
+
<div class="alert alert-warning">
|
| 91 |
+
<div class="d-flex align-items-start">
|
| 92 |
+
<i class="fas fa-exclamation-triangle fa-2x me-3"></i>
|
| 93 |
+
<div>
|
| 94 |
+
<h5 class="alert-heading">Important Instructions:</h5>
|
| 95 |
+
<ul class="mb-0">
|
| 96 |
+
<li>When you change the text, please reselect your analysis method and task to refresh the results</li>
|
| 97 |
+
<li>Please wait a moment while processing your task - this may take a few seconds</li>
|
| 98 |
+
<li>Scroll down to see all processed results for your text</li>
|
| 99 |
+
</ul>
|
| 100 |
+
<hr>
|
| 101 |
+
<small><i class="fas fa-clock"></i> Processing may take longer for larger texts</small>
|
| 102 |
+
</div>
|
| 103 |
+
</div>
|
| 104 |
+
</div>
|
| 105 |
+
</div>
|
| 106 |
+
</div>
|
| 107 |
+
|
| 108 |
+
<!-- Analysis Methods (Simplified, no tabs) -->
|
| 109 |
+
<div class="row mb-4">
|
| 110 |
+
<div class="col-12">
|
| 111 |
+
<div class="card">
|
| 112 |
+
<div class="card-header bg-primary text-white text-center">
|
| 113 |
+
<h2 class="mb-0">CHOOSE AN OPERATION</h2>
|
| 114 |
+
</div>
|
| 115 |
+
<div class="card-body">
|
| 116 |
+
<!-- Text Processing -->
|
| 117 |
+
<h4 class="mb-3"><i class="fas fa-edit me-2"></i>Text Processing</h4>
|
| 118 |
+
<div class="row mb-4">
|
| 119 |
+
<div class="col-md-3 mb-2">
|
| 120 |
+
<a href="{{ url_for('preprocessing') }}" class="btn btn-primary w-100">
|
| 121 |
+
<i class="fas fa-tools"></i> Text Preprocessing
|
| 122 |
+
</a>
|
| 123 |
+
</div>
|
| 124 |
+
<div class="col-md-3 mb-2">
|
| 125 |
+
<a href="{{ url_for('tokenization') }}" class="btn btn-primary w-100">
|
| 126 |
+
<i class="fas fa-cut"></i> Tokenization
|
| 127 |
+
</a>
|
| 128 |
+
</div>
|
| 129 |
+
<div class="col-md-3 mb-2">
|
| 130 |
+
<a href="{{ url_for('pos_tagging') }}" class="btn btn-primary w-100">
|
| 131 |
+
<i class="fas fa-tags"></i> POS Tagging
|
| 132 |
+
</a>
|
| 133 |
+
</div>
|
| 134 |
+
<div class="col-md-3 mb-2">
|
| 135 |
+
<a href="{{ url_for('named_entity') }}" class="btn btn-primary w-100">
|
| 136 |
+
<i class="fas fa-user-tag"></i> Named Entities
|
| 137 |
+
</a>
|
| 138 |
+
</div>
|
| 139 |
+
</div>
|
| 140 |
+
|
| 141 |
+
<!-- Analysis -->
|
| 142 |
+
<h4 class="mb-3"><i class="fas fa-chart-line me-2"></i>Analysis</h4>
|
| 143 |
+
<div class="row mb-4">
|
| 144 |
+
<div class="col-md-4 mb-2">
|
| 145 |
+
<a href="{{ url_for('sentiment') }}" class="btn btn-success w-100">
|
| 146 |
+
<i class="fas fa-smile"></i> Sentiment Analysis
|
| 147 |
+
</a>
|
| 148 |
+
</div>
|
| 149 |
+
<div class="col-md-4 mb-2">
|
| 150 |
+
<a href="{{ url_for('summarization') }}" class="btn btn-success w-100">
|
| 151 |
+
<i class="fas fa-compress"></i> Text Summarization
|
| 152 |
+
</a>
|
| 153 |
+
</div>
|
| 154 |
+
<div class="col-md-4 mb-2">
|
| 155 |
+
<a href="{{ url_for('topic_analysis') }}" class="btn btn-success w-100">
|
| 156 |
+
<i class="fas fa-project-diagram"></i> Topic Analysis
|
| 157 |
+
</a>
|
| 158 |
+
</div>
|
| 159 |
+
</div>
|
| 160 |
+
|
| 161 |
+
<!-- Advanced NLP -->
|
| 162 |
+
<h4 class="mb-3"><i class="fas fa-robot me-2"></i>Advanced NLP</h4>
|
| 163 |
+
<div class="row">
|
| 164 |
+
<div class="col-md-2 mb-2">
|
| 165 |
+
<a href="{{ url_for('question_answering') }}" class="btn btn-info w-100">
|
| 166 |
+
<i class="fas fa-question-circle"></i> QA
|
| 167 |
+
</a>
|
| 168 |
+
</div>
|
| 169 |
+
<div class="col-md-2 mb-2">
|
| 170 |
+
<a href="{{ url_for('text_generation') }}" class="btn btn-info w-100">
|
| 171 |
+
<i class="fas fa-magic"></i> Generation
|
| 172 |
+
</a>
|
| 173 |
+
</div>
|
| 174 |
+
<div class="col-md-2 mb-2">
|
| 175 |
+
<a href="{{ url_for('translation') }}" class="btn btn-info w-100">
|
| 176 |
+
<i class="fas fa-language"></i> Translation
|
| 177 |
+
</a>
|
| 178 |
+
</div>
|
| 179 |
+
<div class="col-md-2 mb-2">
|
| 180 |
+
<a href="{{ url_for('classification') }}" class="btn btn-info w-100">
|
| 181 |
+
<i class="fas fa-sitemap"></i> Classification
|
| 182 |
+
</a>
|
| 183 |
+
</div>
|
| 184 |
+
<div class="col-md-2 mb-2">
|
| 185 |
+
<a href="{{ url_for('vector_embeddings') }}" class="btn btn-info w-100">
|
| 186 |
+
<i class="fas fa-vector-square"></i> Embeddings
|
| 187 |
+
</a>
|
| 188 |
+
</div>
|
| 189 |
+
</div>
|
| 190 |
+
</div>
|
| 191 |
+
</div>
|
| 192 |
+
</div>
|
| 193 |
+
</div>
|
| 194 |
+
|
| 195 |
+
<!-- Results Section -->
|
| 196 |
+
<div class="row">
|
| 197 |
+
<div class="col-12">
|
| 198 |
+
<div class="card">
|
| 199 |
+
<div class="card-header">
|
| 200 |
+
<h3 class="mb-0">
|
| 201 |
+
<i class="fas fa-chart-bar"></i>
|
| 202 |
+
Results
|
| 203 |
+
</h3>
|
| 204 |
+
</div>
|
| 205 |
+
<div class="card-body">
|
| 206 |
+
<div id="resultsContainer">
|
| 207 |
+
<div class="text-center text-muted">
|
| 208 |
+
<i class="fas fa-arrow-up fa-2x mb-3"></i>
|
| 209 |
+
<p>Select an analysis method above to see results here</p>
|
| 210 |
+
</div>
|
| 211 |
+
</div>
|
| 212 |
+
</div>
|
| 213 |
+
</div>
|
| 214 |
+
</div>
|
| 215 |
+
</div>
|
| 216 |
+
</div>
|
| 217 |
+
{% endblock %}
|
| 218 |
+
|
| 219 |
+
{% block extra_scripts %}
|
| 220 |
+
<script>
|
| 221 |
+
// Provide SAMPLE_TEXTS inline to avoid network for dropdown updates
|
| 222 |
+
const SAMPLE_TEXTS = {{ sample_texts | tojson | safe }};
|
| 223 |
+
|
| 224 |
+
// Initialize with default text statistics
|
| 225 |
+
document.addEventListener('DOMContentLoaded', function() {
|
| 226 |
+
// Ensure carry flag is cleared on home to avoid unintended persistence
|
| 227 |
+
sessionStorage.removeItem('carryTextOnNextPage');
|
| 228 |
+
// If a sample (not Custom) is selected by default, load it into the textarea
|
| 229 |
+
const select = document.getElementById('sampleSelect');
|
| 230 |
+
const textInput = document.getElementById('textInput');
|
| 231 |
+
if (select && select.value !== 'Custom' && SAMPLE_TEXTS[select.value]) {
|
| 232 |
+
textInput.value = SAMPLE_TEXTS[select.value];
|
| 233 |
+
}
|
| 234 |
+
updateTextStats();
|
| 235 |
+
});
|
| 236 |
+
|
| 237 |
+
// Sample text dropdown handler (no fetch, instant update)
|
| 238 |
+
const sampleSelectEl = document.getElementById('sampleSelect');
|
| 239 |
+
if (sampleSelectEl) {
|
| 240 |
+
sampleSelectEl.addEventListener('change', function() {
|
| 241 |
+
const sampleType = this.value;
|
| 242 |
+
const textInput = document.getElementById('textInput');
|
| 243 |
+
if (sampleType === 'Custom') {
|
| 244 |
+
textInput.value = '';
|
| 245 |
+
} else {
|
| 246 |
+
textInput.value = SAMPLE_TEXTS[sampleType] || '';
|
| 247 |
+
}
|
| 248 |
+
updateTextStats();
|
| 249 |
+
});
|
| 250 |
+
}
|
| 251 |
+
|
| 252 |
+
// Text input handler
|
| 253 |
+
const textAreaEl = document.getElementById('textInput');
|
| 254 |
+
if (textAreaEl) {
|
| 255 |
+
textAreaEl.addEventListener('input', function() {
|
| 256 |
+
updateTextStats();
|
| 257 |
+
});
|
| 258 |
+
}
|
| 259 |
+
|
| 260 |
+
// Update text statistics
|
| 261 |
+
function updateTextStats() {
|
| 262 |
+
const text = document.getElementById('textInput').value;
|
| 263 |
+
|
| 264 |
+
if (!text.trim()) {
|
| 265 |
+
document.getElementById('textStats').style.display = 'none';
|
| 266 |
+
document.getElementById('warningBox').style.display = 'none';
|
| 267 |
+
return;
|
| 268 |
+
}
|
| 269 |
+
|
| 270 |
+
fetch('/api/text-stats', {
|
| 271 |
+
method: 'POST',
|
| 272 |
+
headers: {
|
| 273 |
+
'Content-Type': 'application/json',
|
| 274 |
+
},
|
| 275 |
+
body: JSON.stringify({text: text})
|
| 276 |
+
})
|
| 277 |
+
.then(response => response.json())
|
| 278 |
+
.then(data => {
|
| 279 |
+
document.getElementById('charCount').textContent = data.chars;
|
| 280 |
+
document.getElementById('wordCount').textContent = data.words;
|
| 281 |
+
document.getElementById('sentenceCount').textContent = data.sentences;
|
| 282 |
+
|
| 283 |
+
document.getElementById('textStats').style.display = 'flex';
|
| 284 |
+
|
| 285 |
+
// Show warning if text is too long
|
| 286 |
+
if (data.words > 500) {
|
| 287 |
+
document.getElementById('warningBox').style.display = 'block';
|
| 288 |
+
} else {
|
| 289 |
+
document.getElementById('warningBox').style.display = 'none';
|
| 290 |
+
}
|
| 291 |
+
})
|
| 292 |
+
.catch(error => {
|
| 293 |
+
console.error('Error:', error);
|
| 294 |
+
});
|
| 295 |
+
}
|
| 296 |
+
|
| 297 |
+
// Store text in session storage for other pages
|
| 298 |
+
function storeTextForAnalysis() {
|
| 299 |
+
const text = document.getElementById('textInput').value;
|
| 300 |
+
sessionStorage.setItem('analysisText', text);
|
| 301 |
+
}
|
| 302 |
+
|
| 303 |
+
// Add click handlers to analysis buttons
|
| 304 |
+
document.querySelectorAll('a[href*="/"]').forEach(link => {
|
| 305 |
+
if (link.href.includes('/preprocessing') ||
|
| 306 |
+
link.href.includes('/tokenization') ||
|
| 307 |
+
link.href.includes('/pos-tagging') ||
|
| 308 |
+
link.href.includes('/named-entity') ||
|
| 309 |
+
link.href.includes('/sentiment') ||
|
| 310 |
+
link.href.includes('/summarization') ||
|
| 311 |
+
link.href.includes('/topic-analysis') ||
|
| 312 |
+
link.href.includes('/question-answering') ||
|
| 313 |
+
link.href.includes('/text-generation') ||
|
| 314 |
+
link.href.includes('/translation') ||
|
| 315 |
+
link.href.includes('/classification') ||
|
| 316 |
+
link.href.includes('/vector-embeddings')) {
|
| 317 |
+
|
| 318 |
+
link.addEventListener('click', storeTextForAnalysis);
|
| 319 |
+
}
|
| 320 |
+
});
|
| 321 |
+
</script>
|
| 322 |
+
{% endblock %}
|
templates/named_entity.html
ADDED
|
@@ -0,0 +1,362 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html" %}
|
| 2 |
+
|
| 3 |
+
{% block title %}Named Entity Recognition - NLP Ultimate Tutorial{% endblock %}
|
| 4 |
+
|
| 5 |
+
{% block content %}
|
| 6 |
+
<div class="container">
|
| 7 |
+
<!-- Header Section -->
|
| 8 |
+
<div class="row mb-4">
|
| 9 |
+
<div class="col-12">
|
| 10 |
+
<div class="card">
|
| 11 |
+
<div class="card-header">
|
| 12 |
+
<h1 class="mb-0">
|
| 13 |
+
<i class="fas fa-user-tag"></i>
|
| 14 |
+
Named Entity Recognition
|
| 15 |
+
</h1>
|
| 16 |
+
</div>
|
| 17 |
+
<div class="card-body">
|
| 18 |
+
<p class="lead">Identify and classify key information in text such as people, organizations, locations, and more.</p>
|
| 19 |
+
|
| 20 |
+
<div class="alert alert-info">
|
| 21 |
+
<i class="fas fa-info-circle"></i>
|
| 22 |
+
<strong>About:</strong> Named Entity Recognition identifies and classifies key information in text into pre-defined categories such as person names, organizations, locations, etc.
|
| 23 |
+
</div>
|
| 24 |
+
</div>
|
| 25 |
+
</div>
|
| 26 |
+
</div>
|
| 27 |
+
</div>
|
| 28 |
+
|
| 29 |
+
{% include "_analysis_nav.html" %}
|
| 30 |
+
|
| 31 |
+
<!-- Text Input Section -->
|
| 32 |
+
<div class="row mb-4">
|
| 33 |
+
<div class="col-12">
|
| 34 |
+
<div class="card">
|
| 35 |
+
<div class="card-header">
|
| 36 |
+
<h3 class="mb-0">
|
| 37 |
+
<i class="fas fa-keyboard"></i>
|
| 38 |
+
Enter your text:
|
| 39 |
+
</h3>
|
| 40 |
+
</div>
|
| 41 |
+
<div class="card-body">
|
| 42 |
+
<div class="row mb-3">
|
| 43 |
+
<div class="col-md-8">
|
| 44 |
+
<textarea id="textInput" class="form-control" rows="6" placeholder="Enter or paste your text here...">Apple Inc. is planning to open a new campus in Austin, Texas next July. CEO Tim Cook announced the plan yesterday.</textarea>
|
| 45 |
+
</div>
|
| 46 |
+
<div class="col-md-4">
|
| 47 |
+
<label for="sampleSelect" class="form-label">Or choose a sample:</label>
|
| 48 |
+
<select id="sampleSelect" class="form-select">
|
| 49 |
+
<option value="Custom">Custom</option>
|
| 50 |
+
<option value="News Article">News Article</option>
|
| 51 |
+
<option value="Product Review">Product Review</option>
|
| 52 |
+
<option value="Scientific Text">Scientific Text</option>
|
| 53 |
+
<option value="Literary Text">Literary Text</option>
|
| 54 |
+
</select>
|
| 55 |
+
</div>
|
| 56 |
+
</div>
|
| 57 |
+
|
| 58 |
+
<div class="d-flex justify-content-between align-items-center">
|
| 59 |
+
<div>
|
| 60 |
+
<button id="processBtn" class="btn btn-primary btn-lg">
|
| 61 |
+
<i class="fas fa-user-tag"></i>
|
| 62 |
+
Identify Entities
|
| 63 |
+
</button>
|
| 64 |
+
</div>
|
| 65 |
+
<div>
|
| 66 |
+
<button id="clearBtn" class="btn btn-outline-secondary">
|
| 67 |
+
<i class="fas fa-trash"></i>
|
| 68 |
+
Clear
|
| 69 |
+
</button>
|
| 70 |
+
</div>
|
| 71 |
+
</div>
|
| 72 |
+
</div>
|
| 73 |
+
</div>
|
| 74 |
+
</div>
|
| 75 |
+
</div>
|
| 76 |
+
|
| 77 |
+
<!-- Entity Types Info -->
|
| 78 |
+
<div class="row mb-4">
|
| 79 |
+
<div class="col-12">
|
| 80 |
+
<div class="card">
|
| 81 |
+
<div class="card-header">
|
| 82 |
+
<h3 class="mb-0">
|
| 83 |
+
<i class="fas fa-info-circle"></i>
|
| 84 |
+
Entity Types Detected
|
| 85 |
+
</h3>
|
| 86 |
+
</div>
|
| 87 |
+
<div class="card-body">
|
| 88 |
+
<div class="row">
|
| 89 |
+
<div class="col-md-3">
|
| 90 |
+
<div class="card h-100">
|
| 91 |
+
<div class="card-body text-center">
|
| 92 |
+
<i class="fas fa-user fa-2x text-danger mb-2"></i>
|
| 93 |
+
<h5>PERSON</h5>
|
| 94 |
+
<p class="small">People, including fictional characters</p>
|
| 95 |
+
</div>
|
| 96 |
+
</div>
|
| 97 |
+
</div>
|
| 98 |
+
<div class="col-md-3">
|
| 99 |
+
<div class="card h-100">
|
| 100 |
+
<div class="card-body text-center">
|
| 101 |
+
<i class="fas fa-building fa-2x text-success mb-2"></i>
|
| 102 |
+
<h5>ORG</h5>
|
| 103 |
+
<p class="small">Organizations, companies, institutions</p>
|
| 104 |
+
</div>
|
| 105 |
+
</div>
|
| 106 |
+
</div>
|
| 107 |
+
<div class="col-md-3">
|
| 108 |
+
<div class="card h-100">
|
| 109 |
+
<div class="card-body text-center">
|
| 110 |
+
<i class="fas fa-map-marker-alt fa-2x text-primary mb-2"></i>
|
| 111 |
+
<h5>GPE</h5>
|
| 112 |
+
<p class="small">Countries, cities, states</p>
|
| 113 |
+
</div>
|
| 114 |
+
</div>
|
| 115 |
+
</div>
|
| 116 |
+
<div class="col-md-3">
|
| 117 |
+
<div class="card h-100">
|
| 118 |
+
<div class="card-body text-center">
|
| 119 |
+
<i class="fas fa-calendar fa-2x text-warning mb-2"></i>
|
| 120 |
+
<h5>DATE</h5>
|
| 121 |
+
<p class="small">Absolute or relative dates</p>
|
| 122 |
+
</div>
|
| 123 |
+
</div>
|
| 124 |
+
</div>
|
| 125 |
+
</div>
|
| 126 |
+
|
| 127 |
+
<div class="row mt-3">
|
| 128 |
+
<div class="col-md-3">
|
| 129 |
+
<div class="card h-100">
|
| 130 |
+
<div class="card-body text-center">
|
| 131 |
+
<i class="fas fa-dollar-sign fa-2x text-info mb-2"></i>
|
| 132 |
+
<h5>MONEY</h5>
|
| 133 |
+
<p class="small">Monetary values</p>
|
| 134 |
+
</div>
|
| 135 |
+
</div>
|
| 136 |
+
</div>
|
| 137 |
+
<div class="col-md-3">
|
| 138 |
+
<div class="card h-100">
|
| 139 |
+
<div class="card-body text-center">
|
| 140 |
+
<i class="fas fa-percentage fa-2x text-secondary mb-2"></i>
|
| 141 |
+
<h5>PERCENT</h5>
|
| 142 |
+
<p class="small">Percentage values</p>
|
| 143 |
+
</div>
|
| 144 |
+
</div>
|
| 145 |
+
</div>
|
| 146 |
+
<div class="col-md-3">
|
| 147 |
+
<div class="card h-100">
|
| 148 |
+
<div class="card-body text-center">
|
| 149 |
+
<i class="fas fa-cube fa-2x text-purple mb-2"></i>
|
| 150 |
+
<h5>PRODUCT</h5>
|
| 151 |
+
<p class="small">Products, objects, vehicles</p>
|
| 152 |
+
</div>
|
| 153 |
+
</div>
|
| 154 |
+
</div>
|
| 155 |
+
<div class="col-md-3">
|
| 156 |
+
<div class="card h-100">
|
| 157 |
+
<div class="card-body text-center">
|
| 158 |
+
<i class="fas fa-users fa-2x text-dark mb-2"></i>
|
| 159 |
+
<h5>NORP</h5>
|
| 160 |
+
<p class="small">Nationalities, religious groups</p>
|
| 161 |
+
</div>
|
| 162 |
+
</div>
|
| 163 |
+
</div>
|
| 164 |
+
</div>
|
| 165 |
+
</div>
|
| 166 |
+
</div>
|
| 167 |
+
</div>
|
| 168 |
+
</div>
|
| 169 |
+
|
| 170 |
+
<!-- Results Section -->
|
| 171 |
+
<div class="row">
|
| 172 |
+
<div class="col-12">
|
| 173 |
+
<div class="card">
|
| 174 |
+
<div class="card-header">
|
| 175 |
+
<h3 class="mb-0">
|
| 176 |
+
<i class="fas fa-chart-bar"></i>
|
| 177 |
+
Entity Recognition Results
|
| 178 |
+
</h3>
|
| 179 |
+
</div>
|
| 180 |
+
<div class="card-body">
|
| 181 |
+
<div id="resultsContainer">
|
| 182 |
+
<div class="text-center text-muted py-5">
|
| 183 |
+
<i class="fas fa-arrow-up fa-2x mb-3"></i>
|
| 184 |
+
<p>Click "Identify Entities" to see named entity recognition results</p>
|
| 185 |
+
</div>
|
| 186 |
+
</div>
|
| 187 |
+
</div>
|
| 188 |
+
</div>
|
| 189 |
+
</div>
|
| 190 |
+
</div>
|
| 191 |
+
</div>
|
| 192 |
+
{% endblock %}
|
| 193 |
+
|
| 194 |
+
{% block extra_scripts %}
|
| 195 |
+
<script>
|
| 196 |
+
// Initialize page
|
| 197 |
+
document.addEventListener('DOMContentLoaded', function() {
|
| 198 |
+
// Only carry over when using Quick Nav; otherwise leave defaults
|
| 199 |
+
const shouldCarry = sessionStorage.getItem('carryTextOnNextPage') === '1';
|
| 200 |
+
if (shouldCarry) {
|
| 201 |
+
const sampleSel = document.getElementById('sampleSelect');
|
| 202 |
+
if (sampleSel) sampleSel.value = 'Custom';
|
| 203 |
+
const storedText = sessionStorage.getItem('analysisText');
|
| 204 |
+
if (storedText) document.getElementById('textInput').value = storedText;
|
| 205 |
+
sessionStorage.removeItem('carryTextOnNextPage');
|
| 206 |
+
}
|
| 207 |
+
|
| 208 |
+
// Sample text dropdown handler
|
| 209 |
+
document.getElementById('sampleSelect').addEventListener('change', function() {
|
| 210 |
+
const sampleType = this.value;
|
| 211 |
+
const textInput = document.getElementById('textInput');
|
| 212 |
+
|
| 213 |
+
if (sampleType === 'Custom') {
|
| 214 |
+
textInput.value = '';
|
| 215 |
+
} else {
|
| 216 |
+
// Get sample text from server
|
| 217 |
+
fetch('/api/sample-text', {
|
| 218 |
+
method: 'POST',
|
| 219 |
+
headers: {
|
| 220 |
+
'Content-Type': 'application/json',
|
| 221 |
+
},
|
| 222 |
+
body: JSON.stringify({sample_type: sampleType})
|
| 223 |
+
})
|
| 224 |
+
.then(response => response.json())
|
| 225 |
+
.then(data => {
|
| 226 |
+
textInput.value = data.text;
|
| 227 |
+
});
|
| 228 |
+
}
|
| 229 |
+
});
|
| 230 |
+
|
| 231 |
+
// Process button handler
|
| 232 |
+
document.getElementById('processBtn').addEventListener('click', function() {
|
| 233 |
+
const text = document.getElementById('textInput').value.trim();
|
| 234 |
+
|
| 235 |
+
if (!text) {
|
| 236 |
+
alert('Please enter some text to analyze.');
|
| 237 |
+
return;
|
| 238 |
+
}
|
| 239 |
+
|
| 240 |
+
// Show loading state
|
| 241 |
+
this.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Processing...';
|
| 242 |
+
this.disabled = true;
|
| 243 |
+
|
| 244 |
+
// Process text
|
| 245 |
+
processNamedEntities();
|
| 246 |
+
|
| 247 |
+
// Reset button after a delay
|
| 248 |
+
setTimeout(() => {
|
| 249 |
+
this.innerHTML = '<i class="fas fa-user-tag"></i> Identify Entities';
|
| 250 |
+
this.disabled = false;
|
| 251 |
+
}, 2000);
|
| 252 |
+
});
|
| 253 |
+
|
| 254 |
+
// Clear button handler
|
| 255 |
+
document.getElementById('clearBtn').addEventListener('click', function() {
|
| 256 |
+
document.getElementById('textInput').value = '';
|
| 257 |
+
document.getElementById('resultsContainer').innerHTML = `
|
| 258 |
+
<div class="text-center text-muted py-5">
|
| 259 |
+
<i class="fas fa-arrow-up fa-2x mb-3"></i>
|
| 260 |
+
<p>Click "Identify Entities" to see named entity recognition results</p>
|
| 261 |
+
</div>
|
| 262 |
+
`;
|
| 263 |
+
});
|
| 264 |
+
|
| 265 |
+
// Keyboard shortcuts
|
| 266 |
+
document.addEventListener('keydown', function(e) {
|
| 267 |
+
// Ctrl+Enter to process
|
| 268 |
+
if (e.ctrlKey && e.key === 'Enter') {
|
| 269 |
+
document.getElementById('processBtn').click();
|
| 270 |
+
}
|
| 271 |
+
|
| 272 |
+
// Ctrl+L to clear
|
| 273 |
+
if (e.ctrlKey && e.key === 'l') {
|
| 274 |
+
e.preventDefault();
|
| 275 |
+
document.getElementById('clearBtn').click();
|
| 276 |
+
}
|
| 277 |
+
});
|
| 278 |
+
});
|
| 279 |
+
|
| 280 |
+
// Process named entity recognition
|
| 281 |
+
function processNamedEntities() {
|
| 282 |
+
const text = document.getElementById('textInput').value.trim();
|
| 283 |
+
|
| 284 |
+
if (!text) {
|
| 285 |
+
alert('Please enter some text to analyze.');
|
| 286 |
+
return;
|
| 287 |
+
}
|
| 288 |
+
|
| 289 |
+
showLoading('resultsContainer');
|
| 290 |
+
|
| 291 |
+
fetch('/api/named-entity', {
|
| 292 |
+
method: 'POST',
|
| 293 |
+
headers: {
|
| 294 |
+
'Content-Type': 'application/json',
|
| 295 |
+
},
|
| 296 |
+
body: JSON.stringify({text: text})
|
| 297 |
+
})
|
| 298 |
+
.then(response => response.json())
|
| 299 |
+
.then(data => {
|
| 300 |
+
if (data.success) {
|
| 301 |
+
displayResults(data.result);
|
| 302 |
+
} else {
|
| 303 |
+
showError(data.error || 'An error occurred while processing the text');
|
| 304 |
+
}
|
| 305 |
+
})
|
| 306 |
+
.catch(error => {
|
| 307 |
+
showError('Failed to process text: ' + error.message);
|
| 308 |
+
})
|
| 309 |
+
.finally(() => {
|
| 310 |
+
hideLoading('resultsContainer');
|
| 311 |
+
});
|
| 312 |
+
}
|
| 313 |
+
|
| 314 |
+
// Show loading state
|
| 315 |
+
function showLoading(elementId) {
|
| 316 |
+
const element = document.getElementById(elementId);
|
| 317 |
+
if (element) {
|
| 318 |
+
element.innerHTML = `
|
| 319 |
+
<div class="text-center py-4">
|
| 320 |
+
<div class="spinner-border text-primary" role="status">
|
| 321 |
+
<span class="visually-hidden">Loading...</span>
|
| 322 |
+
</div>
|
| 323 |
+
<p class="mt-2">Identifying entities...</p>
|
| 324 |
+
</div>
|
| 325 |
+
`;
|
| 326 |
+
}
|
| 327 |
+
}
|
| 328 |
+
|
| 329 |
+
// Hide loading state
|
| 330 |
+
function hideLoading(elementId) {
|
| 331 |
+
const element = document.getElementById(elementId);
|
| 332 |
+
if (element && element.innerHTML.includes('spinner-border')) {
|
| 333 |
+
element.innerHTML = '';
|
| 334 |
+
}
|
| 335 |
+
}
|
| 336 |
+
|
| 337 |
+
// Show error message
|
| 338 |
+
function showError(message, elementId = 'resultsContainer') {
|
| 339 |
+
const element = document.getElementById(elementId);
|
| 340 |
+
if (element) {
|
| 341 |
+
element.innerHTML = `
|
| 342 |
+
<div class="alert alert-danger fade-in">
|
| 343 |
+
<i class="fas fa-exclamation-triangle"></i>
|
| 344 |
+
<strong>Error:</strong> ${message}
|
| 345 |
+
</div>
|
| 346 |
+
`;
|
| 347 |
+
}
|
| 348 |
+
}
|
| 349 |
+
|
| 350 |
+
// Display results
|
| 351 |
+
function displayResults(result) {
|
| 352 |
+
const container = document.getElementById('resultsContainer');
|
| 353 |
+
if (container) {
|
| 354 |
+
container.innerHTML = result;
|
| 355 |
+
container.classList.add('fade-in');
|
| 356 |
+
|
| 357 |
+
// Scroll to results
|
| 358 |
+
container.scrollIntoView({ behavior: 'smooth', block: 'start' });
|
| 359 |
+
}
|
| 360 |
+
}
|
| 361 |
+
</script>
|
| 362 |
+
{% endblock %}
|
templates/pos_tagging.html
ADDED
|
@@ -0,0 +1,397 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html" %}
|
| 2 |
+
|
| 3 |
+
{% block title %}Part-of-Speech Tagging - NLP Ultimate Tutorial{% endblock %}
|
| 4 |
+
|
| 5 |
+
{% block content %}
|
| 6 |
+
<div class="container">
|
| 7 |
+
<!-- Header Section -->
|
| 8 |
+
<div class="row mb-4">
|
| 9 |
+
<div class="col-12">
|
| 10 |
+
<div class="card">
|
| 11 |
+
<div class="card-header">
|
| 12 |
+
<h1 class="mb-0">
|
| 13 |
+
<i class="fas fa-tags"></i>
|
| 14 |
+
Part-of-Speech Tagging
|
| 15 |
+
</h1>
|
| 16 |
+
</div>
|
| 17 |
+
<div class="card-body">
|
| 18 |
+
<p class="lead">Analyze the grammatical structure of text by identifying parts of speech for each word.</p>
|
| 19 |
+
|
| 20 |
+
<div class="alert alert-info">
|
| 21 |
+
<i class="fas fa-info-circle"></i>
|
| 22 |
+
<strong>About:</strong> Part-of-Speech (POS) tagging is the process of marking up words in text according to their grammatical categories such as noun, verb, adjective, etc.
|
| 23 |
+
</div>
|
| 24 |
+
</div>
|
| 25 |
+
</div>
|
| 26 |
+
</div>
|
| 27 |
+
</div>
|
| 28 |
+
|
| 29 |
+
{% include "_analysis_nav.html" %}
|
| 30 |
+
|
| 31 |
+
<!-- Text Input Section -->
|
| 32 |
+
<div class="row mb-4">
|
| 33 |
+
<div class="col-12">
|
| 34 |
+
<div class="card">
|
| 35 |
+
<div class="card-header">
|
| 36 |
+
<h3 class="mb-0">
|
| 37 |
+
<i class="fas fa-keyboard"></i>
|
| 38 |
+
Enter your text:
|
| 39 |
+
</h3>
|
| 40 |
+
</div>
|
| 41 |
+
<div class="card-body">
|
| 42 |
+
<div class="row mb-3">
|
| 43 |
+
<div class="col-md-8">
|
| 44 |
+
<textarea id="textInput" class="form-control" rows="6" placeholder="Enter or paste your text here...">The quick brown fox jumps over the lazy dog.</textarea>
|
| 45 |
+
</div>
|
| 46 |
+
<div class="col-md-4">
|
| 47 |
+
<label for="sampleSelect" class="form-label">Or choose a sample:</label>
|
| 48 |
+
<select id="sampleSelect" class="form-select">
|
| 49 |
+
<option value="Custom">Custom</option>
|
| 50 |
+
<option value="News Article">News Article</option>
|
| 51 |
+
<option value="Product Review">Product Review</option>
|
| 52 |
+
<option value="Scientific Text">Scientific Text</option>
|
| 53 |
+
<option value="Literary Text">Literary Text</option>
|
| 54 |
+
</select>
|
| 55 |
+
</div>
|
| 56 |
+
</div>
|
| 57 |
+
|
| 58 |
+
<div class="d-flex justify-content-between align-items-center">
|
| 59 |
+
<div>
|
| 60 |
+
<button id="processBtn" class="btn btn-primary btn-lg">
|
| 61 |
+
<i class="fas fa-tags"></i>
|
| 62 |
+
Analyze POS Tags
|
| 63 |
+
</button>
|
| 64 |
+
</div>
|
| 65 |
+
<div>
|
| 66 |
+
<button id="clearBtn" class="btn btn-outline-secondary">
|
| 67 |
+
<i class="fas fa-trash"></i>
|
| 68 |
+
Clear
|
| 69 |
+
</button>
|
| 70 |
+
</div>
|
| 71 |
+
</div>
|
| 72 |
+
</div>
|
| 73 |
+
</div>
|
| 74 |
+
</div>
|
| 75 |
+
</div>
|
| 76 |
+
|
| 77 |
+
<!-- POS Tagging Methods Info -->
|
| 78 |
+
<div class="row mb-4">
|
| 79 |
+
<div class="col-12">
|
| 80 |
+
<div class="card">
|
| 81 |
+
<div class="card-header">
|
| 82 |
+
<h3 class="mb-0">
|
| 83 |
+
<i class="fas fa-info-circle"></i>
|
| 84 |
+
POS Tagging Methods
|
| 85 |
+
</h3>
|
| 86 |
+
</div>
|
| 87 |
+
<div class="card-body">
|
| 88 |
+
<div class="row g-4">
|
| 89 |
+
<div class="col-md-6">
|
| 90 |
+
<div class="card h-100 border-primary shadow-sm">
|
| 91 |
+
<div class="card-header bg-primary text-white text-center">
|
| 92 |
+
<i class="fas fa-brain fa-3x mb-2"></i>
|
| 93 |
+
<h4 class="mb-0">NLTK (Penn Treebank)</h4>
|
| 94 |
+
</div>
|
| 95 |
+
<div class="card-body">
|
| 96 |
+
<p class="text-center mb-3">Uses the Perceptron tagger trained on the Penn Treebank corpus with detailed grammatical categories.</p>
|
| 97 |
+
<ul class="list-group list-group-flush">
|
| 98 |
+
<li class="list-group-item d-flex align-items-center">
|
| 99 |
+
<i class="fas fa-tags text-primary me-2"></i>
|
| 100 |
+
<strong>36+ detailed tags</strong>
|
| 101 |
+
</li>
|
| 102 |
+
<li class="list-group-item d-flex align-items-center">
|
| 103 |
+
<i class="fas fa-flag-usa text-primary me-2"></i>
|
| 104 |
+
<strong>English-specific</strong>
|
| 105 |
+
</li>
|
| 106 |
+
<li class="list-group-item d-flex align-items-center">
|
| 107 |
+
<i class="fas fa-book text-primary me-2"></i>
|
| 108 |
+
<strong>Traditional NLP approach</strong>
|
| 109 |
+
</li>
|
| 110 |
+
</ul>
|
| 111 |
+
</div>
|
| 112 |
+
</div>
|
| 113 |
+
</div>
|
| 114 |
+
<div class="col-md-6">
|
| 115 |
+
<div class="card h-100 border-success shadow-sm">
|
| 116 |
+
<div class="card-header bg-success text-white text-center">
|
| 117 |
+
<i class="fas fa-globe fa-3x mb-2"></i>
|
| 118 |
+
<h4 class="mb-0">spaCy (Universal)</h4>
|
| 119 |
+
</div>
|
| 120 |
+
<div class="card-body">
|
| 121 |
+
<p class="text-center mb-3">Uses the en_core_web_sm model with Universal POS tags for cross-linguistic consistency.</p>
|
| 122 |
+
<ul class="list-group list-group-flush">
|
| 123 |
+
<li class="list-group-item d-flex align-items-center">
|
| 124 |
+
<i class="fas fa-tags text-success me-2"></i>
|
| 125 |
+
<strong>17 universal tags</strong>
|
| 126 |
+
</li>
|
| 127 |
+
<li class="list-group-item d-flex align-items-center">
|
| 128 |
+
<i class="fas fa-globe text-success me-2"></i>
|
| 129 |
+
<strong>Cross-linguistic</strong>
|
| 130 |
+
</li>
|
| 131 |
+
<li class="list-group-item d-flex align-items-center">
|
| 132 |
+
<i class="fas fa-rocket text-success me-2"></i>
|
| 133 |
+
<strong>Modern NLP approach</strong>
|
| 134 |
+
</li>
|
| 135 |
+
</ul>
|
| 136 |
+
</div>
|
| 137 |
+
</div>
|
| 138 |
+
</div>
|
| 139 |
+
</div>
|
| 140 |
+
</div>
|
| 141 |
+
</div>
|
| 142 |
+
</div>
|
| 143 |
+
</div>
|
| 144 |
+
|
| 145 |
+
<!-- Results Section -->
|
| 146 |
+
<div class="row">
|
| 147 |
+
<div class="col-12">
|
| 148 |
+
<div class="card">
|
| 149 |
+
<div class="card-header">
|
| 150 |
+
<h3 class="mb-0">
|
| 151 |
+
<i class="fas fa-chart-bar"></i>
|
| 152 |
+
POS Tagging Results
|
| 153 |
+
</h3>
|
| 154 |
+
</div>
|
| 155 |
+
<div class="card-body">
|
| 156 |
+
<div id="resultsContainer">
|
| 157 |
+
<div class="text-center text-muted py-5">
|
| 158 |
+
<i class="fas fa-arrow-up fa-2x mb-3"></i>
|
| 159 |
+
<p>Click "Analyze POS Tags" to see grammatical analysis results</p>
|
| 160 |
+
</div>
|
| 161 |
+
</div>
|
| 162 |
+
</div>
|
| 163 |
+
</div>
|
| 164 |
+
</div>
|
| 165 |
+
</div>
|
| 166 |
+
</div>
|
| 167 |
+
{% endblock %}
|
| 168 |
+
|
| 169 |
+
{% block extra_scripts %}
|
| 170 |
+
<script>
|
| 171 |
+
// Initialize page
|
| 172 |
+
document.addEventListener('DOMContentLoaded', function() {
|
| 173 |
+
const textInput = document.getElementById('textInput');
|
| 174 |
+
const sampleSel = document.getElementById('sampleSelect');
|
| 175 |
+
const routeKey = 'customTextBackup:' + window.location.pathname;
|
| 176 |
+
const shouldCarry = sessionStorage.getItem('carryTextOnNextPage') === '1';
|
| 177 |
+
|
| 178 |
+
if (shouldCarry) {
|
| 179 |
+
// When carrying text via Quick Nav, use the carried text and set to Custom
|
| 180 |
+
const carriedText = sessionStorage.getItem('analysisText') || '';
|
| 181 |
+
if (sampleSel) sampleSel.value = 'Custom';
|
| 182 |
+
if (textInput) textInput.value = carriedText;
|
| 183 |
+
// Save as this route's custom backup
|
| 184 |
+
sessionStorage.setItem(routeKey, carriedText);
|
| 185 |
+
sessionStorage.removeItem('carryTextOnNextPage');
|
| 186 |
+
} else {
|
| 187 |
+
// Normal page load - restore any existing custom backup for this route
|
| 188 |
+
const routeBackup = sessionStorage.getItem(routeKey);
|
| 189 |
+
if (routeBackup !== null && sampleSel && sampleSel.value === 'Custom') {
|
| 190 |
+
if (textInput) textInput.value = routeBackup;
|
| 191 |
+
}
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
// Save custom input changes to route-specific backup
|
| 195 |
+
if (textInput) {
|
| 196 |
+
textInput.addEventListener('input', function() {
|
| 197 |
+
if (sampleSel && sampleSel.value === 'Custom') {
|
| 198 |
+
sessionStorage.setItem(routeKey, textInput.value);
|
| 199 |
+
}
|
| 200 |
+
});
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
// Sample text dropdown handler
|
| 204 |
+
document.getElementById('sampleSelect').addEventListener('change', function() {
|
| 205 |
+
const sampleType = this.value;
|
| 206 |
+
const textInput = document.getElementById('textInput');
|
| 207 |
+
const routeKey = 'customTextBackup:' + window.location.pathname;
|
| 208 |
+
|
| 209 |
+
if (sampleType === 'Custom') {
|
| 210 |
+
// Restore custom backup for this route
|
| 211 |
+
const routeBackup = sessionStorage.getItem(routeKey);
|
| 212 |
+
textInput.value = routeBackup || '';
|
| 213 |
+
} else {
|
| 214 |
+
// Before switching to sample, save current custom input
|
| 215 |
+
if (this.previousValue === 'Custom' && textInput.value.trim()) {
|
| 216 |
+
sessionStorage.setItem(routeKey, textInput.value);
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
// Get sample text from server
|
| 220 |
+
fetch('/api/sample-text', {
|
| 221 |
+
method: 'POST',
|
| 222 |
+
headers: {
|
| 223 |
+
'Content-Type': 'application/json',
|
| 224 |
+
},
|
| 225 |
+
body: JSON.stringify({sample_type: sampleType})
|
| 226 |
+
})
|
| 227 |
+
.then(response => response.json())
|
| 228 |
+
.then(data => {
|
| 229 |
+
textInput.value = data.text;
|
| 230 |
+
});
|
| 231 |
+
}
|
| 232 |
+
|
| 233 |
+
// Remember previous value for next change
|
| 234 |
+
this.previousValue = sampleType;
|
| 235 |
+
});
|
| 236 |
+
|
| 237 |
+
// Process button handler
|
| 238 |
+
document.getElementById('processBtn').addEventListener('click', function() {
|
| 239 |
+
const text = document.getElementById('textInput').value.trim();
|
| 240 |
+
|
| 241 |
+
if (!text) {
|
| 242 |
+
alert('Please enter some text to analyze.');
|
| 243 |
+
return;
|
| 244 |
+
}
|
| 245 |
+
|
| 246 |
+
// Show loading state
|
| 247 |
+
this.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Processing...';
|
| 248 |
+
this.disabled = true;
|
| 249 |
+
|
| 250 |
+
// Process text
|
| 251 |
+
processPOSTagging();
|
| 252 |
+
|
| 253 |
+
// Reset button after a delay
|
| 254 |
+
setTimeout(() => {
|
| 255 |
+
this.innerHTML = '<i class="fas fa-tags"></i> Analyze POS Tags';
|
| 256 |
+
this.disabled = false;
|
| 257 |
+
}, 2000);
|
| 258 |
+
});
|
| 259 |
+
|
| 260 |
+
// Clear button handler
|
| 261 |
+
document.getElementById('clearBtn').addEventListener('click', function() {
|
| 262 |
+
document.getElementById('textInput').value = '';
|
| 263 |
+
document.getElementById('resultsContainer').innerHTML = `
|
| 264 |
+
<div class="text-center text-muted py-5">
|
| 265 |
+
<i class="fas fa-arrow-up fa-2x mb-3"></i>
|
| 266 |
+
<p>Click "Analyze POS Tags" to see grammatical analysis results</p>
|
| 267 |
+
</div>
|
| 268 |
+
`;
|
| 269 |
+
});
|
| 270 |
+
|
| 271 |
+
// Keyboard shortcuts
|
| 272 |
+
document.addEventListener('keydown', function(e) {
|
| 273 |
+
// Ctrl+Enter to process
|
| 274 |
+
if (e.ctrlKey && e.key === 'Enter') {
|
| 275 |
+
document.getElementById('processBtn').click();
|
| 276 |
+
}
|
| 277 |
+
|
| 278 |
+
// Ctrl+L to clear
|
| 279 |
+
if (e.ctrlKey && e.key === 'l') {
|
| 280 |
+
e.preventDefault();
|
| 281 |
+
document.getElementById('clearBtn').click();
|
| 282 |
+
}
|
| 283 |
+
});
|
| 284 |
+
});
|
| 285 |
+
|
| 286 |
+
// Process POS tagging
|
| 287 |
+
function processPOSTagging() {
|
| 288 |
+
const text = document.getElementById('textInput').value.trim();
|
| 289 |
+
|
| 290 |
+
if (!text) {
|
| 291 |
+
alert('Please enter some text to analyze.');
|
| 292 |
+
return;
|
| 293 |
+
}
|
| 294 |
+
|
| 295 |
+
showLoading('resultsContainer');
|
| 296 |
+
|
| 297 |
+
fetch('/api/pos-tagging', {
|
| 298 |
+
method: 'POST',
|
| 299 |
+
headers: {
|
| 300 |
+
'Content-Type': 'application/json',
|
| 301 |
+
},
|
| 302 |
+
body: JSON.stringify({text: text})
|
| 303 |
+
})
|
| 304 |
+
.then(response => response.json())
|
| 305 |
+
.then(data => {
|
| 306 |
+
if (data.success) {
|
| 307 |
+
displayResults(data.result);
|
| 308 |
+
} else {
|
| 309 |
+
showError(data.error || 'An error occurred while processing the text');
|
| 310 |
+
}
|
| 311 |
+
})
|
| 312 |
+
.catch(error => {
|
| 313 |
+
showError('Failed to process text: ' + error.message);
|
| 314 |
+
})
|
| 315 |
+
.finally(() => {
|
| 316 |
+
hideLoading('resultsContainer');
|
| 317 |
+
});
|
| 318 |
+
}
|
| 319 |
+
|
| 320 |
+
// Show loading state
|
| 321 |
+
function showLoading(elementId) {
|
| 322 |
+
const element = document.getElementById(elementId);
|
| 323 |
+
if (element) {
|
| 324 |
+
element.innerHTML = `
|
| 325 |
+
<div class="text-center py-4">
|
| 326 |
+
<div class="spinner-border text-primary" role="status">
|
| 327 |
+
<span class="visually-hidden">Loading...</span>
|
| 328 |
+
</div>
|
| 329 |
+
<p class="mt-2">Analyzing grammatical structure...</p>
|
| 330 |
+
</div>
|
| 331 |
+
`;
|
| 332 |
+
}
|
| 333 |
+
}
|
| 334 |
+
|
| 335 |
+
// Hide loading state
|
| 336 |
+
function hideLoading(elementId) {
|
| 337 |
+
const element = document.getElementById(elementId);
|
| 338 |
+
if (element && element.innerHTML.includes('spinner-border')) {
|
| 339 |
+
element.innerHTML = '';
|
| 340 |
+
}
|
| 341 |
+
}
|
| 342 |
+
|
| 343 |
+
// Show error message
|
| 344 |
+
function showError(message, elementId = 'resultsContainer') {
|
| 345 |
+
const element = document.getElementById(elementId);
|
| 346 |
+
if (element) {
|
| 347 |
+
element.innerHTML = `
|
| 348 |
+
<div class="alert alert-danger fade-in">
|
| 349 |
+
<i class="fas fa-exclamation-triangle"></i>
|
| 350 |
+
<strong>Error:</strong> ${message}
|
| 351 |
+
</div>
|
| 352 |
+
`;
|
| 353 |
+
}
|
| 354 |
+
}
|
| 355 |
+
|
| 356 |
+
// Display results
|
| 357 |
+
function displayResults(result) {
|
| 358 |
+
const container = document.getElementById('resultsContainer');
|
| 359 |
+
if (container) {
|
| 360 |
+
container.innerHTML = result;
|
| 361 |
+
container.classList.add('fade-in');
|
| 362 |
+
|
| 363 |
+
// Scroll to results
|
| 364 |
+
container.scrollIntoView({ behavior: 'smooth', block: 'start' });
|
| 365 |
+
}
|
| 366 |
+
}
|
| 367 |
+
|
| 368 |
+
// Function to switch between POS tag types
|
| 369 |
+
function showPOSTags(type) {
|
| 370 |
+
// Hide all sections
|
| 371 |
+
const sections = document.querySelectorAll('.pos-tags-section');
|
| 372 |
+
sections.forEach(section => {
|
| 373 |
+
section.style.display = 'none';
|
| 374 |
+
});
|
| 375 |
+
|
| 376 |
+
// Remove active class from all buttons
|
| 377 |
+
const buttons = document.querySelectorAll('.pos-legend-buttons .btn');
|
| 378 |
+
buttons.forEach(btn => {
|
| 379 |
+
btn.classList.remove('btn-primary', 'active');
|
| 380 |
+
btn.classList.add('btn-outline-primary');
|
| 381 |
+
});
|
| 382 |
+
|
| 383 |
+
// Show selected section
|
| 384 |
+
const targetSection = document.getElementById(type + '-tags');
|
| 385 |
+
if (targetSection) {
|
| 386 |
+
targetSection.style.display = 'block';
|
| 387 |
+
}
|
| 388 |
+
|
| 389 |
+
// Activate selected button
|
| 390 |
+
const targetButton = document.getElementById(type + '-btn');
|
| 391 |
+
if (targetButton) {
|
| 392 |
+
targetButton.classList.remove('btn-outline-primary');
|
| 393 |
+
targetButton.classList.add('btn-primary', 'active');
|
| 394 |
+
}
|
| 395 |
+
}
|
| 396 |
+
</script>
|
| 397 |
+
{% endblock %}
|
templates/preprocessing.html
ADDED
|
@@ -0,0 +1,269 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html" %}
|
| 2 |
+
|
| 3 |
+
{% block title %}Text Preprocessing - NLP Ultimate Tutorial{% endblock %}
|
| 4 |
+
|
| 5 |
+
{% block content %}
|
| 6 |
+
<div class="container">
|
| 7 |
+
<!-- Header Section -->
|
| 8 |
+
<div class="row mb-4">
|
| 9 |
+
<div class="col-12">
|
| 10 |
+
<div class="card">
|
| 11 |
+
<div class="card-header">
|
| 12 |
+
<h1 class="mb-0">
|
| 13 |
+
<i class="fas fa-tools"></i>
|
| 14 |
+
Text Preprocessing
|
| 15 |
+
</h1>
|
| 16 |
+
</div>
|
| 17 |
+
<div class="card-body">
|
| 18 |
+
<p class="lead">Clean and transform raw text into a format suitable for NLP analysis.</p>
|
| 19 |
+
|
| 20 |
+
<div class="alert alert-info">
|
| 21 |
+
<i class="fas fa-info-circle"></i>
|
| 22 |
+
<strong>About:</strong> Text preprocessing is the first step in NLP pipelines that transforms raw text into a clean, structured format suitable for analysis.
|
| 23 |
+
</div>
|
| 24 |
+
</div>
|
| 25 |
+
</div>
|
| 26 |
+
</div>
|
| 27 |
+
</div>
|
| 28 |
+
|
| 29 |
+
{% include "_analysis_nav.html" %}
|
| 30 |
+
|
| 31 |
+
<!-- Text Input Section -->
|
| 32 |
+
<div class="row mb-4">
|
| 33 |
+
<div class="col-12">
|
| 34 |
+
<div class="card">
|
| 35 |
+
<div class="card-header">
|
| 36 |
+
<h3 class="mb-0">
|
| 37 |
+
<i class="fas fa-keyboard"></i>
|
| 38 |
+
Enter your text:
|
| 39 |
+
</h3>
|
| 40 |
+
</div>
|
| 41 |
+
<div class="card-body">
|
| 42 |
+
<div class="row mb-3">
|
| 43 |
+
<div class="col-md-8">
|
| 44 |
+
<textarea id="textInput" class="form-control" rows="6" placeholder="Enter or paste your text here...">Climate change is the long-term alteration of temperature and typical weather patterns in a place. The cause of current climate change is largely human activity, like burning fossil fuels, which adds heat-trapping gases to Earth's atmosphere. The consequences of changing climate are already being felt worldwide.</textarea>
|
| 45 |
+
</div>
|
| 46 |
+
<div class="col-md-4">
|
| 47 |
+
<label for="sampleSelect" class="form-label">Or choose a sample:</label>
|
| 48 |
+
<select id="sampleSelect" class="form-select">
|
| 49 |
+
<option value="Custom">Custom</option>
|
| 50 |
+
<option value="News Article">News Article</option>
|
| 51 |
+
<option value="Product Review">Product Review</option>
|
| 52 |
+
<option value="Scientific Text" selected>Scientific Text</option>
|
| 53 |
+
<option value="Literary Text">Literary Text</option>
|
| 54 |
+
</select>
|
| 55 |
+
</div>
|
| 56 |
+
</div>
|
| 57 |
+
|
| 58 |
+
<div class="d-flex justify-content-between align-items-center">
|
| 59 |
+
<div>
|
| 60 |
+
<button id="processBtn" class="btn btn-primary btn-lg">
|
| 61 |
+
<i class="fas fa-cogs"></i>
|
| 62 |
+
Process Text
|
| 63 |
+
</button>
|
| 64 |
+
</div>
|
| 65 |
+
<div>
|
| 66 |
+
<button id="clearBtn" class="btn btn-outline-secondary">
|
| 67 |
+
<i class="fas fa-trash"></i>
|
| 68 |
+
Clear
|
| 69 |
+
</button>
|
| 70 |
+
</div>
|
| 71 |
+
</div>
|
| 72 |
+
</div>
|
| 73 |
+
</div>
|
| 74 |
+
</div>
|
| 75 |
+
</div>
|
| 76 |
+
|
| 77 |
+
<!-- Results Section -->
|
| 78 |
+
<div class="row">
|
| 79 |
+
<div class="col-12">
|
| 80 |
+
<div class="card">
|
| 81 |
+
<div class="card-header">
|
| 82 |
+
<h3 class="mb-0">
|
| 83 |
+
<i class="fas fa-chart-bar"></i>
|
| 84 |
+
Preprocessing Results
|
| 85 |
+
</h3>
|
| 86 |
+
</div>
|
| 87 |
+
<div class="card-body">
|
| 88 |
+
<div id="resultsContainer">
|
| 89 |
+
<div class="text-center text-muted py-5">
|
| 90 |
+
<i class="fas fa-arrow-up fa-2x mb-3"></i>
|
| 91 |
+
<p>Click "Process Text" to see preprocessing results</p>
|
| 92 |
+
</div>
|
| 93 |
+
</div>
|
| 94 |
+
</div>
|
| 95 |
+
</div>
|
| 96 |
+
</div>
|
| 97 |
+
</div>
|
| 98 |
+
</div>
|
| 99 |
+
{% endblock %}
|
| 100 |
+
|
| 101 |
+
{% block extra_scripts %}
|
| 102 |
+
<script>
|
| 103 |
+
// Initialize page
|
| 104 |
+
document.addEventListener('DOMContentLoaded', function() {
|
| 105 |
+
// Only carry when requested
|
| 106 |
+
const shouldCarry = sessionStorage.getItem('carryTextOnNextPage') === '1';
|
| 107 |
+
if (shouldCarry) {
|
| 108 |
+
const sampleSel = document.getElementById('sampleSelect');
|
| 109 |
+
if (sampleSel) sampleSel.value = 'Custom';
|
| 110 |
+
const storedText = sessionStorage.getItem('analysisText');
|
| 111 |
+
if (storedText) document.getElementById('textInput').value = storedText;
|
| 112 |
+
sessionStorage.removeItem('carryTextOnNextPage');
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
// Sample text dropdown handler
|
| 116 |
+
document.getElementById('sampleSelect').addEventListener('change', function() {
|
| 117 |
+
const sampleType = this.value;
|
| 118 |
+
const textInput = document.getElementById('textInput');
|
| 119 |
+
|
| 120 |
+
if (sampleType === 'Custom') {
|
| 121 |
+
textInput.value = '';
|
| 122 |
+
} else {
|
| 123 |
+
// Get sample text from server
|
| 124 |
+
fetch('/api/sample-text', {
|
| 125 |
+
method: 'POST',
|
| 126 |
+
headers: {
|
| 127 |
+
'Content-Type': 'application/json',
|
| 128 |
+
},
|
| 129 |
+
body: JSON.stringify({sample_type: sampleType})
|
| 130 |
+
})
|
| 131 |
+
.then(response => response.json())
|
| 132 |
+
.then(data => {
|
| 133 |
+
textInput.value = data.text;
|
| 134 |
+
});
|
| 135 |
+
}
|
| 136 |
+
});
|
| 137 |
+
|
| 138 |
+
// Process button handler
|
| 139 |
+
document.getElementById('processBtn').addEventListener('click', function() {
|
| 140 |
+
const text = document.getElementById('textInput').value.trim();
|
| 141 |
+
|
| 142 |
+
if (!text) {
|
| 143 |
+
alert('Please enter some text to process.');
|
| 144 |
+
return;
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
// Show loading state
|
| 148 |
+
this.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Processing...';
|
| 149 |
+
this.disabled = true;
|
| 150 |
+
|
| 151 |
+
// Process text
|
| 152 |
+
NLPUtils.processText('/api/preprocessing', text);
|
| 153 |
+
|
| 154 |
+
// Reset button after a delay
|
| 155 |
+
setTimeout(() => {
|
| 156 |
+
this.innerHTML = '<i class="fas fa-cogs"></i> Process Text';
|
| 157 |
+
this.disabled = false;
|
| 158 |
+
}, 2000);
|
| 159 |
+
});
|
| 160 |
+
|
| 161 |
+
// Clear button handler
|
| 162 |
+
document.getElementById('clearBtn').addEventListener('click', function() {
|
| 163 |
+
document.getElementById('textInput').value = '';
|
| 164 |
+
document.getElementById('resultsContainer').innerHTML = `
|
| 165 |
+
<div class="text-center text-muted py-5">
|
| 166 |
+
<i class="fas fa-arrow-up fa-2x mb-3"></i>
|
| 167 |
+
<p>Click "Process Text" to see preprocessing results</p>
|
| 168 |
+
</div>
|
| 169 |
+
`;
|
| 170 |
+
});
|
| 171 |
+
|
| 172 |
+
// Keyboard shortcuts
|
| 173 |
+
document.addEventListener('keydown', function(e) {
|
| 174 |
+
// Ctrl+Enter to process
|
| 175 |
+
if (e.ctrlKey && e.key === 'Enter') {
|
| 176 |
+
document.getElementById('processBtn').click();
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
// Ctrl+L to clear
|
| 180 |
+
if (e.ctrlKey && e.key === 'l') {
|
| 181 |
+
e.preventDefault();
|
| 182 |
+
document.getElementById('clearBtn').click();
|
| 183 |
+
}
|
| 184 |
+
});
|
| 185 |
+
});
|
| 186 |
+
|
| 187 |
+
// Override the default processText function to handle preprocessing specifically
|
| 188 |
+
function processPreprocessing() {
|
| 189 |
+
const text = document.getElementById('textInput').value.trim();
|
| 190 |
+
|
| 191 |
+
if (!text) {
|
| 192 |
+
alert('Please enter some text to process.');
|
| 193 |
+
return;
|
| 194 |
+
}
|
| 195 |
+
|
| 196 |
+
showLoading('resultsContainer');
|
| 197 |
+
|
| 198 |
+
fetch('/api/preprocessing', {
|
| 199 |
+
method: 'POST',
|
| 200 |
+
headers: {
|
| 201 |
+
'Content-Type': 'application/json',
|
| 202 |
+
},
|
| 203 |
+
body: JSON.stringify({text: text})
|
| 204 |
+
})
|
| 205 |
+
.then(response => response.json())
|
| 206 |
+
.then(data => {
|
| 207 |
+
if (data.success) {
|
| 208 |
+
displayResults(data.result);
|
| 209 |
+
} else {
|
| 210 |
+
showError(data.error || 'An error occurred while processing the text');
|
| 211 |
+
}
|
| 212 |
+
})
|
| 213 |
+
.catch(error => {
|
| 214 |
+
showError('Failed to process text: ' + error.message);
|
| 215 |
+
})
|
| 216 |
+
.finally(() => {
|
| 217 |
+
hideLoading('resultsContainer');
|
| 218 |
+
});
|
| 219 |
+
}
|
| 220 |
+
|
| 221 |
+
// Show loading state
|
| 222 |
+
function showLoading(elementId) {
|
| 223 |
+
const element = document.getElementById(elementId);
|
| 224 |
+
if (element) {
|
| 225 |
+
element.innerHTML = `
|
| 226 |
+
<div class="text-center py-4">
|
| 227 |
+
<div class="spinner-border text-primary" role="status">
|
| 228 |
+
<span class="visually-hidden">Loading...</span>
|
| 229 |
+
</div>
|
| 230 |
+
<p class="mt-2">Processing your text...</p>
|
| 231 |
+
</div>
|
| 232 |
+
`;
|
| 233 |
+
}
|
| 234 |
+
}
|
| 235 |
+
|
| 236 |
+
// Hide loading state
|
| 237 |
+
function hideLoading(elementId) {
|
| 238 |
+
const element = document.getElementById(elementId);
|
| 239 |
+
if (element && element.innerHTML.includes('spinner-border')) {
|
| 240 |
+
element.innerHTML = '';
|
| 241 |
+
}
|
| 242 |
+
}
|
| 243 |
+
|
| 244 |
+
// Show error message
|
| 245 |
+
function showError(message, elementId = 'resultsContainer') {
|
| 246 |
+
const element = document.getElementById(elementId);
|
| 247 |
+
if (element) {
|
| 248 |
+
element.innerHTML = `
|
| 249 |
+
<div class="alert alert-danger fade-in">
|
| 250 |
+
<i class="fas fa-exclamation-triangle"></i>
|
| 251 |
+
<strong>Error:</strong> ${message}
|
| 252 |
+
</div>
|
| 253 |
+
`;
|
| 254 |
+
}
|
| 255 |
+
}
|
| 256 |
+
|
| 257 |
+
// Display results
|
| 258 |
+
function displayResults(result) {
|
| 259 |
+
const container = document.getElementById('resultsContainer');
|
| 260 |
+
if (container) {
|
| 261 |
+
container.innerHTML = result;
|
| 262 |
+
container.classList.add('fade-in');
|
| 263 |
+
|
| 264 |
+
// Scroll to results
|
| 265 |
+
container.scrollIntoView({ behavior: 'smooth', block: 'start' });
|
| 266 |
+
}
|
| 267 |
+
}
|
| 268 |
+
</script>
|
| 269 |
+
{% endblock %}
|
templates/question_answering.html
ADDED
|
@@ -0,0 +1,442 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html" %}
|
| 2 |
+
|
| 3 |
+
{% block title %}Question Answering - NLP Ultimate Tutorial{% endblock %}
|
| 4 |
+
|
| 5 |
+
{% block content %}
|
| 6 |
+
<div class="container">
|
| 7 |
+
<!-- Header Section -->
|
| 8 |
+
<div class="row mb-4">
|
| 9 |
+
<div class="col-12">
|
| 10 |
+
<div class="card">
|
| 11 |
+
<div class="card-header">
|
| 12 |
+
<h1 class="mb-0">
|
| 13 |
+
<i class="fas fa-question-circle"></i>
|
| 14 |
+
Question Answering System
|
| 15 |
+
</h1>
|
| 16 |
+
</div>
|
| 17 |
+
<div class="card-body">
|
| 18 |
+
<p class="lead">Ask questions about any text context and get intelligent answers with confidence scores.</p>
|
| 19 |
+
|
| 20 |
+
<div class="alert alert-info">
|
| 21 |
+
<i class="fas fa-info-circle"></i>
|
| 22 |
+
<strong>About:</strong> Question Answering (QA) systems extract or generate answers to questions based on a given context or knowledge base.
|
| 23 |
+
</div>
|
| 24 |
+
</div>
|
| 25 |
+
</div>
|
| 26 |
+
</div>
|
| 27 |
+
</div>
|
| 28 |
+
|
| 29 |
+
{% include "_analysis_nav.html" %}
|
| 30 |
+
|
| 31 |
+
<!-- Input Section -->
|
| 32 |
+
<div class="row mb-4">
|
| 33 |
+
<div class="col-12">
|
| 34 |
+
<div class="card">
|
| 35 |
+
<div class="card-header">
|
| 36 |
+
<h3 class="mb-0">
|
| 37 |
+
<i class="fas fa-keyboard"></i>
|
| 38 |
+
Enter Context and Question:
|
| 39 |
+
</h3>
|
| 40 |
+
</div>
|
| 41 |
+
<div class="card-body">
|
| 42 |
+
<div class="row mb-3">
|
| 43 |
+
<div class="col-md-8">
|
| 44 |
+
<label for="contextInput" class="form-label">Context Text:</label>
|
| 45 |
+
<textarea id="contextInput" class="form-control" rows="8" placeholder="Enter the text context here...">The Amazon rainforest is a moist broadleaf tropical rainforest in the Amazon biome that covers most of the Amazon basin of South America. This basin encompasses 7,000,000 km2, of which 5,500,000 km2 are covered by the rainforest. The majority of the forest is contained within Brazil, with 60% of the rainforest, followed by Peru with 13%, Colombia with 10%, and with minor amounts in Bolivia, Ecuador, French Guiana, Guyana, Suriname, and Venezuela.</textarea>
|
| 46 |
+
</div>
|
| 47 |
+
<div class="col-md-4">
|
| 48 |
+
<label for="sampleSelect" class="form-label">Or choose a sample:</label>
|
| 49 |
+
<select id="sampleSelect" class="form-select">
|
| 50 |
+
<option value="Custom">Custom</option>
|
| 51 |
+
<option value="News Article">News Article</option>
|
| 52 |
+
<option value="Product Review">Product Review</option>
|
| 53 |
+
<option value="Scientific Text">Scientific Text</option>
|
| 54 |
+
<option value="Literary Text">Literary Text</option>
|
| 55 |
+
</select>
|
| 56 |
+
</div>
|
| 57 |
+
</div>
|
| 58 |
+
|
| 59 |
+
<div class="row mb-3">
|
| 60 |
+
<div class="col-md-8">
|
| 61 |
+
<label for="questionInput" class="form-label">Your Question:</label>
|
| 62 |
+
<input type="text" id="questionInput" class="form-control" placeholder="What would you like to know about the context?" value="What percentage of the Amazon rainforest is in Brazil?">
|
| 63 |
+
</div>
|
| 64 |
+
<div class="col-md-4">
|
| 65 |
+
<label for="confidenceThreshold" class="form-label">Confidence Threshold:</label>
|
| 66 |
+
<input type="range" class="form-range" id="confidenceThreshold" min="0.0" max="1.0" value="0.5" step="0.1">
|
| 67 |
+
<div class="d-flex justify-content-between">
|
| 68 |
+
<small>0.0</small>
|
| 69 |
+
<small id="confidenceValue">0.5</small>
|
| 70 |
+
<small>1.0</small>
|
| 71 |
+
</div>
|
| 72 |
+
</div>
|
| 73 |
+
</div>
|
| 74 |
+
|
| 75 |
+
<div class="d-flex justify-content-between align-items-center">
|
| 76 |
+
<div>
|
| 77 |
+
<button id="processBtn" class="btn btn-primary btn-lg">
|
| 78 |
+
<i class="fas fa-question-circle"></i>
|
| 79 |
+
Get Answer
|
| 80 |
+
</button>
|
| 81 |
+
</div>
|
| 82 |
+
<div>
|
| 83 |
+
<button id="clearBtn" class="btn btn-outline-secondary">
|
| 84 |
+
<i class="fas fa-trash"></i>
|
| 85 |
+
Clear
|
| 86 |
+
</button>
|
| 87 |
+
</div>
|
| 88 |
+
</div>
|
| 89 |
+
</div>
|
| 90 |
+
</div>
|
| 91 |
+
</div>
|
| 92 |
+
</div>
|
| 93 |
+
|
| 94 |
+
<!-- Tips and Features Section -->
|
| 95 |
+
<div class="row mb-4">
|
| 96 |
+
<div class="col-md-6">
|
| 97 |
+
<div class="card">
|
| 98 |
+
<div class="card-header">
|
| 99 |
+
<h3 class="mb-0">
|
| 100 |
+
<i class="fas fa-lightbulb"></i>
|
| 101 |
+
Tips for Better Results
|
| 102 |
+
</h3>
|
| 103 |
+
</div>
|
| 104 |
+
<div class="card-body">
|
| 105 |
+
<ul class="list-unstyled">
|
| 106 |
+
<li><i class="fas fa-check text-success"></i> <strong>Context:</strong> Provide relevant text containing the answer</li>
|
| 107 |
+
<li><i class="fas fa-check text-success"></i> <strong>Question:</strong> Be specific and clear</li>
|
| 108 |
+
<li><i class="fas fa-check text-success"></i> <strong>Extractive:</strong> Finds exact spans from the text</li>
|
| 109 |
+
<li><i class="fas fa-check text-success"></i> <strong>Confidence:</strong> Higher threshold = more selective answers</li>
|
| 110 |
+
</ul>
|
| 111 |
+
</div>
|
| 112 |
+
</div>
|
| 113 |
+
</div>
|
| 114 |
+
<div class="col-md-6">
|
| 115 |
+
<div class="card">
|
| 116 |
+
<div class="card-header">
|
| 117 |
+
<h3 class="mb-0">
|
| 118 |
+
<i class="fas fa-chart-line"></i>
|
| 119 |
+
System Features
|
| 120 |
+
</h3>
|
| 121 |
+
</div>
|
| 122 |
+
<div class="card-body">
|
| 123 |
+
<ul class="list-unstyled">
|
| 124 |
+
<li><i class="fas fa-brain text-primary"></i> <strong>Transformer QA:</strong> RoBERTa-SQuAD2 model</li>
|
| 125 |
+
<li><i class="fas fa-calculator text-info"></i> <strong>TF-IDF Matching:</strong> Similarity-based answer finding</li>
|
| 126 |
+
<li><i class="fas fa-chart-bar text-success"></i> <strong>Confidence Scoring:</strong> Reliability assessment</li>
|
| 127 |
+
<li><i class="fas fa-highlighter text-warning"></i> <strong>Context Highlighting:</strong> Visual answer location</li>
|
| 128 |
+
</ul>
|
| 129 |
+
</div>
|
| 130 |
+
</div>
|
| 131 |
+
</div>
|
| 132 |
+
</div>
|
| 133 |
+
|
| 134 |
+
<!-- Example Questions Section -->
|
| 135 |
+
<div class="row mb-4">
|
| 136 |
+
<div class="col-12">
|
| 137 |
+
<div class="card">
|
| 138 |
+
<div class="card-header">
|
| 139 |
+
<h3 class="mb-0">
|
| 140 |
+
<i class="fas fa-list"></i>
|
| 141 |
+
Example Questions
|
| 142 |
+
</h3>
|
| 143 |
+
</div>
|
| 144 |
+
<div class="card-body">
|
| 145 |
+
<div class="row">
|
| 146 |
+
<div class="col-md-4">
|
| 147 |
+
<button class="btn btn-outline-primary w-100 mb-2" onclick="setExample('What percentage of the Amazon rainforest is in Brazil?')">
|
| 148 |
+
What percentage of the Amazon rainforest is in Brazil?
|
| 149 |
+
</button>
|
| 150 |
+
</div>
|
| 151 |
+
<div class="col-md-4">
|
| 152 |
+
<button class="btn btn-outline-primary w-100 mb-2" onclick="setExample('Which countries contain parts of the Amazon rainforest?')">
|
| 153 |
+
Which countries contain parts of the Amazon rainforest?
|
| 154 |
+
</button>
|
| 155 |
+
</div>
|
| 156 |
+
<div class="col-md-4">
|
| 157 |
+
<button class="btn btn-outline-primary w-100 mb-2" onclick="setExample('How large is the Amazon basin?')">
|
| 158 |
+
How large is the Amazon basin?
|
| 159 |
+
</button>
|
| 160 |
+
</div>
|
| 161 |
+
</div>
|
| 162 |
+
</div>
|
| 163 |
+
</div>
|
| 164 |
+
</div>
|
| 165 |
+
</div>
|
| 166 |
+
|
| 167 |
+
<!-- Results Section -->
|
| 168 |
+
<div class="row">
|
| 169 |
+
<div class="col-12">
|
| 170 |
+
<div class="card">
|
| 171 |
+
<div class="card-header">
|
| 172 |
+
<h3 class="mb-0">
|
| 173 |
+
<i class="fas fa-chart-bar"></i>
|
| 174 |
+
Question Answering Results
|
| 175 |
+
</h3>
|
| 176 |
+
</div>
|
| 177 |
+
<div class="card-body">
|
| 178 |
+
<div id="resultsContainer">
|
| 179 |
+
<div class="text-center text-muted py-5">
|
| 180 |
+
<i class="fas fa-arrow-up fa-2x mb-3"></i>
|
| 181 |
+
<p>Click "Get Answer" to see question answering results</p>
|
| 182 |
+
</div>
|
| 183 |
+
</div>
|
| 184 |
+
</div>
|
| 185 |
+
</div>
|
| 186 |
+
</div>
|
| 187 |
+
</div>
|
| 188 |
+
</div>
|
| 189 |
+
{% endblock %}
|
| 190 |
+
|
| 191 |
+
{% block extra_scripts %}
|
| 192 |
+
<script>
|
| 193 |
+
// Initialize page
|
| 194 |
+
document.addEventListener('DOMContentLoaded', function() {
|
| 195 |
+
// Prefill only when explicitly navigating via quick-nav or same-route refresh
|
| 196 |
+
const shouldCarry = sessionStorage.getItem('carryTextOnNextPage') === '1';
|
| 197 |
+
if (shouldCarry) {
|
| 198 |
+
const storedText = sessionStorage.getItem('analysisText');
|
| 199 |
+
const storedQuestion = sessionStorage.getItem('qaQuestion');
|
| 200 |
+
if (storedText) document.getElementById('contextInput').value = storedText;
|
| 201 |
+
if (storedQuestion) document.getElementById('questionInput').value = storedQuestion;
|
| 202 |
+
// Reset flag so normal navigation doesn't keep stale inputs
|
| 203 |
+
sessionStorage.removeItem('carryTextOnNextPage');
|
| 204 |
+
} else {
|
| 205 |
+
// Fresh route load: clear inputs
|
| 206 |
+
document.getElementById('contextInput').value = document.getElementById('contextInput').defaultValue || '';
|
| 207 |
+
document.getElementById('questionInput').value = '';
|
| 208 |
+
sessionStorage.removeItem('qaQuestion');
|
| 209 |
+
}
|
| 210 |
+
|
| 211 |
+
// Update confidence threshold value
|
| 212 |
+
document.getElementById('confidenceThreshold').addEventListener('input', function() {
|
| 213 |
+
document.getElementById('confidenceValue').textContent = this.value;
|
| 214 |
+
});
|
| 215 |
+
|
| 216 |
+
// Sample text dropdown handler: keep context/question in sync and restore Custom
|
| 217 |
+
document.getElementById('sampleSelect').addEventListener('change', function() {
|
| 218 |
+
const sampleType = this.value;
|
| 219 |
+
const contextInput = document.getElementById('contextInput');
|
| 220 |
+
const questionInput = document.getElementById('questionInput');
|
| 221 |
+
const routeKey = 'customTextBackup:' + (window.location.pathname || '/question-answering');
|
| 222 |
+
|
| 223 |
+
const SAMPLE_QUESTIONS = {
|
| 224 |
+
'News Article': 'What action did the commission take?',
|
| 225 |
+
'Product Review': 'What is the reviewer\'s overall opinion?',
|
| 226 |
+
'Scientific Text': 'What is the primary cause of current climate change?',
|
| 227 |
+
'Literary Text': 'Who is the main character?'
|
| 228 |
+
};
|
| 229 |
+
|
| 230 |
+
if (sampleType === 'Custom') {
|
| 231 |
+
// Restore previously typed Custom text
|
| 232 |
+
const backup = sessionStorage.getItem(routeKey);
|
| 233 |
+
if (backup !== null) contextInput.value = backup;
|
| 234 |
+
// Clear question to avoid mismatch
|
| 235 |
+
questionInput.value = '';
|
| 236 |
+
sessionStorage.setItem('qaQuestion', '');
|
| 237 |
+
return;
|
| 238 |
+
}
|
| 239 |
+
|
| 240 |
+
// Save current custom text before switching away
|
| 241 |
+
sessionStorage.setItem(routeKey, contextInput.value);
|
| 242 |
+
|
| 243 |
+
fetch('/api/sample-text', {
|
| 244 |
+
method: 'POST',
|
| 245 |
+
headers: {
|
| 246 |
+
'Content-Type': 'application/json',
|
| 247 |
+
},
|
| 248 |
+
body: JSON.stringify({ sample_type: sampleType })
|
| 249 |
+
})
|
| 250 |
+
.then(response => response.json())
|
| 251 |
+
.then(data => {
|
| 252 |
+
contextInput.value = data.text || '';
|
| 253 |
+
// Update question to match selected sample for consistency
|
| 254 |
+
questionInput.value = SAMPLE_QUESTIONS[sampleType] || '';
|
| 255 |
+
// Persist to session storage
|
| 256 |
+
sessionStorage.setItem('analysisText', contextInput.value);
|
| 257 |
+
sessionStorage.setItem('qaQuestion', questionInput.value);
|
| 258 |
+
});
|
| 259 |
+
});
|
| 260 |
+
|
| 261 |
+
// Persist inputs while typing
|
| 262 |
+
document.getElementById('contextInput').addEventListener('input', function() {
|
| 263 |
+
sessionStorage.setItem('analysisText', this.value);
|
| 264 |
+
sessionStorage.setItem('customTextBackup:' + (window.location.pathname || '/question-answering'), this.value);
|
| 265 |
+
// If user starts editing context while a sample-default question is set, clear it to avoid mismatch
|
| 266 |
+
const questionInput = document.getElementById('questionInput');
|
| 267 |
+
const SAMPLE_DEFAULTS = new Set([
|
| 268 |
+
'What action did the commission\'s take?',
|
| 269 |
+
'What action did the commission take?',
|
| 270 |
+
'What is the reviewer\'s overall opinion?',
|
| 271 |
+
'What is the reviewer\'s overall opinion?',
|
| 272 |
+
'What is the primary cause of current climate change?',
|
| 273 |
+
'Who is the main character?'
|
| 274 |
+
]);
|
| 275 |
+
if (SAMPLE_DEFAULTS.has(questionInput.value)) {
|
| 276 |
+
questionInput.value = '';
|
| 277 |
+
sessionStorage.setItem('qaQuestion', '');
|
| 278 |
+
}
|
| 279 |
+
});
|
| 280 |
+
document.getElementById('questionInput').addEventListener('input', function() {
|
| 281 |
+
sessionStorage.setItem('qaQuestion', this.value);
|
| 282 |
+
});
|
| 283 |
+
|
| 284 |
+
// Process button handler
|
| 285 |
+
document.getElementById('processBtn').addEventListener('click', function() {
|
| 286 |
+
const context = document.getElementById('contextInput').value.trim();
|
| 287 |
+
const question = document.getElementById('questionInput').value.trim();
|
| 288 |
+
|
| 289 |
+
if (!context) {
|
| 290 |
+
alert('Please provide context text.');
|
| 291 |
+
return;
|
| 292 |
+
}
|
| 293 |
+
|
| 294 |
+
if (!question) {
|
| 295 |
+
alert('Please enter a question.');
|
| 296 |
+
return;
|
| 297 |
+
}
|
| 298 |
+
|
| 299 |
+
// Show loading state
|
| 300 |
+
this.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Processing...';
|
| 301 |
+
this.disabled = true;
|
| 302 |
+
|
| 303 |
+
// Persist before processing
|
| 304 |
+
sessionStorage.setItem('analysisText', context);
|
| 305 |
+
sessionStorage.setItem('qaQuestion', question);
|
| 306 |
+
|
| 307 |
+
// Process question
|
| 308 |
+
processQuestionAnswering();
|
| 309 |
+
|
| 310 |
+
// Reset button after a delay
|
| 311 |
+
setTimeout(() => {
|
| 312 |
+
this.innerHTML = '<i class="fas fa-question-circle"></i> Get Answer';
|
| 313 |
+
this.disabled = false;
|
| 314 |
+
}, 2000);
|
| 315 |
+
});
|
| 316 |
+
|
| 317 |
+
// Clear button handler
|
| 318 |
+
document.getElementById('clearBtn').addEventListener('click', function() {
|
| 319 |
+
document.getElementById('contextInput').value = '';
|
| 320 |
+
document.getElementById('questionInput').value = '';
|
| 321 |
+
document.getElementById('resultsContainer').innerHTML = `
|
| 322 |
+
<div class="text-center text-muted py-5">
|
| 323 |
+
<i class="fas fa-arrow-up fa-2x mb-3"></i>
|
| 324 |
+
<p>Click "Get Answer" to see question answering results</p>
|
| 325 |
+
</div>
|
| 326 |
+
`;
|
| 327 |
+
});
|
| 328 |
+
|
| 329 |
+
// Keyboard shortcuts
|
| 330 |
+
document.addEventListener('keydown', function(e) {
|
| 331 |
+
// Ctrl+Enter to process
|
| 332 |
+
if (e.ctrlKey && e.key === 'Enter') {
|
| 333 |
+
document.getElementById('processBtn').click();
|
| 334 |
+
}
|
| 335 |
+
|
| 336 |
+
// Ctrl+L to clear
|
| 337 |
+
if (e.ctrlKey && e.key === 'l') {
|
| 338 |
+
e.preventDefault();
|
| 339 |
+
document.getElementById('clearBtn').click();
|
| 340 |
+
}
|
| 341 |
+
});
|
| 342 |
+
});
|
| 343 |
+
|
| 344 |
+
// Set example question
|
| 345 |
+
function setExample(question) {
|
| 346 |
+
document.getElementById('questionInput').value = question;
|
| 347 |
+
}
|
| 348 |
+
|
| 349 |
+
// Process question answering
|
| 350 |
+
function processQuestionAnswering() {
|
| 351 |
+
const context = document.getElementById('contextInput').value.trim();
|
| 352 |
+
const question = document.getElementById('questionInput').value.trim();
|
| 353 |
+
const confidenceThreshold = parseFloat(document.getElementById('confidenceThreshold').value);
|
| 354 |
+
|
| 355 |
+
if (!context) {
|
| 356 |
+
alert('Please provide context text.');
|
| 357 |
+
return;
|
| 358 |
+
}
|
| 359 |
+
|
| 360 |
+
if (!question) {
|
| 361 |
+
alert('Please enter a question.');
|
| 362 |
+
return;
|
| 363 |
+
}
|
| 364 |
+
|
| 365 |
+
showLoading('resultsContainer');
|
| 366 |
+
|
| 367 |
+
fetch('/api/question-answering', {
|
| 368 |
+
method: 'POST',
|
| 369 |
+
headers: {
|
| 370 |
+
'Content-Type': 'application/json',
|
| 371 |
+
},
|
| 372 |
+
body: JSON.stringify({
|
| 373 |
+
context: context,
|
| 374 |
+
question: question,
|
| 375 |
+
confidence_threshold: confidenceThreshold
|
| 376 |
+
})
|
| 377 |
+
})
|
| 378 |
+
.then(response => response.json())
|
| 379 |
+
.then(data => {
|
| 380 |
+
if (data.success) {
|
| 381 |
+
displayResults(data.result);
|
| 382 |
+
} else {
|
| 383 |
+
showError(data.error || 'An error occurred while processing the question');
|
| 384 |
+
}
|
| 385 |
+
})
|
| 386 |
+
.catch(error => {
|
| 387 |
+
showError('Failed to process question: ' + error.message);
|
| 388 |
+
})
|
| 389 |
+
.finally(() => {
|
| 390 |
+
hideLoading('resultsContainer');
|
| 391 |
+
});
|
| 392 |
+
}
|
| 393 |
+
|
| 394 |
+
// Show loading state
|
| 395 |
+
function showLoading(elementId) {
|
| 396 |
+
const element = document.getElementById(elementId);
|
| 397 |
+
if (element) {
|
| 398 |
+
element.innerHTML = `
|
| 399 |
+
<div class="text-center py-4">
|
| 400 |
+
<div class="spinner-border text-primary" role="status">
|
| 401 |
+
<span class="visually-hidden">Loading...</span>
|
| 402 |
+
</div>
|
| 403 |
+
<p class="mt-2">Processing your question...</p>
|
| 404 |
+
</div>
|
| 405 |
+
`;
|
| 406 |
+
}
|
| 407 |
+
}
|
| 408 |
+
|
| 409 |
+
// Hide loading state
|
| 410 |
+
function hideLoading(elementId) {
|
| 411 |
+
const element = document.getElementById(elementId);
|
| 412 |
+
if (element && element.innerHTML.includes('spinner-border')) {
|
| 413 |
+
element.innerHTML = '';
|
| 414 |
+
}
|
| 415 |
+
}
|
| 416 |
+
|
| 417 |
+
// Show error message
|
| 418 |
+
function showError(message, elementId = 'resultsContainer') {
|
| 419 |
+
const element = document.getElementById(elementId);
|
| 420 |
+
if (element) {
|
| 421 |
+
element.innerHTML = `
|
| 422 |
+
<div class="alert alert-danger fade-in">
|
| 423 |
+
<i class="fas fa-exclamation-triangle"></i>
|
| 424 |
+
<strong>Error:</strong> ${message}
|
| 425 |
+
</div>
|
| 426 |
+
`;
|
| 427 |
+
}
|
| 428 |
+
}
|
| 429 |
+
|
| 430 |
+
// Display results
|
| 431 |
+
function displayResults(result) {
|
| 432 |
+
const container = document.getElementById('resultsContainer');
|
| 433 |
+
if (container) {
|
| 434 |
+
container.innerHTML = result;
|
| 435 |
+
container.classList.add('fade-in');
|
| 436 |
+
|
| 437 |
+
// Scroll to results
|
| 438 |
+
container.scrollIntoView({ behavior: 'smooth', block: 'start' });
|
| 439 |
+
}
|
| 440 |
+
}
|
| 441 |
+
</script>
|
| 442 |
+
{% endblock %}
|
templates/sentiment.html
ADDED
|
@@ -0,0 +1,430 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html" %}
|
| 2 |
+
|
| 3 |
+
{% block title %}Sentiment Analysis - NLP Ultimate Tutorial{% endblock %}
|
| 4 |
+
|
| 5 |
+
{% block content %}
|
| 6 |
+
<div class="container">
|
| 7 |
+
<!-- Header Section -->
|
| 8 |
+
<div class="row mb-4">
|
| 9 |
+
<div class="col-12">
|
| 10 |
+
<div class="card">
|
| 11 |
+
<div class="card-header">
|
| 12 |
+
<h1 class="mb-0">
|
| 13 |
+
<i class="fas fa-heart"></i>
|
| 14 |
+
Sentiment Analysis
|
| 15 |
+
</h1>
|
| 16 |
+
</div>
|
| 17 |
+
<div class="card-body">
|
| 18 |
+
<p class="lead">Analyze the emotional tone and sentiment of text using multiple advanced models.</p>
|
| 19 |
+
|
| 20 |
+
<div class="alert alert-info">
|
| 21 |
+
<i class="fas fa-info-circle"></i>
|
| 22 |
+
<strong>About:</strong> Sentiment analysis determines the emotional tone behind text to identify if it expresses positive, negative, or neutral sentiment.
|
| 23 |
+
</div>
|
| 24 |
+
</div>
|
| 25 |
+
</div>
|
| 26 |
+
</div>
|
| 27 |
+
</div>
|
| 28 |
+
|
| 29 |
+
{% include "_analysis_nav.html" %}
|
| 30 |
+
|
| 31 |
+
<!-- Text Input Section -->
|
| 32 |
+
<div class="row mb-4">
|
| 33 |
+
<div class="col-12">
|
| 34 |
+
<div class="card">
|
| 35 |
+
<div class="card-header">
|
| 36 |
+
<h3 class="mb-0">
|
| 37 |
+
<i class="fas fa-keyboard"></i>
|
| 38 |
+
Enter your text:
|
| 39 |
+
</h3>
|
| 40 |
+
</div>
|
| 41 |
+
<div class="card-body">
|
| 42 |
+
<div class="row mb-3">
|
| 43 |
+
<div class="col-md-8">
|
| 44 |
+
<textarea id="textInput" class="form-control" rows="6" placeholder="Enter or paste your text here...">I absolutely loved this movie! The acting was superb and the plot kept me on the edge of my seat.</textarea>
|
| 45 |
+
</div>
|
| 46 |
+
<div class="col-md-4">
|
| 47 |
+
<label for="sampleSelect" class="form-label">Or choose a sample:</label>
|
| 48 |
+
<select id="sampleSelect" class="form-select">
|
| 49 |
+
<option value="Custom">Custom</option>
|
| 50 |
+
<option value="News Article">News Article</option>
|
| 51 |
+
<option value="Product Review">Product Review</option>
|
| 52 |
+
<option value="Scientific Text">Scientific Text</option>
|
| 53 |
+
<option value="Literary Text">Literary Text</option>
|
| 54 |
+
</select>
|
| 55 |
+
</div>
|
| 56 |
+
</div>
|
| 57 |
+
|
| 58 |
+
<div class="d-flex justify-content-between align-items-center">
|
| 59 |
+
<div>
|
| 60 |
+
<button id="processBtn" class="btn btn-primary btn-lg">
|
| 61 |
+
<i class="fas fa-heart"></i>
|
| 62 |
+
Analyze Sentiment
|
| 63 |
+
</button>
|
| 64 |
+
</div>
|
| 65 |
+
<div>
|
| 66 |
+
<button id="clearBtn" class="btn btn-outline-secondary">
|
| 67 |
+
<i class="fas fa-trash"></i>
|
| 68 |
+
Clear
|
| 69 |
+
</button>
|
| 70 |
+
</div>
|
| 71 |
+
</div>
|
| 72 |
+
</div>
|
| 73 |
+
</div>
|
| 74 |
+
</div>
|
| 75 |
+
</div>
|
| 76 |
+
|
| 77 |
+
<!-- Sentiment Analysis Methods Info -->
|
| 78 |
+
<div class="row mb-4">
|
| 79 |
+
<div class="col-12">
|
| 80 |
+
<div class="card">
|
| 81 |
+
<div class="card-header">
|
| 82 |
+
<h3 class="mb-0">
|
| 83 |
+
<i class="fas fa-info-circle"></i>
|
| 84 |
+
Sentiment Analysis Methods
|
| 85 |
+
</h3>
|
| 86 |
+
</div>
|
| 87 |
+
<div class="card-body p-4">
|
| 88 |
+
<div class="row g-4">
|
| 89 |
+
<!-- VADER Card -->
|
| 90 |
+
<div class="col-lg-4 col-md-6">
|
| 91 |
+
<div class="sentiment-method-card h-100 vader-card">
|
| 92 |
+
<div class="method-header">
|
| 93 |
+
<div class="method-icon">
|
| 94 |
+
<i class="fas fa-book"></i>
|
| 95 |
+
</div>
|
| 96 |
+
<h4 class="method-title">VADER</h4>
|
| 97 |
+
<p class="method-subtitle">Rule-based Analyzer</p>
|
| 98 |
+
</div>
|
| 99 |
+
<div class="method-body">
|
| 100 |
+
<p class="method-description">Rule-based sentiment analyzer specifically tuned for social media text with compound scoring.</p>
|
| 101 |
+
<div class="method-features">
|
| 102 |
+
<div class="feature-item">
|
| 103 |
+
<i class="fas fa-check-circle"></i>
|
| 104 |
+
<span>Lexicon-based approach</span>
|
| 105 |
+
</div>
|
| 106 |
+
<div class="feature-item">
|
| 107 |
+
<i class="fas fa-check-circle"></i>
|
| 108 |
+
<span>Social media optimized</span>
|
| 109 |
+
</div>
|
| 110 |
+
<div class="feature-item">
|
| 111 |
+
<i class="fas fa-check-circle"></i>
|
| 112 |
+
<span>Fast and reliable</span>
|
| 113 |
+
</div>
|
| 114 |
+
</div>
|
| 115 |
+
</div>
|
| 116 |
+
</div>
|
| 117 |
+
</div>
|
| 118 |
+
|
| 119 |
+
<!-- DistilBERT Card -->
|
| 120 |
+
<div class="col-lg-4 col-md-6">
|
| 121 |
+
<div class="sentiment-method-card h-100 distilbert-card">
|
| 122 |
+
<div class="method-header">
|
| 123 |
+
<div class="method-icon">
|
| 124 |
+
<i class="fas fa-brain"></i>
|
| 125 |
+
</div>
|
| 126 |
+
<h4 class="method-title">DistilBERT</h4>
|
| 127 |
+
<p class="method-subtitle">Transformer Model</p>
|
| 128 |
+
</div>
|
| 129 |
+
<div class="method-body">
|
| 130 |
+
<p class="method-description">Transformer model fine-tuned on Stanford Sentiment Treebank dataset with high accuracy.</p>
|
| 131 |
+
<div class="method-features">
|
| 132 |
+
<div class="feature-item">
|
| 133 |
+
<i class="fas fa-check-circle"></i>
|
| 134 |
+
<span>Deep learning approach</span>
|
| 135 |
+
</div>
|
| 136 |
+
<div class="feature-item">
|
| 137 |
+
<i class="fas fa-check-circle"></i>
|
| 138 |
+
<span>~91% accuracy</span>
|
| 139 |
+
</div>
|
| 140 |
+
<div class="feature-item">
|
| 141 |
+
<i class="fas fa-check-circle"></i>
|
| 142 |
+
<span>Context-aware</span>
|
| 143 |
+
</div>
|
| 144 |
+
</div>
|
| 145 |
+
</div>
|
| 146 |
+
</div>
|
| 147 |
+
</div>
|
| 148 |
+
|
| 149 |
+
<!-- RoBERTa Emotion Card -->
|
| 150 |
+
<div class="col-lg-4 col-md-6">
|
| 151 |
+
<div class="sentiment-method-card h-100 roberta-card">
|
| 152 |
+
<div class="method-header">
|
| 153 |
+
<div class="method-icon">
|
| 154 |
+
<i class="fas fa-smile"></i>
|
| 155 |
+
</div>
|
| 156 |
+
<h4 class="method-title">RoBERTa Emotion</h4>
|
| 157 |
+
<p class="method-subtitle">Multi-label Emotion</p>
|
| 158 |
+
</div>
|
| 159 |
+
<div class="method-body">
|
| 160 |
+
<p class="method-description">Multi-label emotion detection model identifying specific emotions like joy, anger, sadness, etc.</p>
|
| 161 |
+
<div class="method-features">
|
| 162 |
+
<div class="feature-item">
|
| 163 |
+
<i class="fas fa-check-circle"></i>
|
| 164 |
+
<span>Emotion classification</span>
|
| 165 |
+
</div>
|
| 166 |
+
<div class="feature-item">
|
| 167 |
+
<i class="fas fa-check-circle"></i>
|
| 168 |
+
<span>Multi-label detection</span>
|
| 169 |
+
</div>
|
| 170 |
+
<div class="feature-item">
|
| 171 |
+
<i class="fas fa-check-circle"></i>
|
| 172 |
+
<span>Detailed emotional analysis</span>
|
| 173 |
+
</div>
|
| 174 |
+
</div>
|
| 175 |
+
</div>
|
| 176 |
+
</div>
|
| 177 |
+
</div>
|
| 178 |
+
</div>
|
| 179 |
+
</div>
|
| 180 |
+
</div>
|
| 181 |
+
</div>
|
| 182 |
+
</div>
|
| 183 |
+
|
| 184 |
+
<!-- Sentiment Scale Info -->
|
| 185 |
+
<div class="row mb-4">
|
| 186 |
+
<div class="col-12">
|
| 187 |
+
<div class="card">
|
| 188 |
+
<div class="card-header">
|
| 189 |
+
<h3 class="mb-0">
|
| 190 |
+
<i class="fas fa-chart-line"></i>
|
| 191 |
+
Sentiment Scale
|
| 192 |
+
</h3>
|
| 193 |
+
</div>
|
| 194 |
+
<div class="card-body">
|
| 195 |
+
<div class="row">
|
| 196 |
+
<div class="col-md-3">
|
| 197 |
+
<div class="card text-center">
|
| 198 |
+
<div class="card-body">
|
| 199 |
+
<i class="fas fa-frown fa-2x text-danger mb-2"></i>
|
| 200 |
+
<h5>Negative</h5>
|
| 201 |
+
<p class="small mb-0">Score: -1.0 to -0.05</p>
|
| 202 |
+
</div>
|
| 203 |
+
</div>
|
| 204 |
+
</div>
|
| 205 |
+
<div class="col-md-3">
|
| 206 |
+
<div class="card text-center">
|
| 207 |
+
<div class="card-body">
|
| 208 |
+
<i class="fas fa-meh fa-2x text-warning mb-2"></i>
|
| 209 |
+
<h5>Neutral</h5>
|
| 210 |
+
<p class="small mb-0">Score: -0.05 to 0.05</p>
|
| 211 |
+
</div>
|
| 212 |
+
</div>
|
| 213 |
+
</div>
|
| 214 |
+
<div class="col-md-3">
|
| 215 |
+
<div class="card text-center">
|
| 216 |
+
<div class="card-body">
|
| 217 |
+
<i class="fas fa-smile fa-2x text-success mb-2"></i>
|
| 218 |
+
<h5>Positive</h5>
|
| 219 |
+
<p class="small mb-0">Score: 0.05 to 1.0</p>
|
| 220 |
+
</div>
|
| 221 |
+
</div>
|
| 222 |
+
</div>
|
| 223 |
+
<div class="col-md-3">
|
| 224 |
+
<div class="card text-center">
|
| 225 |
+
<div class="card-body">
|
| 226 |
+
<i class="fas fa-heart fa-2x text-info mb-2"></i>
|
| 227 |
+
<h5>Emotions</h5>
|
| 228 |
+
<p class="small mb-0">Joy, Anger, Sadness, Fear, etc.</p>
|
| 229 |
+
</div>
|
| 230 |
+
</div>
|
| 231 |
+
</div>
|
| 232 |
+
</div>
|
| 233 |
+
</div>
|
| 234 |
+
</div>
|
| 235 |
+
</div>
|
| 236 |
+
</div>
|
| 237 |
+
|
| 238 |
+
<!-- Results Section -->
|
| 239 |
+
<div class="row">
|
| 240 |
+
<div class="col-12">
|
| 241 |
+
<div class="card">
|
| 242 |
+
<div class="card-header">
|
| 243 |
+
<h3 class="mb-0">
|
| 244 |
+
<i class="fas fa-chart-bar"></i>
|
| 245 |
+
Sentiment Analysis Results
|
| 246 |
+
</h3>
|
| 247 |
+
</div>
|
| 248 |
+
<div class="card-body">
|
| 249 |
+
<div id="resultsContainer">
|
| 250 |
+
<div class="text-center text-muted py-5">
|
| 251 |
+
<i class="fas fa-arrow-up fa-2x mb-3"></i>
|
| 252 |
+
<p>Click "Analyze Sentiment" to see sentiment analysis results</p>
|
| 253 |
+
</div>
|
| 254 |
+
</div>
|
| 255 |
+
</div>
|
| 256 |
+
</div>
|
| 257 |
+
</div>
|
| 258 |
+
</div>
|
| 259 |
+
</div>
|
| 260 |
+
{% endblock %}
|
| 261 |
+
|
| 262 |
+
{% block extra_scripts %}
|
| 263 |
+
<script>
|
| 264 |
+
// Initialize page
|
| 265 |
+
document.addEventListener('DOMContentLoaded', function() {
|
| 266 |
+
// Only carry over when using Quick Nav; otherwise leave defaults
|
| 267 |
+
const shouldCarry = sessionStorage.getItem('carryTextOnNextPage') === '1';
|
| 268 |
+
if (shouldCarry) {
|
| 269 |
+
const sampleSel = document.getElementById('sampleSelect');
|
| 270 |
+
if (sampleSel) sampleSel.value = 'Custom';
|
| 271 |
+
const storedText = sessionStorage.getItem('analysisText');
|
| 272 |
+
if (storedText) document.getElementById('textInput').value = storedText;
|
| 273 |
+
sessionStorage.removeItem('carryTextOnNextPage');
|
| 274 |
+
}
|
| 275 |
+
|
| 276 |
+
// Sample text dropdown handler
|
| 277 |
+
document.getElementById('sampleSelect').addEventListener('change', function() {
|
| 278 |
+
const sampleType = this.value;
|
| 279 |
+
const textInput = document.getElementById('textInput');
|
| 280 |
+
|
| 281 |
+
if (sampleType === 'Custom') {
|
| 282 |
+
textInput.value = '';
|
| 283 |
+
} else {
|
| 284 |
+
// Get sample text from server
|
| 285 |
+
fetch('/api/sample-text', {
|
| 286 |
+
method: 'POST',
|
| 287 |
+
headers: {
|
| 288 |
+
'Content-Type': 'application/json',
|
| 289 |
+
},
|
| 290 |
+
body: JSON.stringify({sample_type: sampleType})
|
| 291 |
+
})
|
| 292 |
+
.then(response => response.json())
|
| 293 |
+
.then(data => {
|
| 294 |
+
textInput.value = data.text;
|
| 295 |
+
});
|
| 296 |
+
}
|
| 297 |
+
});
|
| 298 |
+
|
| 299 |
+
// Process button handler
|
| 300 |
+
document.getElementById('processBtn').addEventListener('click', function() {
|
| 301 |
+
const text = document.getElementById('textInput').value.trim();
|
| 302 |
+
|
| 303 |
+
if (!text) {
|
| 304 |
+
alert('Please enter some text to analyze.');
|
| 305 |
+
return;
|
| 306 |
+
}
|
| 307 |
+
|
| 308 |
+
// Show loading state
|
| 309 |
+
this.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Processing...';
|
| 310 |
+
this.disabled = true;
|
| 311 |
+
|
| 312 |
+
// Process text
|
| 313 |
+
processSentiment();
|
| 314 |
+
|
| 315 |
+
// Reset button after a delay
|
| 316 |
+
setTimeout(() => {
|
| 317 |
+
this.innerHTML = '<i class="fas fa-heart"></i> Analyze Sentiment';
|
| 318 |
+
this.disabled = false;
|
| 319 |
+
}, 2000);
|
| 320 |
+
});
|
| 321 |
+
|
| 322 |
+
// Clear button handler
|
| 323 |
+
document.getElementById('clearBtn').addEventListener('click', function() {
|
| 324 |
+
document.getElementById('textInput').value = '';
|
| 325 |
+
document.getElementById('resultsContainer').innerHTML = `
|
| 326 |
+
<div class="text-center text-muted py-5">
|
| 327 |
+
<i class="fas fa-arrow-up fa-2x mb-3"></i>
|
| 328 |
+
<p>Click "Analyze Sentiment" to see sentiment analysis results</p>
|
| 329 |
+
</div>
|
| 330 |
+
`;
|
| 331 |
+
});
|
| 332 |
+
|
| 333 |
+
// Keyboard shortcuts
|
| 334 |
+
document.addEventListener('keydown', function(e) {
|
| 335 |
+
// Ctrl+Enter to process
|
| 336 |
+
if (e.ctrlKey && e.key === 'Enter') {
|
| 337 |
+
document.getElementById('processBtn').click();
|
| 338 |
+
}
|
| 339 |
+
|
| 340 |
+
// Ctrl+L to clear
|
| 341 |
+
if (e.ctrlKey && e.key === 'l') {
|
| 342 |
+
e.preventDefault();
|
| 343 |
+
document.getElementById('clearBtn').click();
|
| 344 |
+
}
|
| 345 |
+
});
|
| 346 |
+
});
|
| 347 |
+
|
| 348 |
+
// Process sentiment analysis
|
| 349 |
+
function processSentiment() {
|
| 350 |
+
const text = document.getElementById('textInput').value.trim();
|
| 351 |
+
|
| 352 |
+
if (!text) {
|
| 353 |
+
alert('Please enter some text to analyze.');
|
| 354 |
+
return;
|
| 355 |
+
}
|
| 356 |
+
|
| 357 |
+
showLoading('resultsContainer');
|
| 358 |
+
|
| 359 |
+
fetch('/api/sentiment', {
|
| 360 |
+
method: 'POST',
|
| 361 |
+
headers: {
|
| 362 |
+
'Content-Type': 'application/json',
|
| 363 |
+
},
|
| 364 |
+
body: JSON.stringify({text: text})
|
| 365 |
+
})
|
| 366 |
+
.then(response => response.json())
|
| 367 |
+
.then(data => {
|
| 368 |
+
if (data.success) {
|
| 369 |
+
displayResults(data.result);
|
| 370 |
+
} else {
|
| 371 |
+
showError(data.error || 'An error occurred while processing the text');
|
| 372 |
+
}
|
| 373 |
+
})
|
| 374 |
+
.catch(error => {
|
| 375 |
+
showError('Failed to process text: ' + error.message);
|
| 376 |
+
})
|
| 377 |
+
.finally(() => {
|
| 378 |
+
hideLoading('resultsContainer');
|
| 379 |
+
});
|
| 380 |
+
}
|
| 381 |
+
|
| 382 |
+
// Show loading state
|
| 383 |
+
function showLoading(elementId) {
|
| 384 |
+
const element = document.getElementById(elementId);
|
| 385 |
+
if (element) {
|
| 386 |
+
element.innerHTML = `
|
| 387 |
+
<div class="text-center py-4">
|
| 388 |
+
<div class="spinner-border text-primary" role="status">
|
| 389 |
+
<span class="visually-hidden">Loading...</span>
|
| 390 |
+
</div>
|
| 391 |
+
<p class="mt-2">Analyzing sentiment and emotions...</p>
|
| 392 |
+
</div>
|
| 393 |
+
`;
|
| 394 |
+
}
|
| 395 |
+
}
|
| 396 |
+
|
| 397 |
+
// Hide loading state
|
| 398 |
+
function hideLoading(elementId) {
|
| 399 |
+
const element = document.getElementById(elementId);
|
| 400 |
+
if (element && element.innerHTML.includes('spinner-border')) {
|
| 401 |
+
element.innerHTML = '';
|
| 402 |
+
}
|
| 403 |
+
}
|
| 404 |
+
|
| 405 |
+
// Show error message
|
| 406 |
+
function showError(message, elementId = 'resultsContainer') {
|
| 407 |
+
const element = document.getElementById(elementId);
|
| 408 |
+
if (element) {
|
| 409 |
+
element.innerHTML = `
|
| 410 |
+
<div class="alert alert-danger fade-in">
|
| 411 |
+
<i class="fas fa-exclamation-triangle"></i>
|
| 412 |
+
<strong>Error:</strong> ${message}
|
| 413 |
+
</div>
|
| 414 |
+
`;
|
| 415 |
+
}
|
| 416 |
+
}
|
| 417 |
+
|
| 418 |
+
// Display results
|
| 419 |
+
function displayResults(result) {
|
| 420 |
+
const container = document.getElementById('resultsContainer');
|
| 421 |
+
if (container) {
|
| 422 |
+
container.innerHTML = result;
|
| 423 |
+
container.classList.add('fade-in');
|
| 424 |
+
|
| 425 |
+
// Scroll to results
|
| 426 |
+
container.scrollIntoView({ behavior: 'smooth', block: 'start' });
|
| 427 |
+
}
|
| 428 |
+
}
|
| 429 |
+
</script>
|
| 430 |
+
{% endblock %}
|
templates/summarization.html
ADDED
|
@@ -0,0 +1,396 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html" %}
|
| 2 |
+
|
| 3 |
+
{% block title %}Text Summarization - NLP Ultimate Tutorial{% endblock %}
|
| 4 |
+
|
| 5 |
+
{% block content %}
|
| 6 |
+
<div class="container">
|
| 7 |
+
<!-- Header Section -->
|
| 8 |
+
<div class="row mb-4">
|
| 9 |
+
<div class="col-12">
|
| 10 |
+
<div class="card">
|
| 11 |
+
<div class="card-header">
|
| 12 |
+
<h1 class="mb-0">
|
| 13 |
+
<i class="fas fa-compress-alt"></i>
|
| 14 |
+
Text Summarization
|
| 15 |
+
</h1>
|
| 16 |
+
</div>
|
| 17 |
+
<div class="card-body">
|
| 18 |
+
<p class="lead">Condense text to capture its main points using both extractive and abstractive summarization techniques.</p>
|
| 19 |
+
|
| 20 |
+
<div class="alert alert-info">
|
| 21 |
+
<i class="fas fa-info-circle"></i>
|
| 22 |
+
<strong>About:</strong> Text summarization condenses text to capture its main points, enabling quicker comprehension of large volumes of information.
|
| 23 |
+
</div>
|
| 24 |
+
</div>
|
| 25 |
+
</div>
|
| 26 |
+
</div>
|
| 27 |
+
</div>
|
| 28 |
+
|
| 29 |
+
{% include "_analysis_nav.html" %}
|
| 30 |
+
|
| 31 |
+
<!-- Text Input Section -->
|
| 32 |
+
<div class="row mb-4">
|
| 33 |
+
<div class="col-12">
|
| 34 |
+
<div class="card">
|
| 35 |
+
<div class="card-header">
|
| 36 |
+
<h3 class="mb-0">
|
| 37 |
+
<i class="fas fa-keyboard"></i>
|
| 38 |
+
Enter your text:
|
| 39 |
+
</h3>
|
| 40 |
+
</div>
|
| 41 |
+
<div class="card-body">
|
| 42 |
+
<div class="row mb-3">
|
| 43 |
+
<div class="col-md-8">
|
| 44 |
+
<textarea id="textInput" class="form-control" rows="8" placeholder="Enter or paste your text here... (minimum 3 sentences, 40 words)">Climate change is the long-term alteration of temperature and typical weather patterns in a place. Climate change is already affecting our planet in many ways - from rising sea levels to more frequent extreme weather events. The primary cause of current climate change is largely human activity, like burning fossil fuels, which adds heat-trapping gases to Earth's atmosphere. The consequences of changing climate are already being felt worldwide, affecting ecosystems, agriculture, and human settlements. Addressing climate change requires immediate action to reduce greenhouse gas emissions and adapt to the changes that are already occurring.</textarea>
|
| 45 |
+
</div>
|
| 46 |
+
<div class="col-md-4">
|
| 47 |
+
<label for="sampleSelect" class="form-label">Or choose a sample:</label>
|
| 48 |
+
<select id="sampleSelect" class="form-select">
|
| 49 |
+
<option value="Custom">Custom</option>
|
| 50 |
+
<option value="News Article">News Article</option>
|
| 51 |
+
<option value="Product Review">Product Review</option>
|
| 52 |
+
<option value="Scientific Text">Scientific Text</option>
|
| 53 |
+
<option value="Literary Text">Literary Text</option>
|
| 54 |
+
</select>
|
| 55 |
+
</div>
|
| 56 |
+
</div>
|
| 57 |
+
|
| 58 |
+
<div class="d-flex justify-content-between align-items-center">
|
| 59 |
+
<div>
|
| 60 |
+
<button id="processBtn" class="btn btn-primary btn-lg">
|
| 61 |
+
<i class="fas fa-compress-alt"></i>
|
| 62 |
+
Generate Summary
|
| 63 |
+
</button>
|
| 64 |
+
</div>
|
| 65 |
+
<div>
|
| 66 |
+
<button id="clearBtn" class="btn btn-outline-secondary">
|
| 67 |
+
<i class="fas fa-trash"></i>
|
| 68 |
+
Clear
|
| 69 |
+
</button>
|
| 70 |
+
</div>
|
| 71 |
+
</div>
|
| 72 |
+
</div>
|
| 73 |
+
</div>
|
| 74 |
+
</div>
|
| 75 |
+
</div>
|
| 76 |
+
|
| 77 |
+
<!-- Summary Settings Section -->
|
| 78 |
+
<div class="row mb-4">
|
| 79 |
+
<div class="col-12">
|
| 80 |
+
<div class="card">
|
| 81 |
+
<div class="card-header">
|
| 82 |
+
<h3 class="mb-0">
|
| 83 |
+
<i class="fas fa-cog"></i>
|
| 84 |
+
Summary Settings
|
| 85 |
+
</h3>
|
| 86 |
+
</div>
|
| 87 |
+
<div class="card-body">
|
| 88 |
+
<div class="row">
|
| 89 |
+
<div class="col-md-4">
|
| 90 |
+
<label for="minLength" class="form-label">Minimum Length</label>
|
| 91 |
+
<input type="range" class="form-range" id="minLength" min="10" max="100" value="30" step="5">
|
| 92 |
+
<div class="d-flex justify-content-between">
|
| 93 |
+
<small>10</small>
|
| 94 |
+
<small id="minLengthValue">30</small>
|
| 95 |
+
<small>100</small>
|
| 96 |
+
</div>
|
| 97 |
+
</div>
|
| 98 |
+
<div class="col-md-4">
|
| 99 |
+
<label for="maxLength" class="form-range" class="form-label">Maximum Length</label>
|
| 100 |
+
<input type="range" class="form-range" id="maxLength" min="50" max="350" value="300" step="10">
|
| 101 |
+
<div class="d-flex justify-content-between">
|
| 102 |
+
<small>50</small>
|
| 103 |
+
<small id="maxLengthValue">300</small>
|
| 104 |
+
<small>350</small>
|
| 105 |
+
</div>
|
| 106 |
+
</div>
|
| 107 |
+
<div class="col-md-4">
|
| 108 |
+
<div class="form-check mt-4">
|
| 109 |
+
<input class="form-check-input" type="checkbox" id="useSampling">
|
| 110 |
+
<label class="form-check-label" for="useSampling">
|
| 111 |
+
Use sampling for diverse summaries
|
| 112 |
+
</label>
|
| 113 |
+
<div class="form-text">Enable to generate more creative summaries</div>
|
| 114 |
+
</div>
|
| 115 |
+
</div>
|
| 116 |
+
</div>
|
| 117 |
+
</div>
|
| 118 |
+
</div>
|
| 119 |
+
</div>
|
| 120 |
+
</div>
|
| 121 |
+
|
| 122 |
+
<!-- Summarization Methods Info -->
|
| 123 |
+
<div class="row mb-4">
|
| 124 |
+
<div class="col-12">
|
| 125 |
+
<div class="card">
|
| 126 |
+
<div class="card-header">
|
| 127 |
+
<h3 class="mb-0">
|
| 128 |
+
<i class="fas fa-info-circle"></i>
|
| 129 |
+
Summarization Methods
|
| 130 |
+
</h3>
|
| 131 |
+
</div>
|
| 132 |
+
<div class="card-body">
|
| 133 |
+
<div class="row">
|
| 134 |
+
<div class="col-md-6">
|
| 135 |
+
<div class="card h-100">
|
| 136 |
+
<div class="card-body text-center">
|
| 137 |
+
<i class="fas fa-cut fa-2x text-primary mb-2"></i>
|
| 138 |
+
<h5>Extractive Summarization</h5>
|
| 139 |
+
<p class="small">Selects important sentences from the original text using TextRank algorithm.</p>
|
| 140 |
+
<ul class="list-unstyled small text-start">
|
| 141 |
+
<li>• Preserves original wording</li>
|
| 142 |
+
<li>• Fast and reliable</li>
|
| 143 |
+
<li>• Based on sentence importance</li>
|
| 144 |
+
</ul>
|
| 145 |
+
</div>
|
| 146 |
+
</div>
|
| 147 |
+
</div>
|
| 148 |
+
<div class="col-md-6">
|
| 149 |
+
<div class="card h-100">
|
| 150 |
+
<div class="card-body text-center">
|
| 151 |
+
<i class="fas fa-brain fa-2x text-success mb-2"></i>
|
| 152 |
+
<h5>Abstractive Summarization</h5>
|
| 153 |
+
<p class="small">Generates new sentences using BART model fine-tuned on CNN/DM dataset.</p>
|
| 154 |
+
<ul class="list-unstyled small text-start">
|
| 155 |
+
<li>• Creates human-like summaries</li>
|
| 156 |
+
<li>• More natural language</li>
|
| 157 |
+
<li>• Higher compression ratios</li>
|
| 158 |
+
</ul>
|
| 159 |
+
</div>
|
| 160 |
+
</div>
|
| 161 |
+
</div>
|
| 162 |
+
</div>
|
| 163 |
+
</div>
|
| 164 |
+
</div>
|
| 165 |
+
</div>
|
| 166 |
+
</div>
|
| 167 |
+
|
| 168 |
+
<!-- Results Section -->
|
| 169 |
+
<div class="row">
|
| 170 |
+
<div class="col-12">
|
| 171 |
+
<div class="card">
|
| 172 |
+
<div class="card-header">
|
| 173 |
+
<h3 class="mb-0">
|
| 174 |
+
<i class="fas fa-chart-bar"></i>
|
| 175 |
+
Summarization Results
|
| 176 |
+
</h3>
|
| 177 |
+
</div>
|
| 178 |
+
<div class="card-body">
|
| 179 |
+
<div id="resultsContainer">
|
| 180 |
+
<div class="text-center text-muted py-5">
|
| 181 |
+
<i class="fas fa-arrow-up fa-2x mb-3"></i>
|
| 182 |
+
<p>Click "Generate Summary" to see summarization results</p>
|
| 183 |
+
</div>
|
| 184 |
+
</div>
|
| 185 |
+
</div>
|
| 186 |
+
</div>
|
| 187 |
+
</div>
|
| 188 |
+
</div>
|
| 189 |
+
</div>
|
| 190 |
+
{% endblock %}
|
| 191 |
+
|
| 192 |
+
{% block extra_scripts %}
|
| 193 |
+
<script>
|
| 194 |
+
// Initialize page
|
| 195 |
+
document.addEventListener('DOMContentLoaded', function() {
|
| 196 |
+
// Only carry over text when explicitly requested by quick-nav
|
| 197 |
+
const shouldCarry = sessionStorage.getItem('carryTextOnNextPage') === '1';
|
| 198 |
+
if (shouldCarry) {
|
| 199 |
+
const storedText = sessionStorage.getItem('analysisText');
|
| 200 |
+
if (storedText) document.getElementById('textInput').value = storedText;
|
| 201 |
+
sessionStorage.removeItem('carryTextOnNextPage');
|
| 202 |
+
} else {
|
| 203 |
+
// Fresh route: leave default sample text
|
| 204 |
+
}
|
| 205 |
+
|
| 206 |
+
// Update slider values
|
| 207 |
+
document.getElementById('minLength').addEventListener('input', function() {
|
| 208 |
+
document.getElementById('minLengthValue').textContent = this.value;
|
| 209 |
+
});
|
| 210 |
+
|
| 211 |
+
document.getElementById('maxLength').addEventListener('input', function() {
|
| 212 |
+
document.getElementById('maxLengthValue').textContent = this.value;
|
| 213 |
+
});
|
| 214 |
+
|
| 215 |
+
// Sample text dropdown handler with Custom restore
|
| 216 |
+
document.getElementById('sampleSelect').addEventListener('change', function() {
|
| 217 |
+
const sampleType = this.value;
|
| 218 |
+
const textInput = document.getElementById('textInput');
|
| 219 |
+
const routeKey = 'customTextBackup:' + (window.location.pathname || '/summarization');
|
| 220 |
+
|
| 221 |
+
if (sampleType === 'Custom') {
|
| 222 |
+
// Restore previously typed custom text if any
|
| 223 |
+
const backup = sessionStorage.getItem(routeKey);
|
| 224 |
+
if (backup !== null) {
|
| 225 |
+
textInput.value = backup;
|
| 226 |
+
}
|
| 227 |
+
return;
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
// Save current custom text before switching away
|
| 231 |
+
sessionStorage.setItem(routeKey, textInput.value);
|
| 232 |
+
|
| 233 |
+
// Get sample text from server
|
| 234 |
+
fetch('/api/sample-text', {
|
| 235 |
+
method: 'POST',
|
| 236 |
+
headers: {
|
| 237 |
+
'Content-Type': 'application/json',
|
| 238 |
+
},
|
| 239 |
+
body: JSON.stringify({sample_type: sampleType})
|
| 240 |
+
})
|
| 241 |
+
.then(response => response.json())
|
| 242 |
+
.then(data => {
|
| 243 |
+
textInput.value = data.text || '';
|
| 244 |
+
});
|
| 245 |
+
});
|
| 246 |
+
|
| 247 |
+
// While typing, keep a backup of custom text for this route
|
| 248 |
+
(function(){
|
| 249 |
+
const textInput = document.getElementById('textInput');
|
| 250 |
+
const routeKey = 'customTextBackup:' + (window.location.pathname || '/summarization');
|
| 251 |
+
textInput.addEventListener('input', function(){
|
| 252 |
+
sessionStorage.setItem(routeKey, textInput.value);
|
| 253 |
+
});
|
| 254 |
+
})();
|
| 255 |
+
|
| 256 |
+
// Process button handler
|
| 257 |
+
document.getElementById('processBtn').addEventListener('click', function() {
|
| 258 |
+
const text = document.getElementById('textInput').value.trim();
|
| 259 |
+
|
| 260 |
+
if (!text) {
|
| 261 |
+
alert('Please enter some text to summarize.');
|
| 262 |
+
return;
|
| 263 |
+
}
|
| 264 |
+
|
| 265 |
+
// Show loading state
|
| 266 |
+
this.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Processing...';
|
| 267 |
+
this.disabled = true;
|
| 268 |
+
|
| 269 |
+
// Process text
|
| 270 |
+
processSummarization();
|
| 271 |
+
|
| 272 |
+
// Reset button after a delay
|
| 273 |
+
setTimeout(() => {
|
| 274 |
+
this.innerHTML = '<i class="fas fa-compress-alt"></i> Generate Summary';
|
| 275 |
+
this.disabled = false;
|
| 276 |
+
}, 2000);
|
| 277 |
+
});
|
| 278 |
+
|
| 279 |
+
// Clear button handler
|
| 280 |
+
document.getElementById('clearBtn').addEventListener('click', function() {
|
| 281 |
+
document.getElementById('textInput').value = '';
|
| 282 |
+
document.getElementById('resultsContainer').innerHTML = `
|
| 283 |
+
<div class="text-center text-muted py-5">
|
| 284 |
+
<i class="fas fa-arrow-up fa-2x mb-3"></i>
|
| 285 |
+
<p>Click "Generate Summary" to see summarization results</p>
|
| 286 |
+
</div>
|
| 287 |
+
`;
|
| 288 |
+
});
|
| 289 |
+
|
| 290 |
+
// Keyboard shortcuts
|
| 291 |
+
document.addEventListener('keydown', function(e) {
|
| 292 |
+
// Ctrl+Enter to process
|
| 293 |
+
if (e.ctrlKey && e.key === 'Enter') {
|
| 294 |
+
document.getElementById('processBtn').click();
|
| 295 |
+
}
|
| 296 |
+
|
| 297 |
+
// Ctrl+L to clear
|
| 298 |
+
if (e.ctrlKey && e.key === 'l') {
|
| 299 |
+
e.preventDefault();
|
| 300 |
+
document.getElementById('clearBtn').click();
|
| 301 |
+
}
|
| 302 |
+
});
|
| 303 |
+
});
|
| 304 |
+
|
| 305 |
+
// Process summarization
|
| 306 |
+
function processSummarization() {
|
| 307 |
+
const text = document.getElementById('textInput').value.trim();
|
| 308 |
+
|
| 309 |
+
if (!text) {
|
| 310 |
+
alert('Please enter some text to summarize.');
|
| 311 |
+
return;
|
| 312 |
+
}
|
| 313 |
+
|
| 314 |
+
const minLength = parseInt(document.getElementById('minLength').value);
|
| 315 |
+
const maxLength = parseInt(document.getElementById('maxLength').value);
|
| 316 |
+
const useSampling = document.getElementById('useSampling').checked;
|
| 317 |
+
|
| 318 |
+
showLoading('resultsContainer');
|
| 319 |
+
|
| 320 |
+
fetch('/api/summarization', {
|
| 321 |
+
method: 'POST',
|
| 322 |
+
headers: {
|
| 323 |
+
'Content-Type': 'application/json',
|
| 324 |
+
},
|
| 325 |
+
body: JSON.stringify({
|
| 326 |
+
text: text,
|
| 327 |
+
min_length: minLength,
|
| 328 |
+
max_length: maxLength,
|
| 329 |
+
use_sampling: useSampling
|
| 330 |
+
})
|
| 331 |
+
})
|
| 332 |
+
.then(response => response.json())
|
| 333 |
+
.then(data => {
|
| 334 |
+
if (data.success) {
|
| 335 |
+
displayResults(data.result);
|
| 336 |
+
} else {
|
| 337 |
+
showError(data.error || 'An error occurred while processing the text');
|
| 338 |
+
}
|
| 339 |
+
})
|
| 340 |
+
.catch(error => {
|
| 341 |
+
showError('Failed to process text: ' + error.message);
|
| 342 |
+
})
|
| 343 |
+
.finally(() => {
|
| 344 |
+
hideLoading('resultsContainer');
|
| 345 |
+
});
|
| 346 |
+
}
|
| 347 |
+
|
| 348 |
+
// Show loading state
|
| 349 |
+
function showLoading(elementId) {
|
| 350 |
+
const element = document.getElementById(elementId);
|
| 351 |
+
if (element) {
|
| 352 |
+
element.innerHTML = `
|
| 353 |
+
<div class="text-center py-4">
|
| 354 |
+
<div class="spinner-border text-primary" role="status">
|
| 355 |
+
<span class="visually-hidden">Loading...</span>
|
| 356 |
+
</div>
|
| 357 |
+
<p class="mt-2">Generating summaries...</p>
|
| 358 |
+
</div>
|
| 359 |
+
`;
|
| 360 |
+
}
|
| 361 |
+
}
|
| 362 |
+
|
| 363 |
+
// Hide loading state
|
| 364 |
+
function hideLoading(elementId) {
|
| 365 |
+
const element = document.getElementById(elementId);
|
| 366 |
+
if (element && element.innerHTML.includes('spinner-border')) {
|
| 367 |
+
element.innerHTML = '';
|
| 368 |
+
}
|
| 369 |
+
}
|
| 370 |
+
|
| 371 |
+
// Show error message
|
| 372 |
+
function showError(message, elementId = 'resultsContainer') {
|
| 373 |
+
const element = document.getElementById(elementId);
|
| 374 |
+
if (element) {
|
| 375 |
+
element.innerHTML = `
|
| 376 |
+
<div class="alert alert-danger fade-in">
|
| 377 |
+
<i class="fas fa-exclamation-triangle"></i>
|
| 378 |
+
<strong>Error:</strong> ${message}
|
| 379 |
+
</div>
|
| 380 |
+
`;
|
| 381 |
+
}
|
| 382 |
+
}
|
| 383 |
+
|
| 384 |
+
// Display results
|
| 385 |
+
function displayResults(result) {
|
| 386 |
+
const container = document.getElementById('resultsContainer');
|
| 387 |
+
if (container) {
|
| 388 |
+
container.innerHTML = result;
|
| 389 |
+
container.classList.add('fade-in');
|
| 390 |
+
|
| 391 |
+
// Scroll to results
|
| 392 |
+
container.scrollIntoView({ behavior: 'smooth', block: 'start' });
|
| 393 |
+
}
|
| 394 |
+
}
|
| 395 |
+
</script>
|
| 396 |
+
{% endblock %}
|
templates/text_generation.html
ADDED
|
@@ -0,0 +1,469 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html" %}
|
| 2 |
+
|
| 3 |
+
{% block title %}Text Generation - NLP Ultimate Tutorial{% endblock %}
|
| 4 |
+
|
| 5 |
+
{% block content %}
|
| 6 |
+
<div class="container">
|
| 7 |
+
<!-- Header Section -->
|
| 8 |
+
<div class="row mb-4">
|
| 9 |
+
<div class="col-12">
|
| 10 |
+
<div class="card">
|
| 11 |
+
<div class="card-header">
|
| 12 |
+
<h1 class="mb-0">
|
| 13 |
+
<i class="fas fa-magic"></i>
|
| 14 |
+
Text Generation
|
| 15 |
+
</h1>
|
| 16 |
+
</div>
|
| 17 |
+
<div class="card-body">
|
| 18 |
+
<p class="lead">Generate human-like text continuations using advanced language models.</p>
|
| 19 |
+
|
| 20 |
+
<div class="alert alert-info">
|
| 21 |
+
<i class="fas fa-info-circle"></i>
|
| 22 |
+
<strong>About:</strong> Text generation models can continue or expand on a given text prompt, creating new content that follows the style and context of the input.
|
| 23 |
+
</div>
|
| 24 |
+
</div>
|
| 25 |
+
</div>
|
| 26 |
+
</div>
|
| 27 |
+
</div>
|
| 28 |
+
|
| 29 |
+
{% include "_analysis_nav.html" %}
|
| 30 |
+
|
| 31 |
+
<!-- Input Section -->
|
| 32 |
+
<div class="row mb-4">
|
| 33 |
+
<div class="col-12">
|
| 34 |
+
<div class="card">
|
| 35 |
+
<div class="card-header">
|
| 36 |
+
<h3 class="mb-0">
|
| 37 |
+
<i class="fas fa-keyboard"></i>
|
| 38 |
+
Enter your prompt:
|
| 39 |
+
</h3>
|
| 40 |
+
</div>
|
| 41 |
+
<div class="card-body">
|
| 42 |
+
<div class="row mb-3">
|
| 43 |
+
<div class="col-md-8">
|
| 44 |
+
<textarea id="textInput" class="form-control" rows="6" placeholder="Enter a prompt to continue or expand on...">Once upon a time in a magical forest,</textarea>
|
| 45 |
+
</div>
|
| 46 |
+
<div class="col-md-4">
|
| 47 |
+
<label for="sampleSelect" class="form-label">Or choose a sample:</label>
|
| 48 |
+
<select id="sampleSelect" class="form-select">
|
| 49 |
+
<option value="Custom">Custom</option>
|
| 50 |
+
<option value="Story">Story</option>
|
| 51 |
+
<option value="Technical">Technical</option>
|
| 52 |
+
<option value="Email">Email</option>
|
| 53 |
+
<option value="Recipe">Recipe</option>
|
| 54 |
+
</select>
|
| 55 |
+
</div>
|
| 56 |
+
</div>
|
| 57 |
+
|
| 58 |
+
<div class="d-flex justify-content-between align-items-center">
|
| 59 |
+
<div>
|
| 60 |
+
<button id="processBtn" class="btn btn-primary btn-lg">
|
| 61 |
+
<i class="fas fa-magic"></i>
|
| 62 |
+
Generate Text
|
| 63 |
+
</button>
|
| 64 |
+
</div>
|
| 65 |
+
<div>
|
| 66 |
+
<button id="clearBtn" class="btn btn-outline-secondary">
|
| 67 |
+
<i class="fas fa-trash"></i>
|
| 68 |
+
Clear
|
| 69 |
+
</button>
|
| 70 |
+
</div>
|
| 71 |
+
</div>
|
| 72 |
+
</div>
|
| 73 |
+
</div>
|
| 74 |
+
</div>
|
| 75 |
+
</div>
|
| 76 |
+
|
| 77 |
+
<!-- Generation Settings Section -->
|
| 78 |
+
<div class="row mb-4">
|
| 79 |
+
<div class="col-12">
|
| 80 |
+
<div class="card">
|
| 81 |
+
<div class="card-header">
|
| 82 |
+
<h3 class="mb-0">
|
| 83 |
+
<i class="fas fa-cog"></i>
|
| 84 |
+
Generation Settings
|
| 85 |
+
</h3>
|
| 86 |
+
</div>
|
| 87 |
+
<div class="card-body">
|
| 88 |
+
<div class="row">
|
| 89 |
+
<div class="col-md-3">
|
| 90 |
+
<label for="maxLength" class="form-label">Output Length</label>
|
| 91 |
+
<input type="range" class="form-range" id="maxLength" min="30" max="250" value="100" step="10">
|
| 92 |
+
<div class="d-flex justify-content-between">
|
| 93 |
+
<small>30</small>
|
| 94 |
+
<small id="maxLengthValue">100</small>
|
| 95 |
+
<small>250</small>
|
| 96 |
+
</div>
|
| 97 |
+
<small class="text-muted">Maximum number of tokens to generate</small>
|
| 98 |
+
</div>
|
| 99 |
+
<div class="col-md-3">
|
| 100 |
+
<label for="temperature" class="form-label">Temperature</label>
|
| 101 |
+
<input type="range" class="form-range" id="temperature" min="0.1" max="1.5" value="0.7" step="0.1">
|
| 102 |
+
<div class="d-flex justify-content-between">
|
| 103 |
+
<small>0.1</small>
|
| 104 |
+
<small id="temperatureValue">0.7</small>
|
| 105 |
+
<small>1.5</small>
|
| 106 |
+
</div>
|
| 107 |
+
<small class="text-muted">Higher values make output more random</small>
|
| 108 |
+
</div>
|
| 109 |
+
<div class="col-md-3">
|
| 110 |
+
<label for="topP" class="form-label">Top-p Sampling</label>
|
| 111 |
+
<input type="range" class="form-range" id="topP" min="0.1" max="1.0" value="0.9" step="0.1">
|
| 112 |
+
<div class="d-flex justify-content-between">
|
| 113 |
+
<small>0.1</small>
|
| 114 |
+
<small id="topPValue">0.9</small>
|
| 115 |
+
<small>1.0</small>
|
| 116 |
+
</div>
|
| 117 |
+
<small class="text-muted">Controls diversity via nucleus sampling</small>
|
| 118 |
+
</div>
|
| 119 |
+
<div class="col-md-3">
|
| 120 |
+
<label for="numSequences" class="form-label">Number of Generations</label>
|
| 121 |
+
<input type="range" class="form-range" id="numSequences" min="1" max="3" value="1" step="1">
|
| 122 |
+
<div class="d-flex justify-content-between">
|
| 123 |
+
<small>1</small>
|
| 124 |
+
<small id="numSequencesValue">1</small>
|
| 125 |
+
<small>3</small>
|
| 126 |
+
</div>
|
| 127 |
+
<small class="text-muted">Generate multiple versions to choose from</small>
|
| 128 |
+
</div>
|
| 129 |
+
</div>
|
| 130 |
+
</div>
|
| 131 |
+
</div>
|
| 132 |
+
</div>
|
| 133 |
+
</div>
|
| 134 |
+
|
| 135 |
+
<!-- Model Info Section -->
|
| 136 |
+
<div class="row mb-4">
|
| 137 |
+
<div class="col-12">
|
| 138 |
+
<div class="card">
|
| 139 |
+
<div class="card-header">
|
| 140 |
+
<h3 class="mb-0">
|
| 141 |
+
<i class="fas fa-info-circle"></i>
|
| 142 |
+
Model Information
|
| 143 |
+
</h3>
|
| 144 |
+
</div>
|
| 145 |
+
<div class="card-body">
|
| 146 |
+
<div class="row">
|
| 147 |
+
<div class="col-md-4">
|
| 148 |
+
<div class="card h-100">
|
| 149 |
+
<div class="card-body text-center">
|
| 150 |
+
<i class="fas fa-brain fa-2x text-primary mb-2"></i>
|
| 151 |
+
<h5>GPT-2 Model</h5>
|
| 152 |
+
<p class="small">124M parameter language model trained on diverse internet text</p>
|
| 153 |
+
<ul class="list-unstyled small text-start">
|
| 154 |
+
<li>• Coherent text continuations</li>
|
| 155 |
+
<li>• Style-aware generation</li>
|
| 156 |
+
<li>• Context understanding</li>
|
| 157 |
+
</ul>
|
| 158 |
+
</div>
|
| 159 |
+
</div>
|
| 160 |
+
</div>
|
| 161 |
+
<div class="col-md-4">
|
| 162 |
+
<div class="card h-100">
|
| 163 |
+
<div class="card-body text-center">
|
| 164 |
+
<i class="fas fa-sliders-h fa-2x text-success mb-2"></i>
|
| 165 |
+
<h5>Generation Controls</h5>
|
| 166 |
+
<p class="small">Fine-tune output characteristics with advanced parameters</p>
|
| 167 |
+
<ul class="list-unstyled small text-start">
|
| 168 |
+
<li>• Temperature control</li>
|
| 169 |
+
<li>• Top-p sampling</li>
|
| 170 |
+
<li>• Length management</li>
|
| 171 |
+
</ul>
|
| 172 |
+
</div>
|
| 173 |
+
</div>
|
| 174 |
+
</div>
|
| 175 |
+
<div class="col-md-4">
|
| 176 |
+
<div class="card h-100">
|
| 177 |
+
<div class="card-body text-center">
|
| 178 |
+
<i class="fas fa-chart-line fa-2x text-info mb-2"></i>
|
| 179 |
+
<h5>Text Analysis</h5>
|
| 180 |
+
<p class="small">Comprehensive analysis of generated text quality and characteristics</p>
|
| 181 |
+
<ul class="list-unstyled small text-start">
|
| 182 |
+
<li>• Word length distribution</li>
|
| 183 |
+
<li>• Lexical diversity</li>
|
| 184 |
+
<li>• Generation statistics</li>
|
| 185 |
+
</ul>
|
| 186 |
+
</div>
|
| 187 |
+
</div>
|
| 188 |
+
</div>
|
| 189 |
+
</div>
|
| 190 |
+
</div>
|
| 191 |
+
</div>
|
| 192 |
+
</div>
|
| 193 |
+
</div>
|
| 194 |
+
|
| 195 |
+
<!-- Example Prompts Section -->
|
| 196 |
+
<div class="row mb-4">
|
| 197 |
+
<div class="col-12">
|
| 198 |
+
<div class="card">
|
| 199 |
+
<div class="card-header">
|
| 200 |
+
<h3 class="mb-0">
|
| 201 |
+
<i class="fas fa-list"></i>
|
| 202 |
+
Example Prompts
|
| 203 |
+
</h3>
|
| 204 |
+
</div>
|
| 205 |
+
<div class="card-body">
|
| 206 |
+
<div class="row">
|
| 207 |
+
<div class="col-md-3">
|
| 208 |
+
<button class="btn btn-outline-primary w-100 mb-2" onclick="setExample('Once upon a time in a magical forest,')">
|
| 209 |
+
Once upon a time in a magical forest,
|
| 210 |
+
</button>
|
| 211 |
+
</div>
|
| 212 |
+
<div class="col-md-3">
|
| 213 |
+
<button class="btn btn-outline-primary w-100 mb-2" onclick="setExample('The research findings indicate that')">
|
| 214 |
+
The research findings indicate that
|
| 215 |
+
</button>
|
| 216 |
+
</div>
|
| 217 |
+
<div class="col-md-3">
|
| 218 |
+
<button class="btn btn-outline-primary w-100 mb-2" onclick="setExample('Dear customer service team, I am writing regarding')">
|
| 219 |
+
Dear customer service team, I am writing regarding
|
| 220 |
+
</button>
|
| 221 |
+
</div>
|
| 222 |
+
<div class="col-md-3">
|
| 223 |
+
<button class="btn btn-outline-primary w-100 mb-2" onclick="setExample('The recipe for the perfect chocolate cake requires')">
|
| 224 |
+
The recipe for the perfect chocolate cake requires
|
| 225 |
+
</button>
|
| 226 |
+
</div>
|
| 227 |
+
</div>
|
| 228 |
+
</div>
|
| 229 |
+
</div>
|
| 230 |
+
</div>
|
| 231 |
+
</div>
|
| 232 |
+
|
| 233 |
+
<!-- Results Section -->
|
| 234 |
+
<div class="row">
|
| 235 |
+
<div class="col-12">
|
| 236 |
+
<div class="card">
|
| 237 |
+
<div class="card-header">
|
| 238 |
+
<h3 class="mb-0">
|
| 239 |
+
<i class="fas fa-chart-bar"></i>
|
| 240 |
+
Generated Text Results
|
| 241 |
+
</h3>
|
| 242 |
+
</div>
|
| 243 |
+
<div class="card-body">
|
| 244 |
+
<div id="resultsContainer">
|
| 245 |
+
<div class="text-center text-muted py-5">
|
| 246 |
+
<i class="fas fa-arrow-up fa-2x mb-3"></i>
|
| 247 |
+
<p>Click "Generate Text" to see text generation results</p>
|
| 248 |
+
</div>
|
| 249 |
+
</div>
|
| 250 |
+
</div>
|
| 251 |
+
</div>
|
| 252 |
+
</div>
|
| 253 |
+
</div>
|
| 254 |
+
</div>
|
| 255 |
+
{% endblock %}
|
| 256 |
+
|
| 257 |
+
{% block extra_scripts %}
|
| 258 |
+
<script>
|
| 259 |
+
// Initialize page
|
| 260 |
+
document.addEventListener('DOMContentLoaded', function() {
|
| 261 |
+
// Only carry over when navigating via Quick Nav; otherwise reset
|
| 262 |
+
const shouldCarry = sessionStorage.getItem('carryTextOnNextPage') === '1';
|
| 263 |
+
const textInput = document.getElementById('textInput');
|
| 264 |
+
const sampleSelect = document.getElementById('sampleSelect');
|
| 265 |
+
const routeKey = 'customTextBackup:' + (window.location.pathname || '/text-generation');
|
| 266 |
+
|
| 267 |
+
if (shouldCarry) {
|
| 268 |
+
const storedText = sessionStorage.getItem('analysisText');
|
| 269 |
+
if (storedText) textInput.value = storedText;
|
| 270 |
+
sessionStorage.removeItem('carryTextOnNextPage');
|
| 271 |
+
} else {
|
| 272 |
+
// Fresh route: reset to defaults
|
| 273 |
+
textInput.value = textInput.defaultValue || '';
|
| 274 |
+
if (sampleSelect) sampleSelect.value = 'Custom';
|
| 275 |
+
}
|
| 276 |
+
|
| 277 |
+
// Update slider values
|
| 278 |
+
document.getElementById('maxLength').addEventListener('input', function() {
|
| 279 |
+
document.getElementById('maxLengthValue').textContent = this.value;
|
| 280 |
+
});
|
| 281 |
+
|
| 282 |
+
document.getElementById('temperature').addEventListener('input', function() {
|
| 283 |
+
document.getElementById('temperatureValue').textContent = this.value;
|
| 284 |
+
});
|
| 285 |
+
|
| 286 |
+
document.getElementById('topP').addEventListener('input', function() {
|
| 287 |
+
document.getElementById('topPValue').textContent = this.value;
|
| 288 |
+
});
|
| 289 |
+
|
| 290 |
+
document.getElementById('numSequences').addEventListener('input', function() {
|
| 291 |
+
document.getElementById('numSequencesValue').textContent = this.value;
|
| 292 |
+
});
|
| 293 |
+
|
| 294 |
+
// Sample text dropdown handler with Custom restore
|
| 295 |
+
sampleSelect.addEventListener('change', function() {
|
| 296 |
+
const sampleType = this.value;
|
| 297 |
+
if (sampleType === 'Custom') {
|
| 298 |
+
const backup = sessionStorage.getItem(routeKey);
|
| 299 |
+
if (backup !== null) textInput.value = backup;
|
| 300 |
+
return;
|
| 301 |
+
}
|
| 302 |
+
|
| 303 |
+
// Save current custom prompt before switching away
|
| 304 |
+
sessionStorage.setItem(routeKey, textInput.value);
|
| 305 |
+
|
| 306 |
+
// Set sample prompts based on type
|
| 307 |
+
const samples = {
|
| 308 |
+
'Story': 'Once upon a time in a magical forest,',
|
| 309 |
+
'Technical': 'The research findings indicate that',
|
| 310 |
+
'Email': 'Dear customer service team, I am writing regarding',
|
| 311 |
+
'Recipe': 'The recipe for the perfect chocolate cake requires'
|
| 312 |
+
};
|
| 313 |
+
textInput.value = samples[sampleType] || '';
|
| 314 |
+
sessionStorage.setItem('analysisText', textInput.value);
|
| 315 |
+
});
|
| 316 |
+
|
| 317 |
+
// While typing, keep backups
|
| 318 |
+
textInput.addEventListener('input', function(){
|
| 319 |
+
sessionStorage.setItem('analysisText', textInput.value);
|
| 320 |
+
sessionStorage.setItem(routeKey, textInput.value);
|
| 321 |
+
});
|
| 322 |
+
|
| 323 |
+
// Process button handler
|
| 324 |
+
document.getElementById('processBtn').addEventListener('click', function() {
|
| 325 |
+
const text = document.getElementById('textInput').value.trim();
|
| 326 |
+
|
| 327 |
+
if (!text) {
|
| 328 |
+
alert('Please enter a prompt text.');
|
| 329 |
+
return;
|
| 330 |
+
}
|
| 331 |
+
|
| 332 |
+
// Show loading state
|
| 333 |
+
this.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Generating...';
|
| 334 |
+
this.disabled = true;
|
| 335 |
+
|
| 336 |
+
// Process text
|
| 337 |
+
processTextGeneration();
|
| 338 |
+
|
| 339 |
+
// Reset button after a delay
|
| 340 |
+
setTimeout(() => {
|
| 341 |
+
this.innerHTML = '<i class="fas fa-magic"></i> Generate Text';
|
| 342 |
+
this.disabled = false;
|
| 343 |
+
}, 2000);
|
| 344 |
+
});
|
| 345 |
+
|
| 346 |
+
// Clear button handler
|
| 347 |
+
document.getElementById('clearBtn').addEventListener('click', function() {
|
| 348 |
+
document.getElementById('textInput').value = '';
|
| 349 |
+
document.getElementById('resultsContainer').innerHTML = `
|
| 350 |
+
<div class="text-center text-muted py-5">
|
| 351 |
+
<i class="fas fa-arrow-up fa-2x mb-3"></i>
|
| 352 |
+
<p>Click "Generate Text" to see text generation results</p>
|
| 353 |
+
</div>
|
| 354 |
+
`;
|
| 355 |
+
});
|
| 356 |
+
|
| 357 |
+
// Keyboard shortcuts
|
| 358 |
+
document.addEventListener('keydown', function(e) {
|
| 359 |
+
// Ctrl+Enter to process
|
| 360 |
+
if (e.ctrlKey && e.key === 'Enter') {
|
| 361 |
+
document.getElementById('processBtn').click();
|
| 362 |
+
}
|
| 363 |
+
|
| 364 |
+
// Ctrl+L to clear
|
| 365 |
+
if (e.ctrlKey && e.key === 'l') {
|
| 366 |
+
e.preventDefault();
|
| 367 |
+
document.getElementById('clearBtn').click();
|
| 368 |
+
}
|
| 369 |
+
});
|
| 370 |
+
});
|
| 371 |
+
|
| 372 |
+
// Set example prompt
|
| 373 |
+
function setExample(prompt) {
|
| 374 |
+
document.getElementById('textInput').value = prompt;
|
| 375 |
+
}
|
| 376 |
+
|
| 377 |
+
// Process text generation
|
| 378 |
+
function processTextGeneration() {
|
| 379 |
+
const text = document.getElementById('textInput').value.trim();
|
| 380 |
+
const maxLength = parseInt(document.getElementById('maxLength').value);
|
| 381 |
+
const temperature = parseFloat(document.getElementById('temperature').value);
|
| 382 |
+
const topP = parseFloat(document.getElementById('topP').value);
|
| 383 |
+
const numSequences = parseInt(document.getElementById('numSequences').value);
|
| 384 |
+
|
| 385 |
+
if (!text) {
|
| 386 |
+
alert('Please enter a prompt text.');
|
| 387 |
+
return;
|
| 388 |
+
}
|
| 389 |
+
|
| 390 |
+
showLoading('resultsContainer');
|
| 391 |
+
|
| 392 |
+
fetch('/api/text-generation', {
|
| 393 |
+
method: 'POST',
|
| 394 |
+
headers: {
|
| 395 |
+
'Content-Type': 'application/json',
|
| 396 |
+
},
|
| 397 |
+
body: JSON.stringify({
|
| 398 |
+
text: text,
|
| 399 |
+
max_length: maxLength,
|
| 400 |
+
temperature: temperature,
|
| 401 |
+
top_p: topP,
|
| 402 |
+
num_sequences: numSequences
|
| 403 |
+
})
|
| 404 |
+
})
|
| 405 |
+
.then(response => response.json())
|
| 406 |
+
.then(data => {
|
| 407 |
+
if (data.success) {
|
| 408 |
+
displayResults(data.result);
|
| 409 |
+
} else {
|
| 410 |
+
showError(data.error || 'An error occurred while generating text');
|
| 411 |
+
}
|
| 412 |
+
})
|
| 413 |
+
.catch(error => {
|
| 414 |
+
showError('Failed to generate text: ' + error.message);
|
| 415 |
+
})
|
| 416 |
+
.finally(() => {
|
| 417 |
+
hideLoading('resultsContainer');
|
| 418 |
+
});
|
| 419 |
+
}
|
| 420 |
+
|
| 421 |
+
// Show loading state
|
| 422 |
+
function showLoading(elementId) {
|
| 423 |
+
const element = document.getElementById(elementId);
|
| 424 |
+
if (element) {
|
| 425 |
+
element.innerHTML = `
|
| 426 |
+
<div class="text-center py-4">
|
| 427 |
+
<div class="spinner-border text-primary" role="status">
|
| 428 |
+
<span class="visually-hidden">Loading...</span>
|
| 429 |
+
</div>
|
| 430 |
+
<p class="mt-2">Generating text...</p>
|
| 431 |
+
</div>
|
| 432 |
+
`;
|
| 433 |
+
}
|
| 434 |
+
}
|
| 435 |
+
|
| 436 |
+
// Hide loading state
|
| 437 |
+
function hideLoading(elementId) {
|
| 438 |
+
const element = document.getElementById(elementId);
|
| 439 |
+
if (element && element.innerHTML.includes('spinner-border')) {
|
| 440 |
+
element.innerHTML = '';
|
| 441 |
+
}
|
| 442 |
+
}
|
| 443 |
+
|
| 444 |
+
// Show error message
|
| 445 |
+
function showError(message, elementId = 'resultsContainer') {
|
| 446 |
+
const element = document.getElementById(elementId);
|
| 447 |
+
if (element) {
|
| 448 |
+
element.innerHTML = `
|
| 449 |
+
<div class="alert alert-danger fade-in">
|
| 450 |
+
<i class="fas fa-exclamation-triangle"></i>
|
| 451 |
+
<strong>Error:</strong> ${message}
|
| 452 |
+
</div>
|
| 453 |
+
`;
|
| 454 |
+
}
|
| 455 |
+
}
|
| 456 |
+
|
| 457 |
+
// Display results
|
| 458 |
+
function displayResults(result) {
|
| 459 |
+
const container = document.getElementById('resultsContainer');
|
| 460 |
+
if (container) {
|
| 461 |
+
container.innerHTML = result;
|
| 462 |
+
container.classList.add('fade-in');
|
| 463 |
+
|
| 464 |
+
// Scroll to results
|
| 465 |
+
container.scrollIntoView({ behavior: 'smooth', block: 'start' });
|
| 466 |
+
}
|
| 467 |
+
}
|
| 468 |
+
</script>
|
| 469 |
+
{% endblock %}
|
templates/tokenization.html
ADDED
|
@@ -0,0 +1,323 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html" %}
|
| 2 |
+
|
| 3 |
+
{% block title %}Tokenization - NLP Ultimate Tutorial{% endblock %}
|
| 4 |
+
|
| 5 |
+
{% block content %}
|
| 6 |
+
<div class="container">
|
| 7 |
+
<!-- Header Section -->
|
| 8 |
+
<div class="row mb-4">
|
| 9 |
+
<div class="col-12">
|
| 10 |
+
<div class="card">
|
| 11 |
+
<div class="card-header">
|
| 12 |
+
<h1 class="mb-0">
|
| 13 |
+
<i class="fas fa-cut"></i>
|
| 14 |
+
Tokenization
|
| 15 |
+
</h1>
|
| 16 |
+
</div>
|
| 17 |
+
<div class="card-body">
|
| 18 |
+
<p class="lead">Break text into smaller units called tokens using various tokenization methods.</p>
|
| 19 |
+
|
| 20 |
+
<div class="alert alert-info">
|
| 21 |
+
<i class="fas fa-info-circle"></i>
|
| 22 |
+
<strong>About:</strong> Tokenization is the process of breaking text into smaller units called tokens, which can be words, characters, or subwords.
|
| 23 |
+
</div>
|
| 24 |
+
</div>
|
| 25 |
+
</div>
|
| 26 |
+
</div>
|
| 27 |
+
</div>
|
| 28 |
+
|
| 29 |
+
{% include "_analysis_nav.html" %}
|
| 30 |
+
|
| 31 |
+
<!-- Text Input Section -->
|
| 32 |
+
<div class="row mb-4">
|
| 33 |
+
<div class="col-12">
|
| 34 |
+
<div class="card">
|
| 35 |
+
<div class="card-header">
|
| 36 |
+
<h3 class="mb-0">
|
| 37 |
+
<i class="fas fa-keyboard"></i>
|
| 38 |
+
Enter your text:
|
| 39 |
+
</h3>
|
| 40 |
+
</div>
|
| 41 |
+
<div class="card-body">
|
| 42 |
+
<div class="row mb-3">
|
| 43 |
+
<div class="col-md-8">
|
| 44 |
+
<textarea id="textInput" class="form-control" rows="6" placeholder="Enter or paste your text here...">The quick brown fox jumps over the lazy dog. It was a beautiful day in May of 2023!</textarea>
|
| 45 |
+
</div>
|
| 46 |
+
<div class="col-md-4">
|
| 47 |
+
<label for="sampleSelect" class="form-label">Or choose a sample:</label>
|
| 48 |
+
<select id="sampleSelect" class="form-select">
|
| 49 |
+
<option value="Custom">Custom</option>
|
| 50 |
+
<option value="News Article">News Article</option>
|
| 51 |
+
<option value="Product Review">Product Review</option>
|
| 52 |
+
<option value="Scientific Text">Scientific Text</option>
|
| 53 |
+
<option value="Literary Text">Literary Text</option>
|
| 54 |
+
</select>
|
| 55 |
+
</div>
|
| 56 |
+
</div>
|
| 57 |
+
|
| 58 |
+
<div class="d-flex justify-content-between align-items-center">
|
| 59 |
+
<div>
|
| 60 |
+
<button id="processBtn" class="btn btn-primary btn-lg">
|
| 61 |
+
<i class="fas fa-cut"></i>
|
| 62 |
+
Analyze Tokens
|
| 63 |
+
</button>
|
| 64 |
+
</div>
|
| 65 |
+
<div>
|
| 66 |
+
<button id="clearBtn" class="btn btn-outline-secondary">
|
| 67 |
+
<i class="fas fa-trash"></i>
|
| 68 |
+
Clear
|
| 69 |
+
</button>
|
| 70 |
+
</div>
|
| 71 |
+
</div>
|
| 72 |
+
</div>
|
| 73 |
+
</div>
|
| 74 |
+
</div>
|
| 75 |
+
</div>
|
| 76 |
+
|
| 77 |
+
<!-- Tokenization Methods Info -->
|
| 78 |
+
<div class="row mb-4">
|
| 79 |
+
<div class="col-12">
|
| 80 |
+
<div class="card">
|
| 81 |
+
<div class="card-header">
|
| 82 |
+
<h3 class="mb-0">
|
| 83 |
+
<i class="fas fa-info-circle"></i>
|
| 84 |
+
Tokenization Methods
|
| 85 |
+
</h3>
|
| 86 |
+
</div>
|
| 87 |
+
<div class="card-body">
|
| 88 |
+
<div class="row">
|
| 89 |
+
<div class="col-md-3">
|
| 90 |
+
<div class="card h-100">
|
| 91 |
+
<div class="card-body text-center">
|
| 92 |
+
<i class="fas fa-font fa-2x text-primary mb-2"></i>
|
| 93 |
+
<h5>Word Tokenization</h5>
|
| 94 |
+
<p class="small">Splits text into individual words and punctuation marks using NLTK.</p>
|
| 95 |
+
</div>
|
| 96 |
+
</div>
|
| 97 |
+
</div>
|
| 98 |
+
<div class="col-md-3">
|
| 99 |
+
<div class="card h-100">
|
| 100 |
+
<div class="card-body text-center">
|
| 101 |
+
<i class="fas fa-paragraph fa-2x text-success mb-2"></i>
|
| 102 |
+
<h5>Sentence Tokenization</h5>
|
| 103 |
+
<p class="small">Divides text into sentences using punctuation and linguistic rules.</p>
|
| 104 |
+
</div>
|
| 105 |
+
</div>
|
| 106 |
+
</div>
|
| 107 |
+
<div class="col-md-3">
|
| 108 |
+
<div class="card h-100">
|
| 109 |
+
<div class="card-body text-center">
|
| 110 |
+
<i class="fas fa-brain fa-2x text-info mb-2"></i>
|
| 111 |
+
<h5>Linguistic Tokenization</h5>
|
| 112 |
+
<p class="small">Advanced tokenization with spaCy including POS tags and dependencies.</p>
|
| 113 |
+
</div>
|
| 114 |
+
</div>
|
| 115 |
+
</div>
|
| 116 |
+
<div class="col-md-3">
|
| 117 |
+
<div class="card h-100">
|
| 118 |
+
<div class="card-body text-center">
|
| 119 |
+
<i class="fas fa-puzzle-piece fa-2x text-warning mb-2"></i>
|
| 120 |
+
<h5>Subword Tokenization</h5>
|
| 121 |
+
<p class="small">Breaks words into smaller units using BERT WordPiece and GPT-2 BPE.</p>
|
| 122 |
+
</div>
|
| 123 |
+
</div>
|
| 124 |
+
</div>
|
| 125 |
+
</div>
|
| 126 |
+
</div>
|
| 127 |
+
</div>
|
| 128 |
+
</div>
|
| 129 |
+
</div>
|
| 130 |
+
|
| 131 |
+
<!-- Results Section -->
|
| 132 |
+
<div class="row">
|
| 133 |
+
<div class="col-12">
|
| 134 |
+
<div class="card">
|
| 135 |
+
<div class="card-header">
|
| 136 |
+
<h3 class="mb-0">
|
| 137 |
+
<i class="fas fa-chart-bar"></i>
|
| 138 |
+
Tokenization Results
|
| 139 |
+
</h3>
|
| 140 |
+
</div>
|
| 141 |
+
<div class="card-body">
|
| 142 |
+
<div id="resultsContainer">
|
| 143 |
+
<div class="text-center text-muted py-5">
|
| 144 |
+
<i class="fas fa-arrow-up fa-2x mb-3"></i>
|
| 145 |
+
<p>Click "Analyze Tokens" to see tokenization results</p>
|
| 146 |
+
</div>
|
| 147 |
+
</div>
|
| 148 |
+
</div>
|
| 149 |
+
</div>
|
| 150 |
+
</div>
|
| 151 |
+
</div>
|
| 152 |
+
</div>
|
| 153 |
+
{% endblock %}
|
| 154 |
+
|
| 155 |
+
{% block extra_scripts %}
|
| 156 |
+
<script>
|
| 157 |
+
// Initialize page
|
| 158 |
+
document.addEventListener('DOMContentLoaded', function() {
|
| 159 |
+
// Only carry over when using Quick Nav; otherwise leave defaults
|
| 160 |
+
const shouldCarry = sessionStorage.getItem('carryTextOnNextPage') === '1';
|
| 161 |
+
if (shouldCarry) {
|
| 162 |
+
const sampleSel = document.getElementById('sampleSelect');
|
| 163 |
+
if (sampleSel) sampleSel.value = 'Custom';
|
| 164 |
+
const storedText = sessionStorage.getItem('analysisText');
|
| 165 |
+
if (storedText) document.getElementById('textInput').value = storedText;
|
| 166 |
+
sessionStorage.removeItem('carryTextOnNextPage');
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
// Sample text dropdown handler
|
| 170 |
+
document.getElementById('sampleSelect').addEventListener('change', function() {
|
| 171 |
+
const sampleType = this.value;
|
| 172 |
+
const textInput = document.getElementById('textInput');
|
| 173 |
+
|
| 174 |
+
if (sampleType === 'Custom') {
|
| 175 |
+
textInput.value = '';
|
| 176 |
+
} else {
|
| 177 |
+
// Get sample text from server
|
| 178 |
+
fetch('/api/sample-text', {
|
| 179 |
+
method: 'POST',
|
| 180 |
+
headers: {
|
| 181 |
+
'Content-Type': 'application/json',
|
| 182 |
+
},
|
| 183 |
+
body: JSON.stringify({sample_type: sampleType})
|
| 184 |
+
})
|
| 185 |
+
.then(response => response.json())
|
| 186 |
+
.then(data => {
|
| 187 |
+
textInput.value = data.text;
|
| 188 |
+
});
|
| 189 |
+
}
|
| 190 |
+
});
|
| 191 |
+
|
| 192 |
+
// Process button handler
|
| 193 |
+
document.getElementById('processBtn').addEventListener('click', function() {
|
| 194 |
+
const text = document.getElementById('textInput').value.trim();
|
| 195 |
+
|
| 196 |
+
if (!text) {
|
| 197 |
+
alert('Please enter some text to tokenize.');
|
| 198 |
+
return;
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
// Show loading state
|
| 202 |
+
this.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Processing...';
|
| 203 |
+
this.disabled = true;
|
| 204 |
+
|
| 205 |
+
// Process text
|
| 206 |
+
processTokenization();
|
| 207 |
+
|
| 208 |
+
// Reset button after a delay
|
| 209 |
+
setTimeout(() => {
|
| 210 |
+
this.innerHTML = '<i class="fas fa-cut"></i> Analyze Tokens';
|
| 211 |
+
this.disabled = false;
|
| 212 |
+
}, 2000);
|
| 213 |
+
});
|
| 214 |
+
|
| 215 |
+
// Clear button handler
|
| 216 |
+
document.getElementById('clearBtn').addEventListener('click', function() {
|
| 217 |
+
document.getElementById('textInput').value = '';
|
| 218 |
+
document.getElementById('resultsContainer').innerHTML = `
|
| 219 |
+
<div class="text-center text-muted py-5">
|
| 220 |
+
<i class="fas fa-arrow-up fa-2x mb-3"></i>
|
| 221 |
+
<p>Click "Analyze Tokens" to see tokenization results</p>
|
| 222 |
+
</div>
|
| 223 |
+
`;
|
| 224 |
+
});
|
| 225 |
+
|
| 226 |
+
// Keyboard shortcuts
|
| 227 |
+
document.addEventListener('keydown', function(e) {
|
| 228 |
+
// Ctrl+Enter to process
|
| 229 |
+
if (e.ctrlKey && e.key === 'Enter') {
|
| 230 |
+
document.getElementById('processBtn').click();
|
| 231 |
+
}
|
| 232 |
+
|
| 233 |
+
// Ctrl+L to clear
|
| 234 |
+
if (e.ctrlKey && e.key === 'l') {
|
| 235 |
+
e.preventDefault();
|
| 236 |
+
document.getElementById('clearBtn').click();
|
| 237 |
+
}
|
| 238 |
+
});
|
| 239 |
+
});
|
| 240 |
+
|
| 241 |
+
// Process tokenization
|
| 242 |
+
function processTokenization() {
|
| 243 |
+
const text = document.getElementById('textInput').value.trim();
|
| 244 |
+
|
| 245 |
+
if (!text) {
|
| 246 |
+
alert('Please enter some text to tokenize.');
|
| 247 |
+
return;
|
| 248 |
+
}
|
| 249 |
+
|
| 250 |
+
showLoading('resultsContainer');
|
| 251 |
+
|
| 252 |
+
fetch('/api/tokenization', {
|
| 253 |
+
method: 'POST',
|
| 254 |
+
headers: {
|
| 255 |
+
'Content-Type': 'application/json',
|
| 256 |
+
},
|
| 257 |
+
body: JSON.stringify({text: text})
|
| 258 |
+
})
|
| 259 |
+
.then(response => response.json())
|
| 260 |
+
.then(data => {
|
| 261 |
+
if (data.success) {
|
| 262 |
+
displayResults(data.result);
|
| 263 |
+
} else {
|
| 264 |
+
showError(data.error || 'An error occurred while processing the text');
|
| 265 |
+
}
|
| 266 |
+
})
|
| 267 |
+
.catch(error => {
|
| 268 |
+
showError('Failed to process text: ' + error.message);
|
| 269 |
+
})
|
| 270 |
+
.finally(() => {
|
| 271 |
+
hideLoading('resultsContainer');
|
| 272 |
+
});
|
| 273 |
+
}
|
| 274 |
+
|
| 275 |
+
// Show loading state
|
| 276 |
+
function showLoading(elementId) {
|
| 277 |
+
const element = document.getElementById(elementId);
|
| 278 |
+
if (element) {
|
| 279 |
+
element.innerHTML = `
|
| 280 |
+
<div class="text-center py-4">
|
| 281 |
+
<div class="spinner-border text-primary" role="status">
|
| 282 |
+
<span class="visually-hidden">Loading...</span>
|
| 283 |
+
</div>
|
| 284 |
+
<p class="mt-2">Analyzing tokens...</p>
|
| 285 |
+
</div>
|
| 286 |
+
`;
|
| 287 |
+
}
|
| 288 |
+
}
|
| 289 |
+
|
| 290 |
+
// Hide loading state
|
| 291 |
+
function hideLoading(elementId) {
|
| 292 |
+
const element = document.getElementById(elementId);
|
| 293 |
+
if (element && element.innerHTML.includes('spinner-border')) {
|
| 294 |
+
element.innerHTML = '';
|
| 295 |
+
}
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
// Show error message
|
| 299 |
+
function showError(message, elementId = 'resultsContainer') {
|
| 300 |
+
const element = document.getElementById(elementId);
|
| 301 |
+
if (element) {
|
| 302 |
+
element.innerHTML = `
|
| 303 |
+
<div class="alert alert-danger fade-in">
|
| 304 |
+
<i class="fas fa-exclamation-triangle"></i>
|
| 305 |
+
<strong>Error:</strong> ${message}
|
| 306 |
+
</div>
|
| 307 |
+
`;
|
| 308 |
+
}
|
| 309 |
+
}
|
| 310 |
+
|
| 311 |
+
// Display results
|
| 312 |
+
function displayResults(result) {
|
| 313 |
+
const container = document.getElementById('resultsContainer');
|
| 314 |
+
if (container) {
|
| 315 |
+
container.innerHTML = result;
|
| 316 |
+
container.classList.add('fade-in');
|
| 317 |
+
|
| 318 |
+
// Scroll to results
|
| 319 |
+
container.scrollIntoView({ behavior: 'smooth', block: 'start' });
|
| 320 |
+
}
|
| 321 |
+
}
|
| 322 |
+
</script>
|
| 323 |
+
{% endblock %}
|
templates/topic_analysis.html
ADDED
|
@@ -0,0 +1,399 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html" %}
|
| 2 |
+
|
| 3 |
+
{% block title %}Topic Analysis - NLP Ultimate Tutorial{% endblock %}
|
| 4 |
+
|
| 5 |
+
{% block content %}
|
| 6 |
+
<div class="container">
|
| 7 |
+
<!-- Header Section -->
|
| 8 |
+
<div class="row mb-4">
|
| 9 |
+
<div class="col-12">
|
| 10 |
+
<div class="card">
|
| 11 |
+
<div class="card-header">
|
| 12 |
+
<h1 class="mb-0">
|
| 13 |
+
<i class="fas fa-project-diagram"></i>
|
| 14 |
+
Topic Analysis
|
| 15 |
+
</h1>
|
| 16 |
+
</div>
|
| 17 |
+
<div class="card-body">
|
| 18 |
+
<p class="lead">Identify main themes and subjects in text using advanced topic modeling techniques.</p>
|
| 19 |
+
|
| 20 |
+
<div class="alert alert-info">
|
| 21 |
+
<i class="fas fa-info-circle"></i>
|
| 22 |
+
<strong>About:</strong> Topic analysis identifies the main themes and subjects in a text, helping to categorize content and understand what it's about.
|
| 23 |
+
</div>
|
| 24 |
+
</div>
|
| 25 |
+
</div>
|
| 26 |
+
</div>
|
| 27 |
+
</div>
|
| 28 |
+
|
| 29 |
+
{% include "_analysis_nav.html" %}
|
| 30 |
+
|
| 31 |
+
<!-- Text Input Section -->
|
| 32 |
+
<div class="row mb-4">
|
| 33 |
+
<div class="col-12">
|
| 34 |
+
<div class="card">
|
| 35 |
+
<div class="card-header">
|
| 36 |
+
<h3 class="mb-0">
|
| 37 |
+
<i class="fas fa-keyboard"></i>
|
| 38 |
+
Enter your text:
|
| 39 |
+
</h3>
|
| 40 |
+
</div>
|
| 41 |
+
<div class="card-body">
|
| 42 |
+
<div class="row mb-3">
|
| 43 |
+
<div class="col-md-8">
|
| 44 |
+
<textarea id="textInput" class="form-control" rows="10" placeholder="Enter or paste your text here... (minimum 50 words for best results)">Artificial intelligence (AI) is intelligence demonstrated by machines, as opposed to the natural intelligence displayed by animals including humans. AI research has been defined as the field of study of intelligent agents, which refers to any system that perceives its environment and takes actions that maximize its chance of achieving its goals. The term "artificial intelligence" had previously been used to describe machines that mimic and display "human" cognitive skills that are associated with the human mind, such as "learning" and "problem-solving". This definition has since been rejected by major AI researchers who now describe AI in terms of rationality and acting rationally, which does not limit how intelligence can be articulated. AI applications include advanced web search engines, recommendation systems (used by YouTube, Amazon and Netflix), understanding human speech (such as Siri or Alexa), self-driving cars, and competing at the highest level in strategic game systems (such as chess and Go). As machines become increasingly capable, tasks considered to require "intelligence" are often removed from the definition of AI, a phenomenon known as the AI effect. For instance, optical character recognition is frequently excluded from things considered to be AI.</textarea>
|
| 45 |
+
</div>
|
| 46 |
+
<div class="col-md-4">
|
| 47 |
+
<label for="sampleSelect" class="form-label">Or choose a sample:</label>
|
| 48 |
+
<select id="sampleSelect" class="form-select">
|
| 49 |
+
<option value="Custom">Custom</option>
|
| 50 |
+
<option value="News Article">News Article</option>
|
| 51 |
+
<option value="Product Review">Product Review</option>
|
| 52 |
+
<option value="Scientific Text">Scientific Text</option>
|
| 53 |
+
<option value="Literary Text">Literary Text</option>
|
| 54 |
+
</select>
|
| 55 |
+
</div>
|
| 56 |
+
</div>
|
| 57 |
+
|
| 58 |
+
<div class="d-flex justify-content-between align-items-center">
|
| 59 |
+
<div>
|
| 60 |
+
<button id="processBtn" class="btn btn-primary btn-lg">
|
| 61 |
+
<i class="fas fa-project-diagram"></i>
|
| 62 |
+
Analyze Topics
|
| 63 |
+
</button>
|
| 64 |
+
</div>
|
| 65 |
+
<div>
|
| 66 |
+
<button id="clearBtn" class="btn btn-outline-secondary">
|
| 67 |
+
<i class="fas fa-trash"></i>
|
| 68 |
+
Clear
|
| 69 |
+
</button>
|
| 70 |
+
</div>
|
| 71 |
+
</div>
|
| 72 |
+
</div>
|
| 73 |
+
</div>
|
| 74 |
+
</div>
|
| 75 |
+
</div>
|
| 76 |
+
|
| 77 |
+
<!-- Topic Analysis Methods Info -->
|
| 78 |
+
<div class="row mb-4">
|
| 79 |
+
<div class="col-12">
|
| 80 |
+
<div class="card">
|
| 81 |
+
<div class="card-header">
|
| 82 |
+
<h3 class="mb-0">
|
| 83 |
+
<i class="fas fa-info-circle"></i>
|
| 84 |
+
Topic Analysis Methods
|
| 85 |
+
</h3>
|
| 86 |
+
</div>
|
| 87 |
+
<div class="card-body">
|
| 88 |
+
<div class="row">
|
| 89 |
+
<div class="col-md-3">
|
| 90 |
+
<div class="card h-100">
|
| 91 |
+
<div class="card-body text-center">
|
| 92 |
+
<i class="fas fa-tags fa-2x text-primary mb-2"></i>
|
| 93 |
+
<h5>Topic Classification</h5>
|
| 94 |
+
<p class="small">Identifies predefined topic categories using keyword matching and scoring.</p>
|
| 95 |
+
<ul class="list-unstyled small text-start">
|
| 96 |
+
<li>• Environment, Science, Business</li>
|
| 97 |
+
<li>• Health, Technology, Politics</li>
|
| 98 |
+
<li>• Sports, Entertainment, Travel</li>
|
| 99 |
+
</ul>
|
| 100 |
+
</div>
|
| 101 |
+
</div>
|
| 102 |
+
</div>
|
| 103 |
+
<div class="col-md-3">
|
| 104 |
+
<div class="card h-100">
|
| 105 |
+
<div class="card-body text-center">
|
| 106 |
+
<i class="fas fa-chart-bar fa-2x text-success mb-2"></i>
|
| 107 |
+
<h5>Term Frequency</h5>
|
| 108 |
+
<p class="small">Analyzes word frequencies to identify the most important terms and concepts.</p>
|
| 109 |
+
<ul class="list-unstyled small text-start">
|
| 110 |
+
<li>• Word frequency analysis</li>
|
| 111 |
+
<li>• Key phrase extraction</li>
|
| 112 |
+
<li>• N-gram analysis</li>
|
| 113 |
+
</ul>
|
| 114 |
+
</div>
|
| 115 |
+
</div>
|
| 116 |
+
</div>
|
| 117 |
+
<div class="col-md-3">
|
| 118 |
+
<div class="card h-100">
|
| 119 |
+
<div class="card-body text-center">
|
| 120 |
+
<i class="fas fa-calculator fa-2x text-info mb-2"></i>
|
| 121 |
+
<h5>TF-IDF Analysis</h5>
|
| 122 |
+
<p class="small">Identifies distinctive terms using Term Frequency-Inverse Document Frequency.</p>
|
| 123 |
+
<ul class="list-unstyled small text-start">
|
| 124 |
+
<li>• Sentence-level analysis</li>
|
| 125 |
+
<li>• Distinctive term identification</li>
|
| 126 |
+
<li>• Heatmap visualization</li>
|
| 127 |
+
</ul>
|
| 128 |
+
</div>
|
| 129 |
+
</div>
|
| 130 |
+
</div>
|
| 131 |
+
<div class="col-md-3">
|
| 132 |
+
<div class="card h-100">
|
| 133 |
+
<div class="card-body text-center">
|
| 134 |
+
<i class="fas fa-brain fa-2x text-warning mb-2"></i>
|
| 135 |
+
<h5>LDA Topic Modeling</h5>
|
| 136 |
+
<p class="small">Uses Latent Dirichlet Allocation to discover abstract topics in text.</p>
|
| 137 |
+
<ul class="list-unstyled small text-start">
|
| 138 |
+
<li>• Probabilistic modeling</li>
|
| 139 |
+
<li>• Topic distribution</li>
|
| 140 |
+
<li>• Network visualization</li>
|
| 141 |
+
</ul>
|
| 142 |
+
</div>
|
| 143 |
+
</div>
|
| 144 |
+
</div>
|
| 145 |
+
</div>
|
| 146 |
+
</div>
|
| 147 |
+
</div>
|
| 148 |
+
</div>
|
| 149 |
+
</div>
|
| 150 |
+
|
| 151 |
+
<!-- Analysis Features Info -->
|
| 152 |
+
<div class="row mb-4">
|
| 153 |
+
<div class="col-12">
|
| 154 |
+
<div class="card">
|
| 155 |
+
<div class="card-header">
|
| 156 |
+
<h3 class="mb-0">
|
| 157 |
+
<i class="fas fa-chart-line"></i>
|
| 158 |
+
Analysis Features
|
| 159 |
+
</h3>
|
| 160 |
+
</div>
|
| 161 |
+
<div class="card-body">
|
| 162 |
+
<div class="row">
|
| 163 |
+
<div class="col-md-4">
|
| 164 |
+
<div class="card text-center">
|
| 165 |
+
<div class="card-body">
|
| 166 |
+
<i class="fas fa-cloud fa-2x text-primary mb-2"></i>
|
| 167 |
+
<h5>Word Clouds</h5>
|
| 168 |
+
<p class="small mb-0">Visual representation of term frequencies</p>
|
| 169 |
+
</div>
|
| 170 |
+
</div>
|
| 171 |
+
</div>
|
| 172 |
+
<div class="col-md-4">
|
| 173 |
+
<div class="card text-center">
|
| 174 |
+
<div class="card-body">
|
| 175 |
+
<i class="fas fa-network-wired fa-2x text-success mb-2"></i>
|
| 176 |
+
<h5>Network Graphs</h5>
|
| 177 |
+
<p class="small mb-0">Topic-term relationship visualization</p>
|
| 178 |
+
</div>
|
| 179 |
+
</div>
|
| 180 |
+
</div>
|
| 181 |
+
<div class="col-md-4">
|
| 182 |
+
<div class="card text-center">
|
| 183 |
+
<div class="card-body">
|
| 184 |
+
<i class="fas fa-fire fa-2x text-danger mb-2"></i>
|
| 185 |
+
<h5>Heatmaps</h5>
|
| 186 |
+
<p class="small mb-0">TF-IDF term importance visualization</p>
|
| 187 |
+
</div>
|
| 188 |
+
</div>
|
| 189 |
+
</div>
|
| 190 |
+
</div>
|
| 191 |
+
</div>
|
| 192 |
+
</div>
|
| 193 |
+
</div>
|
| 194 |
+
</div>
|
| 195 |
+
|
| 196 |
+
<!-- Results Section -->
|
| 197 |
+
<div class="row">
|
| 198 |
+
<div class="col-12">
|
| 199 |
+
<div class="card">
|
| 200 |
+
<div class="card-header">
|
| 201 |
+
<h3 class="mb-0">
|
| 202 |
+
<i class="fas fa-chart-bar"></i>
|
| 203 |
+
Topic Analysis Results
|
| 204 |
+
</h3>
|
| 205 |
+
</div>
|
| 206 |
+
<div class="card-body">
|
| 207 |
+
<div id="resultsContainer">
|
| 208 |
+
<div class="text-center text-muted py-5">
|
| 209 |
+
<i class="fas fa-arrow-up fa-2x mb-3"></i>
|
| 210 |
+
<p>Click "Analyze Topics" to see topic analysis results</p>
|
| 211 |
+
</div>
|
| 212 |
+
</div>
|
| 213 |
+
</div>
|
| 214 |
+
</div>
|
| 215 |
+
</div>
|
| 216 |
+
</div>
|
| 217 |
+
</div>
|
| 218 |
+
{% endblock %}
|
| 219 |
+
|
| 220 |
+
{% block extra_scripts %}
|
| 221 |
+
<script>
|
| 222 |
+
// Initialize page
|
| 223 |
+
document.addEventListener('DOMContentLoaded', function() {
|
| 224 |
+
// Only carry over when requested
|
| 225 |
+
const shouldCarry = sessionStorage.getItem('carryTextOnNextPage') === '1';
|
| 226 |
+
if (shouldCarry) {
|
| 227 |
+
const storedText = sessionStorage.getItem('analysisText');
|
| 228 |
+
if (storedText) document.getElementById('textInput').value = storedText;
|
| 229 |
+
sessionStorage.removeItem('carryTextOnNextPage');
|
| 230 |
+
}
|
| 231 |
+
|
| 232 |
+
// Sample text dropdown handler with Custom restore
|
| 233 |
+
document.getElementById('sampleSelect').addEventListener('change', function() {
|
| 234 |
+
const sampleType = this.value;
|
| 235 |
+
const textInput = document.getElementById('textInput');
|
| 236 |
+
const routeKey = 'customTextBackup:' + (window.location.pathname || '/topic-analysis');
|
| 237 |
+
|
| 238 |
+
if (sampleType === 'Custom') {
|
| 239 |
+
const backup = sessionStorage.getItem(routeKey);
|
| 240 |
+
if (backup !== null) textInput.value = backup;
|
| 241 |
+
return;
|
| 242 |
+
}
|
| 243 |
+
|
| 244 |
+
sessionStorage.setItem(routeKey, textInput.value);
|
| 245 |
+
|
| 246 |
+
fetch('/api/sample-text', {
|
| 247 |
+
method: 'POST',
|
| 248 |
+
headers: {
|
| 249 |
+
'Content-Type': 'application/json',
|
| 250 |
+
},
|
| 251 |
+
body: JSON.stringify({sample_type: sampleType})
|
| 252 |
+
})
|
| 253 |
+
.then(response => response.json())
|
| 254 |
+
.then(data => {
|
| 255 |
+
textInput.value = data.text || '';
|
| 256 |
+
});
|
| 257 |
+
});
|
| 258 |
+
|
| 259 |
+
// Keep backup while typing
|
| 260 |
+
(function(){
|
| 261 |
+
const textInput = document.getElementById('textInput');
|
| 262 |
+
const routeKey = 'customTextBackup:' + (window.location.pathname || '/topic-analysis');
|
| 263 |
+
textInput.addEventListener('input', function(){
|
| 264 |
+
sessionStorage.setItem(routeKey, textInput.value);
|
| 265 |
+
});
|
| 266 |
+
})();
|
| 267 |
+
|
| 268 |
+
// Process button handler
|
| 269 |
+
document.getElementById('processBtn').addEventListener('click', function() {
|
| 270 |
+
const text = document.getElementById('textInput').value.trim();
|
| 271 |
+
|
| 272 |
+
if (!text) {
|
| 273 |
+
alert('Please enter some text to analyze.');
|
| 274 |
+
return;
|
| 275 |
+
}
|
| 276 |
+
|
| 277 |
+
// Show loading state
|
| 278 |
+
this.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Processing...';
|
| 279 |
+
this.disabled = true;
|
| 280 |
+
|
| 281 |
+
// Process text
|
| 282 |
+
processTopicAnalysis();
|
| 283 |
+
|
| 284 |
+
// Reset button after a delay
|
| 285 |
+
setTimeout(() => {
|
| 286 |
+
this.innerHTML = '<i class="fas fa-project-diagram"></i> Analyze Topics';
|
| 287 |
+
this.disabled = false;
|
| 288 |
+
}, 2000);
|
| 289 |
+
});
|
| 290 |
+
|
| 291 |
+
// Clear button handler
|
| 292 |
+
document.getElementById('clearBtn').addEventListener('click', function() {
|
| 293 |
+
document.getElementById('textInput').value = '';
|
| 294 |
+
document.getElementById('resultsContainer').innerHTML = `
|
| 295 |
+
<div class="text-center text-muted py-5">
|
| 296 |
+
<i class="fas fa-arrow-up fa-2x mb-3"></i>
|
| 297 |
+
<p>Click "Analyze Topics" to see topic analysis results</p>
|
| 298 |
+
</div>
|
| 299 |
+
`;
|
| 300 |
+
});
|
| 301 |
+
|
| 302 |
+
// Keyboard shortcuts
|
| 303 |
+
document.addEventListener('keydown', function(e) {
|
| 304 |
+
// Ctrl+Enter to process
|
| 305 |
+
if (e.ctrlKey && e.key === 'Enter') {
|
| 306 |
+
document.getElementById('processBtn').click();
|
| 307 |
+
}
|
| 308 |
+
|
| 309 |
+
// Ctrl+L to clear
|
| 310 |
+
if (e.ctrlKey && e.key === 'l') {
|
| 311 |
+
e.preventDefault();
|
| 312 |
+
document.getElementById('clearBtn').click();
|
| 313 |
+
}
|
| 314 |
+
});
|
| 315 |
+
});
|
| 316 |
+
|
| 317 |
+
// Process topic analysis
|
| 318 |
+
function processTopicAnalysis() {
|
| 319 |
+
const text = document.getElementById('textInput').value.trim();
|
| 320 |
+
|
| 321 |
+
if (!text) {
|
| 322 |
+
alert('Please enter some text to analyze.');
|
| 323 |
+
return;
|
| 324 |
+
}
|
| 325 |
+
|
| 326 |
+
showLoading('resultsContainer');
|
| 327 |
+
|
| 328 |
+
fetch('/api/topic-analysis', {
|
| 329 |
+
method: 'POST',
|
| 330 |
+
headers: {
|
| 331 |
+
'Content-Type': 'application/json',
|
| 332 |
+
},
|
| 333 |
+
body: JSON.stringify({text: text})
|
| 334 |
+
})
|
| 335 |
+
.then(response => response.json())
|
| 336 |
+
.then(data => {
|
| 337 |
+
if (data.success) {
|
| 338 |
+
displayResults(data.result);
|
| 339 |
+
} else {
|
| 340 |
+
showError(data.error || 'An error occurred while processing the text');
|
| 341 |
+
}
|
| 342 |
+
})
|
| 343 |
+
.catch(error => {
|
| 344 |
+
showError('Failed to process text: ' + error.message);
|
| 345 |
+
})
|
| 346 |
+
.finally(() => {
|
| 347 |
+
hideLoading('resultsContainer');
|
| 348 |
+
});
|
| 349 |
+
}
|
| 350 |
+
|
| 351 |
+
// Show loading state
|
| 352 |
+
function showLoading(elementId) {
|
| 353 |
+
const element = document.getElementById(elementId);
|
| 354 |
+
if (element) {
|
| 355 |
+
element.innerHTML = `
|
| 356 |
+
<div class="text-center py-4">
|
| 357 |
+
<div class="spinner-border text-primary" role="status">
|
| 358 |
+
<span class="visually-hidden">Loading...</span>
|
| 359 |
+
</div>
|
| 360 |
+
<p class="mt-2">Analyzing topics and themes...</p>
|
| 361 |
+
</div>
|
| 362 |
+
`;
|
| 363 |
+
}
|
| 364 |
+
}
|
| 365 |
+
|
| 366 |
+
// Hide loading state
|
| 367 |
+
function hideLoading(elementId) {
|
| 368 |
+
const element = document.getElementById(elementId);
|
| 369 |
+
if (element && element.innerHTML.includes('spinner-border')) {
|
| 370 |
+
element.innerHTML = '';
|
| 371 |
+
}
|
| 372 |
+
}
|
| 373 |
+
|
| 374 |
+
// Show error message
|
| 375 |
+
function showError(message, elementId = 'resultsContainer') {
|
| 376 |
+
const element = document.getElementById(elementId);
|
| 377 |
+
if (element) {
|
| 378 |
+
element.innerHTML = `
|
| 379 |
+
<div class="alert alert-danger fade-in">
|
| 380 |
+
<i class="fas fa-exclamation-triangle"></i>
|
| 381 |
+
<strong>Error:</strong> ${message}
|
| 382 |
+
</div>
|
| 383 |
+
`;
|
| 384 |
+
}
|
| 385 |
+
}
|
| 386 |
+
|
| 387 |
+
// Display results
|
| 388 |
+
function displayResults(result) {
|
| 389 |
+
const container = document.getElementById('resultsContainer');
|
| 390 |
+
if (container) {
|
| 391 |
+
container.innerHTML = result;
|
| 392 |
+
container.classList.add('fade-in');
|
| 393 |
+
|
| 394 |
+
// Scroll to results
|
| 395 |
+
container.scrollIntoView({ behavior: 'smooth', block: 'start' });
|
| 396 |
+
}
|
| 397 |
+
}
|
| 398 |
+
</script>
|
| 399 |
+
{% endblock %}
|
templates/translation.html
ADDED
|
@@ -0,0 +1,468 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html" %}
|
| 2 |
+
|
| 3 |
+
{% block title %}Machine Translation - NLP Ultimate Tutorial{% endblock %}
|
| 4 |
+
|
| 5 |
+
{% block content %}
|
| 6 |
+
<div class="container">
|
| 7 |
+
<!-- Header Section -->
|
| 8 |
+
<div class="row mb-4">
|
| 9 |
+
<div class="col-12">
|
| 10 |
+
<div class="card">
|
| 11 |
+
<div class="card-header">
|
| 12 |
+
<h1 class="mb-0">
|
| 13 |
+
<i class="fas fa-language"></i>
|
| 14 |
+
Machine Translation
|
| 15 |
+
</h1>
|
| 16 |
+
</div>
|
| 17 |
+
<div class="card-body">
|
| 18 |
+
<p class="lead">Translate text between multiple languages using advanced neural machine translation models.</p>
|
| 19 |
+
|
| 20 |
+
<div class="alert alert-info">
|
| 21 |
+
<i class="fas fa-info-circle"></i>
|
| 22 |
+
<strong>About:</strong> Machine translation converts text from one language to another while preserving meaning and context as accurately as possible.
|
| 23 |
+
</div>
|
| 24 |
+
</div>
|
| 25 |
+
</div>
|
| 26 |
+
</div>
|
| 27 |
+
</div>
|
| 28 |
+
|
| 29 |
+
{% include "_analysis_nav.html" %}
|
| 30 |
+
|
| 31 |
+
<!-- Input Section -->
|
| 32 |
+
<div class="row mb-4">
|
| 33 |
+
<div class="col-12">
|
| 34 |
+
<div class="card">
|
| 35 |
+
<div class="card-header">
|
| 36 |
+
<h3 class="mb-0">
|
| 37 |
+
<i class="fas fa-keyboard"></i>
|
| 38 |
+
Enter text to translate:
|
| 39 |
+
</h3>
|
| 40 |
+
</div>
|
| 41 |
+
<div class="card-body">
|
| 42 |
+
<div class="row mb-3">
|
| 43 |
+
<div class="col-md-8">
|
| 44 |
+
<textarea id="textInput" class="form-control" rows="6" placeholder="Enter text here to translate...">Hello, how are you today? I hope you're doing well.</textarea>
|
| 45 |
+
</div>
|
| 46 |
+
<div class="col-md-4">
|
| 47 |
+
<label for="sampleSelect" class="form-label">Or choose a sample:</label>
|
| 48 |
+
<select id="sampleSelect" class="form-select">
|
| 49 |
+
<option value="Custom">Custom</option>
|
| 50 |
+
<option value="English">English</option>
|
| 51 |
+
<option value="Spanish">Spanish</option>
|
| 52 |
+
<option value="French">French</option>
|
| 53 |
+
<option value="German">German</option>
|
| 54 |
+
</select>
|
| 55 |
+
</div>
|
| 56 |
+
</div>
|
| 57 |
+
|
| 58 |
+
<div class="d-flex justify-content-between align-items-center">
|
| 59 |
+
<div>
|
| 60 |
+
<button id="processBtn" class="btn btn-primary btn-lg">
|
| 61 |
+
<i class="fas fa-language"></i>
|
| 62 |
+
Translate
|
| 63 |
+
</button>
|
| 64 |
+
</div>
|
| 65 |
+
<div>
|
| 66 |
+
<button id="clearBtn" class="btn btn-outline-secondary">
|
| 67 |
+
<i class="fas fa-trash"></i>
|
| 68 |
+
Clear
|
| 69 |
+
</button>
|
| 70 |
+
</div>
|
| 71 |
+
</div>
|
| 72 |
+
</div>
|
| 73 |
+
</div>
|
| 74 |
+
</div>
|
| 75 |
+
</div>
|
| 76 |
+
|
| 77 |
+
<!-- Language Selection Section -->
|
| 78 |
+
<div class="row mb-4">
|
| 79 |
+
<div class="col-12">
|
| 80 |
+
<div class="card">
|
| 81 |
+
<div class="card-header">
|
| 82 |
+
<h3 class="mb-0">
|
| 83 |
+
<i class="fas fa-cog"></i>
|
| 84 |
+
Language Selection
|
| 85 |
+
</h3>
|
| 86 |
+
</div>
|
| 87 |
+
<div class="card-body">
|
| 88 |
+
<div class="row">
|
| 89 |
+
<div class="col-md-6">
|
| 90 |
+
<label for="sourceLang" class="form-label">Source Language</label>
|
| 91 |
+
<select id="sourceLang" class="form-select">
|
| 92 |
+
<option value="auto" selected>Auto-detect</option>
|
| 93 |
+
<option value="en">English</option>
|
| 94 |
+
<option value="es">Spanish</option>
|
| 95 |
+
<option value="fr">French</option>
|
| 96 |
+
<option value="de">German</option>
|
| 97 |
+
<option value="ru">Russian</option>
|
| 98 |
+
<option value="zh">Chinese</option>
|
| 99 |
+
<option value="ar">Arabic</option>
|
| 100 |
+
<option value="hi">Hindi</option>
|
| 101 |
+
<option value="ja">Japanese</option>
|
| 102 |
+
<option value="pt">Portuguese</option>
|
| 103 |
+
<option value="it">Italian</option>
|
| 104 |
+
</select>
|
| 105 |
+
</div>
|
| 106 |
+
<div class="col-md-6">
|
| 107 |
+
<label for="targetLang" class="form-label">Target Language</label>
|
| 108 |
+
<select id="targetLang" class="form-select">
|
| 109 |
+
<option value="en" selected>English</option>
|
| 110 |
+
<option value="es">Spanish</option>
|
| 111 |
+
<option value="fr">French</option>
|
| 112 |
+
<option value="de">German</option>
|
| 113 |
+
<option value="ru">Russian</option>
|
| 114 |
+
<option value="zh">Chinese</option>
|
| 115 |
+
<option value="ar">Arabic</option>
|
| 116 |
+
<option value="hi">Hindi</option>
|
| 117 |
+
<option value="ja">Japanese</option>
|
| 118 |
+
<option value="pt">Portuguese</option>
|
| 119 |
+
<option value="it">Italian</option>
|
| 120 |
+
</select>
|
| 121 |
+
</div>
|
| 122 |
+
</div>
|
| 123 |
+
</div>
|
| 124 |
+
</div>
|
| 125 |
+
</div>
|
| 126 |
+
</div>
|
| 127 |
+
|
| 128 |
+
<!-- Model Info Section -->
|
| 129 |
+
<div class="row mb-4">
|
| 130 |
+
<div class="col-12">
|
| 131 |
+
<div class="card">
|
| 132 |
+
<div class="card-header">
|
| 133 |
+
<h3 class="mb-0">
|
| 134 |
+
<i class="fas fa-info-circle"></i>
|
| 135 |
+
Model Information
|
| 136 |
+
</h3>
|
| 137 |
+
</div>
|
| 138 |
+
<div class="card-body">
|
| 139 |
+
<div class="row">
|
| 140 |
+
<div class="col-md-4">
|
| 141 |
+
<div class="card h-100">
|
| 142 |
+
<div class="card-body text-center">
|
| 143 |
+
<i class="fas fa-brain fa-2x text-primary mb-2"></i>
|
| 144 |
+
<h5>Helsinki-NLP/opus-mt</h5>
|
| 145 |
+
<p class="small">Pre-trained neural machine translation models</p>
|
| 146 |
+
<ul class="list-unstyled small text-start">
|
| 147 |
+
<li>• Transformer-based architecture</li>
|
| 148 |
+
<li>• Multiple language pairs</li>
|
| 149 |
+
<li>• High accuracy translations</li>
|
| 150 |
+
</ul>
|
| 151 |
+
</div>
|
| 152 |
+
</div>
|
| 153 |
+
</div>
|
| 154 |
+
<div class="col-md-4">
|
| 155 |
+
<div class="card h-100">
|
| 156 |
+
<div class="card-body text-center">
|
| 157 |
+
<i class="fas fa-globe fa-2x text-success mb-2"></i>
|
| 158 |
+
<h5>Language Support</h5>
|
| 159 |
+
<p class="small">Comprehensive language pair coverage</p>
|
| 160 |
+
<ul class="list-unstyled small text-start">
|
| 161 |
+
<li>• 12+ languages supported</li>
|
| 162 |
+
<li>• Auto-detection capability</li>
|
| 163 |
+
<li>• Bidirectional translation</li>
|
| 164 |
+
</ul>
|
| 165 |
+
</div>
|
| 166 |
+
</div>
|
| 167 |
+
</div>
|
| 168 |
+
<div class="col-md-4">
|
| 169 |
+
<div class="card h-100">
|
| 170 |
+
<div class="card-body text-center">
|
| 171 |
+
<i class="fas fa-chart-line fa-2x text-info mb-2"></i>
|
| 172 |
+
<h5>Translation Analysis</h5>
|
| 173 |
+
<p class="small">Comprehensive analysis of translation quality</p>
|
| 174 |
+
<ul class="list-unstyled small text-start">
|
| 175 |
+
<li>• Length comparison</li>
|
| 176 |
+
<li>• Language characteristics</li>
|
| 177 |
+
<li>• Translation statistics</li>
|
| 178 |
+
</ul>
|
| 179 |
+
</div>
|
| 180 |
+
</div>
|
| 181 |
+
</div>
|
| 182 |
+
</div>
|
| 183 |
+
</div>
|
| 184 |
+
</div>
|
| 185 |
+
</div>
|
| 186 |
+
</div>
|
| 187 |
+
|
| 188 |
+
<!-- Example Texts Section -->
|
| 189 |
+
<div class="row mb-4">
|
| 190 |
+
<div class="col-12">
|
| 191 |
+
<div class="card">
|
| 192 |
+
<div class="card-header">
|
| 193 |
+
<h3 class="mb-0">
|
| 194 |
+
<i class="fas fa-list"></i>
|
| 195 |
+
Example Texts
|
| 196 |
+
</h3>
|
| 197 |
+
</div>
|
| 198 |
+
<div class="card-body">
|
| 199 |
+
<div class="row">
|
| 200 |
+
<div class="col-md-6">
|
| 201 |
+
<button class="btn btn-outline-primary w-100 mb-2" onclick="setExample('Hello, how are you today? I hope you\'re doing well.', 'auto', 'es')">
|
| 202 |
+
Hello, how are you today? I hope you're doing well.
|
| 203 |
+
</button>
|
| 204 |
+
</div>
|
| 205 |
+
<div class="col-md-6">
|
| 206 |
+
<button class="btn btn-outline-primary w-100 mb-2" onclick="setExample('La vie est belle et pleine de surprises.', 'auto', 'en')">
|
| 207 |
+
La vie est belle et pleine de surprises.
|
| 208 |
+
</button>
|
| 209 |
+
</div>
|
| 210 |
+
<div class="col-md-6">
|
| 211 |
+
<button class="btn btn-outline-primary w-100 mb-2" onclick="setExample('Der schnelle braune Fuchs springt über den faulen Hund.', 'auto', 'fr')">
|
| 212 |
+
Der schnelle braune Fuchs springt über den faulen Hund.
|
| 213 |
+
</button>
|
| 214 |
+
</div>
|
| 215 |
+
<div class="col-md-6">
|
| 216 |
+
<button class="btn btn-outline-primary w-100 mb-2" onclick="setExample('Me gustaría reservar una mesa para dos personas esta noche.', 'auto', 'en')">
|
| 217 |
+
Me gustaría reservar una mesa para dos personas esta noche.
|
| 218 |
+
</button>
|
| 219 |
+
</div>
|
| 220 |
+
</div>
|
| 221 |
+
</div>
|
| 222 |
+
</div>
|
| 223 |
+
</div>
|
| 224 |
+
</div>
|
| 225 |
+
|
| 226 |
+
<!-- Results Section -->
|
| 227 |
+
<div class="row">
|
| 228 |
+
<div class="col-12">
|
| 229 |
+
<div class="card">
|
| 230 |
+
<div class="card-header">
|
| 231 |
+
<h3 class="mb-0">
|
| 232 |
+
<i class="fas fa-chart-bar"></i>
|
| 233 |
+
Translation Results
|
| 234 |
+
</h3>
|
| 235 |
+
</div>
|
| 236 |
+
<div class="card-body">
|
| 237 |
+
<div id="resultsContainer">
|
| 238 |
+
<div class="text-center text-muted py-5">
|
| 239 |
+
<i class="fas fa-arrow-up fa-2x mb-3"></i>
|
| 240 |
+
<p>Click "Translate" to see translation results</p>
|
| 241 |
+
</div>
|
| 242 |
+
</div>
|
| 243 |
+
</div>
|
| 244 |
+
</div>
|
| 245 |
+
</div>
|
| 246 |
+
</div>
|
| 247 |
+
</div>
|
| 248 |
+
{% endblock %}
|
| 249 |
+
|
| 250 |
+
{% block extra_scripts %}
|
| 251 |
+
<script>
|
| 252 |
+
// Initialize page
|
| 253 |
+
document.addEventListener('DOMContentLoaded', function() {
|
| 254 |
+
// Only carry inputs when navigating via Quick Nav
|
| 255 |
+
const shouldCarry = sessionStorage.getItem('carryTextOnNextPage') === '1';
|
| 256 |
+
const textInput = document.getElementById('textInput');
|
| 257 |
+
const sampleSelect = document.getElementById('sampleSelect');
|
| 258 |
+
const sourceLang = document.getElementById('sourceLang');
|
| 259 |
+
const targetLang = document.getElementById('targetLang');
|
| 260 |
+
const routeKey = 'customTextBackup:' + (window.location.pathname || '/translation');
|
| 261 |
+
|
| 262 |
+
if (shouldCarry) {
|
| 263 |
+
const storedText = sessionStorage.getItem('analysisText');
|
| 264 |
+
if (storedText) textInput.value = storedText;
|
| 265 |
+
sessionStorage.removeItem('carryTextOnNextPage');
|
| 266 |
+
} else {
|
| 267 |
+
// Fresh route: reset to defaults
|
| 268 |
+
textInput.value = textInput.defaultValue || '';
|
| 269 |
+
if (sampleSelect) sampleSelect.value = 'Custom';
|
| 270 |
+
if (sourceLang) sourceLang.value = sourceLang.querySelector('option[selected]')?.value || 'auto';
|
| 271 |
+
if (targetLang) targetLang.value = targetLang.querySelector('option[selected]')?.value || 'en';
|
| 272 |
+
}
|
| 273 |
+
|
| 274 |
+
// Sample text dropdown handler with Custom restore
|
| 275 |
+
sampleSelect.addEventListener('change', function() {
|
| 276 |
+
const sampleType = this.value;
|
| 277 |
+
if (sampleType === 'Custom') {
|
| 278 |
+
const backupJson = sessionStorage.getItem(routeKey);
|
| 279 |
+
if (backupJson) {
|
| 280 |
+
try {
|
| 281 |
+
const backup = JSON.parse(backupJson);
|
| 282 |
+
textInput.value = backup.text || '';
|
| 283 |
+
if (backup.source) sourceLang.value = backup.source;
|
| 284 |
+
if (backup.target) targetLang.value = backup.target;
|
| 285 |
+
} catch {}
|
| 286 |
+
}
|
| 287 |
+
return;
|
| 288 |
+
}
|
| 289 |
+
|
| 290 |
+
// Save current custom input before switching away
|
| 291 |
+
sessionStorage.setItem(routeKey, JSON.stringify({
|
| 292 |
+
text: textInput.value,
|
| 293 |
+
source: sourceLang.value,
|
| 294 |
+
target: targetLang.value
|
| 295 |
+
}));
|
| 296 |
+
|
| 297 |
+
// Set sample prompts based on type
|
| 298 |
+
const samples = {
|
| 299 |
+
'English': { text: 'Hello, how are you today? I hope you\'re doing well.', source: 'auto', target: 'es' },
|
| 300 |
+
'Spanish': { text: 'La vida es bella y llena de sorpresas.', source: 'auto', target: 'en' },
|
| 301 |
+
'French': { text: 'La vie est belle et pleine de surprises.', source: 'auto', target: 'en' },
|
| 302 |
+
'German': { text: 'Der schnelle braune Fuchs springt über den faulen Hund.', source: 'auto', target: 'fr' }
|
| 303 |
+
};
|
| 304 |
+
if (samples[sampleType]) {
|
| 305 |
+
textInput.value = samples[sampleType].text;
|
| 306 |
+
sourceLang.value = samples[sampleType].source;
|
| 307 |
+
targetLang.value = samples[sampleType].target;
|
| 308 |
+
sessionStorage.setItem('analysisText', textInput.value);
|
| 309 |
+
}
|
| 310 |
+
});
|
| 311 |
+
|
| 312 |
+
// While typing or changing languages, keep a backup and analysisText
|
| 313 |
+
textInput.addEventListener('input', function(){
|
| 314 |
+
sessionStorage.setItem('analysisText', textInput.value);
|
| 315 |
+
sessionStorage.setItem(routeKey, JSON.stringify({ text: textInput.value, source: sourceLang.value, target: targetLang.value }));
|
| 316 |
+
});
|
| 317 |
+
sourceLang.addEventListener('change', function(){
|
| 318 |
+
sessionStorage.setItem(routeKey, JSON.stringify({ text: textInput.value, source: sourceLang.value, target: targetLang.value }));
|
| 319 |
+
});
|
| 320 |
+
targetLang.addEventListener('change', function(){
|
| 321 |
+
sessionStorage.setItem(routeKey, JSON.stringify({ text: textInput.value, source: sourceLang.value, target: targetLang.value }));
|
| 322 |
+
});
|
| 323 |
+
|
| 324 |
+
// Process button handler
|
| 325 |
+
document.getElementById('processBtn').addEventListener('click', function() {
|
| 326 |
+
const text = document.getElementById('textInput').value.trim();
|
| 327 |
+
|
| 328 |
+
if (!text) {
|
| 329 |
+
alert('Please enter text to translate.');
|
| 330 |
+
return;
|
| 331 |
+
}
|
| 332 |
+
|
| 333 |
+
// Show loading state
|
| 334 |
+
this.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Translating...';
|
| 335 |
+
this.disabled = true;
|
| 336 |
+
|
| 337 |
+
// Process translation
|
| 338 |
+
processTranslation();
|
| 339 |
+
|
| 340 |
+
// Reset button after a delay
|
| 341 |
+
setTimeout(() => {
|
| 342 |
+
this.innerHTML = '<i class="fas fa-language"></i> Translate';
|
| 343 |
+
this.disabled = false;
|
| 344 |
+
}, 2000);
|
| 345 |
+
});
|
| 346 |
+
|
| 347 |
+
// Clear button handler
|
| 348 |
+
document.getElementById('clearBtn').addEventListener('click', function() {
|
| 349 |
+
document.getElementById('textInput').value = '';
|
| 350 |
+
document.getElementById('resultsContainer').innerHTML = `
|
| 351 |
+
<div class="text-center text-muted py-5">
|
| 352 |
+
<i class="fas fa-arrow-up fa-2x mb-3"></i>
|
| 353 |
+
<p>Click "Translate" to see translation results</p>
|
| 354 |
+
</div>
|
| 355 |
+
`;
|
| 356 |
+
});
|
| 357 |
+
|
| 358 |
+
// Keyboard shortcuts
|
| 359 |
+
document.addEventListener('keydown', function(e) {
|
| 360 |
+
// Ctrl+Enter to process
|
| 361 |
+
if (e.ctrlKey && e.key === 'Enter') {
|
| 362 |
+
document.getElementById('processBtn').click();
|
| 363 |
+
}
|
| 364 |
+
|
| 365 |
+
// Ctrl+L to clear
|
| 366 |
+
if (e.ctrlKey && e.key === 'l') {
|
| 367 |
+
e.preventDefault();
|
| 368 |
+
document.getElementById('clearBtn').click();
|
| 369 |
+
}
|
| 370 |
+
});
|
| 371 |
+
});
|
| 372 |
+
|
| 373 |
+
// Set example text and language settings
|
| 374 |
+
function setExample(text, sourceLang, targetLang) {
|
| 375 |
+
document.getElementById('textInput').value = text;
|
| 376 |
+
document.getElementById('sourceLang').value = sourceLang;
|
| 377 |
+
document.getElementById('targetLang').value = targetLang;
|
| 378 |
+
}
|
| 379 |
+
|
| 380 |
+
// Process translation
|
| 381 |
+
function processTranslation() {
|
| 382 |
+
const text = document.getElementById('textInput').value.trim();
|
| 383 |
+
const sourceLang = document.getElementById('sourceLang').value;
|
| 384 |
+
const targetLang = document.getElementById('targetLang').value;
|
| 385 |
+
|
| 386 |
+
if (!text) {
|
| 387 |
+
alert('Please enter text to translate.');
|
| 388 |
+
return;
|
| 389 |
+
}
|
| 390 |
+
|
| 391 |
+
showLoading('resultsContainer');
|
| 392 |
+
|
| 393 |
+
fetch('/api/translation', {
|
| 394 |
+
method: 'POST',
|
| 395 |
+
headers: {
|
| 396 |
+
'Content-Type': 'application/json',
|
| 397 |
+
},
|
| 398 |
+
body: JSON.stringify({
|
| 399 |
+
text: text,
|
| 400 |
+
source_lang: sourceLang,
|
| 401 |
+
target_lang: targetLang
|
| 402 |
+
})
|
| 403 |
+
})
|
| 404 |
+
.then(response => response.json())
|
| 405 |
+
.then(data => {
|
| 406 |
+
if (data.success) {
|
| 407 |
+
displayResults(data.result);
|
| 408 |
+
} else {
|
| 409 |
+
showError(data.error || 'An error occurred while translating text');
|
| 410 |
+
}
|
| 411 |
+
})
|
| 412 |
+
.catch(error => {
|
| 413 |
+
showError('Failed to translate text: ' + error.message);
|
| 414 |
+
})
|
| 415 |
+
.finally(() => {
|
| 416 |
+
hideLoading('resultsContainer');
|
| 417 |
+
});
|
| 418 |
+
}
|
| 419 |
+
|
| 420 |
+
// Show loading state
|
| 421 |
+
function showLoading(elementId) {
|
| 422 |
+
const element = document.getElementById(elementId);
|
| 423 |
+
if (element) {
|
| 424 |
+
element.innerHTML = `
|
| 425 |
+
<div class="text-center py-4">
|
| 426 |
+
<div class="spinner-border text-primary" role="status">
|
| 427 |
+
<span class="visually-hidden">Loading...</span>
|
| 428 |
+
</div>
|
| 429 |
+
<p class="mt-2">Translating text...</p>
|
| 430 |
+
</div>
|
| 431 |
+
`;
|
| 432 |
+
}
|
| 433 |
+
}
|
| 434 |
+
|
| 435 |
+
// Hide loading state
|
| 436 |
+
function hideLoading(elementId) {
|
| 437 |
+
const element = document.getElementById(elementId);
|
| 438 |
+
if (element && element.innerHTML.includes('spinner-border')) {
|
| 439 |
+
element.innerHTML = '';
|
| 440 |
+
}
|
| 441 |
+
}
|
| 442 |
+
|
| 443 |
+
// Show error message
|
| 444 |
+
function showError(message, elementId = 'resultsContainer') {
|
| 445 |
+
const element = document.getElementById(elementId);
|
| 446 |
+
if (element) {
|
| 447 |
+
element.innerHTML = `
|
| 448 |
+
<div class="alert alert-danger fade-in">
|
| 449 |
+
<i class="fas fa-exclamation-triangle"></i>
|
| 450 |
+
<strong>Error:</strong> ${message}
|
| 451 |
+
</div>
|
| 452 |
+
`;
|
| 453 |
+
}
|
| 454 |
+
}
|
| 455 |
+
|
| 456 |
+
// Display results
|
| 457 |
+
function displayResults(result) {
|
| 458 |
+
const container = document.getElementById('resultsContainer');
|
| 459 |
+
if (container) {
|
| 460 |
+
container.innerHTML = result;
|
| 461 |
+
container.classList.add('fade-in');
|
| 462 |
+
|
| 463 |
+
// Scroll to results
|
| 464 |
+
container.scrollIntoView({ behavior: 'smooth', block: 'start' });
|
| 465 |
+
}
|
| 466 |
+
}
|
| 467 |
+
</script>
|
| 468 |
+
{% endblock %}
|
templates/vector_embeddings.html
ADDED
|
@@ -0,0 +1,499 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html" %}
|
| 2 |
+
|
| 3 |
+
{% block title %}Vector Embeddings & Semantic Search - NLP Ultimate Tutorial{% endblock %}
|
| 4 |
+
|
| 5 |
+
{% block content %}
|
| 6 |
+
<div class="container">
|
| 7 |
+
<!-- Header Section -->
|
| 8 |
+
<div class="row mb-4">
|
| 9 |
+
<div class="col-12">
|
| 10 |
+
<div class="card">
|
| 11 |
+
<div class="card-header">
|
| 12 |
+
<h1 class="mb-0">
|
| 13 |
+
<i class="fas fa-project-diagram"></i>
|
| 14 |
+
Vector Embeddings & Semantic Search
|
| 15 |
+
</h1>
|
| 16 |
+
</div>
|
| 17 |
+
<div class="card-body">
|
| 18 |
+
<p class="lead">Convert text into numerical representations and perform semantic search to find meaningfully similar content.</p>
|
| 19 |
+
|
| 20 |
+
<div class="alert alert-info">
|
| 21 |
+
<i class="fas fa-info-circle"></i>
|
| 22 |
+
<strong>About:</strong> Vector embeddings convert text into numerical representations where similar texts are placed closer together in a high-dimensional space.
|
| 23 |
+
</div>
|
| 24 |
+
</div>
|
| 25 |
+
</div>
|
| 26 |
+
</div>
|
| 27 |
+
</div>
|
| 28 |
+
|
| 29 |
+
{% include "_analysis_nav.html" %}
|
| 30 |
+
|
| 31 |
+
<!-- Input Section -->
|
| 32 |
+
<div class="row mb-4">
|
| 33 |
+
<div class="col-12">
|
| 34 |
+
<div class="card">
|
| 35 |
+
<div class="card-header">
|
| 36 |
+
<h3 class="mb-0">
|
| 37 |
+
<i class="fas fa-keyboard"></i>
|
| 38 |
+
Enter text to analyze:
|
| 39 |
+
</h3>
|
| 40 |
+
</div>
|
| 41 |
+
<div class="card-body">
|
| 42 |
+
<div class="row mb-3">
|
| 43 |
+
<div class="col-md-8">
|
| 44 |
+
<textarea id="textInput" class="form-control" rows="6" placeholder="Enter text to analyze with vector embeddings...">The International Space Station (ISS) is a modular space station in low Earth orbit. It is a multinational collaborative project involving five space agencies: NASA (United States), Roscosmos (Russia), JAXA (Japan), ESA (Europe), and CSA (Canada). The ownership and use of the space station is established by intergovernmental treaties and agreements. The ISS serves as a microgravity and space environment research laboratory in which scientific research is conducted in astrobiology, astronomy, meteorology, physics, and other fields.</textarea>
|
| 45 |
+
</div>
|
| 46 |
+
<div class="col-md-4">
|
| 47 |
+
<label for="sampleSelect" class="form-label">Or choose a sample:</label>
|
| 48 |
+
<select id="sampleSelect" class="form-select">
|
| 49 |
+
<option value="Custom">Custom</option>
|
| 50 |
+
<option value="Space Station">Space Station</option>
|
| 51 |
+
<option value="Python">Python</option>
|
| 52 |
+
<option value="Climate">Climate</option>
|
| 53 |
+
</select>
|
| 54 |
+
</div>
|
| 55 |
+
</div>
|
| 56 |
+
|
| 57 |
+
<div class="d-flex justify-content-between align-items-center">
|
| 58 |
+
<div>
|
| 59 |
+
<button id="processBtn" class="btn btn-primary btn-lg">
|
| 60 |
+
<i class="fas fa-project-diagram"></i>
|
| 61 |
+
Generate Embeddings
|
| 62 |
+
</button>
|
| 63 |
+
</div>
|
| 64 |
+
<div>
|
| 65 |
+
<button id="clearBtn" class="btn btn-outline-secondary">
|
| 66 |
+
<i class="fas fa-trash"></i>
|
| 67 |
+
Clear
|
| 68 |
+
</button>
|
| 69 |
+
</div>
|
| 70 |
+
</div>
|
| 71 |
+
</div>
|
| 72 |
+
</div>
|
| 73 |
+
</div>
|
| 74 |
+
</div>
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
<!-- Results Section -->
|
| 80 |
+
<div class="row">
|
| 81 |
+
<div class="col-12">
|
| 82 |
+
<div class="card">
|
| 83 |
+
<div class="card-header">
|
| 84 |
+
<h3 class="mb-0">
|
| 85 |
+
<i class="fas fa-chart-bar"></i>
|
| 86 |
+
Embedding Analysis Results
|
| 87 |
+
</h3>
|
| 88 |
+
</div>
|
| 89 |
+
<div class="card-body">
|
| 90 |
+
<div id="resultsContainer">
|
| 91 |
+
<div class="text-center text-muted py-5">
|
| 92 |
+
<i class="fas fa-arrow-up fa-2x mb-3"></i>
|
| 93 |
+
<p>Click "Generate Embeddings" to see vector analysis results</p>
|
| 94 |
+
</div>
|
| 95 |
+
</div>
|
| 96 |
+
</div>
|
| 97 |
+
</div>
|
| 98 |
+
</div>
|
| 99 |
+
</div>
|
| 100 |
+
</div>
|
| 101 |
+
{% endblock %}
|
| 102 |
+
|
| 103 |
+
{% block extra_scripts %}
|
| 104 |
+
<script>
|
| 105 |
+
// Initialize page
|
| 106 |
+
document.addEventListener('DOMContentLoaded', function() {
|
| 107 |
+
// Only carry over when using Quick Nav; otherwise leave defaults
|
| 108 |
+
const shouldCarry = sessionStorage.getItem('carryTextOnNextPage') === '1';
|
| 109 |
+
if (shouldCarry) {
|
| 110 |
+
const storedText = sessionStorage.getItem('analysisText');
|
| 111 |
+
if (storedText) document.getElementById('textInput').value = storedText;
|
| 112 |
+
sessionStorage.removeItem('carryTextOnNextPage');
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
// Sample text dropdown handler
|
| 116 |
+
document.getElementById('sampleSelect').addEventListener('change', function() {
|
| 117 |
+
const sampleType = this.value;
|
| 118 |
+
const textInput = document.getElementById('textInput');
|
| 119 |
+
|
| 120 |
+
if (sampleType === 'Custom') {
|
| 121 |
+
textInput.value = '';
|
| 122 |
+
} else {
|
| 123 |
+
// Set sample prompts based on type
|
| 124 |
+
const samples = {
|
| 125 |
+
'Space Station': 'The International Space Station (ISS) is a modular space station in low Earth orbit. It is a multinational collaborative project involving five space agencies: NASA (United States), Roscosmos (Russia), JAXA (Japan), ESA (Europe), and CSA (Canada). The ownership and use of the space station is established by intergovernmental treaties and agreements. The ISS serves as a microgravity and space environment research laboratory in which scientific research is conducted in astrobiology, astronomy, meteorology, physics, and other fields.',
|
| 126 |
+
'Python': 'Python is a high-level, general-purpose programming language. Its design philosophy emphasizes code readability with the use of significant indentation. Python is dynamically typed and garbage-collected. It supports multiple programming paradigms, including structured, object-oriented and functional programming. It is often described as a \'batteries included\' language due to its comprehensive standard library.',
|
| 127 |
+
'Climate': 'Climate change refers to long-term shifts in global temperatures and weather patterns. While climate variations are natural, human activities have been the main driver of climate change since the 1800s, primarily due to burning fossil fuels, which generates heat-trapping gases. The effects of climate change include rising sea levels, more frequent extreme weather events, and changes in precipitation patterns.'
|
| 128 |
+
};
|
| 129 |
+
|
| 130 |
+
if (samples[sampleType]) {
|
| 131 |
+
textInput.value = samples[sampleType];
|
| 132 |
+
}
|
| 133 |
+
}
|
| 134 |
+
});
|
| 135 |
+
|
| 136 |
+
// Process button handler
|
| 137 |
+
document.getElementById('processBtn').addEventListener('click', function() {
|
| 138 |
+
const text = document.getElementById('textInput').value.trim();
|
| 139 |
+
|
| 140 |
+
if (!text) {
|
| 141 |
+
alert('Please enter text to analyze.');
|
| 142 |
+
return;
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
// Show loading state
|
| 146 |
+
this.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Generating...';
|
| 147 |
+
this.disabled = true;
|
| 148 |
+
|
| 149 |
+
// Process embeddings
|
| 150 |
+
processEmbeddings();
|
| 151 |
+
|
| 152 |
+
// Reset button after a delay
|
| 153 |
+
setTimeout(() => {
|
| 154 |
+
this.innerHTML = '<i class="fas fa-project-diagram"></i> Generate Embeddings';
|
| 155 |
+
this.disabled = false;
|
| 156 |
+
}, 2000);
|
| 157 |
+
});
|
| 158 |
+
|
| 159 |
+
// Search button handler
|
| 160 |
+
document.getElementById('searchBtn').addEventListener('click', function() {
|
| 161 |
+
const text = document.getElementById('textInput').value.trim();
|
| 162 |
+
const query = document.getElementById('searchInput').value.trim();
|
| 163 |
+
|
| 164 |
+
if (!text) {
|
| 165 |
+
alert('Please enter text to analyze first.');
|
| 166 |
+
return;
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
if (!query) {
|
| 170 |
+
alert('Please enter a search query.');
|
| 171 |
+
return;
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
// Show loading state
|
| 175 |
+
this.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Searching...';
|
| 176 |
+
this.disabled = true;
|
| 177 |
+
|
| 178 |
+
// Process search
|
| 179 |
+
processSearch();
|
| 180 |
+
|
| 181 |
+
// Reset button after a delay
|
| 182 |
+
setTimeout(() => {
|
| 183 |
+
this.innerHTML = '<i class="fas fa-search"></i> Search';
|
| 184 |
+
this.disabled = false;
|
| 185 |
+
}, 2000);
|
| 186 |
+
});
|
| 187 |
+
|
| 188 |
+
// Clear button handler
|
| 189 |
+
document.getElementById('clearBtn').addEventListener('click', function() {
|
| 190 |
+
document.getElementById('textInput').value = '';
|
| 191 |
+
document.getElementById('searchInput').value = '';
|
| 192 |
+
document.getElementById('resultsContainer').innerHTML = `
|
| 193 |
+
<div class="text-center text-muted py-5">
|
| 194 |
+
<i class="fas fa-arrow-up fa-2x mb-3"></i>
|
| 195 |
+
<p>Click "Generate Embeddings" to see vector analysis results</p>
|
| 196 |
+
</div>
|
| 197 |
+
`;
|
| 198 |
+
});
|
| 199 |
+
|
| 200 |
+
// Keyboard shortcuts
|
| 201 |
+
document.addEventListener('keydown', function(e) {
|
| 202 |
+
// Ctrl+Enter to process
|
| 203 |
+
if (e.ctrlKey && e.key === 'Enter') {
|
| 204 |
+
document.getElementById('processBtn').click();
|
| 205 |
+
}
|
| 206 |
+
|
| 207 |
+
// Ctrl+L to clear
|
| 208 |
+
if (e.ctrlKey && e.key === 'l') {
|
| 209 |
+
e.preventDefault();
|
| 210 |
+
document.getElementById('clearBtn').click();
|
| 211 |
+
}
|
| 212 |
+
});
|
| 213 |
+
});
|
| 214 |
+
|
| 215 |
+
// Set example text
|
| 216 |
+
function setExample(text) {
|
| 217 |
+
document.getElementById('textInput').value = text;
|
| 218 |
+
}
|
| 219 |
+
|
| 220 |
+
// Process embeddings
|
| 221 |
+
function processEmbeddings() {
|
| 222 |
+
const text = document.getElementById('textInput').value.trim();
|
| 223 |
+
|
| 224 |
+
if (!text) {
|
| 225 |
+
alert('Please enter text to analyze.');
|
| 226 |
+
return;
|
| 227 |
+
}
|
| 228 |
+
|
| 229 |
+
showLoading('resultsContainer');
|
| 230 |
+
|
| 231 |
+
fetch('/api/vector-embeddings', {
|
| 232 |
+
method: 'POST',
|
| 233 |
+
headers: {
|
| 234 |
+
'Content-Type': 'application/json',
|
| 235 |
+
},
|
| 236 |
+
body: JSON.stringify({
|
| 237 |
+
text: text
|
| 238 |
+
})
|
| 239 |
+
})
|
| 240 |
+
.then(response => response.json())
|
| 241 |
+
.then(data => {
|
| 242 |
+
console.log('API Response:', data); // Debug log
|
| 243 |
+
if (data.success) {
|
| 244 |
+
console.log('Result length:', data.result ? data.result.length : 'null'); // Debug log
|
| 245 |
+
displayResults(data.result);
|
| 246 |
+
} else {
|
| 247 |
+
console.error('API Error:', data.error); // Debug log
|
| 248 |
+
showError(data.error || 'An error occurred while generating embeddings');
|
| 249 |
+
}
|
| 250 |
+
})
|
| 251 |
+
.catch(error => {
|
| 252 |
+
showError('Failed to generate embeddings: ' + error.message);
|
| 253 |
+
})
|
| 254 |
+
.finally(() => {
|
| 255 |
+
hideLoading('resultsContainer');
|
| 256 |
+
});
|
| 257 |
+
}
|
| 258 |
+
|
| 259 |
+
// Process search
|
| 260 |
+
function processSearch() {
|
| 261 |
+
const text = document.getElementById('textInput').value.trim();
|
| 262 |
+
const query = document.getElementById('searchInput').value.trim();
|
| 263 |
+
|
| 264 |
+
if (!text) {
|
| 265 |
+
alert('Please enter text to analyze first.');
|
| 266 |
+
return;
|
| 267 |
+
}
|
| 268 |
+
|
| 269 |
+
if (!query) {
|
| 270 |
+
alert('Please enter a search query.');
|
| 271 |
+
return;
|
| 272 |
+
}
|
| 273 |
+
|
| 274 |
+
showLoading('resultsContainer');
|
| 275 |
+
|
| 276 |
+
fetch('/api/vector-embeddings', {
|
| 277 |
+
method: 'POST',
|
| 278 |
+
headers: {
|
| 279 |
+
'Content-Type': 'application/json',
|
| 280 |
+
},
|
| 281 |
+
body: JSON.stringify({
|
| 282 |
+
text: text,
|
| 283 |
+
query: query
|
| 284 |
+
})
|
| 285 |
+
})
|
| 286 |
+
.then(response => response.json())
|
| 287 |
+
.then(data => {
|
| 288 |
+
if (data.success) {
|
| 289 |
+
displaySearchResults(data.results);
|
| 290 |
+
} else {
|
| 291 |
+
showError(data.error || 'An error occurred while searching');
|
| 292 |
+
}
|
| 293 |
+
})
|
| 294 |
+
.catch(error => {
|
| 295 |
+
showError('Failed to perform search: ' + error.message);
|
| 296 |
+
})
|
| 297 |
+
.finally(() => {
|
| 298 |
+
hideLoading('resultsContainer');
|
| 299 |
+
});
|
| 300 |
+
}
|
| 301 |
+
|
| 302 |
+
// Show loading state
|
| 303 |
+
function showLoading(elementId) {
|
| 304 |
+
const element = document.getElementById(elementId);
|
| 305 |
+
if (element) {
|
| 306 |
+
element.innerHTML = `
|
| 307 |
+
<div class="text-center py-4">
|
| 308 |
+
<div class="spinner-border text-primary" role="status">
|
| 309 |
+
<span class="visually-hidden">Loading...</span>
|
| 310 |
+
</div>
|
| 311 |
+
<p class="mt-2">Processing...</p>
|
| 312 |
+
</div>
|
| 313 |
+
`;
|
| 314 |
+
}
|
| 315 |
+
}
|
| 316 |
+
|
| 317 |
+
// Hide loading state
|
| 318 |
+
function hideLoading(elementId) {
|
| 319 |
+
const element = document.getElementById(elementId);
|
| 320 |
+
if (element && element.innerHTML.includes('spinner-border')) {
|
| 321 |
+
element.innerHTML = '';
|
| 322 |
+
}
|
| 323 |
+
}
|
| 324 |
+
|
| 325 |
+
// Show error message
|
| 326 |
+
function showError(message, elementId = 'resultsContainer') {
|
| 327 |
+
const element = document.getElementById(elementId);
|
| 328 |
+
if (element) {
|
| 329 |
+
element.innerHTML = `
|
| 330 |
+
<div class="alert alert-danger fade-in">
|
| 331 |
+
<i class="fas fa-exclamation-triangle"></i>
|
| 332 |
+
<strong>Error:</strong> ${message}
|
| 333 |
+
</div>
|
| 334 |
+
`;
|
| 335 |
+
}
|
| 336 |
+
}
|
| 337 |
+
|
| 338 |
+
// Display results
|
| 339 |
+
function displayResults(result) {
|
| 340 |
+
console.log('displayResults called with:', result ? result.substring(0, 200) + '...' : 'null'); // Debug log
|
| 341 |
+
const container = document.getElementById('resultsContainer');
|
| 342 |
+
console.log('Results container found:', !!container); // Debug log
|
| 343 |
+
if (container) {
|
| 344 |
+
container.innerHTML = result;
|
| 345 |
+
container.classList.add('fade-in');
|
| 346 |
+
console.log('Results inserted, container innerHTML length:', container.innerHTML.length); // Debug log
|
| 347 |
+
|
| 348 |
+
// Scroll to results
|
| 349 |
+
container.scrollIntoView({ behavior: 'smooth', block: 'start' });
|
| 350 |
+
}
|
| 351 |
+
}
|
| 352 |
+
|
| 353 |
+
// Display search results
|
| 354 |
+
function displaySearchResults(results) {
|
| 355 |
+
const container = document.getElementById('resultsContainer');
|
| 356 |
+
if (container) {
|
| 357 |
+
let html = '<h4>Search Results:</h4>';
|
| 358 |
+
|
| 359 |
+
if (results && results.length > 0) {
|
| 360 |
+
results.forEach((result, index) => {
|
| 361 |
+
const scorePercent = Math.round(result.score * 100);
|
| 362 |
+
html += `
|
| 363 |
+
<div class="card mb-2">
|
| 364 |
+
<div class="card-body">
|
| 365 |
+
<div class="row">
|
| 366 |
+
<div class="col-md-8">
|
| 367 |
+
<p class="mb-1">${result.text}</p>
|
| 368 |
+
</div>
|
| 369 |
+
<div class="col-md-4">
|
| 370 |
+
<div class="text-end">
|
| 371 |
+
<span class="badge bg-primary">${scorePercent}%</span>
|
| 372 |
+
</div>
|
| 373 |
+
<div class="progress mt-1" style="height: 8px;">
|
| 374 |
+
<div class="progress-bar" role="progressbar" style="width: ${scorePercent}%"></div>
|
| 375 |
+
</div>
|
| 376 |
+
</div>
|
| 377 |
+
</div>
|
| 378 |
+
</div>
|
| 379 |
+
</div>
|
| 380 |
+
`;
|
| 381 |
+
});
|
| 382 |
+
} else {
|
| 383 |
+
html += '<div class="alert alert-info">No relevant results found. Try different search terms.</div>';
|
| 384 |
+
}
|
| 385 |
+
|
| 386 |
+
container.innerHTML = html;
|
| 387 |
+
container.classList.add('fade-in');
|
| 388 |
+
|
| 389 |
+
// Scroll to results
|
| 390 |
+
container.scrollIntoView({ behavior: 'smooth', block: 'start' });
|
| 391 |
+
}
|
| 392 |
+
}
|
| 393 |
+
|
| 394 |
+
// Semantic search function (called from the generated search interface)
|
| 395 |
+
function performSemanticSearch() {
|
| 396 |
+
const searchInput = document.getElementById('search-input');
|
| 397 |
+
const textInput = document.getElementById('textInput');
|
| 398 |
+
|
| 399 |
+
if (!searchInput || !textInput) {
|
| 400 |
+
alert('Please generate embeddings first, then try searching.');
|
| 401 |
+
return;
|
| 402 |
+
}
|
| 403 |
+
|
| 404 |
+
const query = searchInput.value.trim();
|
| 405 |
+
const context = textInput.value.trim();
|
| 406 |
+
|
| 407 |
+
if (!query) {
|
| 408 |
+
alert('Please enter a search query.');
|
| 409 |
+
return;
|
| 410 |
+
}
|
| 411 |
+
|
| 412 |
+
if (!context) {
|
| 413 |
+
alert('Please enter text to analyze first.');
|
| 414 |
+
return;
|
| 415 |
+
}
|
| 416 |
+
|
| 417 |
+
// Show loading
|
| 418 |
+
const resultsDiv = document.getElementById('search-results');
|
| 419 |
+
const resultsContainer = document.getElementById('results-container');
|
| 420 |
+
|
| 421 |
+
if (resultsDiv) {
|
| 422 |
+
resultsDiv.style.display = 'block';
|
| 423 |
+
resultsContainer.innerHTML = `
|
| 424 |
+
<div class="text-center py-3">
|
| 425 |
+
<div class="spinner-border text-warning" role="status">
|
| 426 |
+
<span class="visually-hidden">Searching...</span>
|
| 427 |
+
</div>
|
| 428 |
+
<p class="mt-2">Searching for semantically similar content...</p>
|
| 429 |
+
</div>
|
| 430 |
+
`;
|
| 431 |
+
}
|
| 432 |
+
|
| 433 |
+
// Perform search
|
| 434 |
+
fetch('/api/semantic-search', {
|
| 435 |
+
method: 'POST',
|
| 436 |
+
headers: {
|
| 437 |
+
'Content-Type': 'application/json',
|
| 438 |
+
},
|
| 439 |
+
body: JSON.stringify({
|
| 440 |
+
context: context,
|
| 441 |
+
query: query
|
| 442 |
+
})
|
| 443 |
+
})
|
| 444 |
+
.then(response => response.json())
|
| 445 |
+
.then(data => {
|
| 446 |
+
if (data.success) {
|
| 447 |
+
displaySearchResults(data.results);
|
| 448 |
+
} else {
|
| 449 |
+
showSearchError(data.error || 'Search failed');
|
| 450 |
+
}
|
| 451 |
+
})
|
| 452 |
+
.catch(error => {
|
| 453 |
+
showSearchError('Failed to perform search: ' + error.message);
|
| 454 |
+
});
|
| 455 |
+
}
|
| 456 |
+
|
| 457 |
+
function displaySearchResults(results) {
|
| 458 |
+
const resultsContainer = document.getElementById('results-container');
|
| 459 |
+
|
| 460 |
+
if (!results || results.length === 0) {
|
| 461 |
+
resultsContainer.innerHTML = `
|
| 462 |
+
<div class="text-center py-4">
|
| 463 |
+
<i class="fas fa-search fa-2x text-muted mb-3"></i>
|
| 464 |
+
<p class="text-muted">No similar content found.</p>
|
| 465 |
+
</div>
|
| 466 |
+
`;
|
| 467 |
+
return;
|
| 468 |
+
}
|
| 469 |
+
|
| 470 |
+
let html = '';
|
| 471 |
+
results.forEach((result, index) => {
|
| 472 |
+
const percentage = (result.score * 100).toFixed(1);
|
| 473 |
+
const badgeClass = result.score > 0.8 ? 'bg-success' : result.score > 0.6 ? 'bg-warning' : 'bg-secondary';
|
| 474 |
+
|
| 475 |
+
html += `
|
| 476 |
+
<div class="mb-3 p-3 border rounded bg-white">
|
| 477 |
+
<div class="d-flex justify-content-between align-items-start mb-2">
|
| 478 |
+
<h6 class="mb-0 text-primary">Result ${index + 1}</h6>
|
| 479 |
+
<span class="badge ${badgeClass}">${percentage}% match</span>
|
| 480 |
+
</div>
|
| 481 |
+
<p class="mb-0">${result.text}</p>
|
| 482 |
+
</div>
|
| 483 |
+
`;
|
| 484 |
+
});
|
| 485 |
+
|
| 486 |
+
resultsContainer.innerHTML = html;
|
| 487 |
+
}
|
| 488 |
+
|
| 489 |
+
function showSearchError(message) {
|
| 490 |
+
const resultsContainer = document.getElementById('results-container');
|
| 491 |
+
resultsContainer.innerHTML = `
|
| 492 |
+
<div class="alert alert-danger">
|
| 493 |
+
<i class="fas fa-exclamation-triangle me-2"></i>
|
| 494 |
+
<strong>Search Error:</strong> ${message}
|
| 495 |
+
</div>
|
| 496 |
+
`;
|
| 497 |
+
}
|
| 498 |
+
</script>
|
| 499 |
+
{% endblock %}
|
utils/__init__.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Utils package for NLP Ultimate Tutorial Flask Application
|
| 2 |
+
|
| 3 |
+
from .helpers import (
|
| 4 |
+
fig_to_html,
|
| 5 |
+
df_to_html_table,
|
| 6 |
+
text_statistics,
|
| 7 |
+
create_text_length_chart,
|
| 8 |
+
format_pos_token,
|
| 9 |
+
create_entity_span,
|
| 10 |
+
create_sentiment_color,
|
| 11 |
+
format_sentiment_score,
|
| 12 |
+
create_progress_bar,
|
| 13 |
+
create_confidence_gauge
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
from .model_loader import (
|
| 17 |
+
download_nltk_resources,
|
| 18 |
+
load_spacy,
|
| 19 |
+
load_sentiment_analyzer,
|
| 20 |
+
load_emotion_classifier,
|
| 21 |
+
load_summarizer,
|
| 22 |
+
load_qa_pipeline,
|
| 23 |
+
load_translator,
|
| 24 |
+
load_text_generator,
|
| 25 |
+
load_zero_shot,
|
| 26 |
+
load_embedding_model,
|
| 27 |
+
initialize_all_models,
|
| 28 |
+
get_model_status,
|
| 29 |
+
clear_models
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
from .visualization import (
|
| 33 |
+
setup_mpl_style,
|
| 34 |
+
create_bar_chart,
|
| 35 |
+
create_horizontal_bar_chart,
|
| 36 |
+
create_pie_chart,
|
| 37 |
+
create_line_chart,
|
| 38 |
+
create_scatter_plot,
|
| 39 |
+
create_heatmap,
|
| 40 |
+
create_word_cloud_placeholder,
|
| 41 |
+
create_network_graph,
|
| 42 |
+
create_gauge_chart,
|
| 43 |
+
create_comparison_chart
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
__all__ = [
|
| 47 |
+
# Helpers
|
| 48 |
+
'fig_to_html',
|
| 49 |
+
'df_to_html_table',
|
| 50 |
+
'text_statistics',
|
| 51 |
+
'create_text_length_chart',
|
| 52 |
+
'format_pos_token',
|
| 53 |
+
'create_entity_span',
|
| 54 |
+
'create_sentiment_color',
|
| 55 |
+
'format_sentiment_score',
|
| 56 |
+
'create_progress_bar',
|
| 57 |
+
'create_confidence_gauge',
|
| 58 |
+
|
| 59 |
+
# Model Loader
|
| 60 |
+
'download_nltk_resources',
|
| 61 |
+
'load_spacy',
|
| 62 |
+
'load_sentiment_analyzer',
|
| 63 |
+
'load_emotion_classifier',
|
| 64 |
+
'load_summarizer',
|
| 65 |
+
'load_qa_pipeline',
|
| 66 |
+
'load_translator',
|
| 67 |
+
'load_text_generator',
|
| 68 |
+
'load_zero_shot',
|
| 69 |
+
'load_embedding_model',
|
| 70 |
+
'initialize_all_models',
|
| 71 |
+
'get_model_status',
|
| 72 |
+
'clear_models',
|
| 73 |
+
|
| 74 |
+
# Visualization
|
| 75 |
+
'setup_mpl_style',
|
| 76 |
+
'create_bar_chart',
|
| 77 |
+
'create_horizontal_bar_chart',
|
| 78 |
+
'create_pie_chart',
|
| 79 |
+
'create_line_chart',
|
| 80 |
+
'create_scatter_plot',
|
| 81 |
+
'create_heatmap',
|
| 82 |
+
'create_word_cloud_placeholder',
|
| 83 |
+
'create_network_graph',
|
| 84 |
+
'create_gauge_chart',
|
| 85 |
+
'create_comparison_chart'
|
| 86 |
+
]
|
utils/helpers.py
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import base64
|
| 2 |
+
import matplotlib.pyplot as plt
|
| 3 |
+
import pandas as pd
|
| 4 |
+
from io import BytesIO
|
| 5 |
+
import plotly.graph_objects as go
|
| 6 |
+
import nltk
|
| 7 |
+
|
| 8 |
+
def fig_to_html(fig, width=None):
|
| 9 |
+
"""Convert a matplotlib figure to HTML with optional responsive width"""
|
| 10 |
+
buf = BytesIO()
|
| 11 |
+
fig.savefig(buf, format='png', dpi=100, bbox_inches='tight')
|
| 12 |
+
buf.seek(0)
|
| 13 |
+
b64 = base64.b64encode(buf.read()).decode()
|
| 14 |
+
|
| 15 |
+
# Add style attribute if width is specified
|
| 16 |
+
style_attr = ""
|
| 17 |
+
if width:
|
| 18 |
+
style_attr = f' style="width: {width}; max-width: 100%;"'
|
| 19 |
+
|
| 20 |
+
return f'<img{style_attr} src="data:image/png;base64,{b64}" alt="Plot">'
|
| 21 |
+
|
| 22 |
+
def df_to_html_table(df):
|
| 23 |
+
"""Convert a pandas dataframe to an HTML table with Bootstrap styling"""
|
| 24 |
+
return df.to_html(index=False, classes='table table-striped table-hover', escape=False, table_id='data-table')
|
| 25 |
+
|
| 26 |
+
def text_statistics(text):
|
| 27 |
+
"""Calculate basic text statistics"""
|
| 28 |
+
if not text:
|
| 29 |
+
return {"chars": 0, "words": 0, "sentences": 0}
|
| 30 |
+
|
| 31 |
+
word_count = len(text.split())
|
| 32 |
+
char_count = len(text)
|
| 33 |
+
|
| 34 |
+
try:
|
| 35 |
+
sentence_count = len(nltk.sent_tokenize(text))
|
| 36 |
+
except:
|
| 37 |
+
sentence_count = 0
|
| 38 |
+
|
| 39 |
+
return {"chars": char_count, "words": word_count, "sentences": sentence_count}
|
| 40 |
+
|
| 41 |
+
def create_text_length_chart(text):
|
| 42 |
+
"""Create chart showing text length metrics."""
|
| 43 |
+
words = text.split()
|
| 44 |
+
sentences = nltk.sent_tokenize(text)
|
| 45 |
+
chars = len(text)
|
| 46 |
+
|
| 47 |
+
fig = go.Figure()
|
| 48 |
+
|
| 49 |
+
fig.add_trace(go.Bar(
|
| 50 |
+
x=['Characters', 'Words', 'Sentences'],
|
| 51 |
+
y=[chars, len(words), len(sentences)],
|
| 52 |
+
marker_color=['#90CAF9', '#1E88E5', '#0D47A1']
|
| 53 |
+
))
|
| 54 |
+
|
| 55 |
+
fig.update_layout(
|
| 56 |
+
title="Text Length Metrics",
|
| 57 |
+
xaxis_title="Metric",
|
| 58 |
+
yaxis_title="Count",
|
| 59 |
+
template="plotly_white",
|
| 60 |
+
height=400
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
return fig
|
| 64 |
+
|
| 65 |
+
def get_image_download_link(fig, filename, text):
|
| 66 |
+
"""Generate an HTML representation of a figure - placeholder for Gradio compatibility"""
|
| 67 |
+
return fig_to_html(fig)
|
| 68 |
+
|
| 69 |
+
def get_table_download_link(df, filename, text):
|
| 70 |
+
"""Generate an HTML representation of a dataframe - placeholder for Gradio compatibility"""
|
| 71 |
+
return df_to_html_table(df)
|
| 72 |
+
|
| 73 |
+
def format_pos_token(token, pos, explanation=""):
|
| 74 |
+
"""Format a token with its part-of-speech tag in HTML"""
|
| 75 |
+
# Define colors for different POS types
|
| 76 |
+
pos_colors = {
|
| 77 |
+
'NOUN': '#E3F2FD', # Light blue
|
| 78 |
+
'PROPN': '#E3F2FD', # Light blue (same as NOUN)
|
| 79 |
+
'VERB': '#E8F5E9', # Light green
|
| 80 |
+
'ADJ': '#FFF8E1', # Light yellow
|
| 81 |
+
'ADV': '#F3E5F5', # Light purple
|
| 82 |
+
'ADP': '#EFEBE9', # Light brown
|
| 83 |
+
'PRON': '#E8EAF6', # Light indigo
|
| 84 |
+
'DET': '#E0F7FA', # Light cyan
|
| 85 |
+
'CONJ': '#FBE9E7', # Light deep orange
|
| 86 |
+
'CCONJ': '#FBE9E7', # Light deep orange (for compatibility)
|
| 87 |
+
'SCONJ': '#FBE9E7', # Light deep orange (for compatibility)
|
| 88 |
+
'NUM': '#FFEBEE', # Light red
|
| 89 |
+
'PART': '#F1F8E9', # Light light green
|
| 90 |
+
'INTJ': '#FFF3E0', # Light orange
|
| 91 |
+
'PUNCT': '#FAFAFA', # Light grey
|
| 92 |
+
'SYM': '#FAFAFA', # Light grey (same as PUNCT)
|
| 93 |
+
'X': '#FAFAFA', # Light grey (for other)
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
# Get color for this POS tag, default to light grey if not found
|
| 97 |
+
bg_color = pos_colors.get(pos, '#FAFAFA')
|
| 98 |
+
|
| 99 |
+
# Create HTML for the token with tooltip
|
| 100 |
+
if explanation:
|
| 101 |
+
return f'<span class="pos-token" style="background-color: {bg_color}; border: 1px solid #ccc; padding: 3px 6px; margin: 2px; display: inline-block; border-radius: 3px;" title="{explanation}">{token} <small style="color: #666; font-size: 0.8em;">({pos})</small></span>'
|
| 102 |
+
else:
|
| 103 |
+
return f'<span class="pos-token" style="background-color: {bg_color}; border: 1px solid #ccc; padding: 3px 6px; margin: 2px; display: inline-block; border-radius: 3px;">{token} <small style="color: #666; font-size: 0.8em;">({pos})</small></span>'
|
| 104 |
+
|
| 105 |
+
def create_entity_span(text, entity_type, explanation=""):
|
| 106 |
+
"""Format a named entity with its type in HTML"""
|
| 107 |
+
# Define colors for different entity types
|
| 108 |
+
entity_colors = {
|
| 109 |
+
'PERSON': '#E3F2FD', # Light blue
|
| 110 |
+
'ORG': '#E8F5E9', # Light green
|
| 111 |
+
'GPE': '#FFF8E1', # Light yellow
|
| 112 |
+
'LOC': '#F3E5F5', # Light purple
|
| 113 |
+
'PRODUCT': '#EFEBE9', # Light brown
|
| 114 |
+
'EVENT': '#E8EAF6', # Light indigo
|
| 115 |
+
'WORK_OF_ART': '#E0F7FA', # Light cyan
|
| 116 |
+
'LAW': '#FBE9E7', # Light deep orange
|
| 117 |
+
'LANGUAGE': '#FFEBEE', # Light red
|
| 118 |
+
'DATE': '#F1F8E9', # Light light green
|
| 119 |
+
'TIME': '#FFF3E0', # Light orange
|
| 120 |
+
'PERCENT': '#FAFAFA', # Light grey
|
| 121 |
+
'MONEY': '#FAFAFA', # Light grey
|
| 122 |
+
'QUANTITY': '#FAFAFA', # Light grey
|
| 123 |
+
'ORDINAL': '#FAFAFA', # Light grey
|
| 124 |
+
'CARDINAL': '#FAFAFA', # Light grey
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
# Get color for this entity type, default to light grey if not found
|
| 128 |
+
bg_color = entity_colors.get(entity_type, '#FAFAFA')
|
| 129 |
+
|
| 130 |
+
# Create HTML for the entity with tooltip
|
| 131 |
+
if explanation:
|
| 132 |
+
return f'<span class="entity-token" style="background-color: {bg_color}; border: 1px solid #ccc; padding: 3px 6px; margin: 2px; display: inline-block; border-radius: 3px;" title="{explanation}">{text} <small style="color: #666; font-size: 0.8em;">({entity_type})</small></span>'
|
| 133 |
+
else:
|
| 134 |
+
return f'<span class="entity-token" style="background-color: {bg_color}; border: 1px solid #ccc; padding: 3px 6px; margin: 2px; display: inline-block; border-radius: 3px;">{text} <small style="color: #666; font-size: 0.8em;">({entity_type})</small></span>'
|
| 135 |
+
|
| 136 |
+
def create_sentiment_color(score):
|
| 137 |
+
"""Create color based on sentiment score"""
|
| 138 |
+
if score > 0.1:
|
| 139 |
+
return '#4CAF50' # Green for positive
|
| 140 |
+
elif score < -0.1:
|
| 141 |
+
return '#F44336' # Red for negative
|
| 142 |
+
else:
|
| 143 |
+
return '#FF9800' # Orange for neutral
|
| 144 |
+
|
| 145 |
+
def format_sentiment_score(score, label):
|
| 146 |
+
"""Format sentiment score with appropriate color"""
|
| 147 |
+
color = create_sentiment_color(score)
|
| 148 |
+
return f'<span style="color: {color}; font-weight: bold;">{label} ({score:.3f})</span>'
|
| 149 |
+
|
| 150 |
+
def create_progress_bar(value, max_value=1.0, color='#1976D2'):
|
| 151 |
+
"""Create HTML progress bar"""
|
| 152 |
+
percentage = (value / max_value) * 100
|
| 153 |
+
return f'''
|
| 154 |
+
<div class="progress mb-2" style="height: 20px;">
|
| 155 |
+
<div class="progress-bar" role="progressbar" style="width: {percentage}%; background-color: {color};"
|
| 156 |
+
aria-valuenow="{value}" aria-valuemin="0" aria-valuemax="{max_value}">
|
| 157 |
+
{value:.3f}
|
| 158 |
+
</div>
|
| 159 |
+
</div>
|
| 160 |
+
'''
|
| 161 |
+
|
| 162 |
+
def create_confidence_gauge(score, label):
|
| 163 |
+
"""Create confidence gauge visualization"""
|
| 164 |
+
color = '#4CAF50' if score > 0.7 else '#FF9800' if score > 0.4 else '#F44336'
|
| 165 |
+
return f'''
|
| 166 |
+
<div class="text-center">
|
| 167 |
+
<div class="display-6 text-{color.replace('#', '')}" style="color: {color};">
|
| 168 |
+
{score:.1%}
|
| 169 |
+
</div>
|
| 170 |
+
<div class="small text-muted">{label}</div>
|
| 171 |
+
</div>
|
| 172 |
+
'''
|
utils/model_loader.py
ADDED
|
@@ -0,0 +1,222 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import nltk
|
| 2 |
+
import spacy
|
| 3 |
+
from transformers import pipeline
|
| 4 |
+
|
| 5 |
+
# Global models dictionary for persistent access
|
| 6 |
+
models = {
|
| 7 |
+
"nlp": None,
|
| 8 |
+
"sentiment_analyzer": None,
|
| 9 |
+
"emotion_classifier": None,
|
| 10 |
+
"summarizer": None,
|
| 11 |
+
"qa_pipeline": None,
|
| 12 |
+
"translation_pipeline": None,
|
| 13 |
+
"text_generator": None,
|
| 14 |
+
"zero_shot": None,
|
| 15 |
+
"embedding_model": None
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
def download_nltk_resources():
|
| 19 |
+
"""Download and initialize NLTK resources"""
|
| 20 |
+
resources = ['punkt', 'stopwords', 'vader_lexicon', 'wordnet', 'averaged_perceptron_tagger', 'sentiwordnet']
|
| 21 |
+
for resource in resources:
|
| 22 |
+
try:
|
| 23 |
+
if resource == 'punkt':
|
| 24 |
+
nltk.data.find(f'tokenizers/{resource}')
|
| 25 |
+
elif resource in ['stopwords', 'wordnet']:
|
| 26 |
+
nltk.data.find(f'corpora/{resource}')
|
| 27 |
+
elif resource == 'vader_lexicon':
|
| 28 |
+
nltk.data.find(f'sentiment/{resource}')
|
| 29 |
+
elif resource == 'averaged_perceptron_tagger':
|
| 30 |
+
nltk.data.find(f'taggers/{resource}')
|
| 31 |
+
elif resource == 'sentiwordnet':
|
| 32 |
+
nltk.data.find(f'corpora/{resource}')
|
| 33 |
+
except LookupError:
|
| 34 |
+
print(f"Downloading required NLTK resource: {resource}")
|
| 35 |
+
nltk.download(resource)
|
| 36 |
+
|
| 37 |
+
def load_spacy():
|
| 38 |
+
"""Load spaCy model"""
|
| 39 |
+
if models["nlp"] is None:
|
| 40 |
+
try:
|
| 41 |
+
models["nlp"] = spacy.load("en_core_web_sm")
|
| 42 |
+
except:
|
| 43 |
+
print("SpaCy model not found. Please run: python -m spacy download en_core_web_sm")
|
| 44 |
+
return models["nlp"]
|
| 45 |
+
|
| 46 |
+
def load_sentiment_analyzer():
|
| 47 |
+
"""Load sentiment analysis model"""
|
| 48 |
+
if models["sentiment_analyzer"] is None:
|
| 49 |
+
try:
|
| 50 |
+
models["sentiment_analyzer"] = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
|
| 51 |
+
except Exception as e:
|
| 52 |
+
print(f"Failed to load sentiment analyzer: {e}")
|
| 53 |
+
return models["sentiment_analyzer"]
|
| 54 |
+
|
| 55 |
+
def load_emotion_classifier():
|
| 56 |
+
"""Load emotion classification model"""
|
| 57 |
+
if models["emotion_classifier"] is None:
|
| 58 |
+
try:
|
| 59 |
+
models["emotion_classifier"] = pipeline(
|
| 60 |
+
"text-classification",
|
| 61 |
+
model="cardiffnlp/twitter-roberta-base-emotion",
|
| 62 |
+
return_all_scores=True
|
| 63 |
+
)
|
| 64 |
+
except Exception as e:
|
| 65 |
+
print(f"Failed to load emotion classifier: {e}")
|
| 66 |
+
return models["emotion_classifier"]
|
| 67 |
+
|
| 68 |
+
def load_summarizer():
|
| 69 |
+
"""Load summarization model"""
|
| 70 |
+
if models["summarizer"] is None:
|
| 71 |
+
try:
|
| 72 |
+
models["summarizer"] = pipeline("summarization", model="facebook/bart-large-cnn")
|
| 73 |
+
except Exception as e:
|
| 74 |
+
print(f"Failed to load summarizer: {e}")
|
| 75 |
+
return models["summarizer"]
|
| 76 |
+
|
| 77 |
+
def load_qa_pipeline():
|
| 78 |
+
"""Load or initialize the question answering pipeline."""
|
| 79 |
+
if models["qa_pipeline"] is None:
|
| 80 |
+
try:
|
| 81 |
+
from transformers import pipeline
|
| 82 |
+
|
| 83 |
+
# Use a smaller model to reduce memory usage and improve speed
|
| 84 |
+
models["qa_pipeline"] = pipeline(
|
| 85 |
+
"question-answering",
|
| 86 |
+
model="deepset/roberta-base-squad2", # You can change this to a different model if needed
|
| 87 |
+
tokenizer="deepset/roberta-base-squad2"
|
| 88 |
+
)
|
| 89 |
+
except Exception as e:
|
| 90 |
+
print(f"Error loading QA pipeline: {e}")
|
| 91 |
+
models["qa_pipeline"] = None
|
| 92 |
+
raise e
|
| 93 |
+
return models["qa_pipeline"]
|
| 94 |
+
|
| 95 |
+
def load_translation_pipeline():
|
| 96 |
+
"""Load translation model"""
|
| 97 |
+
if models["translation_pipeline"] is None:
|
| 98 |
+
try:
|
| 99 |
+
models["translation_pipeline"] = pipeline("translation_en_to_fr", model="Helsinki-NLP/opus-mt-en-fr")
|
| 100 |
+
except Exception as e:
|
| 101 |
+
print(f"Failed to load translation model: {e}")
|
| 102 |
+
return models["translation_pipeline"]
|
| 103 |
+
|
| 104 |
+
def load_translator(source_lang="auto", target_lang="en"):
|
| 105 |
+
"""
|
| 106 |
+
Load a machine translation model for the given language pair.
|
| 107 |
+
|
| 108 |
+
Args:
|
| 109 |
+
source_lang (str): Source language code, or 'auto' for automatic detection
|
| 110 |
+
target_lang (str): Target language code
|
| 111 |
+
|
| 112 |
+
Returns:
|
| 113 |
+
A translation pipeline or model
|
| 114 |
+
"""
|
| 115 |
+
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
|
| 116 |
+
|
| 117 |
+
try:
|
| 118 |
+
# For auto language detection, use a more general model
|
| 119 |
+
if source_lang == "auto":
|
| 120 |
+
# Using Helsinki-NLP's opus-mt model for translation
|
| 121 |
+
model_name = "Helsinki-NLP/opus-mt-mul-en" # Multilingual to English
|
| 122 |
+
translator = pipeline("translation", model=model_name)
|
| 123 |
+
else:
|
| 124 |
+
# For specific language pairs
|
| 125 |
+
model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"
|
| 126 |
+
|
| 127 |
+
# Load the model and tokenizer
|
| 128 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
| 129 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 130 |
+
|
| 131 |
+
# Create the translation pipeline
|
| 132 |
+
translator = pipeline("translation", model=model, tokenizer=tokenizer)
|
| 133 |
+
|
| 134 |
+
return translator
|
| 135 |
+
except Exception as e:
|
| 136 |
+
# Fallback to a more general model if language pair isn't available
|
| 137 |
+
try:
|
| 138 |
+
# Use MarianMT model for many language pairs
|
| 139 |
+
model_name = "Helsinki-NLP/opus-mt-mul-en" # Multilingual to English
|
| 140 |
+
translator = pipeline("translation", model=model_name)
|
| 141 |
+
return translator
|
| 142 |
+
except Exception as nested_e:
|
| 143 |
+
# If all else fails, return a simple callable object that returns an error message
|
| 144 |
+
class ErrorTranslator:
|
| 145 |
+
def __call__(self, text, **kwargs):
|
| 146 |
+
return [{"translation_text": f"Error loading translation model: {str(e)}. Fallback also failed: {str(nested_e)}"}]
|
| 147 |
+
return ErrorTranslator()
|
| 148 |
+
|
| 149 |
+
def load_text_generator():
|
| 150 |
+
"""Load text generation model"""
|
| 151 |
+
if models["text_generator"] is None:
|
| 152 |
+
try:
|
| 153 |
+
models["text_generator"] = pipeline("text-generation", model="gpt2")
|
| 154 |
+
except Exception as e:
|
| 155 |
+
print(f"Failed to load text generator: {e}")
|
| 156 |
+
return models["text_generator"]
|
| 157 |
+
|
| 158 |
+
def load_zero_shot():
|
| 159 |
+
"""Load zero-shot classification model"""
|
| 160 |
+
if models["zero_shot"] is None:
|
| 161 |
+
try:
|
| 162 |
+
models["zero_shot"] = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
|
| 163 |
+
except Exception as e:
|
| 164 |
+
print(f"Failed to load zero-shot classifier: {e}")
|
| 165 |
+
return models["zero_shot"]
|
| 166 |
+
|
| 167 |
+
def load_embedding_model():
|
| 168 |
+
"""Load sentence embedding model for semantic search"""
|
| 169 |
+
if models.get("embedding_model") is None:
|
| 170 |
+
try:
|
| 171 |
+
from sentence_transformers import SentenceTransformer
|
| 172 |
+
models["embedding_model"] = SentenceTransformer('all-MiniLM-L6-v2')
|
| 173 |
+
except Exception as e:
|
| 174 |
+
print(f"Failed to load embedding model: {e}")
|
| 175 |
+
return models["embedding_model"]
|
| 176 |
+
|
| 177 |
+
def initialize_all_models():
|
| 178 |
+
"""Initialize all models for better performance"""
|
| 179 |
+
print("Initializing NLP models...")
|
| 180 |
+
|
| 181 |
+
# Download NLTK resources first
|
| 182 |
+
download_nltk_resources()
|
| 183 |
+
|
| 184 |
+
# Load spaCy model
|
| 185 |
+
try:
|
| 186 |
+
load_spacy()
|
| 187 |
+
print("✓ spaCy model loaded")
|
| 188 |
+
except Exception as e:
|
| 189 |
+
print(f"✗ Failed to load spaCy: {e}")
|
| 190 |
+
|
| 191 |
+
# Load transformer models (these might take time)
|
| 192 |
+
models_to_load = [
|
| 193 |
+
("Sentiment Analyzer", load_sentiment_analyzer),
|
| 194 |
+
("Emotion Classifier", load_emotion_classifier),
|
| 195 |
+
("Summarizer", load_summarizer),
|
| 196 |
+
("QA Pipeline", load_qa_pipeline),
|
| 197 |
+
("Text Generator", load_text_generator),
|
| 198 |
+
("Zero-shot Classifier", load_zero_shot),
|
| 199 |
+
("Embedding Model", load_embedding_model)
|
| 200 |
+
]
|
| 201 |
+
|
| 202 |
+
for name, loader_func in models_to_load:
|
| 203 |
+
try:
|
| 204 |
+
loader_func()
|
| 205 |
+
print(f"✓ {name} loaded")
|
| 206 |
+
except Exception as e:
|
| 207 |
+
print(f"✗ Failed to load {name}: {e}")
|
| 208 |
+
|
| 209 |
+
print("Model initialization complete!")
|
| 210 |
+
|
| 211 |
+
def get_model_status():
|
| 212 |
+
"""Get status of all models"""
|
| 213 |
+
status = {}
|
| 214 |
+
for model_name, model in models.items():
|
| 215 |
+
status[model_name] = model is not None
|
| 216 |
+
return status
|
| 217 |
+
|
| 218 |
+
def clear_models():
|
| 219 |
+
"""Clear all loaded models to free memory"""
|
| 220 |
+
for key in models:
|
| 221 |
+
models[key] = None
|
| 222 |
+
print("All models cleared from memory")
|
utils/model_loader_hf.py
ADDED
|
@@ -0,0 +1,267 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Optimized model loader for Hugging Face Spaces with memory management
|
| 3 |
+
"""
|
| 4 |
+
import os
|
| 5 |
+
import gc
|
| 6 |
+
import psutil
|
| 7 |
+
import nltk
|
| 8 |
+
import spacy
|
| 9 |
+
from transformers import pipeline, AutoTokenizer, AutoModel
|
| 10 |
+
import torch
|
| 11 |
+
from functools import lru_cache
|
| 12 |
+
import warnings
|
| 13 |
+
warnings.filterwarnings("ignore")
|
| 14 |
+
|
| 15 |
+
# Set device to CPU for HF Spaces (unless GPU is available)
|
| 16 |
+
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 17 |
+
print(f"Using device: {DEVICE}")
|
| 18 |
+
|
| 19 |
+
# Model cache with memory-conscious loading
|
| 20 |
+
class ModelCache:
|
| 21 |
+
def __init__(self, max_models=3):
|
| 22 |
+
self.models = {}
|
| 23 |
+
self.max_models = max_models
|
| 24 |
+
self.access_count = {}
|
| 25 |
+
|
| 26 |
+
def get_memory_usage(self):
|
| 27 |
+
"""Get current memory usage in MB"""
|
| 28 |
+
process = psutil.Process(os.getpid())
|
| 29 |
+
return process.memory_info().rss / 1024 / 1024
|
| 30 |
+
|
| 31 |
+
def cleanup_least_used(self):
|
| 32 |
+
"""Remove least recently used model if cache is full"""
|
| 33 |
+
if len(self.models) >= self.max_models:
|
| 34 |
+
# Find least used model
|
| 35 |
+
least_used = min(self.access_count.items(), key=lambda x: x[1])
|
| 36 |
+
model_name = least_used[0]
|
| 37 |
+
|
| 38 |
+
print(f"Removing {model_name} from cache to free memory")
|
| 39 |
+
del self.models[model_name]
|
| 40 |
+
del self.access_count[model_name]
|
| 41 |
+
|
| 42 |
+
# Force garbage collection
|
| 43 |
+
gc.collect()
|
| 44 |
+
if torch.cuda.is_available():
|
| 45 |
+
torch.cuda.empty_cache()
|
| 46 |
+
|
| 47 |
+
def load_model(self, model_name, loader_func):
|
| 48 |
+
"""Load model with caching and memory management"""
|
| 49 |
+
if model_name in self.models:
|
| 50 |
+
self.access_count[model_name] += 1
|
| 51 |
+
return self.models[model_name]
|
| 52 |
+
|
| 53 |
+
# Check memory before loading
|
| 54 |
+
memory_before = self.get_memory_usage()
|
| 55 |
+
print(f"Memory before loading {model_name}: {memory_before:.1f}MB")
|
| 56 |
+
|
| 57 |
+
# Clean up if necessary
|
| 58 |
+
self.cleanup_least_used()
|
| 59 |
+
|
| 60 |
+
# Load the model
|
| 61 |
+
try:
|
| 62 |
+
model = loader_func()
|
| 63 |
+
self.models[model_name] = model
|
| 64 |
+
self.access_count[model_name] = 1
|
| 65 |
+
|
| 66 |
+
memory_after = self.get_memory_usage()
|
| 67 |
+
print(f"Memory after loading {model_name}: {memory_after:.1f}MB")
|
| 68 |
+
|
| 69 |
+
return model
|
| 70 |
+
except Exception as e:
|
| 71 |
+
print(f"Failed to load {model_name}: {str(e)}")
|
| 72 |
+
return None
|
| 73 |
+
|
| 74 |
+
# Global model cache
|
| 75 |
+
model_cache = ModelCache(max_models=3)
|
| 76 |
+
|
| 77 |
+
@lru_cache(maxsize=1)
|
| 78 |
+
def download_nltk_resources():
|
| 79 |
+
"""Download and cache NLTK resources"""
|
| 80 |
+
resources = ['punkt', 'stopwords', 'vader_lexicon', 'wordnet', 'averaged_perceptron_tagger']
|
| 81 |
+
|
| 82 |
+
for resource in resources:
|
| 83 |
+
try:
|
| 84 |
+
if resource == 'punkt':
|
| 85 |
+
nltk.data.find(f'tokenizers/{resource}')
|
| 86 |
+
elif resource in ['stopwords', 'wordnet']:
|
| 87 |
+
nltk.data.find(f'corpora/{resource}')
|
| 88 |
+
elif resource == 'vader_lexicon':
|
| 89 |
+
nltk.data.find(f'sentiment/{resource}')
|
| 90 |
+
elif resource == 'averaged_perceptron_tagger':
|
| 91 |
+
nltk.data.find(f'taggers/{resource}')
|
| 92 |
+
except LookupError:
|
| 93 |
+
print(f"Downloading NLTK resource: {resource}")
|
| 94 |
+
nltk.download(resource, quiet=True)
|
| 95 |
+
|
| 96 |
+
@lru_cache(maxsize=1)
|
| 97 |
+
def load_spacy():
|
| 98 |
+
"""Load spaCy model with caching"""
|
| 99 |
+
def _load_spacy():
|
| 100 |
+
try:
|
| 101 |
+
return spacy.load("en_core_web_sm")
|
| 102 |
+
except OSError:
|
| 103 |
+
print("SpaCy model not found. Please install: python -m spacy download en_core_web_sm")
|
| 104 |
+
return None
|
| 105 |
+
|
| 106 |
+
return model_cache.load_model("spacy", _load_spacy)
|
| 107 |
+
|
| 108 |
+
def load_sentiment_analyzer():
|
| 109 |
+
"""Load lightweight sentiment analyzer"""
|
| 110 |
+
def _load_sentiment():
|
| 111 |
+
return pipeline(
|
| 112 |
+
"sentiment-analysis",
|
| 113 |
+
model="cardiffnlp/twitter-roberta-base-sentiment-latest",
|
| 114 |
+
device=0 if DEVICE == "cuda" else -1,
|
| 115 |
+
max_length=512,
|
| 116 |
+
truncation=True
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
return model_cache.load_model("sentiment", _load_sentiment)
|
| 120 |
+
|
| 121 |
+
def load_summarizer():
|
| 122 |
+
"""Load efficient summarization model"""
|
| 123 |
+
def _load_summarizer():
|
| 124 |
+
return pipeline(
|
| 125 |
+
"summarization",
|
| 126 |
+
model="facebook/bart-large-cnn",
|
| 127 |
+
device=0 if DEVICE == "cuda" else -1,
|
| 128 |
+
max_length=512,
|
| 129 |
+
truncation=True
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
return model_cache.load_model("summarizer", _load_summarizer)
|
| 133 |
+
|
| 134 |
+
def load_qa_pipeline():
|
| 135 |
+
"""Load question-answering pipeline"""
|
| 136 |
+
def _load_qa():
|
| 137 |
+
return pipeline(
|
| 138 |
+
"question-answering",
|
| 139 |
+
model="deepset/roberta-base-squad2",
|
| 140 |
+
device=0 if DEVICE == "cuda" else -1,
|
| 141 |
+
max_length=512,
|
| 142 |
+
truncation=True
|
| 143 |
+
)
|
| 144 |
+
|
| 145 |
+
return model_cache.load_model("qa", _load_qa)
|
| 146 |
+
|
| 147 |
+
def load_text_generator():
|
| 148 |
+
"""Load text generation model"""
|
| 149 |
+
def _load_generator():
|
| 150 |
+
return pipeline(
|
| 151 |
+
"text-generation",
|
| 152 |
+
model="gpt2",
|
| 153 |
+
device=0 if DEVICE == "cuda" else -1,
|
| 154 |
+
max_length=256,
|
| 155 |
+
truncation=True,
|
| 156 |
+
pad_token_id=50256
|
| 157 |
+
)
|
| 158 |
+
|
| 159 |
+
return model_cache.load_model("generator", _load_generator)
|
| 160 |
+
|
| 161 |
+
def load_zero_shot():
|
| 162 |
+
"""Load zero-shot classification model"""
|
| 163 |
+
def _load_zero_shot():
|
| 164 |
+
return pipeline(
|
| 165 |
+
"zero-shot-classification",
|
| 166 |
+
model="facebook/bart-large-mnli",
|
| 167 |
+
device=0 if DEVICE == "cuda" else -1,
|
| 168 |
+
max_length=512,
|
| 169 |
+
truncation=True
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
return model_cache.load_model("zero_shot", _load_zero_shot)
|
| 173 |
+
|
| 174 |
+
def load_embedding_model():
|
| 175 |
+
"""Load sentence embedding model"""
|
| 176 |
+
def _load_embedding():
|
| 177 |
+
from sentence_transformers import SentenceTransformer
|
| 178 |
+
return SentenceTransformer('all-MiniLM-L6-v2', device=DEVICE)
|
| 179 |
+
|
| 180 |
+
return model_cache.load_model("embedding", _load_embedding)
|
| 181 |
+
|
| 182 |
+
def load_translation_pipeline(source_lang="auto", target_lang="en"):
|
| 183 |
+
"""Load translation model with fallback"""
|
| 184 |
+
def _load_translation():
|
| 185 |
+
try:
|
| 186 |
+
if source_lang == "auto" or target_lang == "en":
|
| 187 |
+
model_name = "Helsinki-NLP/opus-mt-mul-en"
|
| 188 |
+
else:
|
| 189 |
+
model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"
|
| 190 |
+
|
| 191 |
+
return pipeline(
|
| 192 |
+
"translation",
|
| 193 |
+
model=model_name,
|
| 194 |
+
device=0 if DEVICE == "cuda" else -1,
|
| 195 |
+
max_length=512,
|
| 196 |
+
truncation=True
|
| 197 |
+
)
|
| 198 |
+
except Exception as e:
|
| 199 |
+
print(f"Translation model error: {e}")
|
| 200 |
+
return None
|
| 201 |
+
|
| 202 |
+
return model_cache.load_model(f"translation_{source_lang}_{target_lang}", _load_translation)
|
| 203 |
+
|
| 204 |
+
def get_memory_status():
|
| 205 |
+
"""Get current memory usage statistics"""
|
| 206 |
+
process = psutil.Process(os.getpid())
|
| 207 |
+
memory_info = process.memory_info()
|
| 208 |
+
|
| 209 |
+
return {
|
| 210 |
+
"rss_mb": memory_info.rss / 1024 / 1024,
|
| 211 |
+
"vms_mb": memory_info.vms / 1024 / 1024,
|
| 212 |
+
"percent": process.memory_percent(),
|
| 213 |
+
"loaded_models": list(model_cache.models.keys()),
|
| 214 |
+
"cache_size": len(model_cache.models)
|
| 215 |
+
}
|
| 216 |
+
|
| 217 |
+
def clear_model_cache():
|
| 218 |
+
"""Clear all models from cache to free memory"""
|
| 219 |
+
model_cache.models.clear()
|
| 220 |
+
model_cache.access_count.clear()
|
| 221 |
+
gc.collect()
|
| 222 |
+
if torch.cuda.is_available():
|
| 223 |
+
torch.cuda.empty_cache()
|
| 224 |
+
print("Model cache cleared")
|
| 225 |
+
|
| 226 |
+
def initialize_essential_models():
|
| 227 |
+
"""Initialize only the most essential models for startup"""
|
| 228 |
+
print("Initializing essential models for Hugging Face Spaces...")
|
| 229 |
+
|
| 230 |
+
# Download NLTK resources
|
| 231 |
+
download_nltk_resources()
|
| 232 |
+
print("✓ NLTK resources downloaded")
|
| 233 |
+
|
| 234 |
+
# Load spaCy (small footprint)
|
| 235 |
+
try:
|
| 236 |
+
load_spacy()
|
| 237 |
+
print("✓ spaCy model loaded")
|
| 238 |
+
except Exception as e:
|
| 239 |
+
print(f"✗ spaCy failed: {e}")
|
| 240 |
+
|
| 241 |
+
# Load sentiment analyzer (most commonly used)
|
| 242 |
+
try:
|
| 243 |
+
load_sentiment_analyzer()
|
| 244 |
+
print("✓ Sentiment analyzer loaded")
|
| 245 |
+
except Exception as e:
|
| 246 |
+
print(f"✗ Sentiment analyzer failed: {e}")
|
| 247 |
+
|
| 248 |
+
print(f"Memory status: {get_memory_status()}")
|
| 249 |
+
print("Essential models initialized!")
|
| 250 |
+
|
| 251 |
+
# Lazy loading functions for other models
|
| 252 |
+
def ensure_model_loaded(model_name, loader_func):
|
| 253 |
+
"""Ensure a model is loaded before use"""
|
| 254 |
+
if model_name not in model_cache.models:
|
| 255 |
+
print(f"Loading {model_name} on demand...")
|
| 256 |
+
loader_func()
|
| 257 |
+
return model_cache.models.get(model_name)
|
| 258 |
+
|
| 259 |
+
# Model status for debugging
|
| 260 |
+
def get_model_status():
|
| 261 |
+
"""Get status of all models"""
|
| 262 |
+
return {
|
| 263 |
+
"loaded_models": list(model_cache.models.keys()),
|
| 264 |
+
"access_counts": model_cache.access_count.copy(),
|
| 265 |
+
"memory_usage": get_memory_status(),
|
| 266 |
+
"device": DEVICE
|
| 267 |
+
}
|
utils/visualization.py
ADDED
|
@@ -0,0 +1,242 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import matplotlib.pyplot as plt
|
| 2 |
+
import seaborn as sns
|
| 3 |
+
|
| 4 |
+
def apply_custom_css():
|
| 5 |
+
"""Load custom CSS for the Flask interface"""
|
| 6 |
+
css_file_path = "static/css/style.css"
|
| 7 |
+
try:
|
| 8 |
+
with open(css_file_path, "r") as f:
|
| 9 |
+
return f.read()
|
| 10 |
+
except Exception as e:
|
| 11 |
+
print(f"Warning: Could not load custom CSS: {e}")
|
| 12 |
+
return ""
|
| 13 |
+
|
| 14 |
+
def setup_mpl_style():
|
| 15 |
+
"""Setup matplotlib style for consistent visualizations"""
|
| 16 |
+
try:
|
| 17 |
+
plt.style.use('seaborn-v0_8-whitegrid')
|
| 18 |
+
sns.set_style("whitegrid")
|
| 19 |
+
except:
|
| 20 |
+
# Fallback if seaborn style is not available
|
| 21 |
+
plt.style.use('default')
|
| 22 |
+
|
| 23 |
+
# Configure matplotlib for better visuals
|
| 24 |
+
plt.rcParams['figure.figsize'] = (10, 6)
|
| 25 |
+
plt.rcParams['axes.labelsize'] = 12
|
| 26 |
+
plt.rcParams['axes.titlesize'] = 14
|
| 27 |
+
plt.rcParams['xtick.labelsize'] = 10
|
| 28 |
+
plt.rcParams['ytick.labelsize'] = 10
|
| 29 |
+
plt.rcParams['legend.fontsize'] = 10
|
| 30 |
+
plt.rcParams['axes.spines.top'] = False
|
| 31 |
+
plt.rcParams['axes.spines.right'] = False
|
| 32 |
+
|
| 33 |
+
def create_bar_chart(labels, values, title, xlabel, ylabel, color='#1976D2'):
|
| 34 |
+
"""Create a matplotlib bar chart"""
|
| 35 |
+
setup_mpl_style()
|
| 36 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
| 37 |
+
bars = ax.bar(labels, values, color=color)
|
| 38 |
+
|
| 39 |
+
ax.set_title(title)
|
| 40 |
+
ax.set_xlabel(xlabel)
|
| 41 |
+
ax.set_ylabel(ylabel)
|
| 42 |
+
plt.xticks(rotation=45, ha='right')
|
| 43 |
+
plt.tight_layout()
|
| 44 |
+
|
| 45 |
+
return fig
|
| 46 |
+
|
| 47 |
+
def create_horizontal_bar_chart(labels, values, title, xlabel, ylabel, color='#1976D2'):
|
| 48 |
+
"""Create a matplotlib horizontal bar chart"""
|
| 49 |
+
setup_mpl_style()
|
| 50 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
| 51 |
+
bars = ax.barh(labels, values, color=color)
|
| 52 |
+
|
| 53 |
+
ax.set_title(title)
|
| 54 |
+
ax.set_xlabel(xlabel)
|
| 55 |
+
ax.set_ylabel(ylabel)
|
| 56 |
+
ax.invert_yaxis() # To have the highest value at the top
|
| 57 |
+
plt.tight_layout()
|
| 58 |
+
|
| 59 |
+
return fig
|
| 60 |
+
|
| 61 |
+
def create_pie_chart(labels, values, title, colors=None):
|
| 62 |
+
"""Create a matplotlib pie chart"""
|
| 63 |
+
setup_mpl_style()
|
| 64 |
+
fig, ax = plt.subplots(figsize=(8, 8))
|
| 65 |
+
|
| 66 |
+
if colors is None:
|
| 67 |
+
colors = ['#1976D2', '#4CAF50', '#FF9800', '#F44336', '#9C27B0', '#00BCD4', '#FFC107', '#795548']
|
| 68 |
+
|
| 69 |
+
wedges, texts, autotexts = ax.pie(values, labels=labels, autopct='%1.1f%%', colors=colors)
|
| 70 |
+
ax.set_title(title)
|
| 71 |
+
|
| 72 |
+
# Improve text readability
|
| 73 |
+
for autotext in autotexts:
|
| 74 |
+
autotext.set_color('white')
|
| 75 |
+
autotext.set_fontweight('bold')
|
| 76 |
+
|
| 77 |
+
plt.tight_layout()
|
| 78 |
+
return fig
|
| 79 |
+
|
| 80 |
+
def create_line_chart(x_values, y_values, title, xlabel, ylabel, color='#1976D2'):
|
| 81 |
+
"""Create a matplotlib line chart"""
|
| 82 |
+
setup_mpl_style()
|
| 83 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
| 84 |
+
ax.plot(x_values, y_values, color=color, linewidth=2, marker='o')
|
| 85 |
+
|
| 86 |
+
ax.set_title(title)
|
| 87 |
+
ax.set_xlabel(xlabel)
|
| 88 |
+
ax.set_ylabel(ylabel)
|
| 89 |
+
ax.grid(True, alpha=0.3)
|
| 90 |
+
plt.tight_layout()
|
| 91 |
+
|
| 92 |
+
return fig
|
| 93 |
+
|
| 94 |
+
def create_scatter_plot(x_values, y_values, title, xlabel, ylabel, color='#1976D2'):
|
| 95 |
+
"""Create a matplotlib scatter plot"""
|
| 96 |
+
setup_mpl_style()
|
| 97 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
| 98 |
+
ax.scatter(x_values, y_values, color=color, alpha=0.6, s=50)
|
| 99 |
+
|
| 100 |
+
ax.set_title(title)
|
| 101 |
+
ax.set_xlabel(xlabel)
|
| 102 |
+
ax.set_ylabel(ylabel)
|
| 103 |
+
ax.grid(True, alpha=0.3)
|
| 104 |
+
plt.tight_layout()
|
| 105 |
+
|
| 106 |
+
return fig
|
| 107 |
+
|
| 108 |
+
def create_heatmap(data, title, xlabel, ylabel, cmap='YlGnBu'):
|
| 109 |
+
"""Create a matplotlib heatmap"""
|
| 110 |
+
setup_mpl_style()
|
| 111 |
+
fig, ax = plt.subplots(figsize=(10, 8))
|
| 112 |
+
|
| 113 |
+
im = ax.imshow(data, cmap=cmap, aspect='auto')
|
| 114 |
+
|
| 115 |
+
# Add colorbar
|
| 116 |
+
cbar = ax.figure.colorbar(im, ax=ax)
|
| 117 |
+
cbar.ax.set_ylabel('Value', rotation=-90, va="bottom")
|
| 118 |
+
|
| 119 |
+
ax.set_title(title)
|
| 120 |
+
ax.set_xlabel(xlabel)
|
| 121 |
+
ax.set_ylabel(ylabel)
|
| 122 |
+
|
| 123 |
+
plt.tight_layout()
|
| 124 |
+
return fig
|
| 125 |
+
|
| 126 |
+
def create_word_cloud_placeholder(text, title="Word Cloud"):
|
| 127 |
+
"""Create a placeholder for word cloud visualization"""
|
| 128 |
+
setup_mpl_style()
|
| 129 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
| 130 |
+
|
| 131 |
+
# Create a simple text visualization as placeholder
|
| 132 |
+
ax.text(0.5, 0.5, f"Word Cloud: {title}\n\n{text[:100]}...",
|
| 133 |
+
ha='center', va='center', fontsize=12,
|
| 134 |
+
bbox=dict(boxstyle="round,pad=0.3", facecolor="lightblue", alpha=0.7))
|
| 135 |
+
|
| 136 |
+
ax.set_xlim(0, 1)
|
| 137 |
+
ax.set_ylim(0, 1)
|
| 138 |
+
ax.axis('off')
|
| 139 |
+
ax.set_title(title)
|
| 140 |
+
|
| 141 |
+
plt.tight_layout()
|
| 142 |
+
return fig
|
| 143 |
+
|
| 144 |
+
def create_network_graph(edges, nodes, title="Network Graph"):
|
| 145 |
+
"""Create a network graph visualization"""
|
| 146 |
+
setup_mpl_style()
|
| 147 |
+
fig, ax = plt.subplots(figsize=(12, 8))
|
| 148 |
+
|
| 149 |
+
# Simple network visualization
|
| 150 |
+
if edges and nodes:
|
| 151 |
+
# Extract node positions (simplified)
|
| 152 |
+
pos = {}
|
| 153 |
+
for i, node in enumerate(nodes):
|
| 154 |
+
angle = 2 * 3.14159 * i / len(nodes)
|
| 155 |
+
pos[node] = (0.5 + 0.3 * np.cos(angle), 0.5 + 0.3 * np.sin(angle))
|
| 156 |
+
|
| 157 |
+
# Draw edges
|
| 158 |
+
for edge in edges:
|
| 159 |
+
if len(edge) >= 2:
|
| 160 |
+
x1, y1 = pos.get(edge[0], (0, 0))
|
| 161 |
+
x2, y2 = pos.get(edge[1], (0, 0))
|
| 162 |
+
ax.plot([x1, x2], [y1, y2], 'k-', alpha=0.5, linewidth=1)
|
| 163 |
+
|
| 164 |
+
# Draw nodes
|
| 165 |
+
for node, (x, y) in pos.items():
|
| 166 |
+
ax.scatter(x, y, s=200, c='lightblue', edgecolors='black', linewidth=2)
|
| 167 |
+
ax.text(x, y, str(node), ha='center', va='center', fontsize=8)
|
| 168 |
+
|
| 169 |
+
ax.set_xlim(0, 1)
|
| 170 |
+
ax.set_ylim(0, 1)
|
| 171 |
+
ax.axis('off')
|
| 172 |
+
ax.set_title(title)
|
| 173 |
+
|
| 174 |
+
plt.tight_layout()
|
| 175 |
+
return fig
|
| 176 |
+
|
| 177 |
+
def create_gauge_chart(value, max_value=1.0, title="Gauge Chart"):
|
| 178 |
+
"""Create a gauge chart visualization"""
|
| 179 |
+
setup_mpl_style()
|
| 180 |
+
fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(projection='polar'))
|
| 181 |
+
|
| 182 |
+
# Create gauge
|
| 183 |
+
theta = np.linspace(0, np.pi, 100)
|
| 184 |
+
r = np.ones_like(theta)
|
| 185 |
+
|
| 186 |
+
# Color based on value
|
| 187 |
+
if value / max_value > 0.7:
|
| 188 |
+
color = '#4CAF50' # Green
|
| 189 |
+
elif value / max_value > 0.4:
|
| 190 |
+
color = '#FF9800' # Orange
|
| 191 |
+
else:
|
| 192 |
+
color = '#F44336' # Red
|
| 193 |
+
|
| 194 |
+
ax.fill_between(theta, 0, r, alpha=0.3, color=color)
|
| 195 |
+
ax.plot(theta, r, color=color, linewidth=3)
|
| 196 |
+
|
| 197 |
+
# Add value indicator
|
| 198 |
+
indicator_theta = np.pi * (1 - value / max_value)
|
| 199 |
+
ax.plot([indicator_theta, indicator_theta], [0, 1], color='black', linewidth=4)
|
| 200 |
+
|
| 201 |
+
ax.set_ylim(0, 1)
|
| 202 |
+
ax.set_title(title, pad=20)
|
| 203 |
+
ax.set_xticks([])
|
| 204 |
+
ax.set_yticks([])
|
| 205 |
+
|
| 206 |
+
# Add value text
|
| 207 |
+
ax.text(0, 0, f'{value:.2f}', ha='center', va='center', fontsize=20, fontweight='bold')
|
| 208 |
+
|
| 209 |
+
plt.tight_layout()
|
| 210 |
+
return fig
|
| 211 |
+
|
| 212 |
+
def create_comparison_chart(categories, values1, values2, title, xlabel, ylabel,
|
| 213 |
+
label1="Series 1", label2="Series 2", color1='#1976D2', color2='#4CAF50'):
|
| 214 |
+
"""Create a comparison bar chart"""
|
| 215 |
+
setup_mpl_style()
|
| 216 |
+
fig, ax = plt.subplots(figsize=(12, 6))
|
| 217 |
+
|
| 218 |
+
x = np.arange(len(categories))
|
| 219 |
+
width = 0.35
|
| 220 |
+
|
| 221 |
+
bars1 = ax.bar(x - width/2, values1, width, label=label1, color=color1)
|
| 222 |
+
bars2 = ax.bar(x + width/2, values2, width, label=label2, color=color2)
|
| 223 |
+
|
| 224 |
+
ax.set_title(title)
|
| 225 |
+
ax.set_xlabel(xlabel)
|
| 226 |
+
ax.set_ylabel(ylabel)
|
| 227 |
+
ax.set_xticks(x)
|
| 228 |
+
ax.set_xticklabels(categories, rotation=45, ha='right')
|
| 229 |
+
ax.legend()
|
| 230 |
+
|
| 231 |
+
# Add value labels on bars
|
| 232 |
+
for bars in [bars1, bars2]:
|
| 233 |
+
for bar in bars:
|
| 234 |
+
height = bar.get_height()
|
| 235 |
+
ax.annotate(f'{height:.1f}',
|
| 236 |
+
xy=(bar.get_x() + bar.get_width() / 2, height),
|
| 237 |
+
xytext=(0, 3), # 3 points vertical offset
|
| 238 |
+
textcoords="offset points",
|
| 239 |
+
ha='center', va='bottom')
|
| 240 |
+
|
| 241 |
+
plt.tight_layout()
|
| 242 |
+
return fig
|