KevanSoon
commited on
Commit
·
f147852
1
Parent(s):
89ca815
first project init
Browse files- app.py +610 -0
- auth/__pycache__/clerk.cpython-310.pyc +0 -0
- auth/clerk.py +32 -0
- requirements.txt +96 -0
- tools/TOOLS_README.md +21 -0
- tools/__pycache__/tools.cpython-310.pyc +0 -0
- tools/extraction_results.jsonl +1 -0
- tools/langextract_tool.py +62 -0
- tools/tools.py +321 -0
- tools/visualization.html +189 -0
app.py
ADDED
|
@@ -0,0 +1,610 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import base64
|
| 2 |
+
import json
|
| 3 |
+
import asyncio
|
| 4 |
+
import re
|
| 5 |
+
import os
|
| 6 |
+
import html
|
| 7 |
+
import requests
|
| 8 |
+
import httpx
|
| 9 |
+
import uuid
|
| 10 |
+
from fastapi import FastAPI, File, Form, UploadFile, HTTPException, Request, Header
|
| 11 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 12 |
+
from fastapi.responses import HTMLResponse
|
| 13 |
+
from fastapi import Depends
|
| 14 |
+
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
| 15 |
+
from pydantic import BaseModel
|
| 16 |
+
from requests.exceptions import RequestException
|
| 17 |
+
from dotenv import load_dotenv
|
| 18 |
+
import google.generativeai as genai
|
| 19 |
+
from google.api_core import exceptions as google_exceptions
|
| 20 |
+
from pydantic import BaseModel
|
| 21 |
+
from auth.clerk import verify_clerk_jwt
|
| 22 |
+
from tools.tools import extract_text_from_html, generate_document_insights, analyze_keywords_with_web_search
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
security = HTTPBearer()
|
| 26 |
+
# Load environment variables from a .env file
|
| 27 |
+
load_dotenv()
|
| 28 |
+
|
| 29 |
+
SUPABASE_URL = os.getenv("SUPABASE_URL")
|
| 30 |
+
SUPABASE_SERVICE_ROLE_KEY = os.getenv("SUPABASE_SERVICE_ROLE_KEY")
|
| 31 |
+
|
| 32 |
+
app = FastAPI(
|
| 33 |
+
title="Document Translator (Final Architecture)",
|
| 34 |
+
description="Pipeline: Nemo (JSON) -> Sea-Lion (Translate JSON) -> Gemini (HTML)",
|
| 35 |
+
version="10.0.1", # Final Architecture, patched
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
# Allow requests from the default React frontend port
|
| 39 |
+
app.add_middleware(
|
| 40 |
+
CORSMiddleware,
|
| 41 |
+
allow_origins=["http://localhost:3000"],
|
| 42 |
+
allow_credentials=True,
|
| 43 |
+
allow_methods=["*"],
|
| 44 |
+
allow_headers=["*"],
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def wrap_words_with_spans(html: str) -> str:
|
| 49 |
+
# Wrap each word in target tags with a span having data attributes
|
| 50 |
+
def replacer(match):
|
| 51 |
+
replacer.counter += 1
|
| 52 |
+
word = match.group(0)
|
| 53 |
+
return f'<span data-clickable="true" data-id="word-{replacer.counter}">{word}</span>'
|
| 54 |
+
replacer.counter = 0
|
| 55 |
+
|
| 56 |
+
pattern = r'\b\w+[.,?!]?\b' # matches words with optional trailing punctuation
|
| 57 |
+
|
| 58 |
+
for tag in ['p', 'h1', 'h2', 'td']:
|
| 59 |
+
# regex to capture content inside these tags
|
| 60 |
+
regex = re.compile(fr'(<{tag}[^>]*>)(.*?)(</{tag}>)', re.DOTALL)
|
| 61 |
+
def replacer_func(m):
|
| 62 |
+
open_tag, inner_text, close_tag = m.groups()
|
| 63 |
+
wrapped_text = re.sub(pattern, replacer, inner_text)
|
| 64 |
+
return open_tag + wrapped_text + close_tag
|
| 65 |
+
html = regex.sub(replacer_func, html)
|
| 66 |
+
|
| 67 |
+
return html
|
| 68 |
+
|
| 69 |
+
def inject_dropdown_script(html: str) -> str:
|
| 70 |
+
script = """
|
| 71 |
+
<script>
|
| 72 |
+
window.addEventListener('DOMContentLoaded', () => {
|
| 73 |
+
|
| 74 |
+
function createDropdown(x, y, wordEl, word) {
|
| 75 |
+
// Remove any existing dropdown
|
| 76 |
+
const oldDropdown = document.getElementById('translation-dropdown');
|
| 77 |
+
if (oldDropdown) oldDropdown.remove();
|
| 78 |
+
|
| 79 |
+
// Create dropdown select element
|
| 80 |
+
const dropdown = document.createElement('select');
|
| 81 |
+
dropdown.id = 'translation-dropdown';
|
| 82 |
+
dropdown.style.position = 'absolute';
|
| 83 |
+
dropdown.style.left = x + 'px';
|
| 84 |
+
dropdown.style.top = y + 'px';
|
| 85 |
+
dropdown.style.zIndex = 9999;
|
| 86 |
+
|
| 87 |
+
// Languages options
|
| 88 |
+
const languages = ['English', 'Chinese', 'Tamil', 'Hindi'];
|
| 89 |
+
languages.forEach(lang => {
|
| 90 |
+
const option = document.createElement('option');
|
| 91 |
+
option.value = lang.toLowerCase();
|
| 92 |
+
option.innerText = lang;
|
| 93 |
+
dropdown.appendChild(option);
|
| 94 |
+
});
|
| 95 |
+
|
| 96 |
+
// Placeholder option
|
| 97 |
+
const defaultOption = document.createElement('option');
|
| 98 |
+
defaultOption.value = '';
|
| 99 |
+
defaultOption.innerText = 'Select language';
|
| 100 |
+
defaultOption.selected = true;
|
| 101 |
+
defaultOption.disabled = true;
|
| 102 |
+
dropdown.insertBefore(defaultOption, dropdown.firstChild);
|
| 103 |
+
|
| 104 |
+
document.body.appendChild(dropdown);
|
| 105 |
+
dropdown.focus();
|
| 106 |
+
|
| 107 |
+
dropdown.addEventListener('change', () => {
|
| 108 |
+
const selectedLang = dropdown.value;
|
| 109 |
+
if (!selectedLang) return;
|
| 110 |
+
|
| 111 |
+
// Call backend to translate word
|
| 112 |
+
fetch('http://localhost:8080/api/translate_frontend', {
|
| 113 |
+
method: 'POST',
|
| 114 |
+
headers: { 'Content-Type': 'application/json' },
|
| 115 |
+
body: JSON.stringify({ text: word, target_language: selectedLang }),
|
| 116 |
+
})
|
| 117 |
+
.then(res => {
|
| 118 |
+
if (!res.ok) throw new Error('Translation API error');
|
| 119 |
+
return res.json();
|
| 120 |
+
})
|
| 121 |
+
.then(data => {
|
| 122 |
+
const translated = data.translated_text || word;
|
| 123 |
+
wordEl.innerText = translated;
|
| 124 |
+
|
| 125 |
+
// Add or update language label
|
| 126 |
+
let label = wordEl.nextSibling;
|
| 127 |
+
if (!label || !label.classList || !label.classList.contains('language-label')) {
|
| 128 |
+
label = document.createElement('span');
|
| 129 |
+
label.className = 'language-label';
|
| 130 |
+
label.style.marginLeft = '6px';
|
| 131 |
+
label.style.fontSize = '0.8em';
|
| 132 |
+
label.style.color = '#555';
|
| 133 |
+
wordEl.after(label);
|
| 134 |
+
}
|
| 135 |
+
label.textContent = `(${dropdown.options[dropdown.selectedIndex].text})`;
|
| 136 |
+
})
|
| 137 |
+
.catch(err => {
|
| 138 |
+
console.error('Translation error:', err);
|
| 139 |
+
alert('Translation failed, please try again.');
|
| 140 |
+
});
|
| 141 |
+
|
| 142 |
+
dropdown.remove();
|
| 143 |
+
});
|
| 144 |
+
|
| 145 |
+
// Clicking outside closes dropdown
|
| 146 |
+
document.addEventListener('click', function onDocClick(e) {
|
| 147 |
+
if (!dropdown.contains(e.target)) {
|
| 148 |
+
dropdown.remove();
|
| 149 |
+
document.removeEventListener('click', onDocClick);
|
| 150 |
+
}
|
| 151 |
+
});
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
// Add click handlers to all words wrapped in spans with data-clickable="true"
|
| 155 |
+
document.querySelectorAll('span[data-clickable="true"]').forEach(el => {
|
| 156 |
+
el.style.cursor = 'pointer';
|
| 157 |
+
el.addEventListener('click', event => {
|
| 158 |
+
event.stopPropagation();
|
| 159 |
+
const word = el.innerText;
|
| 160 |
+
const rect = el.getBoundingClientRect();
|
| 161 |
+
const x = rect.left + window.scrollX;
|
| 162 |
+
const y = rect.bottom + window.scrollY;
|
| 163 |
+
createDropdown(x, y, el, word);
|
| 164 |
+
});
|
| 165 |
+
});
|
| 166 |
+
|
| 167 |
+
});
|
| 168 |
+
</script>
|
| 169 |
+
"""
|
| 170 |
+
if "</body>" in html:
|
| 171 |
+
return html.replace("</body>", script + "\n</body>")
|
| 172 |
+
else:
|
| 173 |
+
return html + script
|
| 174 |
+
|
| 175 |
+
# Define a Pydantic model to enforce the structure of the incoming request body
|
| 176 |
+
class HtmlAnalysisRequest(BaseModel):
|
| 177 |
+
html: str
|
| 178 |
+
|
| 179 |
+
@app.post("/api/analyze_html")
|
| 180 |
+
async def analyze_html_file(file: UploadFile = File(...)):
|
| 181 |
+
"""
|
| 182 |
+
Receives an uploaded HTML file, extracts its text content, and uses the
|
| 183 |
+
Gemini tool to generate a summary and key informational points.
|
| 184 |
+
"""
|
| 185 |
+
# Check if the uploaded file is an HTML file
|
| 186 |
+
if file.content_type != "text/html":
|
| 187 |
+
raise HTTPException(status_code=400, detail="Unsupported file type. Please upload a .html file.")
|
| 188 |
+
|
| 189 |
+
try:
|
| 190 |
+
# Step 1: Read the content of the uploaded file
|
| 191 |
+
html_content_bytes = await file.read()
|
| 192 |
+
html_content = html_content_bytes.decode('utf-8')
|
| 193 |
+
|
| 194 |
+
# Step 2: Extract text from the HTML using our tool
|
| 195 |
+
document_text = extract_text_from_html(html_content)
|
| 196 |
+
|
| 197 |
+
# Step 3: Get insights from the Gemini tool
|
| 198 |
+
analysis_results = await generate_document_insights(document_text)
|
| 199 |
+
|
| 200 |
+
# Check if the tool returned a functional error
|
| 201 |
+
if 'error' in analysis_results:
|
| 202 |
+
raise HTTPException(status_code=500, detail=analysis_results['error'])
|
| 203 |
+
|
| 204 |
+
return analysis_results
|
| 205 |
+
|
| 206 |
+
except Exception as e:
|
| 207 |
+
# Catch any other unexpected errors
|
| 208 |
+
raise HTTPException(status_code=500, detail=f"An unexpected error occurred: {e}")
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
@app.post("/api/translate_frontend")
|
| 212 |
+
async def translate_text(request: Request):
|
| 213 |
+
try:
|
| 214 |
+
data = await request.json()
|
| 215 |
+
text = data.get("text")
|
| 216 |
+
target_language = data.get("target_language")
|
| 217 |
+
|
| 218 |
+
if not text or not target_language:
|
| 219 |
+
raise HTTPException(status_code=400, detail="Missing 'text' or 'target_language' in request body")
|
| 220 |
+
|
| 221 |
+
url = "https://api.sea-lion.ai/v1/chat/completions"
|
| 222 |
+
api_key = os.getenv("SEALION_API_KEY")
|
| 223 |
+
|
| 224 |
+
headers = {
|
| 225 |
+
"Authorization": f"Bearer {api_key}",
|
| 226 |
+
"Content-Type": "application/json"
|
| 227 |
+
# No "accept" header or set to "application/json"
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
prompt = (
|
| 231 |
+
f"Please translate the following text to {target_language} and return "
|
| 232 |
+
"ONLY the translated text without any explanations or extra formatting:\n\n"
|
| 233 |
+
f"\"{text}\""
|
| 234 |
+
)
|
| 235 |
+
|
| 236 |
+
payload = {
|
| 237 |
+
"max_completion_tokens": 1024,
|
| 238 |
+
"messages": [
|
| 239 |
+
{
|
| 240 |
+
"role": "user",
|
| 241 |
+
"content": prompt
|
| 242 |
+
}
|
| 243 |
+
],
|
| 244 |
+
"model": "aisingapore/Gemma-SEA-LION-v3-9B-IT"
|
| 245 |
+
}
|
| 246 |
+
|
| 247 |
+
response = requests.post(url, headers=headers, data=json.dumps(payload))
|
| 248 |
+
response.raise_for_status()
|
| 249 |
+
|
| 250 |
+
# Parse JSON response
|
| 251 |
+
response_json = response.json()
|
| 252 |
+
|
| 253 |
+
# Extract translated text from response JSON
|
| 254 |
+
translated_text = response_json["choices"][0]["message"]["content"].strip()
|
| 255 |
+
|
| 256 |
+
if not translated_text:
|
| 257 |
+
raise HTTPException(status_code=500, detail="Empty response from translation model.")
|
| 258 |
+
|
| 259 |
+
return {"translated_text": translated_text}
|
| 260 |
+
|
| 261 |
+
except requests.exceptions.RequestException as e:
|
| 262 |
+
raise HTTPException(status_code=502, detail=f"Translation API request failed: {e}")
|
| 263 |
+
except Exception as e:
|
| 264 |
+
raise HTTPException(status_code=500, detail=f"Unexpected error: {e}")
|
| 265 |
+
|
| 266 |
+
|
| 267 |
+
# --- Model 2: Sea-Lion (The JSON Translator) ---
|
| 268 |
+
@app.post("/api/translate")
|
| 269 |
+
async def translate_text(text: str, target_language: str):
|
| 270 |
+
"""
|
| 271 |
+
Receives text and a target language, and returns the translated text
|
| 272 |
+
using the SEA-LION model.
|
| 273 |
+
"""
|
| 274 |
+
# The API endpoint URL for translation
|
| 275 |
+
url = "https://api.sea-lion.ai/v1/chat/completions"
|
| 276 |
+
|
| 277 |
+
# It's recommended to store API keys securely, e.g., in environment variables
|
| 278 |
+
api_key = os.getenv("SEALION_API_KEY")
|
| 279 |
+
|
| 280 |
+
# The headers for the request
|
| 281 |
+
headers = {
|
| 282 |
+
"accept": "text/plain",
|
| 283 |
+
"Authorization": f"Bearer {api_key}",
|
| 284 |
+
"Content-Type": "application/json"
|
| 285 |
+
}
|
| 286 |
+
|
| 287 |
+
# Create a dynamic prompt for the translation task
|
| 288 |
+
prompt = f"Translate the following text to {text}: \"{target_language}\""
|
| 289 |
+
|
| 290 |
+
# The JSON data payload for the request
|
| 291 |
+
data = {
|
| 292 |
+
"max_completion_tokens": 4096, # Increased token limit for longer translations
|
| 293 |
+
"messages": [
|
| 294 |
+
{
|
| 295 |
+
"role": "user",
|
| 296 |
+
"content": prompt
|
| 297 |
+
}
|
| 298 |
+
],
|
| 299 |
+
"model": "aisingapore/Llama-SEA-LION-v3-70B-IT"
|
| 300 |
+
}
|
| 301 |
+
|
| 302 |
+
try:
|
| 303 |
+
# Make the POST request to the SEA-LION API
|
| 304 |
+
response = requests.post(url, headers=headers, data=json.dumps(data))
|
| 305 |
+
response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)
|
| 306 |
+
|
| 307 |
+
# The response from this specific API is plain text, not JSON.
|
| 308 |
+
# We will wrap it in a JSON structure for consistency in our API.
|
| 309 |
+
translated_text = response.text
|
| 310 |
+
|
| 311 |
+
# It's good practice to check if the response is empty
|
| 312 |
+
if not translated_text:
|
| 313 |
+
raise HTTPException(status_code=500, detail="Received an empty response from the translation model.")
|
| 314 |
+
|
| 315 |
+
return {"translated_text": translated_text}
|
| 316 |
+
|
| 317 |
+
except requests.exceptions.RequestException as e:
|
| 318 |
+
# Handle network-related errors
|
| 319 |
+
raise HTTPException(status_code=502, detail=f"Failed to communicate with the translation AI model: {e}")
|
| 320 |
+
except Exception as e:
|
| 321 |
+
# Handle other potential errors
|
| 322 |
+
raise HTTPException(status_code=500, detail=f"An unexpected error occurred during translation: {e}")
|
| 323 |
+
|
| 324 |
+
|
| 325 |
+
# --- Model 3: Gemini (The HTML Generator) ---
|
| 326 |
+
async def generate_html_from_translated_json(translated_json: dict) -> str:
|
| 327 |
+
"""
|
| 328 |
+
Receives a translated JSON object and uses Gemini to generate the final
|
| 329 |
+
structured HTML document.
|
| 330 |
+
"""
|
| 331 |
+
try:
|
| 332 |
+
api_key = os.getenv("GEMINI_API_KEY")
|
| 333 |
+
if not api_key:
|
| 334 |
+
raise ValueError("GEMINI_API_KEY not found in environment variables.")
|
| 335 |
+
|
| 336 |
+
genai.configure(api_key=api_key)
|
| 337 |
+
model = genai.GenerativeModel(model_name='gemini-2.0-flash')
|
| 338 |
+
json_string_for_prompt = json.dumps(translated_json, indent=2)
|
| 339 |
+
|
| 340 |
+
prompt = f"""
|
| 341 |
+
You are an expert system that converts a JSON object containing PRE-TRANSLATED text into a clean, semantic HTML document.
|
| 342 |
+
|
| 343 |
+
**Your Task:**
|
| 344 |
+
1. Analyze the following JSON object. Its text content has already been translated.
|
| 345 |
+
2. The core document data is located at the path: `choices[0]['message']['tool_calls'][0]['function']['arguments']`.
|
| 346 |
+
3. The value of 'arguments' is a JSON STRING. You must parse this inner string to access the list of document chunks.
|
| 347 |
+
4. Using the translated data from the 'text' fields, generate a single, complete HTML5 document. Use appropriate tags like <h1>, <h2>, <p>, and <table>.
|
| 348 |
+
5. if json contains "tabular" means mmake a table for that with some grey border and styling
|
| 349 |
+
6. Your final output must ONLY be the raw HTML code. Do not add comments or markdown.
|
| 350 |
+
|
| 351 |
+
**Translated JSON object to process:**
|
| 352 |
+
```json
|
| 353 |
+
{json_string_for_prompt}
|
| 354 |
+
```
|
| 355 |
+
"""
|
| 356 |
+
|
| 357 |
+
# def do_request():
|
| 358 |
+
# response = model.generate_content(prompt)
|
| 359 |
+
# match = re.search(r'```html\n(.*?)\n```', response.text, re.DOTALL)
|
| 360 |
+
# if match:
|
| 361 |
+
# return match.group(1).strip()
|
| 362 |
+
# return response.text.strip()
|
| 363 |
+
|
| 364 |
+
# return await asyncio.to_thread(do_request)
|
| 365 |
+
def do_request():
|
| 366 |
+
response = model.generate_content(prompt)
|
| 367 |
+
|
| 368 |
+
# Extract raw HTML from Gemini markdown code block
|
| 369 |
+
match = re.search(r'```html\n(.*?)\n```', response.text, re.DOTALL)
|
| 370 |
+
raw_html = match.group(1).strip() if match else response.text.strip()
|
| 371 |
+
|
| 372 |
+
# Wrap each word in clickable spans
|
| 373 |
+
wrapped_html = wrap_words_with_spans(raw_html)
|
| 374 |
+
|
| 375 |
+
# Inject dropdown script
|
| 376 |
+
final_html = inject_dropdown_script(wrapped_html)
|
| 377 |
+
|
| 378 |
+
return final_html
|
| 379 |
+
|
| 380 |
+
return await asyncio.to_thread(do_request)
|
| 381 |
+
except google_exceptions.ResourceExhausted as e:
|
| 382 |
+
error_message = "The request to the document processor (Gemini) was rejected due to API quota limits. Please wait or upgrade your API plan."
|
| 383 |
+
return f"<html><body><h1>API Quota Error</h1><p>{html.escape(error_message)}</p></body></html>"
|
| 384 |
+
except Exception as e:
|
| 385 |
+
error_message = f"An error occurred while generating the HTML structure with Gemini: {str(e)}"
|
| 386 |
+
return f"<html><body><h1>HTML Generation Error</h1><p>{html.escape(error_message)}</p></body></html>"
|
| 387 |
+
|
| 388 |
+
|
| 389 |
+
# --- API Endpoint Orchestrating the Pipeline ---
|
| 390 |
+
@app.post("/api/translate_file", response_class=HTMLResponse)
|
| 391 |
+
async def translate_document_to_raw_html(target_language: str = Form(...), file: UploadFile = File(...)):
|
| 392 |
+
"""
|
| 393 |
+
Processes a document using the final, robust pipeline:
|
| 394 |
+
1. Nemo extracts content to JSON.
|
| 395 |
+
2. Sea-Lion translates the text within the JSON.
|
| 396 |
+
3. Gemini generates the final HTML from the translated JSON.
|
| 397 |
+
"""
|
| 398 |
+
content_type = file.content_type
|
| 399 |
+
if content_type not in ["application/pdf", "image/png", "image/jpeg"]:
|
| 400 |
+
raise HTTPException(status_code=400, detail="Unsupported file type.")
|
| 401 |
+
|
| 402 |
+
try:
|
| 403 |
+
# === STEP 1: Get raw JSON from Nemo (The Parser) ===
|
| 404 |
+
file_content = await file.read()
|
| 405 |
+
file_b64 = base64.b64encode(file_content).decode("utf-8")
|
| 406 |
+
nemo_data = {
|
| 407 |
+
"model": "nvidia/nemoretriever-parse",
|
| 408 |
+
"messages": [{"role": "user", "content": [{"type": "image_url", "image_url": {"url": f"data:{content_type};base64,{file_b64}"}}]}],
|
| 409 |
+
"max_tokens": 2048,
|
| 410 |
+
}
|
| 411 |
+
headers = {'accept': 'application/json', 'Content-Type': 'application/json'}
|
| 412 |
+
model_response = requests.post('http://localhost:8000/v1/chat/completions', headers=headers, data=json.dumps(nemo_data))
|
| 413 |
+
model_response.raise_for_status()
|
| 414 |
+
nemo_response_json = model_response.json()
|
| 415 |
+
print(nemo_response_json)
|
| 416 |
+
print("*********** Step 1 Done ***********")
|
| 417 |
+
|
| 418 |
+
|
| 419 |
+
print("*********** Step 2 in Progress ***********")
|
| 420 |
+
# === STEP 2: Get translated JSON from Sea-Lion (The Translator) ===
|
| 421 |
+
translated_json = await translate_text(nemo_response_json, target_language)
|
| 422 |
+
print(translated_json)
|
| 423 |
+
print("*********** Step 2 Done ***********")
|
| 424 |
+
|
| 425 |
+
print("*********** Step 3 in Progress ***********")
|
| 426 |
+
# === STEP 3: Generate final HTML from Gemini (The HTML Generator) ===
|
| 427 |
+
final_html = await generate_html_from_translated_json(translated_json)
|
| 428 |
+
print(final_html)
|
| 429 |
+
print("*********** Step 3 Done ***********")
|
| 430 |
+
# Check if Gemini itself returned an error message
|
| 431 |
+
if final_html.strip().startswith("<html><body><h1>"):
|
| 432 |
+
return HTMLResponse(content=final_html)
|
| 433 |
+
|
| 434 |
+
# === STEP 4: Return the final result to the frontend ===
|
| 435 |
+
return HTMLResponse(content=final_html)
|
| 436 |
+
|
| 437 |
+
except requests.exceptions.RequestException as e:
|
| 438 |
+
raise HTTPException(status_code=502, detail=f"Failed to communicate with a downstream AI model: {e}")
|
| 439 |
+
except Exception as e:
|
| 440 |
+
# This will catch any errors, including the ValueError from the Sea-Lion function
|
| 441 |
+
raise HTTPException(status_code=500, detail=f"An unexpected error occurred during processing: {e}")
|
| 442 |
+
|
| 443 |
+
@app.post("/api/verify_document_keywords")
|
| 444 |
+
async def verify_document_keywords(
|
| 445 |
+
file: UploadFile = File(...),
|
| 446 |
+
analysis_type: str = Form("legality"),
|
| 447 |
+
search_context: str = Form("Singapore employment law")
|
| 448 |
+
):
|
| 449 |
+
"""
|
| 450 |
+
Receives an HTML file and a configuration via form data, then uses the
|
| 451 |
+
agent-to-agent RAG workflow to identify and verify key claims.
|
| 452 |
+
"""
|
| 453 |
+
# Check if the uploaded file is an HTML file
|
| 454 |
+
if file.content_type != "text/html":
|
| 455 |
+
raise HTTPException(status_code=400, detail="Unsupported file type. Please upload a .html file.")
|
| 456 |
+
|
| 457 |
+
try:
|
| 458 |
+
# Step 1: Read content from the uploaded file and extract text
|
| 459 |
+
html_content_bytes = await file.read()
|
| 460 |
+
html_content = html_content_bytes.decode('utf-8')
|
| 461 |
+
document_text = extract_text_from_html(html_content)
|
| 462 |
+
|
| 463 |
+
if not document_text.strip():
|
| 464 |
+
raise HTTPException(
|
| 465 |
+
status_code=400,
|
| 466 |
+
detail="Could not extract any meaningful text from the provided HTML content."
|
| 467 |
+
)
|
| 468 |
+
|
| 469 |
+
# Step 2: Prepare the configuration and call the new analysis tool
|
| 470 |
+
config = {
|
| 471 |
+
"analysis_type": analysis_type,
|
| 472 |
+
"search_context": search_context
|
| 473 |
+
}
|
| 474 |
+
analysis_results = await analyze_keywords_with_web_search(document_text, config)
|
| 475 |
+
|
| 476 |
+
# Step 3: Handle potential errors from the tool
|
| 477 |
+
if 'error' in analysis_results:
|
| 478 |
+
raise HTTPException(status_code=500, detail=analysis_results['error'])
|
| 479 |
+
|
| 480 |
+
# Step 4: Return the successful analysis
|
| 481 |
+
return analysis_results
|
| 482 |
+
|
| 483 |
+
except Exception as e:
|
| 484 |
+
# Catch any other unexpected errors during the process
|
| 485 |
+
raise HTTPException(status_code=500, detail=f"An unexpected error occurred: {str(e)}")
|
| 486 |
+
|
| 487 |
+
|
| 488 |
+
|
| 489 |
+
#testing clerk backend authentication
|
| 490 |
+
# @app.post("/upload")
|
| 491 |
+
# async def upload_file(
|
| 492 |
+
# authorization: str = Header(...),
|
| 493 |
+
# file: UploadFile = File(...)
|
| 494 |
+
# ):
|
| 495 |
+
# if not authorization.startswith("Bearer "):
|
| 496 |
+
# raise HTTPException(status_code=401, detail="Missing Bearer token")
|
| 497 |
+
|
| 498 |
+
# token = authorization.split(" ")[1]
|
| 499 |
+
# claims = await verify_clerk_jwt(token)
|
| 500 |
+
|
| 501 |
+
# user_id = claims.get("sub") # Clerk user ID
|
| 502 |
+
|
| 503 |
+
# # ✅ Now the Clerk user is verified
|
| 504 |
+
# # You can securely store this file, e.g., to Supabase or local
|
| 505 |
+
# return {"message": f"File uploaded by Clerk user {user_id}"}
|
| 506 |
+
|
| 507 |
+
@app.post("/upload")
|
| 508 |
+
async def upload_file(
|
| 509 |
+
authorization: str = Header(...),
|
| 510 |
+
file: UploadFile = File(...)
|
| 511 |
+
):
|
| 512 |
+
if not authorization.startswith("Bearer "):
|
| 513 |
+
raise HTTPException(status_code=401, detail="Missing Bearer token")
|
| 514 |
+
|
| 515 |
+
token = authorization.split(" ")[1]
|
| 516 |
+
claims = await verify_clerk_jwt(token)
|
| 517 |
+
|
| 518 |
+
user_id = claims.get("sub") # Clerk user ID
|
| 519 |
+
filename = f"{user_id}/{uuid.uuid4()}.png"
|
| 520 |
+
|
| 521 |
+
# Upload to Supabase Storage
|
| 522 |
+
async with httpx.AsyncClient() as client:
|
| 523 |
+
upload_resp = await client.post(
|
| 524 |
+
f"{SUPABASE_URL}/storage/v1/object/user-documents/{filename}",
|
| 525 |
+
headers={
|
| 526 |
+
"Authorization": f"Bearer {SUPABASE_SERVICE_ROLE_KEY}",
|
| 527 |
+
"Content-Type": file.content_type,
|
| 528 |
+
},
|
| 529 |
+
content=await file.read()
|
| 530 |
+
)
|
| 531 |
+
|
| 532 |
+
if upload_resp.status_code != 200:
|
| 533 |
+
raise HTTPException(status_code=500, detail="Failed to upload to Supabase Storage")
|
| 534 |
+
|
| 535 |
+
file_url = f"user-documents/{filename}"
|
| 536 |
+
|
| 537 |
+
# Insert metadata to `documents` table
|
| 538 |
+
async with httpx.AsyncClient() as client:
|
| 539 |
+
insert_resp = await client.post(
|
| 540 |
+
f"{SUPABASE_URL}/rest/v1/documents",
|
| 541 |
+
headers={
|
| 542 |
+
"Authorization": f"Bearer {SUPABASE_SERVICE_ROLE_KEY}",
|
| 543 |
+
"apikey": SUPABASE_SERVICE_ROLE_KEY,
|
| 544 |
+
"Content-Type": "application/json",
|
| 545 |
+
"Prefer": "return=representation"
|
| 546 |
+
},
|
| 547 |
+
json={
|
| 548 |
+
"user_id": user_id,
|
| 549 |
+
"filename": filename.split("/")[-1],
|
| 550 |
+
"file_url": file_url
|
| 551 |
+
}
|
| 552 |
+
)
|
| 553 |
+
|
| 554 |
+
if insert_resp.status_code >= 300:
|
| 555 |
+
raise HTTPException(status_code=500, detail="Failed to insert document metadata")
|
| 556 |
+
|
| 557 |
+
return {"message": f"File uploaded as {filename}"}
|
| 558 |
+
|
| 559 |
+
|
| 560 |
+
@app.get("/api/documents")
|
| 561 |
+
async def get_user_documents(credentials: HTTPAuthorizationCredentials = Depends(security)):
|
| 562 |
+
token = credentials.credentials
|
| 563 |
+
claims = await verify_clerk_jwt(token)
|
| 564 |
+
user_id = claims.get("sub")
|
| 565 |
+
if not user_id:
|
| 566 |
+
raise HTTPException(status_code=401, detail="Invalid user")
|
| 567 |
+
|
| 568 |
+
# Step 1: Get documents from Supabase
|
| 569 |
+
async with httpx.AsyncClient() as client:
|
| 570 |
+
resp = await client.get(
|
| 571 |
+
f"{SUPABASE_URL}/rest/v1/documents?user_id=eq.{user_id}",
|
| 572 |
+
headers={
|
| 573 |
+
"apikey": SUPABASE_SERVICE_ROLE_KEY,
|
| 574 |
+
"Authorization": f"Bearer {SUPABASE_SERVICE_ROLE_KEY}",
|
| 575 |
+
"Accept": "application/json",
|
| 576 |
+
},
|
| 577 |
+
)
|
| 578 |
+
|
| 579 |
+
if resp.status_code != 200:
|
| 580 |
+
raise HTTPException(status_code=500, detail="Failed to fetch documents")
|
| 581 |
+
|
| 582 |
+
documents = resp.json()
|
| 583 |
+
|
| 584 |
+
# Step 2: Get signed URLs for each file
|
| 585 |
+
async with httpx.AsyncClient() as client:
|
| 586 |
+
for doc in documents:
|
| 587 |
+
file_path = doc["file_url"].split("user-documents/", 1)[-1]
|
| 588 |
+
if not file_path:
|
| 589 |
+
doc["signed_url"] = None
|
| 590 |
+
continue
|
| 591 |
+
|
| 592 |
+
signed_url_resp = await client.post(
|
| 593 |
+
f"{SUPABASE_URL}/storage/v1/object/sign/user-documents/{file_path}",
|
| 594 |
+
headers={
|
| 595 |
+
"apikey": SUPABASE_SERVICE_ROLE_KEY,
|
| 596 |
+
"Authorization": f"Bearer {SUPABASE_SERVICE_ROLE_KEY}",
|
| 597 |
+
# "Content-Type": "application/json"
|
| 598 |
+
},
|
| 599 |
+
json={"expiresIn": 3600}, # 1 hour
|
| 600 |
+
)
|
| 601 |
+
|
| 602 |
+
if signed_url_resp.status_code == 200:
|
| 603 |
+
print(f"{SUPABASE_URL}/storage/v1{signed_url_resp.json().get('signedURL')}")
|
| 604 |
+
doc["signed_url"] = f"{SUPABASE_URL}/storage/v1{signed_url_resp.json().get('signedURL')}"
|
| 605 |
+
|
| 606 |
+
else:
|
| 607 |
+
doc["signed_url"] = None
|
| 608 |
+
print(documents)
|
| 609 |
+
|
| 610 |
+
return documents
|
auth/__pycache__/clerk.cpython-310.pyc
ADDED
|
Binary file (1.25 kB). View file
|
|
|
auth/clerk.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# auth/clerk.py
|
| 2 |
+
from jose import jwt
|
| 3 |
+
import httpx
|
| 4 |
+
from fastapi import HTTPException
|
| 5 |
+
|
| 6 |
+
CLERK_ISSUER = "https://enabling-terrapin-28.clerk.accounts.dev" # e.g. https://enabling-terrapin-28.clerk.accounts.dev
|
| 7 |
+
CLERK_AUDIENCE = "http://localhost:3000" # Your frontend origin
|
| 8 |
+
|
| 9 |
+
async def verify_clerk_jwt(token: str) -> dict:
|
| 10 |
+
try:
|
| 11 |
+
async with httpx.AsyncClient() as client:
|
| 12 |
+
jwks_url = f"{CLERK_ISSUER}/.well-known/jwks.json"
|
| 13 |
+
resp = await client.get(jwks_url)
|
| 14 |
+
jwks = resp.json()["keys"]
|
| 15 |
+
|
| 16 |
+
unverified_header = jwt.get_unverified_header(token)
|
| 17 |
+
kid = unverified_header.get("kid")
|
| 18 |
+
|
| 19 |
+
key = next((k for k in jwks if k["kid"] == kid), None)
|
| 20 |
+
if not key:
|
| 21 |
+
raise HTTPException(status_code=401, detail="Public key not found")
|
| 22 |
+
|
| 23 |
+
payload = jwt.decode(
|
| 24 |
+
token,
|
| 25 |
+
key,
|
| 26 |
+
algorithms=["RS256"],
|
| 27 |
+
audience=CLERK_AUDIENCE,
|
| 28 |
+
issuer=CLERK_ISSUER
|
| 29 |
+
)
|
| 30 |
+
return payload
|
| 31 |
+
except Exception as e:
|
| 32 |
+
raise HTTPException(status_code=401, detail=f"Invalid Clerk JWT: {str(e)}")
|
requirements.txt
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
accelerate==1.9.0
|
| 2 |
+
annotated-types==0.7.0
|
| 3 |
+
anyio==4.9.0
|
| 4 |
+
beautifulsoup4==4.13.4
|
| 5 |
+
cachetools==5.5.2
|
| 6 |
+
certifi==2025.7.14
|
| 7 |
+
cffi==1.17.1
|
| 8 |
+
charset-normalizer==3.4.2
|
| 9 |
+
click==8.2.1
|
| 10 |
+
colorama==0.4.6
|
| 11 |
+
cryptography==45.0.5
|
| 12 |
+
dnspython==2.7.0
|
| 13 |
+
dotenv==0.9.9
|
| 14 |
+
ecdsa==0.19.1
|
| 15 |
+
email_validator==2.2.0
|
| 16 |
+
exceptiongroup==1.3.0
|
| 17 |
+
fastapi==0.116.1
|
| 18 |
+
fastapi-cli==0.0.8
|
| 19 |
+
fastapi-cloud-cli==0.1.4
|
| 20 |
+
filelock==3.13.1
|
| 21 |
+
fsspec==2024.6.1
|
| 22 |
+
google-ai-generativelanguage==0.6.15
|
| 23 |
+
google-api-core==2.25.1
|
| 24 |
+
google-api-python-client==2.177.0
|
| 25 |
+
google-auth==2.40.3
|
| 26 |
+
google-auth-httplib2==0.2.0
|
| 27 |
+
google-generativeai==0.8.5
|
| 28 |
+
googleapis-common-protos==1.70.0
|
| 29 |
+
grpcio==1.74.0
|
| 30 |
+
grpcio-status==1.71.2
|
| 31 |
+
h11==0.16.0
|
| 32 |
+
httpcore==1.0.9
|
| 33 |
+
httplib2==0.22.0
|
| 34 |
+
httptools==0.6.4
|
| 35 |
+
httpx==0.28.1
|
| 36 |
+
huggingface-hub==0.34.3
|
| 37 |
+
idna==3.10
|
| 38 |
+
itsdangerous==2.2.0
|
| 39 |
+
Jinja2==3.1.6
|
| 40 |
+
langdetect==1.0.9
|
| 41 |
+
markdown-it-py==3.0.0
|
| 42 |
+
MarkupSafe==2.1.5
|
| 43 |
+
mdurl==0.1.2
|
| 44 |
+
mpmath==1.3.0
|
| 45 |
+
networkx==3.3
|
| 46 |
+
numpy==2.1.2
|
| 47 |
+
orjson==3.11.0
|
| 48 |
+
packaging==25.0
|
| 49 |
+
pillow==11.0.0
|
| 50 |
+
proto-plus==1.26.1
|
| 51 |
+
protobuf==5.29.5
|
| 52 |
+
psutil==7.0.0
|
| 53 |
+
pyasn1==0.6.1
|
| 54 |
+
pyasn1_modules==0.4.2
|
| 55 |
+
pycparser==2.22
|
| 56 |
+
pydantic==2.11.7
|
| 57 |
+
pydantic-extra-types==2.10.5
|
| 58 |
+
pydantic-settings==2.10.1
|
| 59 |
+
pydantic_core==2.33.2
|
| 60 |
+
Pygments==2.19.2
|
| 61 |
+
PyMuPDF==1.26.3
|
| 62 |
+
pyparsing==3.2.3
|
| 63 |
+
python-dotenv==1.1.1
|
| 64 |
+
python-jose==3.5.0
|
| 65 |
+
python-multipart==0.0.20
|
| 66 |
+
PyYAML==6.0.2
|
| 67 |
+
regex==2025.7.31
|
| 68 |
+
requests==2.32.4
|
| 69 |
+
rich==14.0.0
|
| 70 |
+
rich-toolkit==0.14.8
|
| 71 |
+
rignore==0.6.4
|
| 72 |
+
rsa==4.9.1
|
| 73 |
+
safetensors==0.5.3
|
| 74 |
+
sentry-sdk==2.33.2
|
| 75 |
+
shellingham==1.5.4
|
| 76 |
+
six==1.17.0
|
| 77 |
+
sniffio==1.3.1
|
| 78 |
+
soupsieve==2.7
|
| 79 |
+
starlette==0.47.2
|
| 80 |
+
sympy==1.13.3
|
| 81 |
+
tokenizers==0.21.4
|
| 82 |
+
torch==2.7.1+cu126
|
| 83 |
+
torchaudio==2.7.1+cu126
|
| 84 |
+
torchvision==0.22.1+cu126
|
| 85 |
+
tqdm==4.67.1
|
| 86 |
+
transformers==4.54.1
|
| 87 |
+
typer==0.16.0
|
| 88 |
+
typing-inspection==0.4.1
|
| 89 |
+
typing_extensions==4.12.2
|
| 90 |
+
ujson==5.10.0
|
| 91 |
+
uritemplate==4.2.0
|
| 92 |
+
urllib3==2.5.0
|
| 93 |
+
uvicorn==0.35.0
|
| 94 |
+
watchfiles==1.1.0
|
| 95 |
+
websockets==15.0.1
|
| 96 |
+
langextract
|
tools/TOOLS_README.md
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Test Summary Tool:
|
| 2 |
+
```bash
|
| 3 |
+
curl -X POST "http://localhost:8080/api/analyze_html" -F "[email protected];type=text/html"
|
| 4 |
+
```
|
| 5 |
+
|
| 6 |
+
response:
|
| 7 |
+
```json
|
| 8 |
+
{"summary":"This payslip shows earnings including base salary, allowances, and overtime pay, with deductions for advances, loans, and CPF, resulting in a net pay of 2363.40 or 4213.40. CPF contributions are also detailed.","earnings":["基 本工资 (Basic Salary): 1800.00","总加班费 (Total Overtime): 368.16","[加班1.5倍] (Overtime 1.5x): 141.60","[加班2.0倍] (Overtime 2.0x): 226.56","住宿补贴 (Housing Allowance): 450.00","特别津贴 (Special Allowance): 100.00","交通津贴 (Transport Allowance): 300.00","雇主公积金 (Employer CPF): 180.00"],"deductions":["第一周预支 (First Week Advance): -300.00","员工贷款 (Employee Loan): -80.00","CDAC: -1.00","员工公积金 (Employee CPF): -191.00","总无薪假 (Total Unpaid Leave): -82.76"],"additional_info":{"gross_pay":["2935.40","4935.40"],"net_pay":["2363.40","4213.40"],"cpf_salary":["2555.40","4555.40"],"total_cpf":["371.00","661.00"],"annual_leave":{"used":"1.00","balance":"48.00"},"medical_leave":{"used":"0.00","balance":"14.00"},"bank_details":{"bank":"华侨银行 (OCBC Bank)","account_number":"151179932"}}}
|
| 9 |
+
```
|
| 10 |
+
|
| 11 |
+
Test Keyword Tool:
|
| 12 |
+
|
| 13 |
+
```bash
|
| 14 |
+
curl -X POST "http://localhost:8080/api/verify_document_keywords" -F "[email protected];type=text/html" -F "analysis_type=legality" -F "search_context=Singapore employment law"
|
| 15 |
+
|
| 16 |
+
```
|
| 17 |
+
|
| 18 |
+
reponse:
|
| 19 |
+
```json
|
| 20 |
+
{"analysis_configuration":{"analysis_type":"legality","search_context":"Singapore employment law"},"verification_results":[{"claim":"基本工资 1800.00","summary":"Claim states a base salary of 1800.00. No evidence to verify.","status":"Needs Manual Review"},{"claim":"[ 加班1. 5倍 ] 10. 00小时 x $ 14. 16 = 141. 60","summary":"Claim states overtime pay at 1.5x rate for 10 hours. The calculation needs verification. No evidence to verify the hourly rate or overtime policy.","status":"Needs Manual Review"},{"claim":"[ 加班2. 0倍 ] 12. 00小时 x $ 18. 88 = 226. 56","summary":"Claim states overtime pay at 2.0x rate for 12 hours. The calculation needs verification. No evidence to verify the hourly rate or overtime policy.","status":"Needs Manual Review"},{"claim":"员工公积金 - 191. 00","summary":"Claim states a deduction of 191.00 for employee housing fund. No evidence to verify the legality or accuracy.","status":"Needs Manual Review"},{"claim":"年假 / 已用 / 余额 : 1. 00 / 1. 00 / 48. 00","summary":"Claim states vacation leave information. No evidence to verify the accuracy or legality of the leave policy.","status":"Needs Manual Review"}]}
|
| 21 |
+
```
|
tools/__pycache__/tools.cpython-310.pyc
ADDED
|
Binary file (9.67 kB). View file
|
|
|
tools/extraction_results.jsonl
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"extractions": [{"extraction_class": "character", "extraction_text": "Lady Juliet", "char_interval": {"start_pos": 0, "end_pos": 11}, "alignment_status": "match_exact", "extraction_index": 1, "group_index": 0, "description": null, "attributes": {"emotional_state": "longing"}}, {"extraction_class": "emotion", "extraction_text": "heart aching", "char_interval": {"start_pos": 46, "end_pos": 58}, "alignment_status": "match_exact", "extraction_index": 2, "group_index": 1, "description": null, "attributes": {"feeling": "ache"}}, {"extraction_class": "relationship", "extraction_text": "for Romeo", "char_interval": {"start_pos": 59, "end_pos": 68}, "alignment_status": "match_exact", "extraction_index": 3, "group_index": 2, "description": null, "attributes": {"type": "love"}}], "text": "Lady Juliet gazed longingly at the stars, her heart aching for Romeo", "document_id": "doc_211712b3"}
|
tools/langextract_tool.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import langextract as lx
|
| 3 |
+
import textwrap
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
|
| 6 |
+
# Step 1: Load environment variables from a .env file
|
| 7 |
+
load_dotenv()
|
| 8 |
+
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
| 9 |
+
os.environ["LANGEXTRACT_API_KEY"] = GEMINI_API_KEY
|
| 10 |
+
|
| 11 |
+
# 1. Define the prompt and extraction rules
|
| 12 |
+
prompt = textwrap.dedent(
|
| 13 |
+
"""\
|
| 14 |
+
Extract characters, emotions, and relationships in order of appearance.
|
| 15 |
+
Use exact text for extractions. Do not paraphrase or overlap entities.
|
| 16 |
+
Provide meaningful attributes for each entity to add context."""
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
# 2. Provide a high-quality example to guide the model
|
| 20 |
+
examples = [
|
| 21 |
+
lx.data.ExampleData(
|
| 22 |
+
text="ROMEO. But soft! What light through yonder window breaks? It is the east, and Juliet is the sun.",
|
| 23 |
+
extractions=[
|
| 24 |
+
lx.data.Extraction(
|
| 25 |
+
extraction_class="character",
|
| 26 |
+
extraction_text="ROMEO",
|
| 27 |
+
attributes={"emotional_state": "wonder"},
|
| 28 |
+
),
|
| 29 |
+
lx.data.Extraction(
|
| 30 |
+
extraction_class="emotion",
|
| 31 |
+
extraction_text="But soft!",
|
| 32 |
+
attributes={"feeling": "gentle awe"},
|
| 33 |
+
),
|
| 34 |
+
lx.data.Extraction(
|
| 35 |
+
extraction_class="relationship",
|
| 36 |
+
extraction_text="Juliet is the sun",
|
| 37 |
+
attributes={"type": "metaphor"},
|
| 38 |
+
),
|
| 39 |
+
],
|
| 40 |
+
)
|
| 41 |
+
]
|
| 42 |
+
|
| 43 |
+
# The input text to be processed
|
| 44 |
+
input_text = "Lady Juliet gazed longingly at the stars, her heart aching for Romeo"
|
| 45 |
+
|
| 46 |
+
# Run the extraction
|
| 47 |
+
result = lx.extract(
|
| 48 |
+
text_or_documents=input_text,
|
| 49 |
+
prompt_description=prompt,
|
| 50 |
+
examples=examples,
|
| 51 |
+
model_id="gemini-2.5-flash",
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
# Save the results to a JSONL file
|
| 55 |
+
lx.io.save_annotated_documents(
|
| 56 |
+
[result], output_name="extraction_results.jsonl", output_dir="."
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
# Generate the visualization from the file
|
| 60 |
+
html_content = lx.visualize("extraction_results.jsonl")
|
| 61 |
+
with open("visualization.html", "w", encoding="utf-8") as f:
|
| 62 |
+
f.write(html_content)
|
tools/tools.py
ADDED
|
@@ -0,0 +1,321 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ./tools/tools.py
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import json
|
| 5 |
+
import logging
|
| 6 |
+
import asyncio
|
| 7 |
+
import itertools
|
| 8 |
+
from functools import partial
|
| 9 |
+
from concurrent.futures import ThreadPoolExecutor
|
| 10 |
+
|
| 11 |
+
import google.generativeai as genai
|
| 12 |
+
from google.api_core import exceptions as google_exceptions
|
| 13 |
+
from googleapiclient.discovery import build
|
| 14 |
+
from bs4 import BeautifulSoup
|
| 15 |
+
from dotenv import load_dotenv
|
| 16 |
+
|
| 17 |
+
# Step 1: Load environment variables from a .env file
|
| 18 |
+
load_dotenv()
|
| 19 |
+
|
| 20 |
+
# Configure a logger for the tool
|
| 21 |
+
logging.basicConfig(level=logging.INFO)
|
| 22 |
+
logger = logging.getLogger(__name__)
|
| 23 |
+
|
| 24 |
+
# Step 2: Configure the Gemini API key right after loading it.
|
| 25 |
+
# This is the crucial fix.
|
| 26 |
+
try:
|
| 27 |
+
api_key = os.getenv("GEMINI_API_KEY")
|
| 28 |
+
if not api_key:
|
| 29 |
+
raise ValueError("GEMINI_API_KEY not found in environment variables.")
|
| 30 |
+
genai.configure(api_key=api_key)
|
| 31 |
+
except (ValueError, TypeError) as e:
|
| 32 |
+
# This will print a clear warning if the server starts without a key.
|
| 33 |
+
print(f"WARNING: Gemini API not configured. Tool will fail. Reason: {e}")
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def extract_text_from_html(html_content: str) -> str:
|
| 37 |
+
"""
|
| 38 |
+
Parses an HTML string and extracts all human-readable text from the body.
|
| 39 |
+
"""
|
| 40 |
+
if not html_content:
|
| 41 |
+
return ""
|
| 42 |
+
|
| 43 |
+
soup = BeautifulSoup(html_content, "html.parser")
|
| 44 |
+
|
| 45 |
+
for script_or_style in soup(["script", "style"]):
|
| 46 |
+
script_or_style.decompose()
|
| 47 |
+
|
| 48 |
+
text = soup.get_text(separator=" ", strip=True)
|
| 49 |
+
return text
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
async def generate_document_insights(document_text: str) -> dict:
|
| 53 |
+
"""
|
| 54 |
+
Analyzes a string of text using Gemini to provide a summary and key points.
|
| 55 |
+
"""
|
| 56 |
+
try:
|
| 57 |
+
if not document_text.strip():
|
| 58 |
+
return {
|
| 59 |
+
"error": "Could not extract any meaningful text from the provided content."
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
# CORRECTED MODEL: Using gemini-1.5-flash, a powerful and efficient model.
|
| 63 |
+
model = genai.GenerativeModel(model_name="gemini-2.5-flash")
|
| 64 |
+
|
| 65 |
+
prompt = f"""
|
| 66 |
+
You are an expert financial analyst who specializes in interpreting payslips and financial documents.
|
| 67 |
+
Based on the text below, which was extracted from a payslip, perform two tasks:
|
| 68 |
+
1. **Summarize**: Create a concise, one-sentence summary of the payslip, focusing on the final net pay.
|
| 69 |
+
2. **Extract Key Figures**: Identify and list the most important financial figures as bullet points. Categorize them into "Earnings," and "Deductions."
|
| 70 |
+
|
| 71 |
+
**Document Text:**
|
| 72 |
+
---
|
| 73 |
+
{document_text}
|
| 74 |
+
---
|
| 75 |
+
|
| 76 |
+
Please format your response as a valid JSON object with three keys: "summary" (a string), "earnings" (an array of strings), and "deductions" (an array of strings).
|
| 77 |
+
|
| 78 |
+
Example Format:
|
| 79 |
+
{{
|
| 80 |
+
"summary": "This payslip shows a net pay of [Net Pay Amount] after calculating total earnings and deductions.",
|
| 81 |
+
"earnings": [
|
| 82 |
+
"Basic Salary: 1800.00",
|
| 83 |
+
"Total Overtime: 368.16",
|
| 84 |
+
"Housing Allowance: 450.00"
|
| 85 |
+
],
|
| 86 |
+
"deductions": [
|
| 87 |
+
"Advance (Week 1): -300.00",
|
| 88 |
+
"Employee Loan: -80.00",
|
| 89 |
+
"Employee CPF: -191.00"
|
| 90 |
+
]
|
| 91 |
+
}}
|
| 92 |
+
"""
|
| 93 |
+
|
| 94 |
+
response = await model.generate_content_async(prompt)
|
| 95 |
+
|
| 96 |
+
cleaned_response_text = (
|
| 97 |
+
response.text.strip().replace("```json", "").replace("```", "").strip()
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
insights = json.loads(cleaned_response_text)
|
| 101 |
+
return insights
|
| 102 |
+
|
| 103 |
+
except google_exceptions.ResourceExhausted as e:
|
| 104 |
+
return {
|
| 105 |
+
"error": f"Gemini API quota exceeded. Please try again later. Details: {e}"
|
| 106 |
+
}
|
| 107 |
+
except json.JSONDecodeError:
|
| 108 |
+
return {
|
| 109 |
+
"summary": "Could not parse the AI's response.",
|
| 110 |
+
"key_points": [response.text],
|
| 111 |
+
}
|
| 112 |
+
except Exception as e:
|
| 113 |
+
# This will now catch the ValueError from the configuration step if the key is missing.
|
| 114 |
+
return {
|
| 115 |
+
"error": f"An unexpected error occurred during document analysis: {str(e)}"
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
def _execute_single_google_search(query: str, max_results: int) -> list[dict]:
|
| 120 |
+
"""(Internal Helper) Performs a single synchronous web search using Google."""
|
| 121 |
+
logger.info(f"Executing web search for query: '{query}'...")
|
| 122 |
+
try:
|
| 123 |
+
# ADAPTATION: Fetch keys directly from environment variables.
|
| 124 |
+
api_key = os.getenv("GEMINI_API_KEY")
|
| 125 |
+
cse_id = os.getenv("GOOGLE_CSE_ID")
|
| 126 |
+
|
| 127 |
+
if not api_key or not cse_id:
|
| 128 |
+
raise ValueError(
|
| 129 |
+
"GEMINI_API_KEY and GOOGLE_CSE_ID must be set in the environment."
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
service = build("customsearch", "v1", developerKey=api_key)
|
| 133 |
+
|
| 134 |
+
params = {"q": query, "cx": cse_id, "num": max_results}
|
| 135 |
+
|
| 136 |
+
res = service.cse().list(**params).execute()
|
| 137 |
+
|
| 138 |
+
search_items = res.get("items", [])
|
| 139 |
+
# MODIFICATION: Added 'snippet' for better RAG context.
|
| 140 |
+
results = [
|
| 141 |
+
{
|
| 142 |
+
"title": item.get("title", "Untitled"),
|
| 143 |
+
"href": item.get("link"),
|
| 144 |
+
"snippet": item.get("snippet", "No snippet available."),
|
| 145 |
+
}
|
| 146 |
+
for item in search_items
|
| 147 |
+
if item.get("link")
|
| 148 |
+
]
|
| 149 |
+
logger.info(f"Found {len(results)} web results for query: '{query}'")
|
| 150 |
+
return results
|
| 151 |
+
except Exception as e:
|
| 152 |
+
logger.error(f"An error occurred during web search for '{query}': {e}")
|
| 153 |
+
return []
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
async def perform_searches_and_get_hits(
|
| 157 |
+
queries: list[str], executor: ThreadPoolExecutor, max_results_per_query: int = 3
|
| 158 |
+
) -> list[dict]:
|
| 159 |
+
"""Asynchronously runs multiple Google searches and returns a de-duplicated list of hits."""
|
| 160 |
+
if not queries:
|
| 161 |
+
return []
|
| 162 |
+
logger.info(f"\n--- Starting concurrent web search for {len(queries)} queries ---")
|
| 163 |
+
loop = asyncio.get_running_loop()
|
| 164 |
+
|
| 165 |
+
# ADAPTATION: Removed settings dependency.
|
| 166 |
+
search_tasks = [
|
| 167 |
+
partial(_execute_single_google_search, query, max_results_per_query)
|
| 168 |
+
for query in queries
|
| 169 |
+
]
|
| 170 |
+
search_coroutines = [loop.run_in_executor(executor, task) for task in search_tasks]
|
| 171 |
+
list_of_hit_lists = await asyncio.gather(*search_coroutines)
|
| 172 |
+
|
| 173 |
+
unique_hits = {
|
| 174 |
+
hit["href"]: hit for hit in itertools.chain.from_iterable(list_of_hit_lists)
|
| 175 |
+
}
|
| 176 |
+
|
| 177 |
+
final_hits = list(unique_hits.values())
|
| 178 |
+
logger.info(
|
| 179 |
+
f"--- Web search complete. Found {len(final_hits)} unique items in total. ---"
|
| 180 |
+
)
|
| 181 |
+
return final_hits
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
# --- MODIFIED: Keyword Analysis Tool now uses the full search pipeline ---
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
async def analyze_keywords_with_web_search(document_text: str, config: dict) -> dict:
|
| 188 |
+
"""
|
| 189 |
+
Analyzes and verifies keywords using a two-agent RAG process with
|
| 190 |
+
efficient batching for verification to avoid rate limits.
|
| 191 |
+
"""
|
| 192 |
+
try:
|
| 193 |
+
model = genai.GenerativeModel(model_name="gemini-2.0-flash")
|
| 194 |
+
analysis_type = config.get("analysis_type", "accuracy")
|
| 195 |
+
search_context = config.get("search_context", "public records")
|
| 196 |
+
|
| 197 |
+
# --- Agent 1: Keyword/Claim Extraction (1 API Call) ---
|
| 198 |
+
logger.info("Agent 1: Extracting keywords from document...")
|
| 199 |
+
keyword_extraction_prompt = f"""
|
| 200 |
+
You are an expert analyst specializing in document verification. Based on the document text below,
|
| 201 |
+
identify and extract up to 5 critical keywords, figures, or claims that must be verified for {analysis_type}
|
| 202 |
+
within the context of "{search_context}".
|
| 203 |
+
|
| 204 |
+
Focus on terms that are verifiable against external sources.
|
| 205 |
+
Return your findings as a valid JSON array of strings.
|
| 206 |
+
|
| 207 |
+
Document Text:
|
| 208 |
+
---
|
| 209 |
+
{document_text}
|
| 210 |
+
---
|
| 211 |
+
"""
|
| 212 |
+
response_agent1 = await model.generate_content_async(keyword_extraction_prompt)
|
| 213 |
+
cleaned_agent1_response = (
|
| 214 |
+
response_agent1.text.strip()
|
| 215 |
+
.replace("```json", "")
|
| 216 |
+
.replace("```", "")
|
| 217 |
+
.strip()
|
| 218 |
+
)
|
| 219 |
+
|
| 220 |
+
try:
|
| 221 |
+
keywords_to_verify = json.loads(cleaned_agent1_response)
|
| 222 |
+
except json.JSONDecodeError:
|
| 223 |
+
return {
|
| 224 |
+
"error": "Agent 1 (Keyword Extractor) failed to return valid JSON.",
|
| 225 |
+
"raw_response": cleaned_agent1_response,
|
| 226 |
+
}
|
| 227 |
+
|
| 228 |
+
if not keywords_to_verify:
|
| 229 |
+
return {
|
| 230 |
+
"message": "No keywords were identified for verification.",
|
| 231 |
+
"verification_results": [],
|
| 232 |
+
}
|
| 233 |
+
|
| 234 |
+
logger.info(
|
| 235 |
+
f"Agent 1 found {len(keywords_to_verify)} keywords: {keywords_to_verify}"
|
| 236 |
+
)
|
| 237 |
+
|
| 238 |
+
# --- (Optional) Polite Delay ---
|
| 239 |
+
# A small pause between the two main API calls. Not strictly needed for rate
|
| 240 |
+
# limiting anymore, but can be good practice.
|
| 241 |
+
await asyncio.sleep(2)
|
| 242 |
+
|
| 243 |
+
# --- Live Web Search (No API Calls to Gemini) ---
|
| 244 |
+
dork_queries = [
|
| 245 |
+
f'"{keyword}" AND "{search_context}"' for keyword in keywords_to_verify
|
| 246 |
+
]
|
| 247 |
+
with ThreadPoolExecutor() as executor:
|
| 248 |
+
all_search_hits = await perform_searches_and_get_hits(
|
| 249 |
+
dork_queries, executor
|
| 250 |
+
)
|
| 251 |
+
|
| 252 |
+
# --- Agent 2: Batch Verification (1 API Call for all keywords) ---
|
| 253 |
+
logger.info("Agent 2: Starting batch verification for all keywords...")
|
| 254 |
+
|
| 255 |
+
# Step 1: Prepare the evidence for each claim
|
| 256 |
+
verification_items_for_prompt = []
|
| 257 |
+
for keyword in keywords_to_verify:
|
| 258 |
+
relevant_hits = [
|
| 259 |
+
hit
|
| 260 |
+
for hit in all_search_hits
|
| 261 |
+
if keyword.lower() in hit.get("title", "").lower()
|
| 262 |
+
or keyword.lower() in hit.get("snippet", "").lower()
|
| 263 |
+
]
|
| 264 |
+
web_snippets = (
|
| 265 |
+
"\n".join([f"- {hit['snippet']}" for hit in relevant_hits[:3]])
|
| 266 |
+
if relevant_hits
|
| 267 |
+
else "No specific information found on the web."
|
| 268 |
+
)
|
| 269 |
+
|
| 270 |
+
# Create a formatted block for each item to be verified
|
| 271 |
+
item_block = f'Claim: "{keyword}"\n' f"Evidence:\n{web_snippets}\n" f"---"
|
| 272 |
+
verification_items_for_prompt.append(item_block)
|
| 273 |
+
|
| 274 |
+
# Step 2: Create a single, powerful batch prompt
|
| 275 |
+
batch_verification_prompt = f"""
|
| 276 |
+
You are a verification agent. For EACH of the following claims, assess its {analysis_type} based ONLY on the provided evidence.
|
| 277 |
+
Your response MUST be a valid JSON array, where each object has three keys: "claim", "summary", and "status".
|
| 278 |
+
The status must be one of: "Verified", "Contradicted", or "Needs Manual Review".
|
| 279 |
+
|
| 280 |
+
Here are the claims to verify:
|
| 281 |
+
|
| 282 |
+
{''.join(verification_items_for_prompt)}
|
| 283 |
+
|
| 284 |
+
Provide only the JSON array as your final answer. Do not include markdown backticks.
|
| 285 |
+
"""
|
| 286 |
+
|
| 287 |
+
# Step 3: Make a single API call for all verifications
|
| 288 |
+
response_agent2 = await model.generate_content_async(batch_verification_prompt)
|
| 289 |
+
cleaned_agent2_response = (
|
| 290 |
+
response_agent2.text.strip()
|
| 291 |
+
.replace("```json", "")
|
| 292 |
+
.replace("```", "")
|
| 293 |
+
.strip()
|
| 294 |
+
)
|
| 295 |
+
|
| 296 |
+
# Step 4: Parse the batch response
|
| 297 |
+
try:
|
| 298 |
+
verification_results = json.loads(cleaned_agent2_response)
|
| 299 |
+
except json.JSONDecodeError:
|
| 300 |
+
logger.error(
|
| 301 |
+
f"Agent 2 (Verifier) failed to return valid JSON in batch mode. Raw response: {cleaned_agent2_response}"
|
| 302 |
+
)
|
| 303 |
+
return {
|
| 304 |
+
"error": "Agent 2 (Verifier) failed to return valid JSON in batch mode.",
|
| 305 |
+
"raw_response": cleaned_agent2_response,
|
| 306 |
+
}
|
| 307 |
+
|
| 308 |
+
logger.info("Agent 2: Batch verification complete.")
|
| 309 |
+
return {
|
| 310 |
+
"analysis_configuration": config,
|
| 311 |
+
"verification_results": verification_results,
|
| 312 |
+
}
|
| 313 |
+
|
| 314 |
+
except Exception as e:
|
| 315 |
+
logger.error(
|
| 316 |
+
f"An unexpected error occurred in the keyword analysis tool: {str(e)}",
|
| 317 |
+
exc_info=True,
|
| 318 |
+
)
|
| 319 |
+
return {
|
| 320 |
+
"error": f"An unexpected error occurred in the keyword analysis tool: {str(e)}"
|
| 321 |
+
}
|
tools/visualization.html
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<style>
|
| 2 |
+
.lx-highlight { position: relative; border-radius:3px; padding:1px 2px;}
|
| 3 |
+
.lx-highlight .lx-tooltip {
|
| 4 |
+
visibility: hidden;
|
| 5 |
+
opacity: 0;
|
| 6 |
+
transition: opacity 0.2s ease-in-out;
|
| 7 |
+
background: #333;
|
| 8 |
+
color: #fff;
|
| 9 |
+
text-align: left;
|
| 10 |
+
border-radius: 4px;
|
| 11 |
+
padding: 6px 8px;
|
| 12 |
+
position: absolute;
|
| 13 |
+
z-index: 1000;
|
| 14 |
+
bottom: 125%;
|
| 15 |
+
left: 50%;
|
| 16 |
+
transform: translateX(-50%);
|
| 17 |
+
font-size: 12px;
|
| 18 |
+
max-width: 240px;
|
| 19 |
+
white-space: normal;
|
| 20 |
+
box-shadow: 0 2px 6px rgba(0,0,0,0.3);
|
| 21 |
+
}
|
| 22 |
+
.lx-highlight:hover .lx-tooltip { visibility: visible; opacity:1; }
|
| 23 |
+
.lx-animated-wrapper { max-width: 100%; font-family: Arial, sans-serif; }
|
| 24 |
+
.lx-controls {
|
| 25 |
+
background: #fafafa; border: 1px solid #90caf9; border-radius: 8px;
|
| 26 |
+
padding: 12px; margin-bottom: 16px;
|
| 27 |
+
}
|
| 28 |
+
.lx-button-row {
|
| 29 |
+
display: flex; justify-content: center; gap: 8px; margin-bottom: 12px;
|
| 30 |
+
}
|
| 31 |
+
.lx-control-btn {
|
| 32 |
+
background: #4285f4; color: white; border: none; border-radius: 4px;
|
| 33 |
+
padding: 8px 16px; cursor: pointer; font-size: 13px; font-weight: 500;
|
| 34 |
+
transition: background-color 0.2s;
|
| 35 |
+
}
|
| 36 |
+
.lx-control-btn:hover { background: #3367d6; }
|
| 37 |
+
.lx-progress-container {
|
| 38 |
+
margin-bottom: 8px;
|
| 39 |
+
}
|
| 40 |
+
.lx-progress-slider {
|
| 41 |
+
width: 100%; margin: 0; appearance: none; height: 6px;
|
| 42 |
+
background: #ddd; border-radius: 3px; outline: none;
|
| 43 |
+
}
|
| 44 |
+
.lx-progress-slider::-webkit-slider-thumb {
|
| 45 |
+
appearance: none; width: 18px; height: 18px; background: #4285f4;
|
| 46 |
+
border-radius: 50%; cursor: pointer;
|
| 47 |
+
}
|
| 48 |
+
.lx-progress-slider::-moz-range-thumb {
|
| 49 |
+
width: 18px; height: 18px; background: #4285f4; border-radius: 50%;
|
| 50 |
+
cursor: pointer; border: none;
|
| 51 |
+
}
|
| 52 |
+
.lx-status-text {
|
| 53 |
+
text-align: center; font-size: 12px; color: #666; margin-top: 4px;
|
| 54 |
+
}
|
| 55 |
+
.lx-text-window {
|
| 56 |
+
font-family: monospace; white-space: pre-wrap; border: 1px solid #90caf9;
|
| 57 |
+
padding: 12px; max-height: 260px; overflow-y: auto; margin-bottom: 12px;
|
| 58 |
+
line-height: 1.6;
|
| 59 |
+
}
|
| 60 |
+
.lx-attributes-panel {
|
| 61 |
+
background: #fafafa; border: 1px solid #90caf9; border-radius: 6px;
|
| 62 |
+
padding: 8px 10px; margin-top: 8px; font-size: 13px;
|
| 63 |
+
}
|
| 64 |
+
.lx-current-highlight {
|
| 65 |
+
border-bottom: 4px solid #ff4444;
|
| 66 |
+
font-weight: bold;
|
| 67 |
+
animation: lx-pulse 1s ease-in-out;
|
| 68 |
+
}
|
| 69 |
+
@keyframes lx-pulse {
|
| 70 |
+
0% { text-decoration-color: #ff4444; }
|
| 71 |
+
50% { text-decoration-color: #ff0000; }
|
| 72 |
+
100% { text-decoration-color: #ff4444; }
|
| 73 |
+
}
|
| 74 |
+
.lx-legend {
|
| 75 |
+
font-size: 12px; margin-bottom: 8px;
|
| 76 |
+
padding-bottom: 8px; border-bottom: 1px solid #e0e0e0;
|
| 77 |
+
}
|
| 78 |
+
.lx-label {
|
| 79 |
+
display: inline-block;
|
| 80 |
+
padding: 2px 4px;
|
| 81 |
+
border-radius: 3px;
|
| 82 |
+
margin-right: 4px;
|
| 83 |
+
color: #000;
|
| 84 |
+
}
|
| 85 |
+
.lx-attr-key {
|
| 86 |
+
font-weight: 600;
|
| 87 |
+
color: #1565c0;
|
| 88 |
+
letter-spacing: 0.3px;
|
| 89 |
+
}
|
| 90 |
+
.lx-attr-value {
|
| 91 |
+
font-weight: 400;
|
| 92 |
+
opacity: 0.85;
|
| 93 |
+
letter-spacing: 0.2px;
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
/* Add optimizations with larger fonts and better readability for GIFs */
|
| 97 |
+
.lx-gif-optimized .lx-text-window { font-size: 16px; line-height: 1.8; }
|
| 98 |
+
.lx-gif-optimized .lx-attributes-panel { font-size: 15px; }
|
| 99 |
+
.lx-gif-optimized .lx-current-highlight { text-decoration-thickness: 4px; }
|
| 100 |
+
</style>
|
| 101 |
+
<div class="lx-animated-wrapper lx-gif-optimized">
|
| 102 |
+
<div class="lx-attributes-panel">
|
| 103 |
+
<div class="lx-legend">Highlights Legend: <span class="lx-label" style="background-color:#D2E3FC;">character</span> <span class="lx-label" style="background-color:#C8E6C9;">emotion</span> <span class="lx-label" style="background-color:#FEF0C3;">relationship</span></div>
|
| 104 |
+
<div id="attributesContainer"></div>
|
| 105 |
+
</div>
|
| 106 |
+
<div class="lx-text-window" id="textWindow">
|
| 107 |
+
<span class="lx-highlight lx-current-highlight" data-idx="0" style="background-color:#D2E3FC;">Lady Juliet</span> gazed longingly at the stars, her <span class="lx-highlight" data-idx="1" style="background-color:#C8E6C9;">heart aching</span> <span class="lx-highlight" data-idx="2" style="background-color:#FEF0C3;">for Romeo</span>
|
| 108 |
+
</div>
|
| 109 |
+
<div class="lx-controls">
|
| 110 |
+
<div class="lx-button-row">
|
| 111 |
+
<button class="lx-control-btn" onclick="playPause()">▶️ Play</button>
|
| 112 |
+
<button class="lx-control-btn" onclick="prevExtraction()">⏮ Previous</button>
|
| 113 |
+
<button class="lx-control-btn" onclick="nextExtraction()">⏭ Next</button>
|
| 114 |
+
</div>
|
| 115 |
+
<div class="lx-progress-container">
|
| 116 |
+
<input type="range" id="progressSlider" class="lx-progress-slider"
|
| 117 |
+
min="0" max="2" value="0"
|
| 118 |
+
onchange="jumpToExtraction(this.value)">
|
| 119 |
+
</div>
|
| 120 |
+
<div class="lx-status-text">
|
| 121 |
+
Entity <span id="entityInfo">1/3</span> |
|
| 122 |
+
Pos <span id="posInfo">[0-11]</span>
|
| 123 |
+
</div>
|
| 124 |
+
</div>
|
| 125 |
+
</div>
|
| 126 |
+
|
| 127 |
+
<script>
|
| 128 |
+
(function() {
|
| 129 |
+
const extractions = [{"index": 0, "class": "character", "text": "Lady Juliet", "color": "#D2E3FC", "startPos": 0, "endPos": 11, "beforeText": "", "extractionText": "Lady Juliet", "afterText": " gazed longingly at the stars, her heart aching for Romeo", "attributesHtml": "<div><strong>class:</strong> character</div><div><strong>attributes:</strong> {<span class=\"lx-attr-key\">emotional_state</span>: <span class=\"lx-attr-value\">longing</span>}</div>"}, {"index": 1, "class": "emotion", "text": "heart aching", "color": "#C8E6C9", "startPos": 46, "endPos": 58, "beforeText": "Lady Juliet gazed longingly at the stars, her ", "extractionText": "heart aching", "afterText": " for Romeo", "attributesHtml": "<div><strong>class:</strong> emotion</div><div><strong>attributes:</strong> {<span class=\"lx-attr-key\">feeling</span>: <span class=\"lx-attr-value\">ache</span>}</div>"}, {"index": 2, "class": "relationship", "text": "for Romeo", "color": "#FEF0C3", "startPos": 59, "endPos": 68, "beforeText": "Lady Juliet gazed longingly at the stars, her heart aching ", "extractionText": "for Romeo", "afterText": "", "attributesHtml": "<div><strong>class:</strong> relationship</div><div><strong>attributes:</strong> {<span class=\"lx-attr-key\">type</span>: <span class=\"lx-attr-value\">love</span>}</div>"}];
|
| 130 |
+
let currentIndex = 0;
|
| 131 |
+
let isPlaying = false;
|
| 132 |
+
let animationInterval = null;
|
| 133 |
+
let animationSpeed = 1.0;
|
| 134 |
+
|
| 135 |
+
function updateDisplay() {
|
| 136 |
+
const extraction = extractions[currentIndex];
|
| 137 |
+
if (!extraction) return;
|
| 138 |
+
|
| 139 |
+
document.getElementById('attributesContainer').innerHTML = extraction.attributesHtml;
|
| 140 |
+
document.getElementById('entityInfo').textContent = (currentIndex + 1) + '/' + extractions.length;
|
| 141 |
+
document.getElementById('posInfo').textContent = '[' + extraction.startPos + '-' + extraction.endPos + ']';
|
| 142 |
+
document.getElementById('progressSlider').value = currentIndex;
|
| 143 |
+
|
| 144 |
+
const playBtn = document.querySelector('.lx-control-btn');
|
| 145 |
+
if (playBtn) playBtn.textContent = isPlaying ? '⏸ Pause' : '▶️ Play';
|
| 146 |
+
|
| 147 |
+
const prevHighlight = document.querySelector('.lx-text-window .lx-current-highlight');
|
| 148 |
+
if (prevHighlight) prevHighlight.classList.remove('lx-current-highlight');
|
| 149 |
+
const currentSpan = document.querySelector('.lx-text-window span[data-idx="' + currentIndex + '"]');
|
| 150 |
+
if (currentSpan) {
|
| 151 |
+
currentSpan.classList.add('lx-current-highlight');
|
| 152 |
+
currentSpan.scrollIntoView({block: 'center', behavior: 'smooth'});
|
| 153 |
+
}
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
function nextExtraction() {
|
| 157 |
+
currentIndex = (currentIndex + 1) % extractions.length;
|
| 158 |
+
updateDisplay();
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
function prevExtraction() {
|
| 162 |
+
currentIndex = (currentIndex - 1 + extractions.length) % extractions.length;
|
| 163 |
+
updateDisplay();
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
function jumpToExtraction(index) {
|
| 167 |
+
currentIndex = parseInt(index);
|
| 168 |
+
updateDisplay();
|
| 169 |
+
}
|
| 170 |
+
|
| 171 |
+
function playPause() {
|
| 172 |
+
if (isPlaying) {
|
| 173 |
+
clearInterval(animationInterval);
|
| 174 |
+
isPlaying = false;
|
| 175 |
+
} else {
|
| 176 |
+
animationInterval = setInterval(nextExtraction, animationSpeed * 1000);
|
| 177 |
+
isPlaying = true;
|
| 178 |
+
}
|
| 179 |
+
updateDisplay();
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
window.playPause = playPause;
|
| 183 |
+
window.nextExtraction = nextExtraction;
|
| 184 |
+
window.prevExtraction = prevExtraction;
|
| 185 |
+
window.jumpToExtraction = jumpToExtraction;
|
| 186 |
+
|
| 187 |
+
updateDisplay();
|
| 188 |
+
})();
|
| 189 |
+
</script>
|