KevanSoon
commited on
Commit
·
30f61f6
1
Parent(s):
b002983
adjusted app.py and tools
Browse files- app.py +323 -84
- requirements.txt +2 -1
- tools/__pycache__/tools.cpython-310.pyc +0 -0
app.py
CHANGED
|
@@ -18,8 +18,15 @@ from dotenv import load_dotenv
|
|
| 18 |
import google.generativeai as genai
|
| 19 |
from google.api_core import exceptions as google_exceptions
|
| 20 |
from pydantic import BaseModel
|
|
|
|
|
|
|
|
|
|
| 21 |
from auth.clerk import verify_clerk_jwt
|
| 22 |
-
from tools.tools import
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
|
| 25 |
security = HTTPBearer()
|
|
@@ -32,7 +39,7 @@ SUPABASE_SERVICE_ROLE_KEY = os.getenv("SUPABASE_SERVICE_ROLE_KEY")
|
|
| 32 |
app = FastAPI(
|
| 33 |
title="Document Translator (Final Architecture)",
|
| 34 |
description="Pipeline: Nemo (JSON) -> Sea-Lion (Translate JSON) -> Gemini (HTML)",
|
| 35 |
-
version="10.0.1",
|
| 36 |
)
|
| 37 |
|
| 38 |
# Allow requests from the default React frontend port
|
|
@@ -51,21 +58,25 @@ def wrap_words_with_spans(html: str) -> str:
|
|
| 51 |
replacer.counter += 1
|
| 52 |
word = match.group(0)
|
| 53 |
return f'<span data-clickable="true" data-id="word-{replacer.counter}">{word}</span>'
|
|
|
|
| 54 |
replacer.counter = 0
|
| 55 |
|
| 56 |
-
pattern = r
|
| 57 |
|
| 58 |
-
for tag in [
|
| 59 |
# regex to capture content inside these tags
|
| 60 |
-
regex = re.compile(
|
|
|
|
| 61 |
def replacer_func(m):
|
| 62 |
open_tag, inner_text, close_tag = m.groups()
|
| 63 |
wrapped_text = re.sub(pattern, replacer, inner_text)
|
| 64 |
return open_tag + wrapped_text + close_tag
|
|
|
|
| 65 |
html = regex.sub(replacer_func, html)
|
| 66 |
|
| 67 |
return html
|
| 68 |
|
|
|
|
| 69 |
def inject_dropdown_script(html: str) -> str:
|
| 70 |
script = """
|
| 71 |
<script>
|
|
@@ -171,25 +182,29 @@ window.addEventListener('DOMContentLoaded', () => {
|
|
| 171 |
return html.replace("</body>", script + "\n</body>")
|
| 172 |
else:
|
| 173 |
return html + script
|
| 174 |
-
|
|
|
|
| 175 |
# Define a Pydantic model to enforce the structure of the incoming request body
|
| 176 |
class HtmlAnalysisRequest(BaseModel):
|
| 177 |
html: str
|
| 178 |
|
|
|
|
| 179 |
@app.post("/api/analyze_html")
|
| 180 |
async def analyze_html_file(file: UploadFile = File(...)):
|
| 181 |
"""
|
| 182 |
-
Receives an uploaded HTML file, extracts its text content, and uses the
|
| 183 |
Gemini tool to generate a summary and key informational points.
|
| 184 |
"""
|
| 185 |
# Check if the uploaded file is an HTML file
|
| 186 |
if file.content_type != "text/html":
|
| 187 |
-
raise HTTPException(
|
|
|
|
|
|
|
| 188 |
|
| 189 |
try:
|
| 190 |
# Step 1: Read the content of the uploaded file
|
| 191 |
html_content_bytes = await file.read()
|
| 192 |
-
html_content = html_content_bytes.decode(
|
| 193 |
|
| 194 |
# Step 2: Extract text from the HTML using our tool
|
| 195 |
document_text = extract_text_from_html(html_content)
|
|
@@ -198,16 +213,18 @@ async def analyze_html_file(file: UploadFile = File(...)):
|
|
| 198 |
analysis_results = await generate_document_insights(document_text)
|
| 199 |
|
| 200 |
# Check if the tool returned a functional error
|
| 201 |
-
if
|
| 202 |
-
raise HTTPException(status_code=500, detail=analysis_results[
|
| 203 |
-
|
| 204 |
return analysis_results
|
| 205 |
|
| 206 |
except Exception as e:
|
| 207 |
# Catch any other unexpected errors
|
| 208 |
-
raise HTTPException(
|
| 209 |
-
|
| 210 |
-
|
|
|
|
|
|
|
| 211 |
@app.post("/api/translate_frontend")
|
| 212 |
async def translate_text(request: Request):
|
| 213 |
try:
|
|
@@ -216,32 +233,30 @@ async def translate_text(request: Request):
|
|
| 216 |
target_language = data.get("target_language")
|
| 217 |
|
| 218 |
if not text or not target_language:
|
| 219 |
-
raise HTTPException(
|
|
|
|
|
|
|
|
|
|
| 220 |
|
| 221 |
url = "https://api.sea-lion.ai/v1/chat/completions"
|
| 222 |
api_key = os.getenv("SEALION_API_KEY")
|
| 223 |
|
| 224 |
headers = {
|
| 225 |
"Authorization": f"Bearer {api_key}",
|
| 226 |
-
"Content-Type": "application/json"
|
| 227 |
# No "accept" header or set to "application/json"
|
| 228 |
}
|
| 229 |
|
| 230 |
prompt = (
|
| 231 |
f"Please translate the following text to {target_language} and return "
|
| 232 |
"ONLY the translated text without any explanations or extra formatting:\n\n"
|
| 233 |
-
f"
|
| 234 |
)
|
| 235 |
|
| 236 |
payload = {
|
| 237 |
"max_completion_tokens": 1024,
|
| 238 |
-
"messages": [
|
| 239 |
-
|
| 240 |
-
"role": "user",
|
| 241 |
-
"content": prompt
|
| 242 |
-
}
|
| 243 |
-
],
|
| 244 |
-
"model": "aisingapore/Gemma-SEA-LION-v3-9B-IT"
|
| 245 |
}
|
| 246 |
|
| 247 |
response = requests.post(url, headers=headers, data=json.dumps(payload))
|
|
@@ -254,15 +269,19 @@ async def translate_text(request: Request):
|
|
| 254 |
translated_text = response_json["choices"][0]["message"]["content"].strip()
|
| 255 |
|
| 256 |
if not translated_text:
|
| 257 |
-
raise HTTPException(
|
|
|
|
|
|
|
| 258 |
|
| 259 |
return {"translated_text": translated_text}
|
| 260 |
|
| 261 |
except requests.exceptions.RequestException as e:
|
| 262 |
-
raise HTTPException(
|
|
|
|
|
|
|
| 263 |
except Exception as e:
|
| 264 |
raise HTTPException(status_code=500, detail=f"Unexpected error: {e}")
|
| 265 |
-
|
| 266 |
|
| 267 |
# --- Model 2: Sea-Lion (The JSON Translator) ---
|
| 268 |
@app.post("/api/translate")
|
|
@@ -275,28 +294,23 @@ async def translate_text(text: str, target_language: str):
|
|
| 275 |
url = "https://api.sea-lion.ai/v1/chat/completions"
|
| 276 |
|
| 277 |
# It's recommended to store API keys securely, e.g., in environment variables
|
| 278 |
-
api_key =
|
| 279 |
|
| 280 |
# The headers for the request
|
| 281 |
headers = {
|
| 282 |
"accept": "text/plain",
|
| 283 |
"Authorization": f"Bearer {api_key}",
|
| 284 |
-
"Content-Type": "application/json"
|
| 285 |
}
|
| 286 |
|
| 287 |
# Create a dynamic prompt for the translation task
|
| 288 |
-
prompt = f
|
| 289 |
|
| 290 |
# The JSON data payload for the request
|
| 291 |
data = {
|
| 292 |
"max_completion_tokens": 4096, # Increased token limit for longer translations
|
| 293 |
-
"messages": [
|
| 294 |
-
|
| 295 |
-
"role": "user",
|
| 296 |
-
"content": prompt
|
| 297 |
-
}
|
| 298 |
-
],
|
| 299 |
-
"model": "aisingapore/Llama-SEA-LION-v3-70B-IT"
|
| 300 |
}
|
| 301 |
|
| 302 |
try:
|
|
@@ -307,19 +321,28 @@ async def translate_text(text: str, target_language: str):
|
|
| 307 |
# The response from this specific API is plain text, not JSON.
|
| 308 |
# We will wrap it in a JSON structure for consistency in our API.
|
| 309 |
translated_text = response.text
|
| 310 |
-
|
| 311 |
# It's good practice to check if the response is empty
|
| 312 |
if not translated_text:
|
| 313 |
-
|
|
|
|
|
|
|
|
|
|
| 314 |
|
| 315 |
return {"translated_text": translated_text}
|
| 316 |
|
| 317 |
except requests.exceptions.RequestException as e:
|
| 318 |
# Handle network-related errors
|
| 319 |
-
raise HTTPException(
|
|
|
|
|
|
|
|
|
|
| 320 |
except Exception as e:
|
| 321 |
# Handle other potential errors
|
| 322 |
-
raise HTTPException(
|
|
|
|
|
|
|
|
|
|
| 323 |
|
| 324 |
|
| 325 |
# --- Model 3: Gemini (The HTML Generator) ---
|
|
@@ -334,7 +357,7 @@ async def generate_html_from_translated_json(translated_json: dict) -> str:
|
|
| 334 |
raise ValueError("GEMINI_API_KEY not found in environment variables.")
|
| 335 |
|
| 336 |
genai.configure(api_key=api_key)
|
| 337 |
-
model = genai.GenerativeModel(model_name=
|
| 338 |
json_string_for_prompt = json.dumps(translated_json, indent=2)
|
| 339 |
|
| 340 |
prompt = f"""
|
|
@@ -366,7 +389,7 @@ async def generate_html_from_translated_json(translated_json: dict) -> str:
|
|
| 366 |
response = model.generate_content(prompt)
|
| 367 |
|
| 368 |
# Extract raw HTML from Gemini markdown code block
|
| 369 |
-
match = re.search(r
|
| 370 |
raw_html = match.group(1).strip() if match else response.text.strip()
|
| 371 |
|
| 372 |
# Wrap each word in clickable spans
|
|
@@ -388,7 +411,9 @@ async def generate_html_from_translated_json(translated_json: dict) -> str:
|
|
| 388 |
|
| 389 |
# --- API Endpoint Orchestrating the Pipeline ---
|
| 390 |
@app.post("/api/translate_file", response_class=HTMLResponse)
|
| 391 |
-
async def translate_document_to_raw_html(
|
|
|
|
|
|
|
| 392 |
"""
|
| 393 |
Processes a document using the final, robust pipeline:
|
| 394 |
1. Nemo extracts content to JSON.
|
|
@@ -405,16 +430,31 @@ async def translate_document_to_raw_html(target_language: str = Form(...), file:
|
|
| 405 |
file_b64 = base64.b64encode(file_content).decode("utf-8")
|
| 406 |
nemo_data = {
|
| 407 |
"model": "nvidia/nemoretriever-parse",
|
| 408 |
-
"messages": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 409 |
"max_tokens": 2048,
|
| 410 |
}
|
| 411 |
-
headers = {
|
| 412 |
-
model_response = requests.post(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 413 |
model_response.raise_for_status()
|
| 414 |
nemo_response_json = model_response.json()
|
| 415 |
print(nemo_response_json)
|
| 416 |
print("*********** Step 1 Done ***********")
|
| 417 |
-
|
| 418 |
|
| 419 |
print("*********** Step 2 in Progress ***********")
|
| 420 |
# === STEP 2: Get translated JSON from Sea-Lion (The Translator) ===
|
|
@@ -435,58 +475,249 @@ async def translate_document_to_raw_html(target_language: str = Form(...), file:
|
|
| 435 |
return HTMLResponse(content=final_html)
|
| 436 |
|
| 437 |
except requests.exceptions.RequestException as e:
|
| 438 |
-
raise HTTPException(
|
|
|
|
|
|
|
|
|
|
| 439 |
except Exception as e:
|
| 440 |
# This will catch any errors, including the ValueError from the Sea-Lion function
|
| 441 |
-
raise HTTPException(
|
| 442 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 443 |
@app.post("/api/verify_document_keywords")
|
| 444 |
async def verify_document_keywords(
|
| 445 |
-
file: UploadFile = File(...),
|
| 446 |
-
analysis_type: str = Form("legality"),
|
| 447 |
-
search_context: str = Form("Singapore employment law")
|
| 448 |
):
|
| 449 |
"""
|
| 450 |
-
Receives an HTML file and a configuration via form data, then uses the
|
| 451 |
agent-to-agent RAG workflow to identify and verify key claims.
|
| 452 |
"""
|
| 453 |
# Check if the uploaded file is an HTML file
|
| 454 |
if file.content_type != "text/html":
|
| 455 |
-
raise HTTPException(
|
|
|
|
|
|
|
| 456 |
|
| 457 |
try:
|
| 458 |
# Step 1: Read content from the uploaded file and extract text
|
| 459 |
html_content_bytes = await file.read()
|
| 460 |
-
html_content = html_content_bytes.decode(
|
| 461 |
document_text = extract_text_from_html(html_content)
|
| 462 |
|
| 463 |
if not document_text.strip():
|
| 464 |
raise HTTPException(
|
| 465 |
status_code=400,
|
| 466 |
-
detail="Could not extract any meaningful text from the provided HTML content."
|
| 467 |
)
|
| 468 |
|
| 469 |
# Step 2: Prepare the configuration and call the new analysis tool
|
| 470 |
-
config = {
|
| 471 |
-
"analysis_type": analysis_type,
|
| 472 |
-
"search_context": search_context
|
| 473 |
-
}
|
| 474 |
analysis_results = await analyze_keywords_with_web_search(document_text, config)
|
| 475 |
|
| 476 |
# Step 3: Handle potential errors from the tool
|
| 477 |
-
if
|
| 478 |
-
raise HTTPException(status_code=500, detail=analysis_results[
|
| 479 |
-
|
| 480 |
# Step 4: Return the successful analysis
|
| 481 |
return analysis_results
|
| 482 |
|
| 483 |
except Exception as e:
|
| 484 |
# Catch any other unexpected errors during the process
|
| 485 |
-
raise HTTPException(
|
| 486 |
-
|
|
|
|
| 487 |
|
| 488 |
|
| 489 |
-
#testing clerk backend authentication
|
| 490 |
# @app.post("/upload")
|
| 491 |
# async def upload_file(
|
| 492 |
# authorization: str = Header(...),
|
|
@@ -504,11 +735,9 @@ async def verify_document_keywords(
|
|
| 504 |
# # You can securely store this file, e.g., to Supabase or local
|
| 505 |
# return {"message": f"File uploaded by Clerk user {user_id}"}
|
| 506 |
|
|
|
|
| 507 |
@app.post("/upload")
|
| 508 |
-
async def upload_file(
|
| 509 |
-
authorization: str = Header(...),
|
| 510 |
-
file: UploadFile = File(...)
|
| 511 |
-
):
|
| 512 |
if not authorization.startswith("Bearer "):
|
| 513 |
raise HTTPException(status_code=401, detail="Missing Bearer token")
|
| 514 |
|
|
@@ -526,11 +755,13 @@ async def upload_file(
|
|
| 526 |
"Authorization": f"Bearer {SUPABASE_SERVICE_ROLE_KEY}",
|
| 527 |
"Content-Type": file.content_type,
|
| 528 |
},
|
| 529 |
-
content=await file.read()
|
| 530 |
)
|
| 531 |
|
| 532 |
if upload_resp.status_code != 200:
|
| 533 |
-
raise HTTPException(
|
|
|
|
|
|
|
| 534 |
|
| 535 |
file_url = f"user-documents/{filename}"
|
| 536 |
|
|
@@ -542,23 +773,27 @@ async def upload_file(
|
|
| 542 |
"Authorization": f"Bearer {SUPABASE_SERVICE_ROLE_KEY}",
|
| 543 |
"apikey": SUPABASE_SERVICE_ROLE_KEY,
|
| 544 |
"Content-Type": "application/json",
|
| 545 |
-
"Prefer": "return=representation"
|
| 546 |
},
|
| 547 |
json={
|
| 548 |
"user_id": user_id,
|
| 549 |
"filename": filename.split("/")[-1],
|
| 550 |
-
"file_url": file_url
|
| 551 |
-
}
|
| 552 |
)
|
| 553 |
|
| 554 |
if insert_resp.status_code >= 300:
|
| 555 |
-
raise HTTPException(
|
|
|
|
|
|
|
| 556 |
|
| 557 |
return {"message": f"File uploaded as {filename}"}
|
| 558 |
|
| 559 |
|
| 560 |
@app.get("/api/documents")
|
| 561 |
-
async def get_user_documents(
|
|
|
|
|
|
|
| 562 |
token = credentials.credentials
|
| 563 |
claims = await verify_clerk_jwt(token)
|
| 564 |
user_id = claims.get("sub")
|
|
@@ -600,11 +835,15 @@ async def get_user_documents(credentials: HTTPAuthorizationCredentials = Depends
|
|
| 600 |
)
|
| 601 |
|
| 602 |
if signed_url_resp.status_code == 200:
|
| 603 |
-
print(
|
| 604 |
-
|
| 605 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 606 |
else:
|
| 607 |
doc["signed_url"] = None
|
| 608 |
print(documents)
|
| 609 |
|
| 610 |
-
return documents
|
|
|
|
| 18 |
import google.generativeai as genai
|
| 19 |
from google.api_core import exceptions as google_exceptions
|
| 20 |
from pydantic import BaseModel
|
| 21 |
+
from gradio_client import Client, handle_file
|
| 22 |
+
import tempfile
|
| 23 |
+
|
| 24 |
from auth.clerk import verify_clerk_jwt
|
| 25 |
+
from tools.tools import (
|
| 26 |
+
extract_text_from_html,
|
| 27 |
+
generate_document_insights,
|
| 28 |
+
analyze_keywords_with_web_search,
|
| 29 |
+
)
|
| 30 |
|
| 31 |
|
| 32 |
security = HTTPBearer()
|
|
|
|
| 39 |
app = FastAPI(
|
| 40 |
title="Document Translator (Final Architecture)",
|
| 41 |
description="Pipeline: Nemo (JSON) -> Sea-Lion (Translate JSON) -> Gemini (HTML)",
|
| 42 |
+
version="10.0.1", # Final Architecture, patched
|
| 43 |
)
|
| 44 |
|
| 45 |
# Allow requests from the default React frontend port
|
|
|
|
| 58 |
replacer.counter += 1
|
| 59 |
word = match.group(0)
|
| 60 |
return f'<span data-clickable="true" data-id="word-{replacer.counter}">{word}</span>'
|
| 61 |
+
|
| 62 |
replacer.counter = 0
|
| 63 |
|
| 64 |
+
pattern = r"\b\w+[.,?!]?\b" # matches words with optional trailing punctuation
|
| 65 |
|
| 66 |
+
for tag in ["p", "h1", "h2", "td"]:
|
| 67 |
# regex to capture content inside these tags
|
| 68 |
+
regex = re.compile(rf"(<{tag}[^>]*>)(.*?)(</{tag}>)", re.DOTALL)
|
| 69 |
+
|
| 70 |
def replacer_func(m):
|
| 71 |
open_tag, inner_text, close_tag = m.groups()
|
| 72 |
wrapped_text = re.sub(pattern, replacer, inner_text)
|
| 73 |
return open_tag + wrapped_text + close_tag
|
| 74 |
+
|
| 75 |
html = regex.sub(replacer_func, html)
|
| 76 |
|
| 77 |
return html
|
| 78 |
|
| 79 |
+
|
| 80 |
def inject_dropdown_script(html: str) -> str:
|
| 81 |
script = """
|
| 82 |
<script>
|
|
|
|
| 182 |
return html.replace("</body>", script + "\n</body>")
|
| 183 |
else:
|
| 184 |
return html + script
|
| 185 |
+
|
| 186 |
+
|
| 187 |
# Define a Pydantic model to enforce the structure of the incoming request body
|
| 188 |
class HtmlAnalysisRequest(BaseModel):
|
| 189 |
html: str
|
| 190 |
|
| 191 |
+
|
| 192 |
@app.post("/api/analyze_html")
|
| 193 |
async def analyze_html_file(file: UploadFile = File(...)):
|
| 194 |
"""
|
| 195 |
+
Receives an uploaded HTML file, extracts its text content, and uses the
|
| 196 |
Gemini tool to generate a summary and key informational points.
|
| 197 |
"""
|
| 198 |
# Check if the uploaded file is an HTML file
|
| 199 |
if file.content_type != "text/html":
|
| 200 |
+
raise HTTPException(
|
| 201 |
+
status_code=400, detail="Unsupported file type. Please upload a .html file."
|
| 202 |
+
)
|
| 203 |
|
| 204 |
try:
|
| 205 |
# Step 1: Read the content of the uploaded file
|
| 206 |
html_content_bytes = await file.read()
|
| 207 |
+
html_content = html_content_bytes.decode("utf-8")
|
| 208 |
|
| 209 |
# Step 2: Extract text from the HTML using our tool
|
| 210 |
document_text = extract_text_from_html(html_content)
|
|
|
|
| 213 |
analysis_results = await generate_document_insights(document_text)
|
| 214 |
|
| 215 |
# Check if the tool returned a functional error
|
| 216 |
+
if "error" in analysis_results:
|
| 217 |
+
raise HTTPException(status_code=500, detail=analysis_results["error"])
|
| 218 |
+
|
| 219 |
return analysis_results
|
| 220 |
|
| 221 |
except Exception as e:
|
| 222 |
# Catch any other unexpected errors
|
| 223 |
+
raise HTTPException(
|
| 224 |
+
status_code=500, detail=f"An unexpected error occurred: {e}"
|
| 225 |
+
)
|
| 226 |
+
|
| 227 |
+
|
| 228 |
@app.post("/api/translate_frontend")
|
| 229 |
async def translate_text(request: Request):
|
| 230 |
try:
|
|
|
|
| 233 |
target_language = data.get("target_language")
|
| 234 |
|
| 235 |
if not text or not target_language:
|
| 236 |
+
raise HTTPException(
|
| 237 |
+
status_code=400,
|
| 238 |
+
detail="Missing 'text' or 'target_language' in request body",
|
| 239 |
+
)
|
| 240 |
|
| 241 |
url = "https://api.sea-lion.ai/v1/chat/completions"
|
| 242 |
api_key = os.getenv("SEALION_API_KEY")
|
| 243 |
|
| 244 |
headers = {
|
| 245 |
"Authorization": f"Bearer {api_key}",
|
| 246 |
+
"Content-Type": "application/json",
|
| 247 |
# No "accept" header or set to "application/json"
|
| 248 |
}
|
| 249 |
|
| 250 |
prompt = (
|
| 251 |
f"Please translate the following text to {target_language} and return "
|
| 252 |
"ONLY the translated text without any explanations or extra formatting:\n\n"
|
| 253 |
+
f'"{text}"'
|
| 254 |
)
|
| 255 |
|
| 256 |
payload = {
|
| 257 |
"max_completion_tokens": 1024,
|
| 258 |
+
"messages": [{"role": "user", "content": prompt}],
|
| 259 |
+
"model": "aisingapore/Gemma-SEA-LION-v3-9B-IT",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
}
|
| 261 |
|
| 262 |
response = requests.post(url, headers=headers, data=json.dumps(payload))
|
|
|
|
| 269 |
translated_text = response_json["choices"][0]["message"]["content"].strip()
|
| 270 |
|
| 271 |
if not translated_text:
|
| 272 |
+
raise HTTPException(
|
| 273 |
+
status_code=500, detail="Empty response from translation model."
|
| 274 |
+
)
|
| 275 |
|
| 276 |
return {"translated_text": translated_text}
|
| 277 |
|
| 278 |
except requests.exceptions.RequestException as e:
|
| 279 |
+
raise HTTPException(
|
| 280 |
+
status_code=502, detail=f"Translation API request failed: {e}"
|
| 281 |
+
)
|
| 282 |
except Exception as e:
|
| 283 |
raise HTTPException(status_code=500, detail=f"Unexpected error: {e}")
|
| 284 |
+
|
| 285 |
|
| 286 |
# --- Model 2: Sea-Lion (The JSON Translator) ---
|
| 287 |
@app.post("/api/translate")
|
|
|
|
| 294 |
url = "https://api.sea-lion.ai/v1/chat/completions"
|
| 295 |
|
| 296 |
# It's recommended to store API keys securely, e.g., in environment variables
|
| 297 |
+
api_key = os.getenv("SEALION_API_KEY")
|
| 298 |
|
| 299 |
# The headers for the request
|
| 300 |
headers = {
|
| 301 |
"accept": "text/plain",
|
| 302 |
"Authorization": f"Bearer {api_key}",
|
| 303 |
+
"Content-Type": "application/json",
|
| 304 |
}
|
| 305 |
|
| 306 |
# Create a dynamic prompt for the translation task
|
| 307 |
+
prompt = f'Translate the following text to {text}: "{target_language}"'
|
| 308 |
|
| 309 |
# The JSON data payload for the request
|
| 310 |
data = {
|
| 311 |
"max_completion_tokens": 4096, # Increased token limit for longer translations
|
| 312 |
+
"messages": [{"role": "user", "content": prompt}],
|
| 313 |
+
"model": "aisingapore/Llama-SEA-LION-v3-70B-IT",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 314 |
}
|
| 315 |
|
| 316 |
try:
|
|
|
|
| 321 |
# The response from this specific API is plain text, not JSON.
|
| 322 |
# We will wrap it in a JSON structure for consistency in our API.
|
| 323 |
translated_text = response.text
|
| 324 |
+
|
| 325 |
# It's good practice to check if the response is empty
|
| 326 |
if not translated_text:
|
| 327 |
+
raise HTTPException(
|
| 328 |
+
status_code=500,
|
| 329 |
+
detail="Received an empty response from the translation model.",
|
| 330 |
+
)
|
| 331 |
|
| 332 |
return {"translated_text": translated_text}
|
| 333 |
|
| 334 |
except requests.exceptions.RequestException as e:
|
| 335 |
# Handle network-related errors
|
| 336 |
+
raise HTTPException(
|
| 337 |
+
status_code=502,
|
| 338 |
+
detail=f"Failed to communicate with the translation AI model: {e}",
|
| 339 |
+
)
|
| 340 |
except Exception as e:
|
| 341 |
# Handle other potential errors
|
| 342 |
+
raise HTTPException(
|
| 343 |
+
status_code=500,
|
| 344 |
+
detail=f"An unexpected error occurred during translation: {e}",
|
| 345 |
+
)
|
| 346 |
|
| 347 |
|
| 348 |
# --- Model 3: Gemini (The HTML Generator) ---
|
|
|
|
| 357 |
raise ValueError("GEMINI_API_KEY not found in environment variables.")
|
| 358 |
|
| 359 |
genai.configure(api_key=api_key)
|
| 360 |
+
model = genai.GenerativeModel(model_name="gemini-2.0-flash")
|
| 361 |
json_string_for_prompt = json.dumps(translated_json, indent=2)
|
| 362 |
|
| 363 |
prompt = f"""
|
|
|
|
| 389 |
response = model.generate_content(prompt)
|
| 390 |
|
| 391 |
# Extract raw HTML from Gemini markdown code block
|
| 392 |
+
match = re.search(r"```html\n(.*?)\n```", response.text, re.DOTALL)
|
| 393 |
raw_html = match.group(1).strip() if match else response.text.strip()
|
| 394 |
|
| 395 |
# Wrap each word in clickable spans
|
|
|
|
| 411 |
|
| 412 |
# --- API Endpoint Orchestrating the Pipeline ---
|
| 413 |
@app.post("/api/translate_file", response_class=HTMLResponse)
|
| 414 |
+
async def translate_document_to_raw_html(
|
| 415 |
+
target_language: str = Form(...), file: UploadFile = File(...)
|
| 416 |
+
):
|
| 417 |
"""
|
| 418 |
Processes a document using the final, robust pipeline:
|
| 419 |
1. Nemo extracts content to JSON.
|
|
|
|
| 430 |
file_b64 = base64.b64encode(file_content).decode("utf-8")
|
| 431 |
nemo_data = {
|
| 432 |
"model": "nvidia/nemoretriever-parse",
|
| 433 |
+
"messages": [
|
| 434 |
+
{
|
| 435 |
+
"role": "user",
|
| 436 |
+
"content": [
|
| 437 |
+
{
|
| 438 |
+
"type": "image_url",
|
| 439 |
+
"image_url": {
|
| 440 |
+
"url": f"data:{content_type};base64,{file_b64}"
|
| 441 |
+
},
|
| 442 |
+
}
|
| 443 |
+
],
|
| 444 |
+
}
|
| 445 |
+
],
|
| 446 |
"max_tokens": 2048,
|
| 447 |
}
|
| 448 |
+
headers = {"accept": "application/json", "Content-Type": "application/json"}
|
| 449 |
+
model_response = requests.post(
|
| 450 |
+
"http://localhost:8000/v1/chat/completions",
|
| 451 |
+
headers=headers,
|
| 452 |
+
data=json.dumps(nemo_data),
|
| 453 |
+
)
|
| 454 |
model_response.raise_for_status()
|
| 455 |
nemo_response_json = model_response.json()
|
| 456 |
print(nemo_response_json)
|
| 457 |
print("*********** Step 1 Done ***********")
|
|
|
|
| 458 |
|
| 459 |
print("*********** Step 2 in Progress ***********")
|
| 460 |
# === STEP 2: Get translated JSON from Sea-Lion (The Translator) ===
|
|
|
|
| 475 |
return HTMLResponse(content=final_html)
|
| 476 |
|
| 477 |
except requests.exceptions.RequestException as e:
|
| 478 |
+
raise HTTPException(
|
| 479 |
+
status_code=502,
|
| 480 |
+
detail=f"Failed to communicate with a downstream AI model: {e}",
|
| 481 |
+
)
|
| 482 |
except Exception as e:
|
| 483 |
# This will catch any errors, including the ValueError from the Sea-Lion function
|
| 484 |
+
raise HTTPException(
|
| 485 |
+
status_code=500,
|
| 486 |
+
detail=f"An unexpected error occurred during processing: {e}",
|
| 487 |
+
)
|
| 488 |
+
|
| 489 |
+
|
| 490 |
+
# <<< --- START OF MVP PIPELINE ADDITIONS (Layout-Aware Version) --- >>>
|
| 491 |
+
|
| 492 |
+
|
| 493 |
+
async def extract_text_and_boxes_with_paddle(file_content: bytes) -> list[dict]:
|
| 494 |
+
"""
|
| 495 |
+
Extracts text and their bounding boxes from an image using PaddleOCR.
|
| 496 |
+
Returns the full list of dictionary objects from the OCR tool.
|
| 497 |
+
"""
|
| 498 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
|
| 499 |
+
temp_file.write(file_content)
|
| 500 |
+
temp_filepath = temp_file.name
|
| 501 |
+
|
| 502 |
+
try:
|
| 503 |
+
|
| 504 |
+
def do_ocr() -> list[dict]:
|
| 505 |
+
"""Synchronous function to be run in a separate thread."""
|
| 506 |
+
client = Client("kevansoon/PaddleOCR")
|
| 507 |
+
# Returns a list of dictionaries, e.g., [{'text': '...', 'box': [...]}]
|
| 508 |
+
result = client.predict(
|
| 509 |
+
img=handle_file(temp_filepath),
|
| 510 |
+
lang="en",
|
| 511 |
+
api_name="/predict",
|
| 512 |
+
)
|
| 513 |
+
return result
|
| 514 |
+
|
| 515 |
+
loop = asyncio.get_running_loop()
|
| 516 |
+
extracted_data = await loop.run_in_executor(None, do_ocr)
|
| 517 |
+
return extracted_data
|
| 518 |
+
finally:
|
| 519 |
+
os.unlink(temp_filepath)
|
| 520 |
+
|
| 521 |
+
|
| 522 |
+
async def translate_paddle_data_concurrently(
|
| 523 |
+
paddle_data: list[dict], target_language: str
|
| 524 |
+
) -> list[dict]:
|
| 525 |
+
"""
|
| 526 |
+
Translates the 'text' field of each item in the paddle_data list concurrently.
|
| 527 |
+
"""
|
| 528 |
+
|
| 529 |
+
async def call_sealion_for_translation(text_to_translate: str, lang: str) -> str:
|
| 530 |
+
"""Helper function to call the translation API for a single piece of text."""
|
| 531 |
+
url = "https://api.sea-lion.ai/v1/chat/completions"
|
| 532 |
+
api_key = os.getenv("SEALION_API_KEY")
|
| 533 |
+
headers = {
|
| 534 |
+
"Authorization": f"Bearer {api_key}",
|
| 535 |
+
"Content-Type": "application/json",
|
| 536 |
+
}
|
| 537 |
+
prompt = f'Translate the following phrase to {lang} and return ONLY the translated text without explanations or extra formatting:\n\n"{text_to_translate}"'
|
| 538 |
+
payload = {
|
| 539 |
+
"max_completion_tokens": 256, # Tokens for a single phrase, not a whole doc
|
| 540 |
+
"messages": [{"role": "user", "content": prompt}],
|
| 541 |
+
"model": "aisingapore/Gemma-SEA-LION-v3-9B-IT",
|
| 542 |
+
}
|
| 543 |
+
async with httpx.AsyncClient() as client:
|
| 544 |
+
response = await client.post(
|
| 545 |
+
url, headers=headers, json=payload, timeout=30.0
|
| 546 |
+
)
|
| 547 |
+
response.raise_for_status()
|
| 548 |
+
response_json = response.json()
|
| 549 |
+
return response_json["choices"][0]["message"]["content"].strip()
|
| 550 |
+
|
| 551 |
+
# Create a list of translation tasks to run concurrently
|
| 552 |
+
translation_tasks = [
|
| 553 |
+
call_sealion_for_translation(item["text"], target_language)
|
| 554 |
+
for item in paddle_data
|
| 555 |
+
]
|
| 556 |
+
|
| 557 |
+
# Execute all translation tasks in parallel
|
| 558 |
+
translated_texts = await asyncio.gather(*translation_tasks)
|
| 559 |
+
|
| 560 |
+
# Reconstruct the data structure with translated text and original boxes
|
| 561 |
+
translated_data = []
|
| 562 |
+
for i, item in enumerate(paddle_data):
|
| 563 |
+
translated_data.append({"text": translated_texts[i], "box": item["box"]})
|
| 564 |
+
|
| 565 |
+
return translated_data
|
| 566 |
+
|
| 567 |
+
|
| 568 |
+
async def generate_html_from_paddle_data(translated_data: list[dict]) -> str:
|
| 569 |
+
"""
|
| 570 |
+
Receives translated OCR data (text with coordinates) and uses Gemini
|
| 571 |
+
to generate a layout-aware HTML document.
|
| 572 |
+
"""
|
| 573 |
+
try:
|
| 574 |
+
api_key = os.getenv("GEMINI_API_KEY")
|
| 575 |
+
if not api_key:
|
| 576 |
+
raise ValueError("GEMINI_API_KEY not found in environment variables.")
|
| 577 |
+
|
| 578 |
+
genai.configure(api_key=api_key)
|
| 579 |
+
model = genai.GenerativeModel(model_name="gemini-2.5-flash")
|
| 580 |
+
|
| 581 |
+
# Convert the list of data to a JSON string for the prompt
|
| 582 |
+
# THE FIX IS HERE: Added ensure_ascii=False
|
| 583 |
+
json_data_for_prompt = json.dumps(translated_data, indent=2, ensure_ascii=False)
|
| 584 |
+
|
| 585 |
+
prompt = f"""
|
| 586 |
+
You are an expert system specializing in converting structured OCR data into a well-formatted HTML document that preserves the original layout.
|
| 587 |
+
|
| 588 |
+
**Your Task:**
|
| 589 |
+
1. Analyze the following JSON array. Each object contains a `text` field (pre-translated) and a `box` field (four [x, y] coordinates of its bounding box).
|
| 590 |
+
2. Use the `box` coordinates to understand the document's spatial structure.
|
| 591 |
+
- Elements with similar y-coordinates are likely on the same row.
|
| 592 |
+
- Elements aligned vertically form columns.
|
| 593 |
+
3. Reconstruct the visual layout using semantic HTML.
|
| 594 |
+
- Use `<table>` for grid-like data (rows and columns). This is critical for payslips.
|
| 595 |
+
- Use `<h1>`, `<h2>`, `<p>` for headings and paragraphs.
|
| 596 |
+
- Do NOT use absolute positioning (e.g., `style="position: absolute; left: ..."`). Create a clean, flowing HTML structure.
|
| 597 |
+
4. Your final output must ONLY be the raw HTML code. Do not add comments, markdown backticks, or any other explanatory text.
|
| 598 |
+
|
| 599 |
+
**OCR Data to process:**
|
| 600 |
+
```json
|
| 601 |
+
{json_data_for_prompt}
|
| 602 |
+
```
|
| 603 |
+
"""
|
| 604 |
+
|
| 605 |
+
def do_request():
|
| 606 |
+
"""Synchronous function to be run in a separate thread."""
|
| 607 |
+
response = model.generate_content(prompt)
|
| 608 |
+
match = re.search(r"```html\n(.*?)\n```", response.text, re.DOTALL)
|
| 609 |
+
raw_html = match.group(1).strip() if match else response.text.strip()
|
| 610 |
+
# Reuse existing functions to make the HTML interactive
|
| 611 |
+
wrapped_html = wrap_words_with_spans(raw_html)
|
| 612 |
+
final_html = inject_dropdown_script(wrapped_html)
|
| 613 |
+
return final_html
|
| 614 |
+
|
| 615 |
+
return await asyncio.to_thread(do_request)
|
| 616 |
+
except Exception as e:
|
| 617 |
+
error_message = f"An error occurred while generating the HTML structure with Gemini: {str(e)}"
|
| 618 |
+
return f"<html><body><h1>HTML Generation Error</h1><p>{html.escape(error_message)}</p></body></html>"
|
| 619 |
+
|
| 620 |
+
|
| 621 |
+
@app.post("/api/translate_file_mvp", response_class=HTMLResponse)
|
| 622 |
+
async def translate_document_mvp(
|
| 623 |
+
target_language: str = Form(...), file: UploadFile = File(...)
|
| 624 |
+
):
|
| 625 |
+
"""
|
| 626 |
+
Processes a document using the Layout-Aware MVP pipeline:
|
| 627 |
+
1. PaddleOCR extracts text and coordinates.
|
| 628 |
+
2. Sea-Lion translates each text block concurrently.
|
| 629 |
+
3. Gemini uses the translated text and original coordinates to generate layout-aware HTML.
|
| 630 |
+
"""
|
| 631 |
+
content_type = file.content_type
|
| 632 |
+
if content_type not in ["image/png", "image/jpeg"]:
|
| 633 |
+
raise HTTPException(
|
| 634 |
+
status_code=400,
|
| 635 |
+
detail="Unsupported file type for MVP pipeline. Please use PNG or JPG.",
|
| 636 |
+
)
|
| 637 |
+
|
| 638 |
+
try:
|
| 639 |
+
file_content = await file.read()
|
| 640 |
+
|
| 641 |
+
# === MVP STEP 1: Extract text and coordinates with PaddleOCR ===
|
| 642 |
+
paddle_data = await extract_text_and_boxes_with_paddle(file_content)
|
| 643 |
+
if not paddle_data:
|
| 644 |
+
raise HTTPException(
|
| 645 |
+
status_code=400,
|
| 646 |
+
detail="PaddleOCR could not extract any text from the image.",
|
| 647 |
+
)
|
| 648 |
+
|
| 649 |
+
# === MVP STEP 2: Translate each text block concurrently ===
|
| 650 |
+
translated_data = await translate_paddle_data_concurrently(
|
| 651 |
+
paddle_data, target_language
|
| 652 |
+
)
|
| 653 |
+
|
| 654 |
+
# === MVP STEP 3: Generate final, layout-aware HTML from Gemini ===
|
| 655 |
+
final_html = await generate_html_from_paddle_data(translated_data)
|
| 656 |
+
|
| 657 |
+
return HTMLResponse(content=final_html)
|
| 658 |
+
|
| 659 |
+
except httpx.HTTPStatusError as e:
|
| 660 |
+
raise HTTPException(
|
| 661 |
+
status_code=e.response.status_code,
|
| 662 |
+
detail=f"Error from a downstream AI service: {e.response.text}",
|
| 663 |
+
)
|
| 664 |
+
except Exception as e:
|
| 665 |
+
raise HTTPException(
|
| 666 |
+
status_code=500,
|
| 667 |
+
detail=f"An unexpected error occurred during MVP processing: {str(e)}",
|
| 668 |
+
)
|
| 669 |
+
|
| 670 |
+
|
| 671 |
+
# <<< --- END OF MVP PIPELINE ADDITIONS (Layout-Aware Version) --- >>>
|
| 672 |
+
|
| 673 |
+
|
| 674 |
@app.post("/api/verify_document_keywords")
|
| 675 |
async def verify_document_keywords(
|
| 676 |
+
file: UploadFile = File(...),
|
| 677 |
+
analysis_type: str = Form("legality"),
|
| 678 |
+
search_context: str = Form("Singapore employment law"),
|
| 679 |
):
|
| 680 |
"""
|
| 681 |
+
Receives an HTML file and a configuration via form data, then uses the
|
| 682 |
agent-to-agent RAG workflow to identify and verify key claims.
|
| 683 |
"""
|
| 684 |
# Check if the uploaded file is an HTML file
|
| 685 |
if file.content_type != "text/html":
|
| 686 |
+
raise HTTPException(
|
| 687 |
+
status_code=400, detail="Unsupported file type. Please upload a .html file."
|
| 688 |
+
)
|
| 689 |
|
| 690 |
try:
|
| 691 |
# Step 1: Read content from the uploaded file and extract text
|
| 692 |
html_content_bytes = await file.read()
|
| 693 |
+
html_content = html_content_bytes.decode("utf-8")
|
| 694 |
document_text = extract_text_from_html(html_content)
|
| 695 |
|
| 696 |
if not document_text.strip():
|
| 697 |
raise HTTPException(
|
| 698 |
status_code=400,
|
| 699 |
+
detail="Could not extract any meaningful text from the provided HTML content.",
|
| 700 |
)
|
| 701 |
|
| 702 |
# Step 2: Prepare the configuration and call the new analysis tool
|
| 703 |
+
config = {"analysis_type": analysis_type, "search_context": search_context}
|
|
|
|
|
|
|
|
|
|
| 704 |
analysis_results = await analyze_keywords_with_web_search(document_text, config)
|
| 705 |
|
| 706 |
# Step 3: Handle potential errors from the tool
|
| 707 |
+
if "error" in analysis_results:
|
| 708 |
+
raise HTTPException(status_code=500, detail=analysis_results["error"])
|
| 709 |
+
|
| 710 |
# Step 4: Return the successful analysis
|
| 711 |
return analysis_results
|
| 712 |
|
| 713 |
except Exception as e:
|
| 714 |
# Catch any other unexpected errors during the process
|
| 715 |
+
raise HTTPException(
|
| 716 |
+
status_code=500, detail=f"An unexpected error occurred: {str(e)}"
|
| 717 |
+
)
|
| 718 |
|
| 719 |
|
| 720 |
+
# testing clerk backend authentication
|
| 721 |
# @app.post("/upload")
|
| 722 |
# async def upload_file(
|
| 723 |
# authorization: str = Header(...),
|
|
|
|
| 735 |
# # You can securely store this file, e.g., to Supabase or local
|
| 736 |
# return {"message": f"File uploaded by Clerk user {user_id}"}
|
| 737 |
|
| 738 |
+
|
| 739 |
@app.post("/upload")
|
| 740 |
+
async def upload_file(authorization: str = Header(...), file: UploadFile = File(...)):
|
|
|
|
|
|
|
|
|
|
| 741 |
if not authorization.startswith("Bearer "):
|
| 742 |
raise HTTPException(status_code=401, detail="Missing Bearer token")
|
| 743 |
|
|
|
|
| 755 |
"Authorization": f"Bearer {SUPABASE_SERVICE_ROLE_KEY}",
|
| 756 |
"Content-Type": file.content_type,
|
| 757 |
},
|
| 758 |
+
content=await file.read(),
|
| 759 |
)
|
| 760 |
|
| 761 |
if upload_resp.status_code != 200:
|
| 762 |
+
raise HTTPException(
|
| 763 |
+
status_code=500, detail="Failed to upload to Supabase Storage"
|
| 764 |
+
)
|
| 765 |
|
| 766 |
file_url = f"user-documents/{filename}"
|
| 767 |
|
|
|
|
| 773 |
"Authorization": f"Bearer {SUPABASE_SERVICE_ROLE_KEY}",
|
| 774 |
"apikey": SUPABASE_SERVICE_ROLE_KEY,
|
| 775 |
"Content-Type": "application/json",
|
| 776 |
+
"Prefer": "return=representation",
|
| 777 |
},
|
| 778 |
json={
|
| 779 |
"user_id": user_id,
|
| 780 |
"filename": filename.split("/")[-1],
|
| 781 |
+
"file_url": file_url,
|
| 782 |
+
},
|
| 783 |
)
|
| 784 |
|
| 785 |
if insert_resp.status_code >= 300:
|
| 786 |
+
raise HTTPException(
|
| 787 |
+
status_code=500, detail="Failed to insert document metadata"
|
| 788 |
+
)
|
| 789 |
|
| 790 |
return {"message": f"File uploaded as {filename}"}
|
| 791 |
|
| 792 |
|
| 793 |
@app.get("/api/documents")
|
| 794 |
+
async def get_user_documents(
|
| 795 |
+
credentials: HTTPAuthorizationCredentials = Depends(security),
|
| 796 |
+
):
|
| 797 |
token = credentials.credentials
|
| 798 |
claims = await verify_clerk_jwt(token)
|
| 799 |
user_id = claims.get("sub")
|
|
|
|
| 835 |
)
|
| 836 |
|
| 837 |
if signed_url_resp.status_code == 200:
|
| 838 |
+
print(
|
| 839 |
+
f"{SUPABASE_URL}/storage/v1{signed_url_resp.json().get('signedURL')}"
|
| 840 |
+
)
|
| 841 |
+
doc["signed_url"] = (
|
| 842 |
+
f"{SUPABASE_URL}/storage/v1{signed_url_resp.json().get('signedURL')}"
|
| 843 |
+
)
|
| 844 |
+
|
| 845 |
else:
|
| 846 |
doc["signed_url"] = None
|
| 847 |
print(documents)
|
| 848 |
|
| 849 |
+
return documents
|
requirements.txt
CHANGED
|
@@ -26,7 +26,6 @@ google-auth==2.40.3
|
|
| 26 |
google-auth-httplib2==0.2.0
|
| 27 |
google-generativeai==0.8.5
|
| 28 |
googleapis-common-protos==1.70.0
|
| 29 |
-
gradio_client==1.11.0
|
| 30 |
grpcio==1.74.0
|
| 31 |
grpcio-status==1.71.2
|
| 32 |
h11==0.16.0
|
|
@@ -94,3 +93,5 @@ urllib3==2.5.0
|
|
| 94 |
uvicorn==0.35.0
|
| 95 |
watchfiles==1.1.0
|
| 96 |
websockets==15.0.1
|
|
|
|
|
|
|
|
|
| 26 |
google-auth-httplib2==0.2.0
|
| 27 |
google-generativeai==0.8.5
|
| 28 |
googleapis-common-protos==1.70.0
|
|
|
|
| 29 |
grpcio==1.74.0
|
| 30 |
grpcio-status==1.71.2
|
| 31 |
h11==0.16.0
|
|
|
|
| 93 |
uvicorn==0.35.0
|
| 94 |
watchfiles==1.1.0
|
| 95 |
websockets==15.0.1
|
| 96 |
+
langextract
|
| 97 |
+
gradio_client
|
tools/__pycache__/tools.cpython-310.pyc
CHANGED
|
Binary files a/tools/__pycache__/tools.cpython-310.pyc and b/tools/__pycache__/tools.cpython-310.pyc differ
|
|
|