KevanSoon commited on
Commit
30f61f6
·
1 Parent(s): b002983

adjusted app.py and tools

Browse files
app.py CHANGED
@@ -18,8 +18,15 @@ from dotenv import load_dotenv
18
  import google.generativeai as genai
19
  from google.api_core import exceptions as google_exceptions
20
  from pydantic import BaseModel
 
 
 
21
  from auth.clerk import verify_clerk_jwt
22
- from tools.tools import extract_text_from_html, generate_document_insights, analyze_keywords_with_web_search
 
 
 
 
23
 
24
 
25
  security = HTTPBearer()
@@ -32,7 +39,7 @@ SUPABASE_SERVICE_ROLE_KEY = os.getenv("SUPABASE_SERVICE_ROLE_KEY")
32
  app = FastAPI(
33
  title="Document Translator (Final Architecture)",
34
  description="Pipeline: Nemo (JSON) -> Sea-Lion (Translate JSON) -> Gemini (HTML)",
35
- version="10.0.1", # Final Architecture, patched
36
  )
37
 
38
  # Allow requests from the default React frontend port
@@ -51,21 +58,25 @@ def wrap_words_with_spans(html: str) -> str:
51
  replacer.counter += 1
52
  word = match.group(0)
53
  return f'<span data-clickable="true" data-id="word-{replacer.counter}">{word}</span>'
 
54
  replacer.counter = 0
55
 
56
- pattern = r'\b\w+[.,?!]?\b' # matches words with optional trailing punctuation
57
 
58
- for tag in ['p', 'h1', 'h2', 'td']:
59
  # regex to capture content inside these tags
60
- regex = re.compile(fr'(<{tag}[^>]*>)(.*?)(</{tag}>)', re.DOTALL)
 
61
  def replacer_func(m):
62
  open_tag, inner_text, close_tag = m.groups()
63
  wrapped_text = re.sub(pattern, replacer, inner_text)
64
  return open_tag + wrapped_text + close_tag
 
65
  html = regex.sub(replacer_func, html)
66
 
67
  return html
68
 
 
69
  def inject_dropdown_script(html: str) -> str:
70
  script = """
71
  <script>
@@ -171,25 +182,29 @@ window.addEventListener('DOMContentLoaded', () => {
171
  return html.replace("</body>", script + "\n</body>")
172
  else:
173
  return html + script
174
-
 
175
  # Define a Pydantic model to enforce the structure of the incoming request body
176
  class HtmlAnalysisRequest(BaseModel):
177
  html: str
178
 
 
179
  @app.post("/api/analyze_html")
180
  async def analyze_html_file(file: UploadFile = File(...)):
181
  """
182
- Receives an uploaded HTML file, extracts its text content, and uses the
183
  Gemini tool to generate a summary and key informational points.
184
  """
185
  # Check if the uploaded file is an HTML file
186
  if file.content_type != "text/html":
187
- raise HTTPException(status_code=400, detail="Unsupported file type. Please upload a .html file.")
 
 
188
 
189
  try:
190
  # Step 1: Read the content of the uploaded file
191
  html_content_bytes = await file.read()
192
- html_content = html_content_bytes.decode('utf-8')
193
 
194
  # Step 2: Extract text from the HTML using our tool
195
  document_text = extract_text_from_html(html_content)
@@ -198,16 +213,18 @@ async def analyze_html_file(file: UploadFile = File(...)):
198
  analysis_results = await generate_document_insights(document_text)
199
 
200
  # Check if the tool returned a functional error
201
- if 'error' in analysis_results:
202
- raise HTTPException(status_code=500, detail=analysis_results['error'])
203
-
204
  return analysis_results
205
 
206
  except Exception as e:
207
  # Catch any other unexpected errors
208
- raise HTTPException(status_code=500, detail=f"An unexpected error occurred: {e}")
209
-
210
-
 
 
211
  @app.post("/api/translate_frontend")
212
  async def translate_text(request: Request):
213
  try:
@@ -216,32 +233,30 @@ async def translate_text(request: Request):
216
  target_language = data.get("target_language")
217
 
218
  if not text or not target_language:
219
- raise HTTPException(status_code=400, detail="Missing 'text' or 'target_language' in request body")
 
 
 
220
 
221
  url = "https://api.sea-lion.ai/v1/chat/completions"
222
  api_key = os.getenv("SEALION_API_KEY")
223
 
224
  headers = {
225
  "Authorization": f"Bearer {api_key}",
226
- "Content-Type": "application/json"
227
  # No "accept" header or set to "application/json"
228
  }
229
 
230
  prompt = (
231
  f"Please translate the following text to {target_language} and return "
232
  "ONLY the translated text without any explanations or extra formatting:\n\n"
233
- f"\"{text}\""
234
  )
235
 
236
  payload = {
237
  "max_completion_tokens": 1024,
238
- "messages": [
239
- {
240
- "role": "user",
241
- "content": prompt
242
- }
243
- ],
244
- "model": "aisingapore/Gemma-SEA-LION-v3-9B-IT"
245
  }
246
 
247
  response = requests.post(url, headers=headers, data=json.dumps(payload))
@@ -254,15 +269,19 @@ async def translate_text(request: Request):
254
  translated_text = response_json["choices"][0]["message"]["content"].strip()
255
 
256
  if not translated_text:
257
- raise HTTPException(status_code=500, detail="Empty response from translation model.")
 
 
258
 
259
  return {"translated_text": translated_text}
260
 
261
  except requests.exceptions.RequestException as e:
262
- raise HTTPException(status_code=502, detail=f"Translation API request failed: {e}")
 
 
263
  except Exception as e:
264
  raise HTTPException(status_code=500, detail=f"Unexpected error: {e}")
265
-
266
 
267
  # --- Model 2: Sea-Lion (The JSON Translator) ---
268
  @app.post("/api/translate")
@@ -275,28 +294,23 @@ async def translate_text(text: str, target_language: str):
275
  url = "https://api.sea-lion.ai/v1/chat/completions"
276
 
277
  # It's recommended to store API keys securely, e.g., in environment variables
278
- api_key = os.getenv("SEALION_API_KEY")
279
 
280
  # The headers for the request
281
  headers = {
282
  "accept": "text/plain",
283
  "Authorization": f"Bearer {api_key}",
284
- "Content-Type": "application/json"
285
  }
286
 
287
  # Create a dynamic prompt for the translation task
288
- prompt = f"Translate the following text to {text}: \"{target_language}\""
289
 
290
  # The JSON data payload for the request
291
  data = {
292
  "max_completion_tokens": 4096, # Increased token limit for longer translations
293
- "messages": [
294
- {
295
- "role": "user",
296
- "content": prompt
297
- }
298
- ],
299
- "model": "aisingapore/Llama-SEA-LION-v3-70B-IT"
300
  }
301
 
302
  try:
@@ -307,19 +321,28 @@ async def translate_text(text: str, target_language: str):
307
  # The response from this specific API is plain text, not JSON.
308
  # We will wrap it in a JSON structure for consistency in our API.
309
  translated_text = response.text
310
-
311
  # It's good practice to check if the response is empty
312
  if not translated_text:
313
- raise HTTPException(status_code=500, detail="Received an empty response from the translation model.")
 
 
 
314
 
315
  return {"translated_text": translated_text}
316
 
317
  except requests.exceptions.RequestException as e:
318
  # Handle network-related errors
319
- raise HTTPException(status_code=502, detail=f"Failed to communicate with the translation AI model: {e}")
 
 
 
320
  except Exception as e:
321
  # Handle other potential errors
322
- raise HTTPException(status_code=500, detail=f"An unexpected error occurred during translation: {e}")
 
 
 
323
 
324
 
325
  # --- Model 3: Gemini (The HTML Generator) ---
@@ -334,7 +357,7 @@ async def generate_html_from_translated_json(translated_json: dict) -> str:
334
  raise ValueError("GEMINI_API_KEY not found in environment variables.")
335
 
336
  genai.configure(api_key=api_key)
337
- model = genai.GenerativeModel(model_name='gemini-2.0-flash')
338
  json_string_for_prompt = json.dumps(translated_json, indent=2)
339
 
340
  prompt = f"""
@@ -366,7 +389,7 @@ async def generate_html_from_translated_json(translated_json: dict) -> str:
366
  response = model.generate_content(prompt)
367
 
368
  # Extract raw HTML from Gemini markdown code block
369
- match = re.search(r'```html\n(.*?)\n```', response.text, re.DOTALL)
370
  raw_html = match.group(1).strip() if match else response.text.strip()
371
 
372
  # Wrap each word in clickable spans
@@ -388,7 +411,9 @@ async def generate_html_from_translated_json(translated_json: dict) -> str:
388
 
389
  # --- API Endpoint Orchestrating the Pipeline ---
390
  @app.post("/api/translate_file", response_class=HTMLResponse)
391
- async def translate_document_to_raw_html(target_language: str = Form(...), file: UploadFile = File(...)):
 
 
392
  """
393
  Processes a document using the final, robust pipeline:
394
  1. Nemo extracts content to JSON.
@@ -405,16 +430,31 @@ async def translate_document_to_raw_html(target_language: str = Form(...), file:
405
  file_b64 = base64.b64encode(file_content).decode("utf-8")
406
  nemo_data = {
407
  "model": "nvidia/nemoretriever-parse",
408
- "messages": [{"role": "user", "content": [{"type": "image_url", "image_url": {"url": f"data:{content_type};base64,{file_b64}"}}]}],
 
 
 
 
 
 
 
 
 
 
 
 
409
  "max_tokens": 2048,
410
  }
411
- headers = {'accept': 'application/json', 'Content-Type': 'application/json'}
412
- model_response = requests.post('http://localhost:8000/v1/chat/completions', headers=headers, data=json.dumps(nemo_data))
 
 
 
 
413
  model_response.raise_for_status()
414
  nemo_response_json = model_response.json()
415
  print(nemo_response_json)
416
  print("*********** Step 1 Done ***********")
417
-
418
 
419
  print("*********** Step 2 in Progress ***********")
420
  # === STEP 2: Get translated JSON from Sea-Lion (The Translator) ===
@@ -435,58 +475,249 @@ async def translate_document_to_raw_html(target_language: str = Form(...), file:
435
  return HTMLResponse(content=final_html)
436
 
437
  except requests.exceptions.RequestException as e:
438
- raise HTTPException(status_code=502, detail=f"Failed to communicate with a downstream AI model: {e}")
 
 
 
439
  except Exception as e:
440
  # This will catch any errors, including the ValueError from the Sea-Lion function
441
- raise HTTPException(status_code=500, detail=f"An unexpected error occurred during processing: {e}")
442
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
443
  @app.post("/api/verify_document_keywords")
444
  async def verify_document_keywords(
445
- file: UploadFile = File(...),
446
- analysis_type: str = Form("legality"),
447
- search_context: str = Form("Singapore employment law")
448
  ):
449
  """
450
- Receives an HTML file and a configuration via form data, then uses the
451
  agent-to-agent RAG workflow to identify and verify key claims.
452
  """
453
  # Check if the uploaded file is an HTML file
454
  if file.content_type != "text/html":
455
- raise HTTPException(status_code=400, detail="Unsupported file type. Please upload a .html file.")
 
 
456
 
457
  try:
458
  # Step 1: Read content from the uploaded file and extract text
459
  html_content_bytes = await file.read()
460
- html_content = html_content_bytes.decode('utf-8')
461
  document_text = extract_text_from_html(html_content)
462
 
463
  if not document_text.strip():
464
  raise HTTPException(
465
  status_code=400,
466
- detail="Could not extract any meaningful text from the provided HTML content."
467
  )
468
 
469
  # Step 2: Prepare the configuration and call the new analysis tool
470
- config = {
471
- "analysis_type": analysis_type,
472
- "search_context": search_context
473
- }
474
  analysis_results = await analyze_keywords_with_web_search(document_text, config)
475
 
476
  # Step 3: Handle potential errors from the tool
477
- if 'error' in analysis_results:
478
- raise HTTPException(status_code=500, detail=analysis_results['error'])
479
-
480
  # Step 4: Return the successful analysis
481
  return analysis_results
482
 
483
  except Exception as e:
484
  # Catch any other unexpected errors during the process
485
- raise HTTPException(status_code=500, detail=f"An unexpected error occurred: {str(e)}")
486
-
 
487
 
488
 
489
- #testing clerk backend authentication
490
  # @app.post("/upload")
491
  # async def upload_file(
492
  # authorization: str = Header(...),
@@ -504,11 +735,9 @@ async def verify_document_keywords(
504
  # # You can securely store this file, e.g., to Supabase or local
505
  # return {"message": f"File uploaded by Clerk user {user_id}"}
506
 
 
507
  @app.post("/upload")
508
- async def upload_file(
509
- authorization: str = Header(...),
510
- file: UploadFile = File(...)
511
- ):
512
  if not authorization.startswith("Bearer "):
513
  raise HTTPException(status_code=401, detail="Missing Bearer token")
514
 
@@ -526,11 +755,13 @@ async def upload_file(
526
  "Authorization": f"Bearer {SUPABASE_SERVICE_ROLE_KEY}",
527
  "Content-Type": file.content_type,
528
  },
529
- content=await file.read()
530
  )
531
 
532
  if upload_resp.status_code != 200:
533
- raise HTTPException(status_code=500, detail="Failed to upload to Supabase Storage")
 
 
534
 
535
  file_url = f"user-documents/{filename}"
536
 
@@ -542,23 +773,27 @@ async def upload_file(
542
  "Authorization": f"Bearer {SUPABASE_SERVICE_ROLE_KEY}",
543
  "apikey": SUPABASE_SERVICE_ROLE_KEY,
544
  "Content-Type": "application/json",
545
- "Prefer": "return=representation"
546
  },
547
  json={
548
  "user_id": user_id,
549
  "filename": filename.split("/")[-1],
550
- "file_url": file_url
551
- }
552
  )
553
 
554
  if insert_resp.status_code >= 300:
555
- raise HTTPException(status_code=500, detail="Failed to insert document metadata")
 
 
556
 
557
  return {"message": f"File uploaded as {filename}"}
558
 
559
 
560
  @app.get("/api/documents")
561
- async def get_user_documents(credentials: HTTPAuthorizationCredentials = Depends(security)):
 
 
562
  token = credentials.credentials
563
  claims = await verify_clerk_jwt(token)
564
  user_id = claims.get("sub")
@@ -600,11 +835,15 @@ async def get_user_documents(credentials: HTTPAuthorizationCredentials = Depends
600
  )
601
 
602
  if signed_url_resp.status_code == 200:
603
- print(f"{SUPABASE_URL}/storage/v1{signed_url_resp.json().get('signedURL')}")
604
- doc["signed_url"] = f"{SUPABASE_URL}/storage/v1{signed_url_resp.json().get('signedURL')}"
605
-
 
 
 
 
606
  else:
607
  doc["signed_url"] = None
608
  print(documents)
609
 
610
- return documents
 
18
  import google.generativeai as genai
19
  from google.api_core import exceptions as google_exceptions
20
  from pydantic import BaseModel
21
+ from gradio_client import Client, handle_file
22
+ import tempfile
23
+
24
  from auth.clerk import verify_clerk_jwt
25
+ from tools.tools import (
26
+ extract_text_from_html,
27
+ generate_document_insights,
28
+ analyze_keywords_with_web_search,
29
+ )
30
 
31
 
32
  security = HTTPBearer()
 
39
  app = FastAPI(
40
  title="Document Translator (Final Architecture)",
41
  description="Pipeline: Nemo (JSON) -> Sea-Lion (Translate JSON) -> Gemini (HTML)",
42
+ version="10.0.1", # Final Architecture, patched
43
  )
44
 
45
  # Allow requests from the default React frontend port
 
58
  replacer.counter += 1
59
  word = match.group(0)
60
  return f'<span data-clickable="true" data-id="word-{replacer.counter}">{word}</span>'
61
+
62
  replacer.counter = 0
63
 
64
+ pattern = r"\b\w+[.,?!]?\b" # matches words with optional trailing punctuation
65
 
66
+ for tag in ["p", "h1", "h2", "td"]:
67
  # regex to capture content inside these tags
68
+ regex = re.compile(rf"(<{tag}[^>]*>)(.*?)(</{tag}>)", re.DOTALL)
69
+
70
  def replacer_func(m):
71
  open_tag, inner_text, close_tag = m.groups()
72
  wrapped_text = re.sub(pattern, replacer, inner_text)
73
  return open_tag + wrapped_text + close_tag
74
+
75
  html = regex.sub(replacer_func, html)
76
 
77
  return html
78
 
79
+
80
  def inject_dropdown_script(html: str) -> str:
81
  script = """
82
  <script>
 
182
  return html.replace("</body>", script + "\n</body>")
183
  else:
184
  return html + script
185
+
186
+
187
  # Define a Pydantic model to enforce the structure of the incoming request body
188
  class HtmlAnalysisRequest(BaseModel):
189
  html: str
190
 
191
+
192
  @app.post("/api/analyze_html")
193
  async def analyze_html_file(file: UploadFile = File(...)):
194
  """
195
+ Receives an uploaded HTML file, extracts its text content, and uses the
196
  Gemini tool to generate a summary and key informational points.
197
  """
198
  # Check if the uploaded file is an HTML file
199
  if file.content_type != "text/html":
200
+ raise HTTPException(
201
+ status_code=400, detail="Unsupported file type. Please upload a .html file."
202
+ )
203
 
204
  try:
205
  # Step 1: Read the content of the uploaded file
206
  html_content_bytes = await file.read()
207
+ html_content = html_content_bytes.decode("utf-8")
208
 
209
  # Step 2: Extract text from the HTML using our tool
210
  document_text = extract_text_from_html(html_content)
 
213
  analysis_results = await generate_document_insights(document_text)
214
 
215
  # Check if the tool returned a functional error
216
+ if "error" in analysis_results:
217
+ raise HTTPException(status_code=500, detail=analysis_results["error"])
218
+
219
  return analysis_results
220
 
221
  except Exception as e:
222
  # Catch any other unexpected errors
223
+ raise HTTPException(
224
+ status_code=500, detail=f"An unexpected error occurred: {e}"
225
+ )
226
+
227
+
228
  @app.post("/api/translate_frontend")
229
  async def translate_text(request: Request):
230
  try:
 
233
  target_language = data.get("target_language")
234
 
235
  if not text or not target_language:
236
+ raise HTTPException(
237
+ status_code=400,
238
+ detail="Missing 'text' or 'target_language' in request body",
239
+ )
240
 
241
  url = "https://api.sea-lion.ai/v1/chat/completions"
242
  api_key = os.getenv("SEALION_API_KEY")
243
 
244
  headers = {
245
  "Authorization": f"Bearer {api_key}",
246
+ "Content-Type": "application/json",
247
  # No "accept" header or set to "application/json"
248
  }
249
 
250
  prompt = (
251
  f"Please translate the following text to {target_language} and return "
252
  "ONLY the translated text without any explanations or extra formatting:\n\n"
253
+ f'"{text}"'
254
  )
255
 
256
  payload = {
257
  "max_completion_tokens": 1024,
258
+ "messages": [{"role": "user", "content": prompt}],
259
+ "model": "aisingapore/Gemma-SEA-LION-v3-9B-IT",
 
 
 
 
 
260
  }
261
 
262
  response = requests.post(url, headers=headers, data=json.dumps(payload))
 
269
  translated_text = response_json["choices"][0]["message"]["content"].strip()
270
 
271
  if not translated_text:
272
+ raise HTTPException(
273
+ status_code=500, detail="Empty response from translation model."
274
+ )
275
 
276
  return {"translated_text": translated_text}
277
 
278
  except requests.exceptions.RequestException as e:
279
+ raise HTTPException(
280
+ status_code=502, detail=f"Translation API request failed: {e}"
281
+ )
282
  except Exception as e:
283
  raise HTTPException(status_code=500, detail=f"Unexpected error: {e}")
284
+
285
 
286
  # --- Model 2: Sea-Lion (The JSON Translator) ---
287
  @app.post("/api/translate")
 
294
  url = "https://api.sea-lion.ai/v1/chat/completions"
295
 
296
  # It's recommended to store API keys securely, e.g., in environment variables
297
+ api_key = os.getenv("SEALION_API_KEY")
298
 
299
  # The headers for the request
300
  headers = {
301
  "accept": "text/plain",
302
  "Authorization": f"Bearer {api_key}",
303
+ "Content-Type": "application/json",
304
  }
305
 
306
  # Create a dynamic prompt for the translation task
307
+ prompt = f'Translate the following text to {text}: "{target_language}"'
308
 
309
  # The JSON data payload for the request
310
  data = {
311
  "max_completion_tokens": 4096, # Increased token limit for longer translations
312
+ "messages": [{"role": "user", "content": prompt}],
313
+ "model": "aisingapore/Llama-SEA-LION-v3-70B-IT",
 
 
 
 
 
314
  }
315
 
316
  try:
 
321
  # The response from this specific API is plain text, not JSON.
322
  # We will wrap it in a JSON structure for consistency in our API.
323
  translated_text = response.text
324
+
325
  # It's good practice to check if the response is empty
326
  if not translated_text:
327
+ raise HTTPException(
328
+ status_code=500,
329
+ detail="Received an empty response from the translation model.",
330
+ )
331
 
332
  return {"translated_text": translated_text}
333
 
334
  except requests.exceptions.RequestException as e:
335
  # Handle network-related errors
336
+ raise HTTPException(
337
+ status_code=502,
338
+ detail=f"Failed to communicate with the translation AI model: {e}",
339
+ )
340
  except Exception as e:
341
  # Handle other potential errors
342
+ raise HTTPException(
343
+ status_code=500,
344
+ detail=f"An unexpected error occurred during translation: {e}",
345
+ )
346
 
347
 
348
  # --- Model 3: Gemini (The HTML Generator) ---
 
357
  raise ValueError("GEMINI_API_KEY not found in environment variables.")
358
 
359
  genai.configure(api_key=api_key)
360
+ model = genai.GenerativeModel(model_name="gemini-2.0-flash")
361
  json_string_for_prompt = json.dumps(translated_json, indent=2)
362
 
363
  prompt = f"""
 
389
  response = model.generate_content(prompt)
390
 
391
  # Extract raw HTML from Gemini markdown code block
392
+ match = re.search(r"```html\n(.*?)\n```", response.text, re.DOTALL)
393
  raw_html = match.group(1).strip() if match else response.text.strip()
394
 
395
  # Wrap each word in clickable spans
 
411
 
412
  # --- API Endpoint Orchestrating the Pipeline ---
413
  @app.post("/api/translate_file", response_class=HTMLResponse)
414
+ async def translate_document_to_raw_html(
415
+ target_language: str = Form(...), file: UploadFile = File(...)
416
+ ):
417
  """
418
  Processes a document using the final, robust pipeline:
419
  1. Nemo extracts content to JSON.
 
430
  file_b64 = base64.b64encode(file_content).decode("utf-8")
431
  nemo_data = {
432
  "model": "nvidia/nemoretriever-parse",
433
+ "messages": [
434
+ {
435
+ "role": "user",
436
+ "content": [
437
+ {
438
+ "type": "image_url",
439
+ "image_url": {
440
+ "url": f"data:{content_type};base64,{file_b64}"
441
+ },
442
+ }
443
+ ],
444
+ }
445
+ ],
446
  "max_tokens": 2048,
447
  }
448
+ headers = {"accept": "application/json", "Content-Type": "application/json"}
449
+ model_response = requests.post(
450
+ "http://localhost:8000/v1/chat/completions",
451
+ headers=headers,
452
+ data=json.dumps(nemo_data),
453
+ )
454
  model_response.raise_for_status()
455
  nemo_response_json = model_response.json()
456
  print(nemo_response_json)
457
  print("*********** Step 1 Done ***********")
 
458
 
459
  print("*********** Step 2 in Progress ***********")
460
  # === STEP 2: Get translated JSON from Sea-Lion (The Translator) ===
 
475
  return HTMLResponse(content=final_html)
476
 
477
  except requests.exceptions.RequestException as e:
478
+ raise HTTPException(
479
+ status_code=502,
480
+ detail=f"Failed to communicate with a downstream AI model: {e}",
481
+ )
482
  except Exception as e:
483
  # This will catch any errors, including the ValueError from the Sea-Lion function
484
+ raise HTTPException(
485
+ status_code=500,
486
+ detail=f"An unexpected error occurred during processing: {e}",
487
+ )
488
+
489
+
490
+ # <<< --- START OF MVP PIPELINE ADDITIONS (Layout-Aware Version) --- >>>
491
+
492
+
493
+ async def extract_text_and_boxes_with_paddle(file_content: bytes) -> list[dict]:
494
+ """
495
+ Extracts text and their bounding boxes from an image using PaddleOCR.
496
+ Returns the full list of dictionary objects from the OCR tool.
497
+ """
498
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
499
+ temp_file.write(file_content)
500
+ temp_filepath = temp_file.name
501
+
502
+ try:
503
+
504
+ def do_ocr() -> list[dict]:
505
+ """Synchronous function to be run in a separate thread."""
506
+ client = Client("kevansoon/PaddleOCR")
507
+ # Returns a list of dictionaries, e.g., [{'text': '...', 'box': [...]}]
508
+ result = client.predict(
509
+ img=handle_file(temp_filepath),
510
+ lang="en",
511
+ api_name="/predict",
512
+ )
513
+ return result
514
+
515
+ loop = asyncio.get_running_loop()
516
+ extracted_data = await loop.run_in_executor(None, do_ocr)
517
+ return extracted_data
518
+ finally:
519
+ os.unlink(temp_filepath)
520
+
521
+
522
+ async def translate_paddle_data_concurrently(
523
+ paddle_data: list[dict], target_language: str
524
+ ) -> list[dict]:
525
+ """
526
+ Translates the 'text' field of each item in the paddle_data list concurrently.
527
+ """
528
+
529
+ async def call_sealion_for_translation(text_to_translate: str, lang: str) -> str:
530
+ """Helper function to call the translation API for a single piece of text."""
531
+ url = "https://api.sea-lion.ai/v1/chat/completions"
532
+ api_key = os.getenv("SEALION_API_KEY")
533
+ headers = {
534
+ "Authorization": f"Bearer {api_key}",
535
+ "Content-Type": "application/json",
536
+ }
537
+ prompt = f'Translate the following phrase to {lang} and return ONLY the translated text without explanations or extra formatting:\n\n"{text_to_translate}"'
538
+ payload = {
539
+ "max_completion_tokens": 256, # Tokens for a single phrase, not a whole doc
540
+ "messages": [{"role": "user", "content": prompt}],
541
+ "model": "aisingapore/Gemma-SEA-LION-v3-9B-IT",
542
+ }
543
+ async with httpx.AsyncClient() as client:
544
+ response = await client.post(
545
+ url, headers=headers, json=payload, timeout=30.0
546
+ )
547
+ response.raise_for_status()
548
+ response_json = response.json()
549
+ return response_json["choices"][0]["message"]["content"].strip()
550
+
551
+ # Create a list of translation tasks to run concurrently
552
+ translation_tasks = [
553
+ call_sealion_for_translation(item["text"], target_language)
554
+ for item in paddle_data
555
+ ]
556
+
557
+ # Execute all translation tasks in parallel
558
+ translated_texts = await asyncio.gather(*translation_tasks)
559
+
560
+ # Reconstruct the data structure with translated text and original boxes
561
+ translated_data = []
562
+ for i, item in enumerate(paddle_data):
563
+ translated_data.append({"text": translated_texts[i], "box": item["box"]})
564
+
565
+ return translated_data
566
+
567
+
568
+ async def generate_html_from_paddle_data(translated_data: list[dict]) -> str:
569
+ """
570
+ Receives translated OCR data (text with coordinates) and uses Gemini
571
+ to generate a layout-aware HTML document.
572
+ """
573
+ try:
574
+ api_key = os.getenv("GEMINI_API_KEY")
575
+ if not api_key:
576
+ raise ValueError("GEMINI_API_KEY not found in environment variables.")
577
+
578
+ genai.configure(api_key=api_key)
579
+ model = genai.GenerativeModel(model_name="gemini-2.5-flash")
580
+
581
+ # Convert the list of data to a JSON string for the prompt
582
+ # THE FIX IS HERE: Added ensure_ascii=False
583
+ json_data_for_prompt = json.dumps(translated_data, indent=2, ensure_ascii=False)
584
+
585
+ prompt = f"""
586
+ You are an expert system specializing in converting structured OCR data into a well-formatted HTML document that preserves the original layout.
587
+
588
+ **Your Task:**
589
+ 1. Analyze the following JSON array. Each object contains a `text` field (pre-translated) and a `box` field (four [x, y] coordinates of its bounding box).
590
+ 2. Use the `box` coordinates to understand the document's spatial structure.
591
+ - Elements with similar y-coordinates are likely on the same row.
592
+ - Elements aligned vertically form columns.
593
+ 3. Reconstruct the visual layout using semantic HTML.
594
+ - Use `<table>` for grid-like data (rows and columns). This is critical for payslips.
595
+ - Use `<h1>`, `<h2>`, `<p>` for headings and paragraphs.
596
+ - Do NOT use absolute positioning (e.g., `style="position: absolute; left: ..."`). Create a clean, flowing HTML structure.
597
+ 4. Your final output must ONLY be the raw HTML code. Do not add comments, markdown backticks, or any other explanatory text.
598
+
599
+ **OCR Data to process:**
600
+ ```json
601
+ {json_data_for_prompt}
602
+ ```
603
+ """
604
+
605
+ def do_request():
606
+ """Synchronous function to be run in a separate thread."""
607
+ response = model.generate_content(prompt)
608
+ match = re.search(r"```html\n(.*?)\n```", response.text, re.DOTALL)
609
+ raw_html = match.group(1).strip() if match else response.text.strip()
610
+ # Reuse existing functions to make the HTML interactive
611
+ wrapped_html = wrap_words_with_spans(raw_html)
612
+ final_html = inject_dropdown_script(wrapped_html)
613
+ return final_html
614
+
615
+ return await asyncio.to_thread(do_request)
616
+ except Exception as e:
617
+ error_message = f"An error occurred while generating the HTML structure with Gemini: {str(e)}"
618
+ return f"<html><body><h1>HTML Generation Error</h1><p>{html.escape(error_message)}</p></body></html>"
619
+
620
+
621
+ @app.post("/api/translate_file_mvp", response_class=HTMLResponse)
622
+ async def translate_document_mvp(
623
+ target_language: str = Form(...), file: UploadFile = File(...)
624
+ ):
625
+ """
626
+ Processes a document using the Layout-Aware MVP pipeline:
627
+ 1. PaddleOCR extracts text and coordinates.
628
+ 2. Sea-Lion translates each text block concurrently.
629
+ 3. Gemini uses the translated text and original coordinates to generate layout-aware HTML.
630
+ """
631
+ content_type = file.content_type
632
+ if content_type not in ["image/png", "image/jpeg"]:
633
+ raise HTTPException(
634
+ status_code=400,
635
+ detail="Unsupported file type for MVP pipeline. Please use PNG or JPG.",
636
+ )
637
+
638
+ try:
639
+ file_content = await file.read()
640
+
641
+ # === MVP STEP 1: Extract text and coordinates with PaddleOCR ===
642
+ paddle_data = await extract_text_and_boxes_with_paddle(file_content)
643
+ if not paddle_data:
644
+ raise HTTPException(
645
+ status_code=400,
646
+ detail="PaddleOCR could not extract any text from the image.",
647
+ )
648
+
649
+ # === MVP STEP 2: Translate each text block concurrently ===
650
+ translated_data = await translate_paddle_data_concurrently(
651
+ paddle_data, target_language
652
+ )
653
+
654
+ # === MVP STEP 3: Generate final, layout-aware HTML from Gemini ===
655
+ final_html = await generate_html_from_paddle_data(translated_data)
656
+
657
+ return HTMLResponse(content=final_html)
658
+
659
+ except httpx.HTTPStatusError as e:
660
+ raise HTTPException(
661
+ status_code=e.response.status_code,
662
+ detail=f"Error from a downstream AI service: {e.response.text}",
663
+ )
664
+ except Exception as e:
665
+ raise HTTPException(
666
+ status_code=500,
667
+ detail=f"An unexpected error occurred during MVP processing: {str(e)}",
668
+ )
669
+
670
+
671
+ # <<< --- END OF MVP PIPELINE ADDITIONS (Layout-Aware Version) --- >>>
672
+
673
+
674
  @app.post("/api/verify_document_keywords")
675
  async def verify_document_keywords(
676
+ file: UploadFile = File(...),
677
+ analysis_type: str = Form("legality"),
678
+ search_context: str = Form("Singapore employment law"),
679
  ):
680
  """
681
+ Receives an HTML file and a configuration via form data, then uses the
682
  agent-to-agent RAG workflow to identify and verify key claims.
683
  """
684
  # Check if the uploaded file is an HTML file
685
  if file.content_type != "text/html":
686
+ raise HTTPException(
687
+ status_code=400, detail="Unsupported file type. Please upload a .html file."
688
+ )
689
 
690
  try:
691
  # Step 1: Read content from the uploaded file and extract text
692
  html_content_bytes = await file.read()
693
+ html_content = html_content_bytes.decode("utf-8")
694
  document_text = extract_text_from_html(html_content)
695
 
696
  if not document_text.strip():
697
  raise HTTPException(
698
  status_code=400,
699
+ detail="Could not extract any meaningful text from the provided HTML content.",
700
  )
701
 
702
  # Step 2: Prepare the configuration and call the new analysis tool
703
+ config = {"analysis_type": analysis_type, "search_context": search_context}
 
 
 
704
  analysis_results = await analyze_keywords_with_web_search(document_text, config)
705
 
706
  # Step 3: Handle potential errors from the tool
707
+ if "error" in analysis_results:
708
+ raise HTTPException(status_code=500, detail=analysis_results["error"])
709
+
710
  # Step 4: Return the successful analysis
711
  return analysis_results
712
 
713
  except Exception as e:
714
  # Catch any other unexpected errors during the process
715
+ raise HTTPException(
716
+ status_code=500, detail=f"An unexpected error occurred: {str(e)}"
717
+ )
718
 
719
 
720
+ # testing clerk backend authentication
721
  # @app.post("/upload")
722
  # async def upload_file(
723
  # authorization: str = Header(...),
 
735
  # # You can securely store this file, e.g., to Supabase or local
736
  # return {"message": f"File uploaded by Clerk user {user_id}"}
737
 
738
+
739
  @app.post("/upload")
740
+ async def upload_file(authorization: str = Header(...), file: UploadFile = File(...)):
 
 
 
741
  if not authorization.startswith("Bearer "):
742
  raise HTTPException(status_code=401, detail="Missing Bearer token")
743
 
 
755
  "Authorization": f"Bearer {SUPABASE_SERVICE_ROLE_KEY}",
756
  "Content-Type": file.content_type,
757
  },
758
+ content=await file.read(),
759
  )
760
 
761
  if upload_resp.status_code != 200:
762
+ raise HTTPException(
763
+ status_code=500, detail="Failed to upload to Supabase Storage"
764
+ )
765
 
766
  file_url = f"user-documents/{filename}"
767
 
 
773
  "Authorization": f"Bearer {SUPABASE_SERVICE_ROLE_KEY}",
774
  "apikey": SUPABASE_SERVICE_ROLE_KEY,
775
  "Content-Type": "application/json",
776
+ "Prefer": "return=representation",
777
  },
778
  json={
779
  "user_id": user_id,
780
  "filename": filename.split("/")[-1],
781
+ "file_url": file_url,
782
+ },
783
  )
784
 
785
  if insert_resp.status_code >= 300:
786
+ raise HTTPException(
787
+ status_code=500, detail="Failed to insert document metadata"
788
+ )
789
 
790
  return {"message": f"File uploaded as {filename}"}
791
 
792
 
793
  @app.get("/api/documents")
794
+ async def get_user_documents(
795
+ credentials: HTTPAuthorizationCredentials = Depends(security),
796
+ ):
797
  token = credentials.credentials
798
  claims = await verify_clerk_jwt(token)
799
  user_id = claims.get("sub")
 
835
  )
836
 
837
  if signed_url_resp.status_code == 200:
838
+ print(
839
+ f"{SUPABASE_URL}/storage/v1{signed_url_resp.json().get('signedURL')}"
840
+ )
841
+ doc["signed_url"] = (
842
+ f"{SUPABASE_URL}/storage/v1{signed_url_resp.json().get('signedURL')}"
843
+ )
844
+
845
  else:
846
  doc["signed_url"] = None
847
  print(documents)
848
 
849
+ return documents
requirements.txt CHANGED
@@ -26,7 +26,6 @@ google-auth==2.40.3
26
  google-auth-httplib2==0.2.0
27
  google-generativeai==0.8.5
28
  googleapis-common-protos==1.70.0
29
- gradio_client==1.11.0
30
  grpcio==1.74.0
31
  grpcio-status==1.71.2
32
  h11==0.16.0
@@ -94,3 +93,5 @@ urllib3==2.5.0
94
  uvicorn==0.35.0
95
  watchfiles==1.1.0
96
  websockets==15.0.1
 
 
 
26
  google-auth-httplib2==0.2.0
27
  google-generativeai==0.8.5
28
  googleapis-common-protos==1.70.0
 
29
  grpcio==1.74.0
30
  grpcio-status==1.71.2
31
  h11==0.16.0
 
93
  uvicorn==0.35.0
94
  watchfiles==1.1.0
95
  websockets==15.0.1
96
+ langextract
97
+ gradio_client
tools/__pycache__/tools.cpython-310.pyc CHANGED
Binary files a/tools/__pycache__/tools.cpython-310.pyc and b/tools/__pycache__/tools.cpython-310.pyc differ