Spaces:

kevansoon
/

backend

Sleeping

App Files Files Community

KevanSoon commited on Aug 8

Commit

30f61f6

1 Parent(s): b002983

adjusted app.py and tools

Browse files

Files changed (3) hide show

app.py +323 -84
requirements.txt +2 -1
tools/__pycache__/tools.cpython-310.pyc +0 -0

app.py CHANGED Viewed

@@ -18,8 +18,15 @@ from dotenv import load_dotenv
 import google.generativeai as genai
 from google.api_core import exceptions as google_exceptions
 from pydantic import BaseModel
 from auth.clerk import verify_clerk_jwt
-from tools.tools import extract_text_from_html, generate_document_insights, analyze_keywords_with_web_search
 security = HTTPBearer()
@@ -32,7 +39,7 @@ SUPABASE_SERVICE_ROLE_KEY = os.getenv("SUPABASE_SERVICE_ROLE_KEY")
 app = FastAPI(
     title="Document Translator (Final Architecture)",
     description="Pipeline: Nemo (JSON) -> Sea-Lion (Translate JSON) -> Gemini (HTML)",
-    version="10.0.1", # Final Architecture, patched
 )
 # Allow requests from the default React frontend port
@@ -51,21 +58,25 @@ def wrap_words_with_spans(html: str) -> str:
         replacer.counter += 1
         word = match.group(0)
         return f'<span data-clickable="true" data-id="word-{replacer.counter}">{word}</span>'
     replacer.counter = 0
-    pattern = r'\b\w+[.,?!]?\b'  # matches words with optional trailing punctuation
-    for tag in ['p', 'h1', 'h2', 'td']:
         # regex to capture content inside these tags
-        regex = re.compile(fr'(<{tag}[^>]*>)(.*?)(</{tag}>)', re.DOTALL)
         def replacer_func(m):
             open_tag, inner_text, close_tag = m.groups()
             wrapped_text = re.sub(pattern, replacer, inner_text)
             return open_tag + wrapped_text + close_tag
         html = regex.sub(replacer_func, html)
     return html
 def inject_dropdown_script(html: str) -> str:
     script = """
 <script>
@@ -171,25 +182,29 @@ window.addEventListener('DOMContentLoaded', () => {
         return html.replace("</body>", script + "\n</body>")
     else:
         return html + script
 # Define a Pydantic model to enforce the structure of the incoming request body
 class HtmlAnalysisRequest(BaseModel):
     html: str
 @app.post("/api/analyze_html")
 async def analyze_html_file(file: UploadFile = File(...)):
     """
-    Receives an uploaded HTML file, extracts its text content, and uses the
     Gemini tool to generate a summary and key informational points.
     """
     # Check if the uploaded file is an HTML file
     if file.content_type != "text/html":
-        raise HTTPException(status_code=400, detail="Unsupported file type. Please upload a .html file.")
     try:
         # Step 1: Read the content of the uploaded file
         html_content_bytes = await file.read()
-        html_content = html_content_bytes.decode('utf-8')
         # Step 2: Extract text from the HTML using our tool
         document_text = extract_text_from_html(html_content)
@@ -198,16 +213,18 @@ async def analyze_html_file(file: UploadFile = File(...)):
         analysis_results = await generate_document_insights(document_text)
         # Check if the tool returned a functional error
-        if 'error' in analysis_results:
-            raise HTTPException(status_code=500, detail=analysis_results['error'])
         return analysis_results
     except Exception as e:
         # Catch any other unexpected errors
-        raise HTTPException(status_code=500, detail=f"An unexpected error occurred: {e}")
 @app.post("/api/translate_frontend")
 async def translate_text(request: Request):
     try:
@@ -216,32 +233,30 @@ async def translate_text(request: Request):
         target_language = data.get("target_language")
         if not text or not target_language:
-            raise HTTPException(status_code=400, detail="Missing 'text' or 'target_language' in request body")
         url = "https://api.sea-lion.ai/v1/chat/completions"
         api_key = os.getenv("SEALION_API_KEY")
         headers = {
             "Authorization": f"Bearer {api_key}",
-            "Content-Type": "application/json"
             # No "accept" header or set to "application/json"
         }
         prompt = (
             f"Please translate the following text to {target_language} and return "
             "ONLY the translated text without any explanations or extra formatting:\n\n"
-            f"\"{text}\""
         )
         payload = {
             "max_completion_tokens": 1024,
-            "messages": [
-                {
-                    "role": "user",
-                    "content": prompt
-                }
-            ],
-            "model": "aisingapore/Gemma-SEA-LION-v3-9B-IT"
         }
         response = requests.post(url, headers=headers, data=json.dumps(payload))
@@ -254,15 +269,19 @@ async def translate_text(request: Request):
         translated_text = response_json["choices"][0]["message"]["content"].strip()
         if not translated_text:
-            raise HTTPException(status_code=500, detail="Empty response from translation model.")
         return {"translated_text": translated_text}
     except requests.exceptions.RequestException as e:
-        raise HTTPException(status_code=502, detail=f"Translation API request failed: {e}")
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Unexpected error: {e}")
 # --- Model 2: Sea-Lion (The JSON Translator) ---
 @app.post("/api/translate")
@@ -275,28 +294,23 @@ async def translate_text(text: str, target_language: str):
     url = "https://api.sea-lion.ai/v1/chat/completions"
     # It's recommended to store API keys securely, e.g., in environment variables
-    api_key =  os.getenv("SEALION_API_KEY")
     # The headers for the request
     headers = {
         "accept": "text/plain",
         "Authorization": f"Bearer {api_key}",
-        "Content-Type": "application/json"
     }
     # Create a dynamic prompt for the translation task
-    prompt = f"Translate the following text to {text}: \"{target_language}\""
     # The JSON data payload for the request
     data = {
         "max_completion_tokens": 4096,  # Increased token limit for longer translations
-        "messages": [
-            {
-                "role": "user",
-                "content": prompt
-            }
-        ],
-        "model": "aisingapore/Llama-SEA-LION-v3-70B-IT"
     }
     try:
@@ -307,19 +321,28 @@ async def translate_text(text: str, target_language: str):
         # The response from this specific API is plain text, not JSON.
         # We will wrap it in a JSON structure for consistency in our API.
         translated_text = response.text
         # It's good practice to check if the response is empty
         if not translated_text:
-             raise HTTPException(status_code=500, detail="Received an empty response from the translation model.")
         return {"translated_text": translated_text}
     except requests.exceptions.RequestException as e:
         # Handle network-related errors
-        raise HTTPException(status_code=502, detail=f"Failed to communicate with the translation AI model: {e}")
     except Exception as e:
         # Handle other potential errors
-        raise HTTPException(status_code=500, detail=f"An unexpected error occurred during translation: {e}")
 # --- Model 3: Gemini (The HTML Generator) ---
@@ -334,7 +357,7 @@ async def generate_html_from_translated_json(translated_json: dict) -> str:
             raise ValueError("GEMINI_API_KEY not found in environment variables.")
         genai.configure(api_key=api_key)
-        model = genai.GenerativeModel(model_name='gemini-2.0-flash')
         json_string_for_prompt = json.dumps(translated_json, indent=2)
         prompt = f"""
@@ -366,7 +389,7 @@ async def generate_html_from_translated_json(translated_json: dict) -> str:
             response = model.generate_content(prompt)
             # Extract raw HTML from Gemini markdown code block
-            match = re.search(r'```html\n(.*?)\n```', response.text, re.DOTALL)
             raw_html = match.group(1).strip() if match else response.text.strip()
             # Wrap each word in clickable spans
@@ -388,7 +411,9 @@ async def generate_html_from_translated_json(translated_json: dict) -> str:
 # --- API Endpoint Orchestrating the Pipeline ---
 @app.post("/api/translate_file", response_class=HTMLResponse)
-async def translate_document_to_raw_html(target_language: str = Form(...), file: UploadFile = File(...)):
     """
     Processes a document using the final, robust pipeline:
     1. Nemo extracts content to JSON.
@@ -405,16 +430,31 @@ async def translate_document_to_raw_html(target_language: str = Form(...), file:
         file_b64 = base64.b64encode(file_content).decode("utf-8")
         nemo_data = {
             "model": "nvidia/nemoretriever-parse",
-            "messages": [{"role": "user", "content": [{"type": "image_url", "image_url": {"url": f"data:{content_type};base64,{file_b64}"}}]}],
             "max_tokens": 2048,
         }
-        headers = {'accept': 'application/json', 'Content-Type': 'application/json'}
-        model_response = requests.post('http://localhost:8000/v1/chat/completions', headers=headers, data=json.dumps(nemo_data))
         model_response.raise_for_status()
         nemo_response_json = model_response.json()
         print(nemo_response_json)
         print("*********** Step 1 Done ***********")
         print("*********** Step 2 in Progress ***********")
         # === STEP 2: Get translated JSON from Sea-Lion (The Translator) ===
@@ -435,58 +475,249 @@ async def translate_document_to_raw_html(target_language: str = Form(...), file:
         return HTMLResponse(content=final_html)
     except requests.exceptions.RequestException as e:
-        raise HTTPException(status_code=502, detail=f"Failed to communicate with a downstream AI model: {e}")
     except Exception as e:
         # This will catch any errors, including the ValueError from the Sea-Lion function
-        raise HTTPException(status_code=500, detail=f"An unexpected error occurred during processing: {e}")
 @app.post("/api/verify_document_keywords")
 async def verify_document_keywords(
-    file: UploadFile = File(...),
-    analysis_type: str = Form("legality"),
-    search_context: str = Form("Singapore employment law")
 ):
     """
-    Receives an HTML file and a configuration via form data, then uses the
     agent-to-agent RAG workflow to identify and verify key claims.
     """
     # Check if the uploaded file is an HTML file
     if file.content_type != "text/html":
-        raise HTTPException(status_code=400, detail="Unsupported file type. Please upload a .html file.")
     try:
         # Step 1: Read content from the uploaded file and extract text
         html_content_bytes = await file.read()
-        html_content = html_content_bytes.decode('utf-8')
         document_text = extract_text_from_html(html_content)
         if not document_text.strip():
             raise HTTPException(
                 status_code=400,
-                detail="Could not extract any meaningful text from the provided HTML content."
             )
         # Step 2: Prepare the configuration and call the new analysis tool
-        config = {
-            "analysis_type": analysis_type,
-            "search_context": search_context
-        }
         analysis_results = await analyze_keywords_with_web_search(document_text, config)
         # Step 3: Handle potential errors from the tool
-        if 'error' in analysis_results:
-            raise HTTPException(status_code=500, detail=analysis_results['error'])
         # Step 4: Return the successful analysis
         return analysis_results
     except Exception as e:
         # Catch any other unexpected errors during the process
-        raise HTTPException(status_code=500, detail=f"An unexpected error occurred: {str(e)}")
-#testing clerk backend authentication
 # @app.post("/upload")
 # async def upload_file(
 #     authorization: str = Header(...),
@@ -504,11 +735,9 @@ async def verify_document_keywords(
 #     # You can securely store this file, e.g., to Supabase or local
 #     return {"message": f"File uploaded by Clerk user {user_id}"}
 @app.post("/upload")
-async def upload_file(
-    authorization: str = Header(...),
-    file: UploadFile = File(...)
-):
     if not authorization.startswith("Bearer "):
         raise HTTPException(status_code=401, detail="Missing Bearer token")
@@ -526,11 +755,13 @@ async def upload_file(
                 "Authorization": f"Bearer {SUPABASE_SERVICE_ROLE_KEY}",
                 "Content-Type": file.content_type,
             },
-            content=await file.read()
         )
     if upload_resp.status_code != 200:
-        raise HTTPException(status_code=500, detail="Failed to upload to Supabase Storage")
     file_url = f"user-documents/{filename}"
@@ -542,23 +773,27 @@ async def upload_file(
                 "Authorization": f"Bearer {SUPABASE_SERVICE_ROLE_KEY}",
                 "apikey": SUPABASE_SERVICE_ROLE_KEY,
                 "Content-Type": "application/json",
-                "Prefer": "return=representation"
             },
             json={
                 "user_id": user_id,
                 "filename": filename.split("/")[-1],
-                "file_url": file_url
-            }
         )
     if insert_resp.status_code >= 300:
-        raise HTTPException(status_code=500, detail="Failed to insert document metadata")
     return {"message": f"File uploaded as {filename}"}
 @app.get("/api/documents")
-async def get_user_documents(credentials: HTTPAuthorizationCredentials = Depends(security)):
     token = credentials.credentials
     claims = await verify_clerk_jwt(token)
     user_id = claims.get("sub")
@@ -600,11 +835,15 @@ async def get_user_documents(credentials: HTTPAuthorizationCredentials = Depends
             )
             if signed_url_resp.status_code == 200:
-                print(f"{SUPABASE_URL}/storage/v1{signed_url_resp.json().get('signedURL')}")
-                doc["signed_url"] = f"{SUPABASE_URL}/storage/v1{signed_url_resp.json().get('signedURL')}"
             else:
                 doc["signed_url"] = None
     print(documents)
-    return documents

 import google.generativeai as genai
 from google.api_core import exceptions as google_exceptions
 from pydantic import BaseModel
+from gradio_client import Client, handle_file
+import tempfile
 from auth.clerk import verify_clerk_jwt
+from tools.tools import (
+    extract_text_from_html,
+    generate_document_insights,
+    analyze_keywords_with_web_search,
+)
 security = HTTPBearer()
 app = FastAPI(
     title="Document Translator (Final Architecture)",
     description="Pipeline: Nemo (JSON) -> Sea-Lion (Translate JSON) -> Gemini (HTML)",
+    version="10.0.1",  # Final Architecture, patched
 )
 # Allow requests from the default React frontend port
         replacer.counter += 1
         word = match.group(0)
         return f'<span data-clickable="true" data-id="word-{replacer.counter}">{word}</span>'
     replacer.counter = 0
+    pattern = r"\b\w+[.,?!]?\b"  # matches words with optional trailing punctuation
+    for tag in ["p", "h1", "h2", "td"]:
         # regex to capture content inside these tags
+        regex = re.compile(rf"(<{tag}[^>]*>)(.*?)(</{tag}>)", re.DOTALL)
         def replacer_func(m):
             open_tag, inner_text, close_tag = m.groups()
             wrapped_text = re.sub(pattern, replacer, inner_text)
             return open_tag + wrapped_text + close_tag
         html = regex.sub(replacer_func, html)
     return html
 def inject_dropdown_script(html: str) -> str:
     script = """
 <script>
         return html.replace("</body>", script + "\n</body>")
     else:
         return html + script
 # Define a Pydantic model to enforce the structure of the incoming request body
 class HtmlAnalysisRequest(BaseModel):
     html: str
 @app.post("/api/analyze_html")
 async def analyze_html_file(file: UploadFile = File(...)):
     """
+    Receives an uploaded HTML file, extracts its text content, and uses the
     Gemini tool to generate a summary and key informational points.
     """
     # Check if the uploaded file is an HTML file
     if file.content_type != "text/html":
+        raise HTTPException(
+            status_code=400, detail="Unsupported file type. Please upload a .html file."
+        )
     try:
         # Step 1: Read the content of the uploaded file
         html_content_bytes = await file.read()
+        html_content = html_content_bytes.decode("utf-8")
         # Step 2: Extract text from the HTML using our tool
         document_text = extract_text_from_html(html_content)
         analysis_results = await generate_document_insights(document_text)
         # Check if the tool returned a functional error
+        if "error" in analysis_results:
+            raise HTTPException(status_code=500, detail=analysis_results["error"])
         return analysis_results
     except Exception as e:
         # Catch any other unexpected errors
+        raise HTTPException(
+            status_code=500, detail=f"An unexpected error occurred: {e}"
+        )
 @app.post("/api/translate_frontend")
 async def translate_text(request: Request):
     try:
         target_language = data.get("target_language")
         if not text or not target_language:
+            raise HTTPException(
+                status_code=400,
+                detail="Missing 'text' or 'target_language' in request body",
+            )
         url = "https://api.sea-lion.ai/v1/chat/completions"
         api_key = os.getenv("SEALION_API_KEY")
         headers = {
             "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json",
             # No "accept" header or set to "application/json"
         }
         prompt = (
             f"Please translate the following text to {target_language} and return "
             "ONLY the translated text without any explanations or extra formatting:\n\n"
+            f'"{text}"'
         )
         payload = {
             "max_completion_tokens": 1024,
+            "messages": [{"role": "user", "content": prompt}],
+            "model": "aisingapore/Gemma-SEA-LION-v3-9B-IT",
         }
         response = requests.post(url, headers=headers, data=json.dumps(payload))
         translated_text = response_json["choices"][0]["message"]["content"].strip()
         if not translated_text:
+            raise HTTPException(
+                status_code=500, detail="Empty response from translation model."
+            )
         return {"translated_text": translated_text}
     except requests.exceptions.RequestException as e:
+        raise HTTPException(
+            status_code=502, detail=f"Translation API request failed: {e}"
+        )
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Unexpected error: {e}")
 # --- Model 2: Sea-Lion (The JSON Translator) ---
 @app.post("/api/translate")
     url = "https://api.sea-lion.ai/v1/chat/completions"
     # It's recommended to store API keys securely, e.g., in environment variables
+    api_key = os.getenv("SEALION_API_KEY")
     # The headers for the request
     headers = {
         "accept": "text/plain",
         "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json",
     }
     # Create a dynamic prompt for the translation task
+    prompt = f'Translate the following text to {text}: "{target_language}"'
     # The JSON data payload for the request
     data = {
         "max_completion_tokens": 4096,  # Increased token limit for longer translations
+        "messages": [{"role": "user", "content": prompt}],
+        "model": "aisingapore/Llama-SEA-LION-v3-70B-IT",
     }
     try:
         # The response from this specific API is plain text, not JSON.
         # We will wrap it in a JSON structure for consistency in our API.
         translated_text = response.text
         # It's good practice to check if the response is empty
         if not translated_text:
+            raise HTTPException(
+                status_code=500,
+                detail="Received an empty response from the translation model.",
+            )
         return {"translated_text": translated_text}
     except requests.exceptions.RequestException as e:
         # Handle network-related errors
+        raise HTTPException(
+            status_code=502,
+            detail=f"Failed to communicate with the translation AI model: {e}",
+        )
     except Exception as e:
         # Handle other potential errors
+        raise HTTPException(
+            status_code=500,
+            detail=f"An unexpected error occurred during translation: {e}",
+        )
 # --- Model 3: Gemini (The HTML Generator) ---
             raise ValueError("GEMINI_API_KEY not found in environment variables.")
         genai.configure(api_key=api_key)
+        model = genai.GenerativeModel(model_name="gemini-2.0-flash")
         json_string_for_prompt = json.dumps(translated_json, indent=2)
         prompt = f"""
             response = model.generate_content(prompt)
             # Extract raw HTML from Gemini markdown code block
+            match = re.search(r"```html\n(.*?)\n```", response.text, re.DOTALL)
             raw_html = match.group(1).strip() if match else response.text.strip()
             # Wrap each word in clickable spans
 # --- API Endpoint Orchestrating the Pipeline ---
 @app.post("/api/translate_file", response_class=HTMLResponse)
+async def translate_document_to_raw_html(
+    target_language: str = Form(...), file: UploadFile = File(...)
+):
     """
     Processes a document using the final, robust pipeline:
     1. Nemo extracts content to JSON.
         file_b64 = base64.b64encode(file_content).decode("utf-8")
         nemo_data = {
             "model": "nvidia/nemoretriever-parse",
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"data:{content_type};base64,{file_b64}"
+                            },
+                        }
+                    ],
+                }
+            ],
             "max_tokens": 2048,
         }
+        headers = {"accept": "application/json", "Content-Type": "application/json"}
+        model_response = requests.post(
+            "http://localhost:8000/v1/chat/completions",
+            headers=headers,
+            data=json.dumps(nemo_data),
+        )
         model_response.raise_for_status()
         nemo_response_json = model_response.json()
         print(nemo_response_json)
         print("*********** Step 1 Done ***********")
         print("*********** Step 2 in Progress ***********")
         # === STEP 2: Get translated JSON from Sea-Lion (The Translator) ===
         return HTMLResponse(content=final_html)
     except requests.exceptions.RequestException as e:
+        raise HTTPException(
+            status_code=502,
+            detail=f"Failed to communicate with a downstream AI model: {e}",
+        )
     except Exception as e:
         # This will catch any errors, including the ValueError from the Sea-Lion function
+        raise HTTPException(
+            status_code=500,
+            detail=f"An unexpected error occurred during processing: {e}",
+        )
+# <<< --- START OF MVP PIPELINE ADDITIONS (Layout-Aware Version) --- >>>
+async def extract_text_and_boxes_with_paddle(file_content: bytes) -> list[dict]:
+    """
+    Extracts text and their bounding boxes from an image using PaddleOCR.
+    Returns the full list of dictionary objects from the OCR tool.
+    """
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
+        temp_file.write(file_content)
+        temp_filepath = temp_file.name
+    try:
+        def do_ocr() -> list[dict]:
+            """Synchronous function to be run in a separate thread."""
+            client = Client("kevansoon/PaddleOCR")
+            # Returns a list of dictionaries, e.g., [{'text': '...', 'box': [...]}]
+            result = client.predict(
+                img=handle_file(temp_filepath),
+                lang="en",
+                api_name="/predict",
+            )
+            return result
+        loop = asyncio.get_running_loop()
+        extracted_data = await loop.run_in_executor(None, do_ocr)
+        return extracted_data
+    finally:
+        os.unlink(temp_filepath)
+async def translate_paddle_data_concurrently(
+    paddle_data: list[dict], target_language: str
+) -> list[dict]:
+    """
+    Translates the 'text' field of each item in the paddle_data list concurrently.
+    """
+    async def call_sealion_for_translation(text_to_translate: str, lang: str) -> str:
+        """Helper function to call the translation API for a single piece of text."""
+        url = "https://api.sea-lion.ai/v1/chat/completions"
+        api_key = os.getenv("SEALION_API_KEY")
+        headers = {
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json",
+        }
+        prompt = f'Translate the following phrase to {lang} and return ONLY the translated text without explanations or extra formatting:\n\n"{text_to_translate}"'
+        payload = {
+            "max_completion_tokens": 256,  # Tokens for a single phrase, not a whole doc
+            "messages": [{"role": "user", "content": prompt}],
+            "model": "aisingapore/Gemma-SEA-LION-v3-9B-IT",
+        }
+        async with httpx.AsyncClient() as client:
+            response = await client.post(
+                url, headers=headers, json=payload, timeout=30.0
+            )
+            response.raise_for_status()
+            response_json = response.json()
+            return response_json["choices"][0]["message"]["content"].strip()
+    # Create a list of translation tasks to run concurrently
+    translation_tasks = [
+        call_sealion_for_translation(item["text"], target_language)
+        for item in paddle_data
+    ]
+    # Execute all translation tasks in parallel
+    translated_texts = await asyncio.gather(*translation_tasks)
+    # Reconstruct the data structure with translated text and original boxes
+    translated_data = []
+    for i, item in enumerate(paddle_data):
+        translated_data.append({"text": translated_texts[i], "box": item["box"]})
+    return translated_data
+async def generate_html_from_paddle_data(translated_data: list[dict]) -> str:
+    """
+    Receives translated OCR data (text with coordinates) and uses Gemini
+    to generate a layout-aware HTML document.
+    """
+    try:
+        api_key = os.getenv("GEMINI_API_KEY")
+        if not api_key:
+            raise ValueError("GEMINI_API_KEY not found in environment variables.")
+        genai.configure(api_key=api_key)
+        model = genai.GenerativeModel(model_name="gemini-2.5-flash")
+        # Convert the list of data to a JSON string for the prompt
+        # THE FIX IS HERE: Added ensure_ascii=False
+        json_data_for_prompt = json.dumps(translated_data, indent=2, ensure_ascii=False)
+        prompt = f"""
+        You are an expert system specializing in converting structured OCR data into a well-formatted HTML document that preserves the original layout.
+        **Your Task:**
+        1.  Analyze the following JSON array. Each object contains a `text` field (pre-translated) and a `box` field (four [x, y] coordinates of its bounding box).
+        2.  Use the `box` coordinates to understand the document's spatial structure.
+            -   Elements with similar y-coordinates are likely on the same row.
+            -   Elements aligned vertically form columns.
+        3.  Reconstruct the visual layout using semantic HTML.
+            -   Use `<table>` for grid-like data (rows and columns). This is critical for payslips.
+            -   Use `<h1>`, `<h2>`, `<p>` for headings and paragraphs.
+            -   Do NOT use absolute positioning (e.g., `style="position: absolute; left: ..."`). Create a clean, flowing HTML structure.
+        4.  Your final output must ONLY be the raw HTML code. Do not add comments, markdown backticks, or any other explanatory text.
+        **OCR Data to process:**
+        ```json
+        {json_data_for_prompt}
+        ```
+        """
+        def do_request():
+            """Synchronous function to be run in a separate thread."""
+            response = model.generate_content(prompt)
+            match = re.search(r"```html\n(.*?)\n```", response.text, re.DOTALL)
+            raw_html = match.group(1).strip() if match else response.text.strip()
+            # Reuse existing functions to make the HTML interactive
+            wrapped_html = wrap_words_with_spans(raw_html)
+            final_html = inject_dropdown_script(wrapped_html)
+            return final_html
+        return await asyncio.to_thread(do_request)
+    except Exception as e:
+        error_message = f"An error occurred while generating the HTML structure with Gemini: {str(e)}"
+        return f"<html><body><h1>HTML Generation Error</h1><p>{html.escape(error_message)}</p></body></html>"
+@app.post("/api/translate_file_mvp", response_class=HTMLResponse)
+async def translate_document_mvp(
+    target_language: str = Form(...), file: UploadFile = File(...)
+):
+    """
+    Processes a document using the Layout-Aware MVP pipeline:
+    1. PaddleOCR extracts text and coordinates.
+    2. Sea-Lion translates each text block concurrently.
+    3. Gemini uses the translated text and original coordinates to generate layout-aware HTML.
+    """
+    content_type = file.content_type
+    if content_type not in ["image/png", "image/jpeg"]:
+        raise HTTPException(
+            status_code=400,
+            detail="Unsupported file type for MVP pipeline. Please use PNG or JPG.",
+        )
+    try:
+        file_content = await file.read()
+        # === MVP STEP 1: Extract text and coordinates with PaddleOCR ===
+        paddle_data = await extract_text_and_boxes_with_paddle(file_content)
+        if not paddle_data:
+            raise HTTPException(
+                status_code=400,
+                detail="PaddleOCR could not extract any text from the image.",
+            )
+        # === MVP STEP 2: Translate each text block concurrently ===
+        translated_data = await translate_paddle_data_concurrently(
+            paddle_data, target_language
+        )
+        # === MVP STEP 3: Generate final, layout-aware HTML from Gemini ===
+        final_html = await generate_html_from_paddle_data(translated_data)
+        return HTMLResponse(content=final_html)
+    except httpx.HTTPStatusError as e:
+        raise HTTPException(
+            status_code=e.response.status_code,
+            detail=f"Error from a downstream AI service: {e.response.text}",
+        )
+    except Exception as e:
+        raise HTTPException(
+            status_code=500,
+            detail=f"An unexpected error occurred during MVP processing: {str(e)}",
+        )
+# <<< --- END OF MVP PIPELINE ADDITIONS (Layout-Aware Version) --- >>>
 @app.post("/api/verify_document_keywords")
 async def verify_document_keywords(
+    file: UploadFile = File(...),
+    analysis_type: str = Form("legality"),
+    search_context: str = Form("Singapore employment law"),
 ):
     """
+    Receives an HTML file and a configuration via form data, then uses the
     agent-to-agent RAG workflow to identify and verify key claims.
     """
     # Check if the uploaded file is an HTML file
     if file.content_type != "text/html":
+        raise HTTPException(
+            status_code=400, detail="Unsupported file type. Please upload a .html file."
+        )
     try:
         # Step 1: Read content from the uploaded file and extract text
         html_content_bytes = await file.read()
+        html_content = html_content_bytes.decode("utf-8")
         document_text = extract_text_from_html(html_content)
         if not document_text.strip():
             raise HTTPException(
                 status_code=400,
+                detail="Could not extract any meaningful text from the provided HTML content.",
             )
         # Step 2: Prepare the configuration and call the new analysis tool
+        config = {"analysis_type": analysis_type, "search_context": search_context}
         analysis_results = await analyze_keywords_with_web_search(document_text, config)
         # Step 3: Handle potential errors from the tool
+        if "error" in analysis_results:
+            raise HTTPException(status_code=500, detail=analysis_results["error"])
         # Step 4: Return the successful analysis
         return analysis_results
     except Exception as e:
         # Catch any other unexpected errors during the process
+        raise HTTPException(
+            status_code=500, detail=f"An unexpected error occurred: {str(e)}"
+        )
+# testing clerk backend authentication
 # @app.post("/upload")
 # async def upload_file(
 #     authorization: str = Header(...),
 #     # You can securely store this file, e.g., to Supabase or local
 #     return {"message": f"File uploaded by Clerk user {user_id}"}
 @app.post("/upload")
+async def upload_file(authorization: str = Header(...), file: UploadFile = File(...)):
     if not authorization.startswith("Bearer "):
         raise HTTPException(status_code=401, detail="Missing Bearer token")
                 "Authorization": f"Bearer {SUPABASE_SERVICE_ROLE_KEY}",
                 "Content-Type": file.content_type,
             },
+            content=await file.read(),
         )
     if upload_resp.status_code != 200:
+        raise HTTPException(
+            status_code=500, detail="Failed to upload to Supabase Storage"
+        )
     file_url = f"user-documents/{filename}"
                 "Authorization": f"Bearer {SUPABASE_SERVICE_ROLE_KEY}",
                 "apikey": SUPABASE_SERVICE_ROLE_KEY,
                 "Content-Type": "application/json",
+                "Prefer": "return=representation",
             },
             json={
                 "user_id": user_id,
                 "filename": filename.split("/")[-1],
+                "file_url": file_url,
+            },
         )
     if insert_resp.status_code >= 300:
+        raise HTTPException(
+            status_code=500, detail="Failed to insert document metadata"
+        )
     return {"message": f"File uploaded as {filename}"}
 @app.get("/api/documents")
+async def get_user_documents(
+    credentials: HTTPAuthorizationCredentials = Depends(security),
+):
     token = credentials.credentials
     claims = await verify_clerk_jwt(token)
     user_id = claims.get("sub")
             )
             if signed_url_resp.status_code == 200:
+                print(
+                    f"{SUPABASE_URL}/storage/v1{signed_url_resp.json().get('signedURL')}"
+                )
+                doc["signed_url"] = (
+                    f"{SUPABASE_URL}/storage/v1{signed_url_resp.json().get('signedURL')}"
+                )
             else:
                 doc["signed_url"] = None
     print(documents)
+    return documents

requirements.txt CHANGED Viewed

@@ -26,7 +26,6 @@ google-auth==2.40.3
 google-auth-httplib2==0.2.0
 google-generativeai==0.8.5
 googleapis-common-protos==1.70.0
-gradio_client==1.11.0
 grpcio==1.74.0
 grpcio-status==1.71.2
 h11==0.16.0
@@ -94,3 +93,5 @@ urllib3==2.5.0
 uvicorn==0.35.0
 watchfiles==1.1.0
 websockets==15.0.1

 google-auth-httplib2==0.2.0
 google-generativeai==0.8.5
 googleapis-common-protos==1.70.0
 grpcio==1.74.0
 grpcio-status==1.71.2
 h11==0.16.0
 uvicorn==0.35.0
 watchfiles==1.1.0
 websockets==15.0.1
+langextract
+gradio_client

tools/__pycache__/tools.cpython-310.pyc CHANGED Viewed

Binary files a/tools/__pycache__/tools.cpython-310.pyc and b/tools/__pycache__/tools.cpython-310.pyc differ