# # app.py
# import gradio as gr
# from transformers import pipeline
# import torch
# from PIL import Image
# import io
# import fitz  # PyMuPDF

# # --- Model Loading ---
# # Nougat is typically used for PDF/document image OCR.
# #The `facebook/nougat-small` model is a good starting point.
# # Using 'facebook/nougat-base' or 'facebook/nougat-large' is more accurate but requires more GPU memory/power.
# try:
#     # Set up the device based on availability
#     device = "cuda:0" if torch.cuda.is_available() else "cpu"

#     # Load the Nougat pipeline
#     # The task is technically 'document-image-to-text' but can be inferred by the model name
#     nougat_pipeline = pipeline(
#         "image-to-text",
#         model="facebook/nougat-small",
#         device=device,
#         # Set max_new_tokens for the output length
#         max_new_tokens=1024,
#         # Set to False to prevent a warning about the model not having an image-to-text pipeline
#         # (The pipeline can still wrap the VisionEncoderDecoder model)
#         trust_remote_code=True
#     )
#     print(f"Nougat model loaded successfully on {device}")

# except Exception as e:
#     # Fallback/error handling for model loading
#     print(f"Error loading Nougat model: {e}")
#     nougat_pipeline = None


# # --- OCR Function ---
# def nougat_ocr(document):
#     """Performs Nougat OCR on a single-page document image or PDF."""
#     if nougat_pipeline is None:
#         return "Error: Nougat model failed to load. Check your Space hardware and dependencies."

#     # Handle File object from Gradio (could be an image or a PDF)
#     file_path = document.name

#     # 1. Convert PDF (or first page of PDF) to an image
#     if file_path.lower().endswith(('.pdf')):
#         try:
#             # Open PDF using PyMuPDF (fitz)
#             doc = fitz.open(file_path)
#             if len(doc) == 0:
#                 return "Error: PDF contains no pages."

#             # Render the first page at a high DPI for better OCR
#             page = doc.load_page(0)
#             pix = page.get_pixmap(dpi=300)

#             # Convert pixmap to PIL Image
#             img_data = pix.tobytes("png")
#             image = Image.open(io.BytesIO(img_data))
#             doc.close()

#         except Exception as e:
#             return f"Error processing PDF: {e}"

#     # 2. Handle image file (png, jpg, etc.)
#     elif file_path.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
#         image = Image.open(file_path).convert("RGB")
#     else:
#         return "Error: Unsupported file format. Please upload an image or a PDF."

#     # 3. Perform OCR inference
#     try:
#         # Pass the PIL image to the pipeline
#         output = nougat_pipeline(image)
#         # The output is typically a list of dicts: [{'generated_text': '...'}]
#         markdown_text = output[0]['generated_text'] if output else "OCR failed to generate text."

#         return markdown_text

#     except Exception as e:
#         return f"An error occurred during OCR: {e}"


# # --- Gradio Interface ---
# title = "🍫 Nougat OCR for Documents"
# description = "Upload a single-page document image (PNG/JPG) or a PDF to transcribe it into Markdown format using the Nougat-small model. **Note: For multi-page PDFs, only the first page is processed.**"

# iface = gr.Interface(
#     fn=nougat_ocr,
#     inputs=gr.File(
#         label="Upload Document (Image or PDF)",
#         file_types=["image", ".pdf"],
#         file_count="single"
#     ),
#     outputs=gr.Markdown(label="Generated Markdown Output"),
#     title=title,
#     description=description,
#     allow_flagging="auto",
#     theme=gr.themes.Soft()
# )

# if __name__ == "__main__":
#     iface.launch()


import gradio as gr
from transformers import pipeline
import torch
from PIL import Image
import io
import fitz  # PyMuPDF
import os
import tempfile # Used for creating temporary files for download

# --- Model Loading ---
try:
    # Set up the device based on availability
    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    
    # Load the Nougat pipeline
    nougat_pipeline = pipeline(
        "image-to-text",
        model="facebook/nougat-small",
        device=device,
        max_new_tokens=1024,
        trust_remote_code=True
    )
    print(f"Nougat model loaded successfully on {device}")
except Exception as e:
    print(f"Error loading Nougat model: {e}")
    nougat_pipeline = None


# --- OCR Function (Revised for Multi-Page PDF and TXT Output) ---
def nougat_ocr(document):
    """
    Performs Nougat OCR on all pages of a PDF, aggregates results,
    and saves them to a temporary TXT file for download.
    """
    if nougat_pipeline is None:
        return None, "Error: Nougat model failed to load. Check dependencies."

    file_path = document.name
    all_markdown_text = []

    # Only PDF processing is supported for the multi-page output logic
    if not file_path.lower().endswith(('.pdf')):
        return None, "Error: Please upload a PDF file for multi-page processing."

    try:
        doc = fitz.open(file_path)
        if len(doc) == 0:
            return None, "Error: PDF contains no pages."

        # Process pages one by one
        for i in range(len(doc)):
            page = doc.load_page(i)
            # Render the page to a high-DPI image
            pix = page.get_pixmap(dpi=300) 
            
            # Convert pixmap to PIL Image
            img_data = pix.tobytes("png")
            image = Image.open(io.BytesIO(img_data)).convert("RGB")

            # Perform OCR inference for this page
            output = nougat_pipeline(image)
            markdown_text = output[0]['generated_text'] if output else "[OCR FAILED FOR PAGE {}]".format(i+1)
            
            # Add a clear separator and the page content
            all_markdown_text.append(f"\n\n\n# --- PAGE {i+1} ---\n\n{markdown_text}")

        doc.close()
        aggregated_text = "".join(all_markdown_text)

        # Create a temporary file to save the result
        with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt", encoding="utf-8") as tmp_file:
            tmp_file.write(aggregated_text)
            temp_file_path = tmp_file.name

        # The function returns the file path and the text content
        return temp_file_path, aggregated_text

    except Exception as e:
        return None, f"An error occurred during processing: {e}"


# --- Gradio Interface ---
title = "🍫 Multi-Page Nougat OCR to TXT"
description = "Upload a PDF. The model will process each page sequentially and output a TXT file for download, along with a Markdown preview."

iface = gr.Interface(
    fn=nougat_ocr,
    inputs=gr.File(
        label="Upload PDF Document",
        file_types=[".pdf"], # Restrict to PDF
        file_count="single"
    ),
    outputs=[
        gr.File(label="Download OCR Output (.txt)", file_count="single", file_types=[".txt"]), # Downloadable file
        gr.Markdown(label="Preview (Formatted Markdown)", visible=True) # Preview output
    ],
    title=title,
    description=description,
    allow_flagging="auto",
    theme=gr.themes.Soft()
)

if __name__ == "__main__":
    iface.launch()