pdf_testing / app.py
AkashKumarave's picture
Update app.py
00d3b13 verified
import fitz # PyMuPDF
import base64
import logging
from fastapi import FastAPI, File, UploadFile
from fastapi.middleware.cors import CORSMiddleware
# Set up logging for Hugging Face console
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
app = FastAPI()
# Enable CORS for Figma Plugin access
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
def safe_f(val, default=0.0):
"""Guards against float(None) errors."""
if val is None:
return default
try:
return float(val)
except (TypeError, ValueError):
return default
def get_figma_color(color):
"""Converts various PDF color formats to Figma 0-1 RGB."""
if color is None:
return None
try:
# If color is an integer, convert to RGB tuple
if isinstance(color, int):
color = fitz.utils.getColor(color)
if not isinstance(color, (list, tuple)):
return None
# Handle Grayscale (1 component)
if len(color) == 1:
val = safe_f(color[0])
return {"r": val, "g": val, "b": val}
# Handle RGB/CMYK (take first 3)
if len(color) >= 3:
return {
"r": safe_f(color[0]),
"g": safe_f(color[1]),
"b": safe_f(color[2])
}
except Exception:
return None
return None
@app.get("/")
async def root():
return {"status": "PDF Converter is Online", "engine": "PyMuPDF"}
@app.post("/convert")
async def convert_pdf(file: UploadFile = File(...)):
try:
logger.info(f"Processing: {file.filename}")
pdf_bytes = await file.read()
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
pages_data = []
for page in doc:
page_dict = {
"width": safe_f(page.rect.width),
"height": safe_f(page.rect.height),
"elements": []
}
# 1. TEXT AND IMAGES (using dict for structured layout)
raw_dict = page.get_text("dict")
for block in raw_dict.get("blocks", []):
# Type 0 is Text
if block.get("type") == 0:
for line in block.get("lines", []):
for span in line.get("spans", []):
page_dict["elements"].append({
"type": "TEXT",
"content": span.get("text", ""),
"x": safe_f(span["bbox"][0]),
"y": safe_f(span["bbox"][1]),
"size": safe_f(span.get("size"), 12.0),
"color": get_figma_color(span.get("color"))
})
# Type 1 is Image
elif block.get("type") == 1:
page_dict["elements"].append({
"type": "IMAGE",
"bytes": base64.b64encode(block["image"]).decode("utf-8"),
"x": safe_f(block["bbox"][0]),
"y": safe_f(block["bbox"][1]),
"width": safe_f(block["bbox"][2] - block["bbox"][0]),
"height": safe_f(block["bbox"][3] - block["bbox"][1])
})
# 2. VECTOR DRAWINGS (Paths, Lines, Rectangles)
for path in page.get_drawings():
d_path = ""
for item in path.get("items", []):
# Line
if item[0] == "l":
d_path += f"M {safe_f(item[1].x)} {safe_f(item[1].y)} L {safe_f(item[2].x)} {safe_f(item[2].y)} "
# Rectangle
elif item[0] == "re":
r = item[1]
d_path += f"M {safe_f(r.x0)} {safe_f(r.y0)} L {safe_f(r.x1)} {safe_f(r.y0)} L {safe_f(r.x1)} {safe_f(r.y1)} L {safe_f(r.x0)} {safe_f(r.y1)} Z "
# Curve
elif item[0] == "c":
d_path += f"M {safe_f(item[1].x)} {safe_f(item[1].y)} C {safe_f(item[2].x)} {safe_f(item[2].y)} {safe_f(item[3].x)} {safe_f(item[3].y)} {safe_f(item[4].x)} {safe_f(item[4].y)} "
if d_path:
page_dict["elements"].append({
"type": "VECTOR",
"path": d_path.strip(),
"fill": get_figma_color(path.get("fill")),
"stroke": get_figma_color(path.get("color")),
"strokeWeight": safe_f(path.get("width"), 1.0)
})
pages_data.append(page_dict)
doc.close()
return {"pages": pages_data}
except Exception as e:
logger.error(f"Error during conversion: {str(e)}")
return {"error": str(e)}
if __name__ == "__main__":
import uvicorn
# Important: Hugging Face uses port 7860
uvicorn.run(app, host="0.0.0.0", port=7860)