import gradio as gr from transformers import AutoProcessor, AutoModelForVision2Seq import torch MODEL_REPO = "vitouphy/khmer-handwriting-trocr-p1-f1" processor = AutoProcessor.from_pretrained(MODEL_REPO) model = AutoModelForVision2Seq.from_pretrained(MODEL_REPO) def predict(editor_value): if editor_value is None or editor_value.get('composite') is None: return "No image provided." image = editor_value['composite'] pixel_values = processor(images=image, return_tensors="pt").pixel_values with torch.no_grad(): generated_ids = model.generate(pixel_values) generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] return generated_text with gr.Blocks() as demo: gr.Markdown("# Khmer Handwriting OCR Demo\nDraw or upload a character or word below:") with gr.Row(): editor = gr.ImageEditor(type="pil", image_mode="RGB", label="Draw or upload here", width=512, height=512) output = gr.Textbox(label="Recognized Text") recognize_btn = gr.Button("Recognize") recognize_btn.click(predict, inputs=editor, outputs=output) demo.launch()