| import gradio as gr | |
| from transformers import AutoProcessor, AutoModelForVision2Seq | |
| import torch | |
| MODEL_REPO = "vitouphy/khmer-handwriting-trocr-p1-f1" | |
| processor = AutoProcessor.from_pretrained(MODEL_REPO) | |
| model = AutoModelForVision2Seq.from_pretrained(MODEL_REPO) | |
| def predict(editor_value): | |
| if editor_value is None or editor_value.get('composite') is None: | |
| return "No image provided." | |
| image = editor_value['composite'] | |
| pixel_values = processor(images=image, return_tensors="pt").pixel_values | |
| with torch.no_grad(): | |
| generated_ids = model.generate(pixel_values) | |
| generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
| return generated_text | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Khmer Handwriting OCR Demo\nDraw or upload a character or word below:") | |
| with gr.Row(): | |
| editor = gr.ImageEditor(type="pil", image_mode="RGB", label="Draw or upload here", width=512, height=512) | |
| output = gr.Textbox(label="Recognized Text") | |
| recognize_btn = gr.Button("Recognize") | |
| recognize_btn.click(predict, inputs=editor, outputs=output) | |
| demo.launch() |