import gradio as gr
from ultralytics import YOLO
import numpy as np
import cv2
from PIL import Image
import random
from transformers import pipeline

# ---------------------------
# Load Models
# ---------------------------

# Text model (tiny LLM)
text_gen = pipeline("text-generation", model="tiny-random-gpt2")

# YOLOv8 segmentation (nano version for speed)
yolo_model = YOLO("yolov8n-seg.pt")  # change to yolov8s-seg.pt for more accuracy

# ---------------------------
# Image Segmentation
# ---------------------------
def segment_image(image: Image.Image):
    results = yolo_model.predict(np.array(image))[0]

    overlay = np.array(image).copy()
    annotations = []

    if results.masks is not None:
        for mask, cls in zip(results.masks.xy, results.boxes.cls):
            pts = np.array(mask, dtype=np.int32)
            color = [random.randint(0, 255) for _ in range(3)]
            cv2.fillPoly(overlay, [pts], color)
            annotations.append((mask.tolist(), yolo_model.names[int(cls)]))

    overlay_img = Image.fromarray(overlay)
    return (overlay_img, annotations)

# ---------------------------
# Video Segmentation
# ---------------------------
def segment_video(video):
    cap = cv2.VideoCapture(video)
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out_path = "output.mp4"
    out = cv2.VideoWriter(out_path, fourcc, cap.get(cv2.CAP_PROP_FPS),
                          (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
                           int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        results = yolo_model.predict(frame)[0]
        overlay = frame.copy()

        if results.masks is not None:
            for mask, cls in zip(results.masks.xy, results.boxes.cls):
                pts = np.array(mask, dtype=np.int32)
                color = [random.randint(0, 255) for _ in range(3)]
                cv2.fillPoly(overlay, [pts], color)

        out.write(overlay)

    cap.release()
    out.release()
    return out_path

# ---------------------------
# Text Generation
# ---------------------------
def generate_text(prompt):
    result = text_gen(prompt, max_length=100, num_return_sequences=1)
    return result[0]["generated_text"]

# ---------------------------
# Gradio UI
# ---------------------------
with gr.Blocks() as demo:
    gr.Markdown("# 🔥 Multi-Modal Playground\nTry out **Text + Image + Video Segmentation** in one app!")

    with gr.Tab("💬 Text Generation"):
        inp_text = gr.Textbox(label="Enter your prompt")
        out_text = gr.Textbox(label="Generated text")
        btn_text = gr.Button("Generate")
        btn_text.click(generate_text, inputs=inp_text, outputs=out_text)

    with gr.Tab("🖼️ Image Segmentation"):
        inp_img = gr.Image(type="pil", label="Upload Image")
        out_img = gr.Image(type="pil", label="Segmented Image")
        out_ann = gr.JSON(label="Annotations")
        btn_img = gr.Button("Run Segmentation")
        btn_img.click(segment_image, inputs=inp_img, outputs=[out_img, out_ann])

    with gr.Tab("🎥 Video Segmentation"):
        inp_vid = gr.Video(label="Upload Video")
        out_vid = gr.Video(label="Segmented Video")
        btn_vid = gr.Button("Run Segmentation")
        btn_vid.click(segment_video, inputs=inp_vid, outputs=out_vid)

demo.launch(server_name="0.0.0.0", server_port=7860, share=False, ssr_mode=False)