Spaces:

trojblue
/

annotate-and-concatenate-images

Sleeping

App Files Files Community

trojblue commited on 8 days ago

Commit

d8004fd

verified ·

1 Parent(s): 5859393

Update app.py

Browse files

Files changed (1) hide show

app.py +221 -371

app.py CHANGED Viewed

@@ -1,378 +1,156 @@
-import io
-import json
-import struct
-import zlib
-from typing import List, Dict, Any, Optional, Union
-import gradio as gr
-from PIL import Image, PngImagePlugin
-# -------- THEME (similar to your example) --------
-theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="violet", radius_size="lg")
-# =================================================
-# ========== PNG Text Chunk Reader (tab 1) ========
-# =================================================
-PNG_SIGNATURE = b"\x89PNG\r\n\x1a\n"
-def _parse_png_text_chunks(data: bytes) -> List[Dict[str, Any]]:
-    """
-    Parse PNG chunks and extract tEXt, zTXt, and iTXt entries.
-    """
-    if not data.startswith(PNG_SIGNATURE):
-        raise ValueError("Not a PNG file.")
-    pos = len(PNG_SIGNATURE)
-    out = []
-    while pos + 8 <= len(data):
-        # Read chunk length and type
-        length = struct.unpack(">I", data[pos:pos+4])[0]
-        ctype = data[pos+4:pos+8]
-        pos += 8
-        if pos + length + 4 > len(data):
-            break
-        cdata = data[pos:pos+length]
-        pos += length
-        # Skip CRC (4 bytes)
-        pos += 4
-        if ctype == b"tEXt":
-            # Latin-1: key\0value
-            try:
-                null_idx = cdata.index(b"\x00")
-                key = cdata[:null_idx].decode("latin-1", "replace")
-                text = cdata[null_idx+1:].decode("latin-1", "replace")
-                out.append({"type": "tEXt", "keyword": key, "text": text})
-            except Exception:
-                pass
-        elif ctype == b"zTXt":
-            # key\0compression_method(1) + compressed data
-            try:
-                null_idx = cdata.index(b"\x00")
-                key = cdata[:null_idx].decode("latin-1", "replace")
-                method = cdata[null_idx+1:null_idx+2]
-                comp = cdata[null_idx+2:]
-                if method == b"\x00":  # zlib/deflate
-                    text = zlib.decompress(comp).decode("latin-1", "replace")
-                    out.append({"type": "zTXt", "keyword": key, "text": text})
-            except Exception:
-                pass
-        elif ctype == b"iTXt":
-            # UTF-8: key\0flag(1)\0method(1)\0lang\0translated\0text
-            try:
-                i0 = cdata.index(b"\x00")
-                key = cdata[:i0].decode("latin-1", "replace")
-                comp_flag = cdata[i0+1:i0+2]
-                comp_method = cdata[i0+2:i0+3]
-                rest = cdata[i0+3:]
-                i1 = rest.index(b"\x00")
-                language_tag = rest[:i1].decode("ascii", "replace")
-                rest2 = rest[i1+1:]
-                i2 = rest2.index(b"\x00")
-                translated_keyword = rest2[:i2].decode("utf-8", "replace")
-                text_bytes = rest2[i2+1:]
-                if comp_flag == b"\x01" and comp_method == b"\x00":
-                    text = zlib.decompress(text_bytes).decode("utf-8", "replace")
-                else:
-                    text = text_bytes.decode("utf-8", "replace")
-                out.append({
-                    "type": "iTXt",
-                    "keyword": key,
-                    "language_tag": language_tag,
-                    "translated_keyword": translated_keyword,
-                    "text": text,
-                })
-            except Exception:
-                pass
-        if ctype == b"IEND":
-            break
-    return out
-def read_png_info(file_obj) -> Dict[str, Any]:
-    """
-    Given an uploaded file (path or file-like), return structured PNG text info.
-    Also surface Pillow's .info (which often contains 'parameters').
-    """
-    if hasattr(file_obj, "read"):
-        data = file_obj.read()
-    else:
-        with open(file_obj, "rb") as f:
-            data = f.read()
-    chunks = _parse_png_text_chunks(data)
-    try:
-        img = Image.open(io.BytesIO(data))
-        pil_info = dict(img.info)
-        for k, v in list(pil_info.items()):
-            if isinstance(v, (bytes, bytearray)):
-                try:
-                    pil_info[k] = v.decode("utf-8", "replace")
-                except Exception:
-                    pil_info[k] = repr(v)
-            elif isinstance(v, PngImagePlugin.PngInfo):
-                pil_info[k] = "PngInfo(...)"
-    except Exception as e:
-        pil_info = {"_error": f"Pillow failed to open PNG: {e}"}
-    response = {
-        "found_text_chunks": chunks,
-        "pil_info": pil_info,
-        "quick_fields": {
-            "parameters": next((c["text"] for c in chunks if c.get("keyword") == "parameters"), pil_info.get("parameters")),
-            "Software": next((c["text"] for c in chunks if c.get("keyword") == "Software"), pil_info.get("Software")),
-        },
-    }
-    return response
-def infer_png_text(file):
-    if file is None:
-        return {"error": "Please upload a PNG file."}
-    try:
-        return read_png_info(file.name if hasattr(file, "name") else file)
-    except Exception as e:
-        return {"error": str(e)}
-# =================================================
-# ========== NovelAI LSB Reader (tab 2) ===========
-# =================================================
-# (User-provided logic, lightly wrapped for Gradio.)
-import numpy as np
-import gzip
 from pathlib import Path
-from io import BytesIO
-def _pack_lsb_bytes(alpha: np.ndarray) -> np.ndarray:
-    """
-    Pack the least significant bits (LSB) from an image's alpha channel into bytes.
-    """
-    alpha = alpha.T.reshape((-1,))
-    alpha = alpha[:(alpha.shape[0] // 8) * 8]
-    alpha = np.bitwise_and(alpha, 1)
-    alpha = alpha.reshape((-1, 8))
-    alpha = np.packbits(alpha, axis=1)
-    return alpha
-class LSBReader:
-    """
-    Utility class for reading hidden data from an image's alpha channel using LSB encoding.
-    """
-    def __init__(self, data: np.ndarray):
-        self.data = _pack_lsb_bytes(data[..., -1])
-        self.pos = 0
-    def read_bytes(self, n: int) -> bytearray:
-        """Read `n` bytes from the bitstream."""
-        n_bytes = self.data[self.pos:self.pos + n]
-        self.pos += n
-        return bytearray(n_bytes.flatten().tolist())
-    def read_int32(self) -> Optional[int]:
-        """Read a 4-byte big-endian integer from the bitstream."""
-        bytes_list = self.read_bytes(4)
-        return int.from_bytes(bytes_list, 'big') if len(bytes_list) == 4 else None
-def _extract_nai_metadata_from_image(image: Image.Image) -> dict:
-    """
-    Extract embedded metadata from a PNG image generated by NovelAI.
-    """
-    image_array = np.array(image.convert("RGBA"))
-    if image_array.shape[-1] != 4 or len(image_array.shape) != 3:
-        raise ValueError("Image must be in RGBA format")
-    reader = LSBReader(image_array)
-    magic = "stealth_pngcomp"
-    if reader.read_bytes(len(magic)).decode("utf-8", "replace") != magic:
-        raise ValueError("Invalid magic number (not NovelAI stealth payload)")
-    bit_len = reader.read_int32()
-    if bit_len is None or bit_len <= 0:
-        raise ValueError("Invalid payload length")
-    json_len = bit_len // 8
-    compressed_json = reader.read_bytes(json_len)
-    json_data = json.loads(gzip.decompress(bytes(compressed_json)).decode("utf-8"))
-    if "Comment" in json_data and isinstance(json_data["Comment"], str):
-        try:
-            json_data["Comment"] = json.loads(json_data["Comment"])
-        except Exception:
-            # Leave as-is if not valid JSON
-            pass
-    return json_data
-def extract_nai_metadata(image: Union[Image.Image, str, Path]) -> dict:
-    if isinstance(image, (str, Path)):
-        image = Image.open(image)
-    elif not isinstance(image, Image.Image):
-        raise ValueError("Input must be a file path (string/Path) or a PIL Image")
-    return _extract_nai_metadata_from_image(image)
-def extract_nai_caption_from_hf_img(hf_img: dict) -> Optional[str]:
-    image_bytes = hf_img['bytes']
-    pil_image = Image.open(BytesIO(image_bytes))
-    metadata = extract_nai_metadata(pil_image)
-    return metadata.get('Description')
-def infer_nai(image: Optional[Image.Image]):
-    if image is None:
-        return None, {"error": "Please upload a PNG with alpha channel (RGBA)."}
-    try:
-        meta = extract_nai_metadata(image)
-        description = meta.get("Description")
-        return description, meta
-    except Exception as e:
-        return None, {"error": str(e)}
-# =================================================
-# =========== Similarity Metrics (tab 3) ===========
-# =================================================
-def _load_rgb_image(path: Union[str, Path]) -> np.ndarray:
-    """Load an image file as RGB uint8 numpy array."""
-    img = Image.open(path).convert("RGB")
-    return np.array(img, dtype=np.uint8)
-def _pixel_metrics(img_a: np.ndarray, img_b: np.ndarray) -> Dict[str, float]:
-    """Compute basic pixel-wise similarity metrics between two RGB images."""
-    if img_a.shape != img_b.shape:
-        raise ValueError(f"Image size mismatch: {img_a.shape} vs {img_b.shape}")
-    diff = img_a.astype(np.float32) - img_b.astype(np.float32)
-    abs_diff = np.abs(diff)
-    mse = float(np.mean(diff ** 2))
-    mae = float(np.mean(abs_diff))
-    max_abs = float(np.max(abs_diff))
-    pixel_match = float(np.mean(img_a == img_b))
-    pixel_diff_pct = float(100.0 * (1.0 - pixel_match))
-    if mse == 0.0:
-        psnr = float("inf")
-    else:
-        psnr = float(20.0 * np.log10(255.0 / np.sqrt(mse)))
     return {
-        "pixel_diff_pct": pixel_diff_pct,
-        "pixel_match": pixel_match,
-        "mse": mse,
-        "mae": mae,
-        "max_abs": max_abs,
-        "psnr": psnr,
     }
-def compute_similarity_report(files: Optional[List[str]]) -> str:
-    if not files or len(files) < 2:
-        return "Upload at least two images to compare (first file is treated as base)."
-    try:
-        images: Dict[str, np.ndarray] = {}
-        base_name = None
-        base_img = None
-        for idx, file_path in enumerate(files):
-            name = Path(file_path).name
-            images[name] = _load_rgb_image(file_path)
-            if idx == 0:
-                base_name = name
-                base_img = images[name]
-        if base_name is None or base_img is None:
-            return "Failed to load base image."
-        metrics: Dict[str, Dict[str, float]] = {}
-        # Base vs others
-        for name, img in images.items():
-            if name == base_name:
-                continue
-            metrics[f"{base_name}_vs_{name}"] = _pixel_metrics(base_img, img)
-        # Pairwise among non-base images
-        other_keys = [k for k in images.keys() if k != base_name]
-        for i in range(len(other_keys)):
-            for j in range(i + 1, len(other_keys)):
-                k1, k2 = other_keys[i], other_keys[j]
-                metrics[f"{k1}_vs_{k2}"] = _pixel_metrics(images[k1], images[k2])
-        lines = [
-            "=== similarity metrics ===",
-            f"Base image: {base_name}",
-        ]
-        for name, vals in metrics.items():
-            lines.append(
-                (
-                    f"{name}: pixel_diff_pct={vals['pixel_diff_pct']:.6f}%, "
-                    f"pixel_match={vals['pixel_match']:.6f}, mse={vals['mse']:.6e}, "
-                    f"mae={vals['mae']:.6e}, max_abs={vals['max_abs']:.6e}, "
-                    f"psnr={vals['psnr']:.2f}dB"
-                )
             )
-        lines.append("\nMetrics (JSON):")
-        lines.append(json.dumps(metrics, indent=2))
-        return "\n".join(lines)
-    except Exception as exc:  # pragma: no cover - handled for UI
-        return f"Error computing metrics: {exc}"
-# =================================================
-# =============== Gradio App (two tabs) ===========
-# =================================================
-with gr.Blocks(title="PNG Tools — ImageInfo & NovelAI Reader", theme=theme, analytics_enabled=False) as demo:
-    gr.Markdown("# PNG Tools\nTwo utilities: PNG text-chunk metadata and NovelAI LSB metadata.")
-    with gr.Tabs():
-        with gr.Tab("PNG ImageInfo Reader"):
-            with gr.Row():
-                inp_png = gr.File(label="PNG file", file_types=[".png"])
-            out_png = gr.JSON(label="pngImageInfo")
-            inp_png.change(fn=infer_png_text, inputs=inp_png, outputs=out_png)
-            gr.Markdown("Tip: Stable Diffusion ‘parameters’ often appear under a **tEXt** chunk with keyword `parameters`.")
-        with gr.Tab("NovelAI Reader"):
-            with gr.Row():
-                nai_img = gr.Image(label="Upload PNG (RGBA preferred)", type="pil", height=360)
-            with gr.Row():
-                nai_btn = gr.Button("Extract NovelAI Metadata", variant="primary")
-            with gr.Row():
-                nai_desc = gr.Textbox(label="Description (if present)", lines=4)
-            nai_json = gr.JSON(label="Decoded NovelAI JSON")
-            nai_btn.click(fn=infer_nai, inputs=nai_img, outputs=[nai_desc, nai_json])
-        with gr.Tab("Similarity Metrics"):
-            gr.Markdown("Upload multiple images; the first file is treated as the base for comparisons.")
             files_in = gr.Files(
                 label="Image files",
                 # Explicit list ensures WebP acceptance across Gradio builds
@@ -383,10 +161,82 @@ with gr.Blocks(title="PNG Tools — ImageInfo & NovelAI Reader", theme=theme, an
                 type="filepath",
                 interactive=True,
             )
             with gr.Row():
-                metrics_btn = gr.Button("Compute Similarity", variant="primary")
-            metrics_out = gr.Textbox(label="Similarity report", lines=14, show_copy_button=True)
-            metrics_btn.click(fn=compute_similarity_report, inputs=files_in, outputs=metrics_out)
 if __name__ == "__main__":
-    demo.launch()

+# annotate_concat_demo.py
+# pip install -U gradio pillow
+import os
+import time
 from pathlib import Path
+from typing import List, Tuple, Optional
+import gradio as gr
+from PIL import Image, ImageOps
+# Your existing implementations are assumed available:
+from unibox.utils.image_utils import (
+    concatenate_images_horizontally,
+    add_annotation,
+)
+# ------------------------- helpers -------------------------
+def _norm_path(f) -> Optional[str]:
+    if f is None:
+        return None
+    if isinstance(f, (str, Path)):
+        return str(f)
+    if hasattr(f, "name"):
+        return str(getattr(f, "name"))
+    if isinstance(f, dict):
+        return str(f.get("name") or f.get("path") or "")
+    return str(f)
+def _load_images(files) -> List[Tuple[Image.Image, str]]:
+    out: List[Tuple[Image.Image, str]] = []
+    for f in (files or []):
+        p = _norm_path(f)
+        if not p:
+            continue
+        im = Image.open(p)
+        # auto-orient, ensure RGB; supports PNG/JPEG/WebP/GIF/BMP/TIFF…
+        im = ImageOps.exif_transpose(im).convert("RGB")
+        out.append((im, os.path.basename(p)))
+    return out
+def _parse_descriptions(text: str, n: int):
+    lines = (text or "").splitlines()
+    if len(lines) > n:
+        return None, f"Too many description lines ({len(lines)}) for {n} image(s). Provide ≤ one per image."
+    lines = lines + [""] * (max(0, n - len(lines)))  # pad with blanks
+    return lines[:n], None
+def _build_stats(files, desc_text: str) -> dict:
+    pairs = _load_images(files)
+    n = len(pairs)
+    lines, err = _parse_descriptions(desc_text, n) if n > 0 else ((desc_text or "").splitlines(), None)
+    mapping = {}
+    for i, (_, fname) in enumerate(pairs):
+        mapping[fname] = (lines[i] if isinstance(lines, list) and i < len(lines) else "")
     return {
+        "num_images": n,
+        "num_descriptions": len((desc_text or "").splitlines()),
+        "mapping": mapping,
+        **({"error": err} if err else {}),
     }
+# --------------------- core actions ------------------------
+def concatenate_with_annotations(
+    files,
+    desc_text: str,
+    max_height: int,
+    position: str,
+    alignment: str,
+    size_adj: str,
+):
+    logs = []
+    out_img = None
+    out_file = None
+    pairs = _load_images(files)
+    if not pairs:
+        logs.append("ERROR: Please upload at least one image.")
+        return out_img, out_file, "\n".join(logs), _build_stats(files, desc_text)
+    lines, err = _parse_descriptions(desc_text, len(pairs))
+    if err:
+        logs.append(f"ERROR: {err}")
+        return out_img, out_file, "\n".join(logs), _build_stats(files, desc_text)
+    # For left/right, alignment must be center (matches add_annotation behavior)
+    if position in ("left", "right"):
+        alignment = "center"
+    annotated = []
+    for (im, fname), line in zip(pairs, lines):
+        if line.strip():
+            im2 = add_annotation(
+                pil_image=im,
+                annotation=line,
+                position=position,
+                alignment=alignment,
+                size=size_adj,
             )
+            annotated.append(im2)
+            logs.append(f"Annotated: {fname}")
+        else:
+            annotated.append(im)
+            logs.append(f"Skipped (no description): {fname}")
+    started = time.time()
+    merged = concatenate_images_horizontally(annotated, max_height=max_height)
+    if merged is None:
+        logs.append("ERROR: Concatenation produced no result.")
+        return None, None, "\n".join(logs), _build_stats(files, desc_text)
+    # Save JPEG with required name
+    out_dir = Path("outputs")
+    out_dir.mkdir(parents=True, exist_ok=True)
+    stamp = time.strftime("%Y%m%d_%H%M%S")
+    out_name = f"concatenate_{stamp}.jpg"
+    out_path = out_dir / out_name
+    merged.save(str(out_path), format="JPEG", quality=95, optimize=True)
+    w, h = merged.size
+    size_bytes = out_path.stat().st_size
+    latency = time.time() - started
+    logs.append(f"Output: {out_name} — {w}×{h}px — {size_bytes} bytes — {latency:.3f}s")
+    return merged, str(out_path), "\n".join(logs), _build_stats(files, desc_text)
+def check_stats_only(files, desc_text: str, *_):
+    stats = _build_stats(files, desc_text)
+    log = f"Images: {stats.get('num_images', 0)}; Description lines: {stats.get('num_descriptions', 0)}"
+    if "error" in stats:
+        log += f"\nERROR: {stats['error']}"
+    return None, None, log, stats
+# ----------------------- UI wiring -------------------------
+theme = gr.themes.Monochrome(primary_hue="slate", radius_size="sm")
+with gr.Blocks(
+    title="Annotated Concatenation — Demo",
+    theme=theme,
+    analytics_enabled=False,
+) as demo:
+    gr.Markdown("# Annotate & Concatenate Images")
+    gr.Markdown(
+        "Upload images (PNG/JPEG/WebP…), add one description per line (blank = skip), "
+        "and concatenate horizontally. The output JPEG is named `concatenate_{timestamp}.jpg`."
+    )
+    with gr.Row(variant="panel"):
+        with gr.Column(scale=2):
             files_in = gr.Files(
                 label="Image files",
                 # Explicit list ensures WebP acceptance across Gradio builds
                 type="filepath",
                 interactive=True,
             )
+            desc_in = gr.Textbox(
+                label="Descriptions (one per line; blank lines allowed to skip)",
+                placeholder="e.g.\nLeft image label\n\nRight image label",
+                lines=8,
+            )
+            max_h = gr.Number(
+                label="Max height (px) for concatenated image",
+                value=1024,
+                precision=0,
+                minimum=64,
+                interactive=True,
+            )
+            # Folded by default
+            with gr.Accordion("Annotation settings", open=False):
+                pos = gr.Dropdown(
+                    label="Position",
+                    choices=["top", "bottom", "left", "right"],
+                    value="bottom",
+                )
+                align = gr.Radio(
+                    label="Alignment (applies to top/bottom)",
+                    choices=["left", "center", "right"],
+                    value="center",
+                )
+                size_adj = gr.Radio(
+                    label="Text size",
+                    choices=["default", "larger", "smaller", "smallest", "largest"],
+                    value="default",
+                )
+                def _toggle_align(p):
+                    return gr.update(value="center", interactive=False) if p in ("left", "right") else gr.update(interactive=True)
+                pos.change(_toggle_align, inputs=[pos], outputs=[align])
             with gr.Row():
+                concat_btn = gr.Button("Concatenate image", variant="primary")
+                stats_btn = gr.Button("Check stats")
+        with gr.Column(scale=3):
+            out_img = gr.Image(
+                label="Concatenated image (preview)",
+                interactive=False,
+                format="jpeg",
+                show_download_button=False,
+            )
+            download_file = gr.File(
+                label="Download JPEG (named as saved)",
+                interactive=False,
+                height=72,  # compact
+            )
+            with gr.Accordion("Logs", open=False):
+                logs_out = gr.Textbox(
+                    label="Info / Errors",
+                    lines=10,
+                    interactive=False,
+                )
+            with gr.Accordion("Stats", open=False):
+                stats_out = gr.JSON(label="Counts and current filename→description mapping")
+    concat_btn.click(
+        concatenate_with_annotations,
+        inputs=[files_in, desc_in, max_h, pos, align, size_adj],
+        outputs=[out_img, download_file, logs_out, stats_out],
+        api_name="concatenate",
+    )
+    stats_btn.click(
+        check_stats_only,
+        inputs=[files_in, desc_in, max_h, pos, align, size_adj],
+        outputs=[out_img, download_file, logs_out, stats_out],
+        api_name="check_stats",
+    )
 if __name__ == "__main__":
+    demo.queue(max_size=8).launch()