Spaces:

trojblue
/

annotate-and-concatenate-images

Sleeping

App Files Files Community

trojblue commited on 20 days ago

Commit

5859393

verified ·

1 Parent(s): 7cb613c

Update app.py

Browse files

Files changed (1) hide show

app.py +371 -221

app.py CHANGED Viewed

@@ -1,156 +1,378 @@
-# annotate_concat_demo.py
-# pip install -U gradio pillow
-import os
-import time
-from pathlib import Path
-from typing import List, Tuple, Optional
 import gradio as gr
-from PIL import Image, ImageOps
-# Your existing implementations are assumed available:
-from unibox.utils.image_utils import (
-    concatenate_images_horizontally,
-    add_annotation,
-)
-# ------------------------- helpers -------------------------
-def _norm_path(f) -> Optional[str]:
-    if f is None:
-        return None
-    if isinstance(f, (str, Path)):
-        return str(f)
-    if hasattr(f, "name"):
-        return str(getattr(f, "name"))
-    if isinstance(f, dict):
-        return str(f.get("name") or f.get("path") or "")
-    return str(f)
-def _load_images(files) -> List[Tuple[Image.Image, str]]:
-    out: List[Tuple[Image.Image, str]] = []
-    for f in (files or []):
-        p = _norm_path(f)
-        if not p:
-            continue
-        im = Image.open(p)
-        # auto-orient, ensure RGB; supports PNG/JPEG/WebP/GIF/BMP/TIFF…
-        im = ImageOps.exif_transpose(im).convert("RGB")
-        out.append((im, os.path.basename(p)))
     return out
-def _parse_descriptions(text: str, n: int):
-    lines = (text or "").splitlines()
-    if len(lines) > n:
-        return None, f"Too many description lines ({len(lines)}) for {n} image(s). Provide ≤ one per image."
-    lines = lines + [""] * (max(0, n - len(lines)))  # pad with blanks
-    return lines[:n], None
-def _build_stats(files, desc_text: str) -> dict:
-    pairs = _load_images(files)
-    n = len(pairs)
-    lines, err = _parse_descriptions(desc_text, n) if n > 0 else ((desc_text or "").splitlines(), None)
-    mapping = {}
-    for i, (_, fname) in enumerate(pairs):
-        mapping[fname] = (lines[i] if isinstance(lines, list) and i < len(lines) else "")
     return {
-        "num_images": n,
-        "num_descriptions": len((desc_text or "").splitlines()),
-        "mapping": mapping,
-        **({"error": err} if err else {}),
     }
-# --------------------- core actions ------------------------
-def concatenate_with_annotations(
-    files,
-    desc_text: str,
-    max_height: int,
-    position: str,
-    alignment: str,
-    size_adj: str,
-):
-    logs = []
-    out_img = None
-    out_file = None
-    pairs = _load_images(files)
-    if not pairs:
-        logs.append("ERROR: Please upload at least one image.")
-        return out_img, out_file, "\n".join(logs), _build_stats(files, desc_text)
-    lines, err = _parse_descriptions(desc_text, len(pairs))
-    if err:
-        logs.append(f"ERROR: {err}")
-        return out_img, out_file, "\n".join(logs), _build_stats(files, desc_text)
-    # For left/right, alignment must be center (matches add_annotation behavior)
-    if position in ("left", "right"):
-        alignment = "center"
-    annotated = []
-    for (im, fname), line in zip(pairs, lines):
-        if line.strip():
-            im2 = add_annotation(
-                pil_image=im,
-                annotation=line,
-                position=position,
-                alignment=alignment,
-                size=size_adj,
             )
-            annotated.append(im2)
-            logs.append(f"Annotated: {fname}")
-        else:
-            annotated.append(im)
-            logs.append(f"Skipped (no description): {fname}")
-    started = time.time()
-    merged = concatenate_images_horizontally(annotated, max_height=max_height)
-    if merged is None:
-        logs.append("ERROR: Concatenation produced no result.")
-        return None, None, "\n".join(logs), _build_stats(files, desc_text)
-    # Save JPEG with required name
-    out_dir = Path("outputs")
-    out_dir.mkdir(parents=True, exist_ok=True)
-    stamp = time.strftime("%Y%m%d_%H%M%S")
-    out_name = f"concatenate_{stamp}.jpg"
-    out_path = out_dir / out_name
-    merged.save(str(out_path), format="JPEG", quality=95, optimize=True)
-    w, h = merged.size
-    size_bytes = out_path.stat().st_size
-    latency = time.time() - started
-    logs.append(f"Output: {out_name} — {w}×{h}px — {size_bytes} bytes — {latency:.3f}s")
-    return merged, str(out_path), "\n".join(logs), _build_stats(files, desc_text)
-def check_stats_only(files, desc_text: str, *_):
-    stats = _build_stats(files, desc_text)
-    log = f"Images: {stats.get('num_images', 0)}; Description lines: {stats.get('num_descriptions', 0)}"
-    if "error" in stats:
-        log += f"\nERROR: {stats['error']}"
-    return None, None, log, stats
-# ----------------------- UI wiring -------------------------
-theme = gr.themes.Monochrome(primary_hue="slate", radius_size="sm")
-with gr.Blocks(
-    title="Annotated Concatenation — Demo",
-    theme=theme,
-    analytics_enabled=False,
-) as demo:
-    gr.Markdown("# Annotate & Concatenate Images")
-    gr.Markdown(
-        "Upload images (PNG/JPEG/WebP…), add one description per line (blank = skip), "
-        "and concatenate horizontally. The output JPEG is named `concatenate_{timestamp}.jpg`."
-    )
-    with gr.Row(variant="panel"):
-        with gr.Column(scale=2):
             files_in = gr.Files(
                 label="Image files",
                 # Explicit list ensures WebP acceptance across Gradio builds
@@ -161,82 +383,10 @@ with gr.Blocks(
                 type="filepath",
                 interactive=True,
             )
-            desc_in = gr.Textbox(
-                label="Descriptions (one per line; blank lines allowed to skip)",
-                placeholder="e.g.\nLeft image label\n\nRight image label",
-                lines=8,
-            )
-            max_h = gr.Number(
-                label="Max height (px) for concatenated image",
-                value=1024,
-                precision=0,
-                minimum=64,
-                interactive=True,
-            )
-            # Folded by default
-            with gr.Accordion("Annotation settings", open=False):
-                pos = gr.Dropdown(
-                    label="Position",
-                    choices=["top", "bottom", "left", "right"],
-                    value="bottom",
-                )
-                align = gr.Radio(
-                    label="Alignment (applies to top/bottom)",
-                    choices=["left", "center", "right"],
-                    value="center",
-                )
-                size_adj = gr.Radio(
-                    label="Text size",
-                    choices=["default", "larger", "smaller", "smallest", "largest"],
-                    value="default",
-                )
-                def _toggle_align(p):
-                    return gr.update(value="center", interactive=False) if p in ("left", "right") else gr.update(interactive=True)
-                pos.change(_toggle_align, inputs=[pos], outputs=[align])
             with gr.Row():
-                concat_btn = gr.Button("Concatenate image", variant="primary")
-                stats_btn = gr.Button("Check stats")
-        with gr.Column(scale=3):
-            out_img = gr.Image(
-                label="Concatenated image (preview)",
-                interactive=False,
-                format="jpeg",
-                show_download_button=False,
-            )
-            download_file = gr.File(
-                label="Download JPEG (named as saved)",
-                interactive=False,
-                height=72,  # compact
-            )
-            with gr.Accordion("Logs", open=False):
-                logs_out = gr.Textbox(
-                    label="Info / Errors",
-                    lines=10,
-                    interactive=False,
-                )
-            with gr.Accordion("Stats", open=False):
-                stats_out = gr.JSON(label="Counts and current filename→description mapping")
-    concat_btn.click(
-        concatenate_with_annotations,
-        inputs=[files_in, desc_in, max_h, pos, align, size_adj],
-        outputs=[out_img, download_file, logs_out, stats_out],
-        api_name="concatenate",
-    )
-    stats_btn.click(
-        check_stats_only,
-        inputs=[files_in, desc_in, max_h, pos, align, size_adj],
-        outputs=[out_img, download_file, logs_out, stats_out],
-        api_name="check_stats",
-    )
 if __name__ == "__main__":
-    demo.queue(max_size=8).launch()

+import io
+import json
+import struct
+import zlib
+from typing import List, Dict, Any, Optional, Union
 import gradio as gr
+from PIL import Image, PngImagePlugin
+# -------- THEME (similar to your example) --------
+theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="violet", radius_size="lg")
+# =================================================
+# ========== PNG Text Chunk Reader (tab 1) ========
+# =================================================
+PNG_SIGNATURE = b"\x89PNG\r\n\x1a\n"
+def _parse_png_text_chunks(data: bytes) -> List[Dict[str, Any]]:
+    """
+    Parse PNG chunks and extract tEXt, zTXt, and iTXt entries.
+    """
+    if not data.startswith(PNG_SIGNATURE):
+        raise ValueError("Not a PNG file.")
+    pos = len(PNG_SIGNATURE)
+    out = []
+    while pos + 8 <= len(data):
+        # Read chunk length and type
+        length = struct.unpack(">I", data[pos:pos+4])[0]
+        ctype = data[pos+4:pos+8]
+        pos += 8
+        if pos + length + 4 > len(data):
+            break
+        cdata = data[pos:pos+length]
+        pos += length
+        # Skip CRC (4 bytes)
+        pos += 4
+        if ctype == b"tEXt":
+            # Latin-1: key\0value
+            try:
+                null_idx = cdata.index(b"\x00")
+                key = cdata[:null_idx].decode("latin-1", "replace")
+                text = cdata[null_idx+1:].decode("latin-1", "replace")
+                out.append({"type": "tEXt", "keyword": key, "text": text})
+            except Exception:
+                pass
+        elif ctype == b"zTXt":
+            # key\0compression_method(1) + compressed data
+            try:
+                null_idx = cdata.index(b"\x00")
+                key = cdata[:null_idx].decode("latin-1", "replace")
+                method = cdata[null_idx+1:null_idx+2]
+                comp = cdata[null_idx+2:]
+                if method == b"\x00":  # zlib/deflate
+                    text = zlib.decompress(comp).decode("latin-1", "replace")
+                    out.append({"type": "zTXt", "keyword": key, "text": text})
+            except Exception:
+                pass
+        elif ctype == b"iTXt":
+            # UTF-8: key\0flag(1)\0method(1)\0lang\0translated\0text
+            try:
+                i0 = cdata.index(b"\x00")
+                key = cdata[:i0].decode("latin-1", "replace")
+                comp_flag = cdata[i0+1:i0+2]
+                comp_method = cdata[i0+2:i0+3]
+                rest = cdata[i0+3:]
+                i1 = rest.index(b"\x00")
+                language_tag = rest[:i1].decode("ascii", "replace")
+                rest2 = rest[i1+1:]
+                i2 = rest2.index(b"\x00")
+                translated_keyword = rest2[:i2].decode("utf-8", "replace")
+                text_bytes = rest2[i2+1:]
+                if comp_flag == b"\x01" and comp_method == b"\x00":
+                    text = zlib.decompress(text_bytes).decode("utf-8", "replace")
+                else:
+                    text = text_bytes.decode("utf-8", "replace")
+                out.append({
+                    "type": "iTXt",
+                    "keyword": key,
+                    "language_tag": language_tag,
+                    "translated_keyword": translated_keyword,
+                    "text": text,
+                })
+            except Exception:
+                pass
+        if ctype == b"IEND":
+            break
     return out
+def read_png_info(file_obj) -> Dict[str, Any]:
+    """
+    Given an uploaded file (path or file-like), return structured PNG text info.
+    Also surface Pillow's .info (which often contains 'parameters').
+    """
+    if hasattr(file_obj, "read"):
+        data = file_obj.read()
+    else:
+        with open(file_obj, "rb") as f:
+            data = f.read()
+    chunks = _parse_png_text_chunks(data)
+    try:
+        img = Image.open(io.BytesIO(data))
+        pil_info = dict(img.info)
+        for k, v in list(pil_info.items()):
+            if isinstance(v, (bytes, bytearray)):
+                try:
+                    pil_info[k] = v.decode("utf-8", "replace")
+                except Exception:
+                    pil_info[k] = repr(v)
+            elif isinstance(v, PngImagePlugin.PngInfo):
+                pil_info[k] = "PngInfo(...)"
+    except Exception as e:
+        pil_info = {"_error": f"Pillow failed to open PNG: {e}"}
+    response = {
+        "found_text_chunks": chunks,
+        "pil_info": pil_info,
+        "quick_fields": {
+            "parameters": next((c["text"] for c in chunks if c.get("keyword") == "parameters"), pil_info.get("parameters")),
+            "Software": next((c["text"] for c in chunks if c.get("keyword") == "Software"), pil_info.get("Software")),
+        },
+    }
+    return response
+def infer_png_text(file):
+    if file is None:
+        return {"error": "Please upload a PNG file."}
+    try:
+        return read_png_info(file.name if hasattr(file, "name") else file)
+    except Exception as e:
+        return {"error": str(e)}
+# =================================================
+# ========== NovelAI LSB Reader (tab 2) ===========
+# =================================================
+# (User-provided logic, lightly wrapped for Gradio.)
+import numpy as np
+import gzip
+from pathlib import Path
+from io import BytesIO
+def _pack_lsb_bytes(alpha: np.ndarray) -> np.ndarray:
+    """
+    Pack the least significant bits (LSB) from an image's alpha channel into bytes.
+    """
+    alpha = alpha.T.reshape((-1,))
+    alpha = alpha[:(alpha.shape[0] // 8) * 8]
+    alpha = np.bitwise_and(alpha, 1)
+    alpha = alpha.reshape((-1, 8))
+    alpha = np.packbits(alpha, axis=1)
+    return alpha
+class LSBReader:
+    """
+    Utility class for reading hidden data from an image's alpha channel using LSB encoding.
+    """
+    def __init__(self, data: np.ndarray):
+        self.data = _pack_lsb_bytes(data[..., -1])
+        self.pos = 0
+    def read_bytes(self, n: int) -> bytearray:
+        """Read `n` bytes from the bitstream."""
+        n_bytes = self.data[self.pos:self.pos + n]
+        self.pos += n
+        return bytearray(n_bytes.flatten().tolist())
+    def read_int32(self) -> Optional[int]:
+        """Read a 4-byte big-endian integer from the bitstream."""
+        bytes_list = self.read_bytes(4)
+        return int.from_bytes(bytes_list, 'big') if len(bytes_list) == 4 else None
+def _extract_nai_metadata_from_image(image: Image.Image) -> dict:
+    """
+    Extract embedded metadata from a PNG image generated by NovelAI.
+    """
+    image_array = np.array(image.convert("RGBA"))
+    if image_array.shape[-1] != 4 or len(image_array.shape) != 3:
+        raise ValueError("Image must be in RGBA format")
+    reader = LSBReader(image_array)
+    magic = "stealth_pngcomp"
+    if reader.read_bytes(len(magic)).decode("utf-8", "replace") != magic:
+        raise ValueError("Invalid magic number (not NovelAI stealth payload)")
+    bit_len = reader.read_int32()
+    if bit_len is None or bit_len <= 0:
+        raise ValueError("Invalid payload length")
+    json_len = bit_len // 8
+    compressed_json = reader.read_bytes(json_len)
+    json_data = json.loads(gzip.decompress(bytes(compressed_json)).decode("utf-8"))
+    if "Comment" in json_data and isinstance(json_data["Comment"], str):
+        try:
+            json_data["Comment"] = json.loads(json_data["Comment"])
+        except Exception:
+            # Leave as-is if not valid JSON
+            pass
+    return json_data
+def extract_nai_metadata(image: Union[Image.Image, str, Path]) -> dict:
+    if isinstance(image, (str, Path)):
+        image = Image.open(image)
+    elif not isinstance(image, Image.Image):
+        raise ValueError("Input must be a file path (string/Path) or a PIL Image")
+    return _extract_nai_metadata_from_image(image)
+def extract_nai_caption_from_hf_img(hf_img: dict) -> Optional[str]:
+    image_bytes = hf_img['bytes']
+    pil_image = Image.open(BytesIO(image_bytes))
+    metadata = extract_nai_metadata(pil_image)
+    return metadata.get('Description')
+def infer_nai(image: Optional[Image.Image]):
+    if image is None:
+        return None, {"error": "Please upload a PNG with alpha channel (RGBA)."}
+    try:
+        meta = extract_nai_metadata(image)
+        description = meta.get("Description")
+        return description, meta
+    except Exception as e:
+        return None, {"error": str(e)}
+# =================================================
+# =========== Similarity Metrics (tab 3) ===========
+# =================================================
+def _load_rgb_image(path: Union[str, Path]) -> np.ndarray:
+    """Load an image file as RGB uint8 numpy array."""
+    img = Image.open(path).convert("RGB")
+    return np.array(img, dtype=np.uint8)
+def _pixel_metrics(img_a: np.ndarray, img_b: np.ndarray) -> Dict[str, float]:
+    """Compute basic pixel-wise similarity metrics between two RGB images."""
+    if img_a.shape != img_b.shape:
+        raise ValueError(f"Image size mismatch: {img_a.shape} vs {img_b.shape}")
+    diff = img_a.astype(np.float32) - img_b.astype(np.float32)
+    abs_diff = np.abs(diff)
+    mse = float(np.mean(diff ** 2))
+    mae = float(np.mean(abs_diff))
+    max_abs = float(np.max(abs_diff))
+    pixel_match = float(np.mean(img_a == img_b))
+    pixel_diff_pct = float(100.0 * (1.0 - pixel_match))
+    if mse == 0.0:
+        psnr = float("inf")
+    else:
+        psnr = float(20.0 * np.log10(255.0 / np.sqrt(mse)))
     return {
+        "pixel_diff_pct": pixel_diff_pct,
+        "pixel_match": pixel_match,
+        "mse": mse,
+        "mae": mae,
+        "max_abs": max_abs,
+        "psnr": psnr,
     }
+def compute_similarity_report(files: Optional[List[str]]) -> str:
+    if not files or len(files) < 2:
+        return "Upload at least two images to compare (first file is treated as base)."
+    try:
+        images: Dict[str, np.ndarray] = {}
+        base_name = None
+        base_img = None
+        for idx, file_path in enumerate(files):
+            name = Path(file_path).name
+            images[name] = _load_rgb_image(file_path)
+            if idx == 0:
+                base_name = name
+                base_img = images[name]
+        if base_name is None or base_img is None:
+            return "Failed to load base image."
+        metrics: Dict[str, Dict[str, float]] = {}
+        # Base vs others
+        for name, img in images.items():
+            if name == base_name:
+                continue
+            metrics[f"{base_name}_vs_{name}"] = _pixel_metrics(base_img, img)
+        # Pairwise among non-base images
+        other_keys = [k for k in images.keys() if k != base_name]
+        for i in range(len(other_keys)):
+            for j in range(i + 1, len(other_keys)):
+                k1, k2 = other_keys[i], other_keys[j]
+                metrics[f"{k1}_vs_{k2}"] = _pixel_metrics(images[k1], images[k2])
+        lines = [
+            "=== similarity metrics ===",
+            f"Base image: {base_name}",
+        ]
+        for name, vals in metrics.items():
+            lines.append(
+                (
+                    f"{name}: pixel_diff_pct={vals['pixel_diff_pct']:.6f}%, "
+                    f"pixel_match={vals['pixel_match']:.6f}, mse={vals['mse']:.6e}, "
+                    f"mae={vals['mae']:.6e}, max_abs={vals['max_abs']:.6e}, "
+                    f"psnr={vals['psnr']:.2f}dB"
+                )
             )
+        lines.append("\nMetrics (JSON):")
+        lines.append(json.dumps(metrics, indent=2))
+        return "\n".join(lines)
+    except Exception as exc:  # pragma: no cover - handled for UI
+        return f"Error computing metrics: {exc}"
+# =================================================
+# =============== Gradio App (two tabs) ===========
+# =================================================
+with gr.Blocks(title="PNG Tools — ImageInfo & NovelAI Reader", theme=theme, analytics_enabled=False) as demo:
+    gr.Markdown("# PNG Tools\nTwo utilities: PNG text-chunk metadata and NovelAI LSB metadata.")
+    with gr.Tabs():
+        with gr.Tab("PNG ImageInfo Reader"):
+            with gr.Row():
+                inp_png = gr.File(label="PNG file", file_types=[".png"])
+            out_png = gr.JSON(label="pngImageInfo")
+            inp_png.change(fn=infer_png_text, inputs=inp_png, outputs=out_png)
+            gr.Markdown("Tip: Stable Diffusion ‘parameters’ often appear under a **tEXt** chunk with keyword `parameters`.")
+        with gr.Tab("NovelAI Reader"):
+            with gr.Row():
+                nai_img = gr.Image(label="Upload PNG (RGBA preferred)", type="pil", height=360)
+            with gr.Row():
+                nai_btn = gr.Button("Extract NovelAI Metadata", variant="primary")
+            with gr.Row():
+                nai_desc = gr.Textbox(label="Description (if present)", lines=4)
+            nai_json = gr.JSON(label="Decoded NovelAI JSON")
+            nai_btn.click(fn=infer_nai, inputs=nai_img, outputs=[nai_desc, nai_json])
+        with gr.Tab("Similarity Metrics"):
+            gr.Markdown("Upload multiple images; the first file is treated as the base for comparisons.")
             files_in = gr.Files(
                 label="Image files",
                 # Explicit list ensures WebP acceptance across Gradio builds
                 type="filepath",
                 interactive=True,
             )
             with gr.Row():
+                metrics_btn = gr.Button("Compute Similarity", variant="primary")
+            metrics_out = gr.Textbox(label="Similarity report", lines=14, show_copy_button=True)
+            metrics_btn.click(fn=compute_similarity_report, inputs=files_in, outputs=metrics_out)
 if __name__ == "__main__":
+    demo.launch()