import io import json import struct import zlib from typing import List, Dict, Any, Optional, Union import gradio as gr from PIL import Image, PngImagePlugin # -------- THEME (similar to your example) -------- theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="violet", radius_size="lg") # ================================================= # ========== PNG Text Chunk Reader (tab 1) ======== # ================================================= PNG_SIGNATURE = b"\x89PNG\r\n\x1a\n" def _parse_png_text_chunks(data: bytes) -> List[Dict[str, Any]]: """ Parse PNG chunks and extract tEXt, zTXt, and iTXt entries. """ if not data.startswith(PNG_SIGNATURE): raise ValueError("Not a PNG file.") pos = len(PNG_SIGNATURE) out = [] while pos + 8 <= len(data): # Read chunk length and type length = struct.unpack(">I", data[pos:pos+4])[0] ctype = data[pos+4:pos+8] pos += 8 if pos + length + 4 > len(data): break cdata = data[pos:pos+length] pos += length # Skip CRC (4 bytes) pos += 4 if ctype == b"tEXt": # Latin-1: key\0value try: null_idx = cdata.index(b"\x00") key = cdata[:null_idx].decode("latin-1", "replace") text = cdata[null_idx+1:].decode("latin-1", "replace") out.append({"type": "tEXt", "keyword": key, "text": text}) except Exception: pass elif ctype == b"zTXt": # key\0compression_method(1) + compressed data try: null_idx = cdata.index(b"\x00") key = cdata[:null_idx].decode("latin-1", "replace") method = cdata[null_idx+1:null_idx+2] comp = cdata[null_idx+2:] if method == b"\x00": # zlib/deflate text = zlib.decompress(comp).decode("latin-1", "replace") out.append({"type": "zTXt", "keyword": key, "text": text}) except Exception: pass elif ctype == b"iTXt": # UTF-8: key\0flag(1)\0method(1)\0lang\0translated\0text try: i0 = cdata.index(b"\x00") key = cdata[:i0].decode("latin-1", "replace") comp_flag = cdata[i0+1:i0+2] comp_method = cdata[i0+2:i0+3] rest = cdata[i0+3:] i1 = rest.index(b"\x00") language_tag = rest[:i1].decode("ascii", "replace") rest2 = rest[i1+1:] i2 = rest2.index(b"\x00") translated_keyword = rest2[:i2].decode("utf-8", "replace") text_bytes = rest2[i2+1:] if comp_flag == b"\x01" and comp_method == b"\x00": text = zlib.decompress(text_bytes).decode("utf-8", "replace") else: text = text_bytes.decode("utf-8", "replace") out.append({ "type": "iTXt", "keyword": key, "language_tag": language_tag, "translated_keyword": translated_keyword, "text": text, }) except Exception: pass if ctype == b"IEND": break return out def read_png_info(file_obj) -> Dict[str, Any]: """ Given an uploaded file (path or file-like), return structured PNG text info. Also surface Pillow's .info (which often contains 'parameters'). """ if hasattr(file_obj, "read"): data = file_obj.read() else: with open(file_obj, "rb") as f: data = f.read() chunks = _parse_png_text_chunks(data) try: img = Image.open(io.BytesIO(data)) pil_info = dict(img.info) for k, v in list(pil_info.items()): if isinstance(v, (bytes, bytearray)): try: pil_info[k] = v.decode("utf-8", "replace") except Exception: pil_info[k] = repr(v) elif isinstance(v, PngImagePlugin.PngInfo): pil_info[k] = "PngInfo(...)" except Exception as e: pil_info = {"_error": f"Pillow failed to open PNG: {e}"} response = { "found_text_chunks": chunks, "pil_info": pil_info, "quick_fields": { "parameters": next((c["text"] for c in chunks if c.get("keyword") == "parameters"), pil_info.get("parameters")), "Software": next((c["text"] for c in chunks if c.get("keyword") == "Software"), pil_info.get("Software")), }, } return response def infer_png_text(file): if file is None: return {"error": "Please upload a PNG file."} try: return read_png_info(file.name if hasattr(file, "name") else file) except Exception as e: return {"error": str(e)} # ================================================= # ========== NovelAI LSB Reader (tab 2) =========== # ================================================= # (User-provided logic, lightly wrapped for Gradio.) import numpy as np import gzip from pathlib import Path from io import BytesIO def _pack_lsb_bytes(alpha: np.ndarray) -> np.ndarray: """ Pack the least significant bits (LSB) from an image's alpha channel into bytes. """ alpha = alpha.T.reshape((-1,)) alpha = alpha[:(alpha.shape[0] // 8) * 8] alpha = np.bitwise_and(alpha, 1) alpha = alpha.reshape((-1, 8)) alpha = np.packbits(alpha, axis=1) return alpha class LSBReader: """ Utility class for reading hidden data from an image's alpha channel using LSB encoding. """ def __init__(self, data: np.ndarray): self.data = _pack_lsb_bytes(data[..., -1]) self.pos = 0 def read_bytes(self, n: int) -> bytearray: """Read `n` bytes from the bitstream.""" n_bytes = self.data[self.pos:self.pos + n] self.pos += n return bytearray(n_bytes.flatten().tolist()) def read_int32(self) -> Optional[int]: """Read a 4-byte big-endian integer from the bitstream.""" bytes_list = self.read_bytes(4) return int.from_bytes(bytes_list, 'big') if len(bytes_list) == 4 else None def _extract_nai_metadata_from_image(image: Image.Image) -> dict: """ Extract embedded metadata from a PNG image generated by NovelAI. """ image_array = np.array(image.convert("RGBA")) if image_array.shape[-1] != 4 or len(image_array.shape) != 3: raise ValueError("Image must be in RGBA format") reader = LSBReader(image_array) magic = "stealth_pngcomp" if reader.read_bytes(len(magic)).decode("utf-8", "replace") != magic: raise ValueError("Invalid magic number (not NovelAI stealth payload)") bit_len = reader.read_int32() if bit_len is None or bit_len <= 0: raise ValueError("Invalid payload length") json_len = bit_len // 8 compressed_json = reader.read_bytes(json_len) json_data = json.loads(gzip.decompress(bytes(compressed_json)).decode("utf-8")) if "Comment" in json_data and isinstance(json_data["Comment"], str): try: json_data["Comment"] = json.loads(json_data["Comment"]) except Exception: # Leave as-is if not valid JSON pass return json_data def extract_nai_metadata(image: Union[Image.Image, str, Path]) -> dict: if isinstance(image, (str, Path)): image = Image.open(image) elif not isinstance(image, Image.Image): raise ValueError("Input must be a file path (string/Path) or a PIL Image") return _extract_nai_metadata_from_image(image) def extract_nai_caption_from_hf_img(hf_img: dict) -> Optional[str]: image_bytes = hf_img['bytes'] pil_image = Image.open(BytesIO(image_bytes)) metadata = extract_nai_metadata(pil_image) return metadata.get('Description') def infer_nai(image: Optional[Image.Image]): if image is None: return None, {"error": "Please upload a PNG with alpha channel (RGBA)."} try: meta = extract_nai_metadata(image) description = meta.get("Description") return description, meta except Exception as e: return None, {"error": str(e)} # ================================================= # =========== Similarity Metrics (tab 3) =========== # ================================================= def _load_rgb_image(path: Union[str, Path]) -> np.ndarray: """Load an image file as RGB uint8 numpy array.""" img = Image.open(path).convert("RGB") return np.array(img, dtype=np.uint8) def _pixel_metrics(img_a: np.ndarray, img_b: np.ndarray) -> Dict[str, float]: """Compute basic pixel-wise similarity metrics between two RGB images.""" if img_a.shape != img_b.shape: raise ValueError(f"Image size mismatch: {img_a.shape} vs {img_b.shape}") diff = img_a.astype(np.float32) - img_b.astype(np.float32) abs_diff = np.abs(diff) mse = float(np.mean(diff ** 2)) mae = float(np.mean(abs_diff)) max_abs = float(np.max(abs_diff)) pixel_match = float(np.mean(img_a == img_b)) pixel_diff_pct = float(100.0 * (1.0 - pixel_match)) if mse == 0.0: psnr = float("inf") else: psnr = float(20.0 * np.log10(255.0 / np.sqrt(mse))) return { "pixel_diff_pct": pixel_diff_pct, "pixel_match": pixel_match, "mse": mse, "mae": mae, "max_abs": max_abs, "psnr": psnr, } def compute_similarity_report(files: Optional[List[str]]) -> str: if not files or len(files) < 2: return "Upload at least two images to compare (first file is treated as base)." try: images: Dict[str, np.ndarray] = {} base_name = None base_img = None for idx, file_path in enumerate(files): name = Path(file_path).name images[name] = _load_rgb_image(file_path) if idx == 0: base_name = name base_img = images[name] if base_name is None or base_img is None: return "Failed to load base image." metrics: Dict[str, Dict[str, float]] = {} # Base vs others for name, img in images.items(): if name == base_name: continue metrics[f"{base_name}_vs_{name}"] = _pixel_metrics(base_img, img) # Pairwise among non-base images other_keys = [k for k in images.keys() if k != base_name] for i in range(len(other_keys)): for j in range(i + 1, len(other_keys)): k1, k2 = other_keys[i], other_keys[j] metrics[f"{k1}_vs_{k2}"] = _pixel_metrics(images[k1], images[k2]) lines = [ "=== similarity metrics ===", f"Base image: {base_name}", ] for name, vals in metrics.items(): lines.append( ( f"{name}: pixel_diff_pct={vals['pixel_diff_pct']:.6f}%, " f"pixel_match={vals['pixel_match']:.6f}, mse={vals['mse']:.6e}, " f"mae={vals['mae']:.6e}, max_abs={vals['max_abs']:.6e}, " f"psnr={vals['psnr']:.2f}dB" ) ) lines.append("\nMetrics (JSON):") lines.append(json.dumps(metrics, indent=2)) return "\n".join(lines) except Exception as exc: # pragma: no cover - handled for UI return f"Error computing metrics: {exc}" # ================================================= # =============== Gradio App (two tabs) =========== # ================================================= with gr.Blocks(title="PNG Tools — ImageInfo & NovelAI Reader", theme=theme, analytics_enabled=False) as demo: gr.Markdown("# PNG Tools\nTwo utilities: PNG text-chunk metadata and NovelAI LSB metadata.") with gr.Tabs(): with gr.Tab("PNG ImageInfo Reader"): with gr.Row(): inp_png = gr.File(label="PNG file", file_types=[".png"]) out_png = gr.JSON(label="pngImageInfo") inp_png.change(fn=infer_png_text, inputs=inp_png, outputs=out_png) gr.Markdown("Tip: Stable Diffusion ‘parameters’ often appear under a **tEXt** chunk with keyword `parameters`.") with gr.Tab("NovelAI Reader"): with gr.Row(): nai_img = gr.Image(label="Upload PNG (RGBA preferred)", type="pil", height=360) with gr.Row(): nai_btn = gr.Button("Extract NovelAI Metadata", variant="primary") with gr.Row(): nai_desc = gr.Textbox(label="Description (if present)", lines=4) nai_json = gr.JSON(label="Decoded NovelAI JSON") nai_btn.click(fn=infer_nai, inputs=nai_img, outputs=[nai_desc, nai_json]) with gr.Tab("Similarity Metrics"): gr.Markdown("Upload multiple images; the first file is treated as the base for comparisons.") files_in = gr.Files( label="Image files", # Explicit list ensures WebP acceptance across Gradio builds file_types=[ ".png", ".jpg", ".jpeg", ".webp", ".gif", ".bmp", ".tif", ".tiff", ".jfif" ], type="filepath", interactive=True, ) with gr.Row(): metrics_btn = gr.Button("Compute Similarity", variant="primary") metrics_out = gr.Textbox(label="Similarity report", lines=14, show_copy_button=True) metrics_btn.click(fn=compute_similarity_report, inputs=files_in, outputs=metrics_out) if __name__ == "__main__": demo.launch()