|
|
import io |
|
|
import json |
|
|
import struct |
|
|
import zlib |
|
|
from typing import List, Dict, Any, Optional, Union |
|
|
|
|
|
import gradio as gr |
|
|
from PIL import Image, PngImagePlugin |
|
|
|
|
|
|
|
|
theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="violet", radius_size="lg") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PNG_SIGNATURE = b"\x89PNG\r\n\x1a\n" |
|
|
|
|
|
|
|
|
def _parse_png_text_chunks(data: bytes) -> List[Dict[str, Any]]: |
|
|
""" |
|
|
Parse PNG chunks and extract tEXt, zTXt, and iTXt entries. |
|
|
""" |
|
|
if not data.startswith(PNG_SIGNATURE): |
|
|
raise ValueError("Not a PNG file.") |
|
|
|
|
|
pos = len(PNG_SIGNATURE) |
|
|
out = [] |
|
|
|
|
|
while pos + 8 <= len(data): |
|
|
|
|
|
length = struct.unpack(">I", data[pos:pos+4])[0] |
|
|
ctype = data[pos+4:pos+8] |
|
|
pos += 8 |
|
|
|
|
|
if pos + length + 4 > len(data): |
|
|
break |
|
|
|
|
|
cdata = data[pos:pos+length] |
|
|
pos += length |
|
|
|
|
|
|
|
|
pos += 4 |
|
|
|
|
|
if ctype == b"tEXt": |
|
|
|
|
|
try: |
|
|
null_idx = cdata.index(b"\x00") |
|
|
key = cdata[:null_idx].decode("latin-1", "replace") |
|
|
text = cdata[null_idx+1:].decode("latin-1", "replace") |
|
|
out.append({"type": "tEXt", "keyword": key, "text": text}) |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
elif ctype == b"zTXt": |
|
|
|
|
|
try: |
|
|
null_idx = cdata.index(b"\x00") |
|
|
key = cdata[:null_idx].decode("latin-1", "replace") |
|
|
method = cdata[null_idx+1:null_idx+2] |
|
|
comp = cdata[null_idx+2:] |
|
|
if method == b"\x00": |
|
|
text = zlib.decompress(comp).decode("latin-1", "replace") |
|
|
out.append({"type": "zTXt", "keyword": key, "text": text}) |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
elif ctype == b"iTXt": |
|
|
|
|
|
try: |
|
|
i0 = cdata.index(b"\x00") |
|
|
key = cdata[:i0].decode("latin-1", "replace") |
|
|
comp_flag = cdata[i0+1:i0+2] |
|
|
comp_method = cdata[i0+2:i0+3] |
|
|
rest = cdata[i0+3:] |
|
|
|
|
|
i1 = rest.index(b"\x00") |
|
|
language_tag = rest[:i1].decode("ascii", "replace") |
|
|
rest2 = rest[i1+1:] |
|
|
|
|
|
i2 = rest2.index(b"\x00") |
|
|
translated_keyword = rest2[:i2].decode("utf-8", "replace") |
|
|
text_bytes = rest2[i2+1:] |
|
|
|
|
|
if comp_flag == b"\x01" and comp_method == b"\x00": |
|
|
text = zlib.decompress(text_bytes).decode("utf-8", "replace") |
|
|
else: |
|
|
text = text_bytes.decode("utf-8", "replace") |
|
|
|
|
|
out.append({ |
|
|
"type": "iTXt", |
|
|
"keyword": key, |
|
|
"language_tag": language_tag, |
|
|
"translated_keyword": translated_keyword, |
|
|
"text": text, |
|
|
}) |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
if ctype == b"IEND": |
|
|
break |
|
|
|
|
|
return out |
|
|
|
|
|
|
|
|
def read_png_info(file_obj) -> Dict[str, Any]: |
|
|
""" |
|
|
Given an uploaded file (path or file-like), return structured PNG text info. |
|
|
Also surface Pillow's .info (which often contains 'parameters'). |
|
|
""" |
|
|
if hasattr(file_obj, "read"): |
|
|
data = file_obj.read() |
|
|
else: |
|
|
with open(file_obj, "rb") as f: |
|
|
data = f.read() |
|
|
|
|
|
chunks = _parse_png_text_chunks(data) |
|
|
|
|
|
try: |
|
|
img = Image.open(io.BytesIO(data)) |
|
|
pil_info = dict(img.info) |
|
|
for k, v in list(pil_info.items()): |
|
|
if isinstance(v, (bytes, bytearray)): |
|
|
try: |
|
|
pil_info[k] = v.decode("utf-8", "replace") |
|
|
except Exception: |
|
|
pil_info[k] = repr(v) |
|
|
elif isinstance(v, PngImagePlugin.PngInfo): |
|
|
pil_info[k] = "PngInfo(...)" |
|
|
except Exception as e: |
|
|
pil_info = {"_error": f"Pillow failed to open PNG: {e}"} |
|
|
|
|
|
response = { |
|
|
"found_text_chunks": chunks, |
|
|
"pil_info": pil_info, |
|
|
"quick_fields": { |
|
|
"parameters": next((c["text"] for c in chunks if c.get("keyword") == "parameters"), pil_info.get("parameters")), |
|
|
"Software": next((c["text"] for c in chunks if c.get("keyword") == "Software"), pil_info.get("Software")), |
|
|
}, |
|
|
} |
|
|
return response |
|
|
|
|
|
|
|
|
def infer_png_text(file): |
|
|
if file is None: |
|
|
return {"error": "Please upload a PNG file."} |
|
|
try: |
|
|
return read_png_info(file.name if hasattr(file, "name") else file) |
|
|
except Exception as e: |
|
|
return {"error": str(e)} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import numpy as np |
|
|
import gzip |
|
|
from pathlib import Path |
|
|
from io import BytesIO |
|
|
|
|
|
def _pack_lsb_bytes(alpha: np.ndarray) -> np.ndarray: |
|
|
""" |
|
|
Pack the least significant bits (LSB) from an image's alpha channel into bytes. |
|
|
""" |
|
|
alpha = alpha.T.reshape((-1,)) |
|
|
alpha = alpha[:(alpha.shape[0] // 8) * 8] |
|
|
alpha = np.bitwise_and(alpha, 1) |
|
|
alpha = alpha.reshape((-1, 8)) |
|
|
alpha = np.packbits(alpha, axis=1) |
|
|
return alpha |
|
|
|
|
|
|
|
|
class LSBReader: |
|
|
""" |
|
|
Utility class for reading hidden data from an image's alpha channel using LSB encoding. |
|
|
""" |
|
|
def __init__(self, data: np.ndarray): |
|
|
self.data = _pack_lsb_bytes(data[..., -1]) |
|
|
self.pos = 0 |
|
|
|
|
|
def read_bytes(self, n: int) -> bytearray: |
|
|
"""Read `n` bytes from the bitstream.""" |
|
|
n_bytes = self.data[self.pos:self.pos + n] |
|
|
self.pos += n |
|
|
return bytearray(n_bytes.flatten().tolist()) |
|
|
|
|
|
def read_int32(self) -> Optional[int]: |
|
|
"""Read a 4-byte big-endian integer from the bitstream.""" |
|
|
bytes_list = self.read_bytes(4) |
|
|
return int.from_bytes(bytes_list, 'big') if len(bytes_list) == 4 else None |
|
|
|
|
|
|
|
|
def _extract_nai_metadata_from_image(image: Image.Image) -> dict: |
|
|
""" |
|
|
Extract embedded metadata from a PNG image generated by NovelAI. |
|
|
""" |
|
|
image_array = np.array(image.convert("RGBA")) |
|
|
if image_array.shape[-1] != 4 or len(image_array.shape) != 3: |
|
|
raise ValueError("Image must be in RGBA format") |
|
|
|
|
|
reader = LSBReader(image_array) |
|
|
magic = "stealth_pngcomp" |
|
|
if reader.read_bytes(len(magic)).decode("utf-8", "replace") != magic: |
|
|
raise ValueError("Invalid magic number (not NovelAI stealth payload)") |
|
|
|
|
|
bit_len = reader.read_int32() |
|
|
if bit_len is None or bit_len <= 0: |
|
|
raise ValueError("Invalid payload length") |
|
|
|
|
|
json_len = bit_len // 8 |
|
|
compressed_json = reader.read_bytes(json_len) |
|
|
json_data = json.loads(gzip.decompress(bytes(compressed_json)).decode("utf-8")) |
|
|
|
|
|
if "Comment" in json_data and isinstance(json_data["Comment"], str): |
|
|
try: |
|
|
json_data["Comment"] = json.loads(json_data["Comment"]) |
|
|
except Exception: |
|
|
|
|
|
pass |
|
|
|
|
|
return json_data |
|
|
|
|
|
|
|
|
def extract_nai_metadata(image: Union[Image.Image, str, Path]) -> dict: |
|
|
if isinstance(image, (str, Path)): |
|
|
image = Image.open(image) |
|
|
elif not isinstance(image, Image.Image): |
|
|
raise ValueError("Input must be a file path (string/Path) or a PIL Image") |
|
|
return _extract_nai_metadata_from_image(image) |
|
|
|
|
|
|
|
|
def extract_nai_caption_from_hf_img(hf_img: dict) -> Optional[str]: |
|
|
image_bytes = hf_img['bytes'] |
|
|
pil_image = Image.open(BytesIO(image_bytes)) |
|
|
metadata = extract_nai_metadata(pil_image) |
|
|
return metadata.get('Description') |
|
|
|
|
|
|
|
|
def infer_nai(image: Optional[Image.Image]): |
|
|
if image is None: |
|
|
return None, {"error": "Please upload a PNG with alpha channel (RGBA)."} |
|
|
try: |
|
|
meta = extract_nai_metadata(image) |
|
|
description = meta.get("Description") |
|
|
return description, meta |
|
|
except Exception as e: |
|
|
return None, {"error": str(e)} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _load_rgb_image(path: Union[str, Path]) -> np.ndarray: |
|
|
"""Load an image file as RGB uint8 numpy array.""" |
|
|
img = Image.open(path).convert("RGB") |
|
|
return np.array(img, dtype=np.uint8) |
|
|
|
|
|
|
|
|
def _pixel_metrics(img_a: np.ndarray, img_b: np.ndarray) -> Dict[str, float]: |
|
|
"""Compute basic pixel-wise similarity metrics between two RGB images.""" |
|
|
if img_a.shape != img_b.shape: |
|
|
raise ValueError(f"Image size mismatch: {img_a.shape} vs {img_b.shape}") |
|
|
|
|
|
diff = img_a.astype(np.float32) - img_b.astype(np.float32) |
|
|
abs_diff = np.abs(diff) |
|
|
|
|
|
mse = float(np.mean(diff ** 2)) |
|
|
mae = float(np.mean(abs_diff)) |
|
|
max_abs = float(np.max(abs_diff)) |
|
|
|
|
|
pixel_match = float(np.mean(img_a == img_b)) |
|
|
pixel_diff_pct = float(100.0 * (1.0 - pixel_match)) |
|
|
|
|
|
if mse == 0.0: |
|
|
psnr = float("inf") |
|
|
else: |
|
|
psnr = float(20.0 * np.log10(255.0 / np.sqrt(mse))) |
|
|
|
|
|
return { |
|
|
"pixel_diff_pct": pixel_diff_pct, |
|
|
"pixel_match": pixel_match, |
|
|
"mse": mse, |
|
|
"mae": mae, |
|
|
"max_abs": max_abs, |
|
|
"psnr": psnr, |
|
|
} |
|
|
|
|
|
|
|
|
def compute_similarity_report(files: Optional[List[str]]) -> str: |
|
|
if not files or len(files) < 2: |
|
|
return "Upload at least two images to compare (first file is treated as base)." |
|
|
|
|
|
try: |
|
|
images: Dict[str, np.ndarray] = {} |
|
|
base_name = None |
|
|
base_img = None |
|
|
|
|
|
for idx, file_path in enumerate(files): |
|
|
name = Path(file_path).name |
|
|
images[name] = _load_rgb_image(file_path) |
|
|
if idx == 0: |
|
|
base_name = name |
|
|
base_img = images[name] |
|
|
|
|
|
if base_name is None or base_img is None: |
|
|
return "Failed to load base image." |
|
|
|
|
|
metrics: Dict[str, Dict[str, float]] = {} |
|
|
|
|
|
|
|
|
for name, img in images.items(): |
|
|
if name == base_name: |
|
|
continue |
|
|
metrics[f"{base_name}_vs_{name}"] = _pixel_metrics(base_img, img) |
|
|
|
|
|
|
|
|
other_keys = [k for k in images.keys() if k != base_name] |
|
|
for i in range(len(other_keys)): |
|
|
for j in range(i + 1, len(other_keys)): |
|
|
k1, k2 = other_keys[i], other_keys[j] |
|
|
metrics[f"{k1}_vs_{k2}"] = _pixel_metrics(images[k1], images[k2]) |
|
|
|
|
|
lines = [ |
|
|
"=== similarity metrics ===", |
|
|
f"Base image: {base_name}", |
|
|
] |
|
|
for name, vals in metrics.items(): |
|
|
lines.append( |
|
|
( |
|
|
f"{name}: pixel_diff_pct={vals['pixel_diff_pct']:.6f}%, " |
|
|
f"pixel_match={vals['pixel_match']:.6f}, mse={vals['mse']:.6e}, " |
|
|
f"mae={vals['mae']:.6e}, max_abs={vals['max_abs']:.6e}, " |
|
|
f"psnr={vals['psnr']:.2f}dB" |
|
|
) |
|
|
) |
|
|
|
|
|
lines.append("\nMetrics (JSON):") |
|
|
lines.append(json.dumps(metrics, indent=2)) |
|
|
|
|
|
return "\n".join(lines) |
|
|
except Exception as exc: |
|
|
return f"Error computing metrics: {exc}" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks(title="PNG Tools — ImageInfo & NovelAI Reader", theme=theme, analytics_enabled=False) as demo: |
|
|
gr.Markdown("# PNG Tools\nTwo utilities: PNG text-chunk metadata and NovelAI LSB metadata.") |
|
|
|
|
|
with gr.Tabs(): |
|
|
with gr.Tab("PNG ImageInfo Reader"): |
|
|
with gr.Row(): |
|
|
inp_png = gr.File(label="PNG file", file_types=[".png"]) |
|
|
out_png = gr.JSON(label="pngImageInfo") |
|
|
inp_png.change(fn=infer_png_text, inputs=inp_png, outputs=out_png) |
|
|
gr.Markdown("Tip: Stable Diffusion ‘parameters’ often appear under a **tEXt** chunk with keyword `parameters`.") |
|
|
|
|
|
with gr.Tab("NovelAI Reader"): |
|
|
with gr.Row(): |
|
|
nai_img = gr.Image(label="Upload PNG (RGBA preferred)", type="pil", height=360) |
|
|
with gr.Row(): |
|
|
nai_btn = gr.Button("Extract NovelAI Metadata", variant="primary") |
|
|
with gr.Row(): |
|
|
nai_desc = gr.Textbox(label="Description (if present)", lines=4) |
|
|
nai_json = gr.JSON(label="Decoded NovelAI JSON") |
|
|
|
|
|
nai_btn.click(fn=infer_nai, inputs=nai_img, outputs=[nai_desc, nai_json]) |
|
|
|
|
|
with gr.Tab("Similarity Metrics"): |
|
|
gr.Markdown("Upload multiple images; the first file is treated as the base for comparisons.") |
|
|
files_in = gr.Files( |
|
|
label="Image files", |
|
|
|
|
|
file_types=[ |
|
|
".png", ".jpg", ".jpeg", ".webp", ".gif", |
|
|
".bmp", ".tif", ".tiff", ".jfif" |
|
|
], |
|
|
type="filepath", |
|
|
interactive=True, |
|
|
) |
|
|
with gr.Row(): |
|
|
metrics_btn = gr.Button("Compute Similarity", variant="primary") |
|
|
metrics_out = gr.Textbox(label="Similarity report", lines=14, show_copy_button=True) |
|
|
metrics_btn.click(fn=compute_similarity_report, inputs=files_in, outputs=metrics_out) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|