Spaces:

aipicasso
/

emi-2-demo

Running on Zero

File size: 6,674 Bytes

# app.py - Hugging Face Spaces (Gradio) for SDXL + CompelForSDXL (deprecation & length fix)
import os
import re
import spaces
import gradio as gr
import torch

from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
from huggingface_hub import hf_hub_download
from safetensors.torch import load_file

# --- Compel imports (prefer SDXL wrapper) ---
from compel import CompelForSDXL, ReturnedEmbeddingsType, DiffusersTextualInversionManager

MODEL_ID = "aipicasso/emi-2-5"
HF_TOKEN = os.getenv("TOKEN", None)

DEFAULT_PROMPT = (
    "1girl, (upper body)++, black long hair, hime cut, black eyes, "
    "looking at viewer, blue and purple hydrangea"
)

NEGATIVE_BASE = "(unaestheticXLv31)++, (unaestheticXL_Alb2)++, bad hands, bad anatomy, low quality, 3d, photo, text"
BANNED_WORDS = {"pokemon", "pikachu", "picachu", "mario", "sonic", "genshin"}

WIDTH = 768
HEIGHT = 1344

def sanitize_prompt(s: str) -> str:
    if not s:
        return ""
    out = s
    for w in BANNED_WORDS:
        out = re.sub(w, "", out, flags=re.IGNORECASE)
    return re.sub(r"\s{2,}", " ", out).strip()

def pick_dtype() -> torch.dtype:
    if torch.cuda.is_available() and torch.cuda.is_bf16_supported():
        return torch.bfloat16
    return torch.float16

dtype = pick_dtype()

# ---------------- Pipeline ----------------
scheduler = EulerAncestralDiscreteScheduler.from_pretrained(
    MODEL_ID, subfolder="scheduler", token=HF_TOKEN
)
pipe = StableDiffusionXLPipeline.from_pretrained(
    MODEL_ID,
    scheduler=scheduler,
    torch_dtype=dtype,
    use_safetensors=True,
    token=HF_TOKEN,
)

pipe.to("cuda" if torch.cuda.is_available() else "cpu")
pipe.set_progress_bar_config(disable=True)

# --------------- Compel ---------------
textual_inversion_manager = DiffusersTextualInversionManager(pipe)
compel = CompelForSDXL(pipe,textual_inversion_manager=textual_inversion_manager)

# ---- Negative TI: unaestheticXL v3.1 ----
neg_ti_path = hf_hub_download(
    repo_id="Aikimi/unaestheticXL_Negative_TI",
    filename="unaestheticXLv31.safetensors",
    token=HF_TOKEN,
)
state_dict = load_file(neg_ti_path)
pipe.load_textual_inversion(
    state_dict["clip_g"], token="unaestheticXLv31",
    text_encoder=pipe.text_encoder_2, tokenizer=pipe.tokenizer_2,
)
pipe.load_textual_inversion(
    state_dict["clip_l"], token="unaestheticXLv31",
    text_encoder=pipe.text_encoder, tokenizer=pipe.tokenizer,
)

# ---- Optional local TI ----
if os.path.exists("unaestheticXL_Alb2.safetensors"):
    state_dict2 = load_file("unaestheticXL_Alb2.safetensors")
    pipe.load_textual_inversion(
        state_dict2["clip_g"], token="unaestheticXL_Alb2",
        text_encoder=pipe.text_encoder_2, tokenizer=pipe.tokenizer_2,
    )
    pipe.load_textual_inversion(
        state_dict2["clip_l"], token="unaestheticXL_Alb2",
        text_encoder=pipe.text_encoder, tokenizer=pipe.tokenizer,
    )

# ---- Optional LoRA (UNetのみでもOK) ----
if os.path.exists("fix_hands.pt"):
    try:
        pipe.load_lora_weights(".", weight_name="fix_hands.pt")
        pipe.fuse_lora(lora_scale=1.0, safe_fusing=True)
    except Exception as e:
        print(f"[WARN] LoRA load/fuse skipped: {e}")

# Optional FreeU
try:
    pipe.enable_freeu(s1=1.2, s2=0.7, b1=1.1, b2=1.3)
except Exception as e:
    print(f"[WARN] FreeU not enabled: {e}")

# --------------- Inference ---------------
@spaces.GPU
def run_normal(prompt: str,
               negative_prompt: str = "",
               guidance_scale: float = 7.5,
               progress=gr.Progress(track_tqdm=True)):

    prompt = sanitize_prompt(prompt) or DEFAULT_PROMPT
    neg_full = f"{NEGATIVE_BASE}, {sanitize_prompt(negative_prompt)}".strip().strip(",")

    with torch.inference_mode():
        print(prompt,neg_full)
        conditioning = compel(prompt, negative_prompt=neg_full)

        out = pipe(
            prompt_embeds=conditioning.embeds, 
            pooled_prompt_embeds=conditioning.pooled_embeds,
            negative_prompt_embeds=conditioning.negative_embeds,
            negative_pooled_prompt_embeds=conditioning.negative_pooled_embeds,
            num_inference_steps=25,
            guidance_scale=float(guidance_scale),
            width=(WIDTH // 8) * 8,
            height=(HEIGHT // 8) * 8,
        )
    return out.images[0]

# --------------- UI ---------------
css = """
.gradio-container{
  max-width: 768px !important;
  margin: 0 auto;
}
"""

normal_examples = [
    "1girl, (upper body)++, black long hair, hime cut, black eyes, looking at viewer, blue and purple hydrangea",
    "1girl, (full body)++, black long hair, hime cut, black eyes, looking at viewer, school uniform, blue and purple hydrangea",
    "no humans, manga, black and white, monochrome, Mt. fuji, 4k, highly detailed",
    "no humans, manga, black and white, monochrome, Shibuya street, 4k, highly detailed",
    "anime, 1boy++, (upper body)++, silver very short hair, blue eyes, looking at viewer, white background",
    "anime, 1boy++, (full body)++, silver very short hair, blue eyes, looking at viewer, white background",
]

with gr.Blocks(css=css) as demo:
    gr.Markdown(
        """
# Emi 2.5
Official demo for [Emi 2.5](https://huggingface.co/aipicasso/emi-2-5). Click the generate button!  
本モデルの生成物は各種法令に従って取り扱って下さい。
"""
    )
    gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
    with gr.Group():
        with gr.Row():
            prompt_normal = gr.Textbox(
                show_label=False,
                scale=4,
                placeholder=(
                    "Your prompt, e.g.: 1girl, (upper body)++, brown bob short hair, "
                    "brown eyes, looking at viewer, cherry blossom"
                ),
            )
            button_normal = gr.Button("Generate", min_width=120)
        output_normal = gr.Image(label="Your result image", interactive=False)
        with gr.Accordion("Advanced Settings", open=False):
            negative_prompt_normal = gr.Textbox(label="Negative Prompt")
            guidance_scale_normal = gr.Number(label="Guidance Scale", value=7.5)

    gr.Examples(
        examples=normal_examples,
        fn=run_normal,
        inputs=[prompt_normal],
        outputs=[output_normal],
        cache_examples=False,
    )

    gr.on(
        triggers=[button_normal.click, prompt_normal.submit],
        fn=run_normal,
        inputs=[prompt_normal, negative_prompt_normal, guidance_scale_normal],
        outputs=[output_normal],
    )

# SSRのexperimentalメッセージが気になる場合は ssr_mode=False に
demo.launch()  # .launch(ssr_mode=False) でもOK