# app.py - Hugging Face Spaces (Gradio) for SDXL + CompelForSDXL (deprecation & length fix) import os import re import spaces import gradio as gr import torch from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler from huggingface_hub import hf_hub_download from safetensors.torch import load_file # --- Compel imports (prefer SDXL wrapper) --- from compel import CompelForSDXL, ReturnedEmbeddingsType, DiffusersTextualInversionManager MODEL_ID = "aipicasso/emi-2-5" HF_TOKEN = os.getenv("TOKEN", None) DEFAULT_PROMPT = ( "1girl, (upper body)++, black long hair, hime cut, black eyes, " "looking at viewer, blue and purple hydrangea" ) NEGATIVE_BASE = "(unaestheticXLv31)++, (unaestheticXL_Alb2)++, bad hands, bad anatomy, low quality, 3d, photo, text" BANNED_WORDS = {"pokemon", "pikachu", "picachu", "mario", "sonic", "genshin"} WIDTH = 768 HEIGHT = 1344 def sanitize_prompt(s: str) -> str: if not s: return "" out = s for w in BANNED_WORDS: out = re.sub(w, "", out, flags=re.IGNORECASE) return re.sub(r"\s{2,}", " ", out).strip() def pick_dtype() -> torch.dtype: if torch.cuda.is_available() and torch.cuda.is_bf16_supported(): return torch.bfloat16 return torch.float16 dtype = pick_dtype() # ---------------- Pipeline ---------------- scheduler = EulerAncestralDiscreteScheduler.from_pretrained( MODEL_ID, subfolder="scheduler", token=HF_TOKEN ) pipe = StableDiffusionXLPipeline.from_pretrained( MODEL_ID, scheduler=scheduler, torch_dtype=dtype, use_safetensors=True, token=HF_TOKEN, ) pipe.to("cuda" if torch.cuda.is_available() else "cpu") pipe.set_progress_bar_config(disable=True) # --------------- Compel --------------- textual_inversion_manager = DiffusersTextualInversionManager(pipe) compel = CompelForSDXL(pipe,textual_inversion_manager=textual_inversion_manager) # ---- Negative TI: unaestheticXL v3.1 ---- neg_ti_path = hf_hub_download( repo_id="Aikimi/unaestheticXL_Negative_TI", filename="unaestheticXLv31.safetensors", token=HF_TOKEN, ) state_dict = load_file(neg_ti_path) pipe.load_textual_inversion( state_dict["clip_g"], token="unaestheticXLv31", text_encoder=pipe.text_encoder_2, tokenizer=pipe.tokenizer_2, ) pipe.load_textual_inversion( state_dict["clip_l"], token="unaestheticXLv31", text_encoder=pipe.text_encoder, tokenizer=pipe.tokenizer, ) # ---- Optional local TI ---- if os.path.exists("unaestheticXL_Alb2.safetensors"): state_dict2 = load_file("unaestheticXL_Alb2.safetensors") pipe.load_textual_inversion( state_dict2["clip_g"], token="unaestheticXL_Alb2", text_encoder=pipe.text_encoder_2, tokenizer=pipe.tokenizer_2, ) pipe.load_textual_inversion( state_dict2["clip_l"], token="unaestheticXL_Alb2", text_encoder=pipe.text_encoder, tokenizer=pipe.tokenizer, ) # ---- Optional LoRA (UNetのみでもOK) ---- if os.path.exists("fix_hands.pt"): try: pipe.load_lora_weights(".", weight_name="fix_hands.pt") pipe.fuse_lora(lora_scale=1.0, safe_fusing=True) except Exception as e: print(f"[WARN] LoRA load/fuse skipped: {e}") # Optional FreeU try: pipe.enable_freeu(s1=1.2, s2=0.7, b1=1.1, b2=1.3) except Exception as e: print(f"[WARN] FreeU not enabled: {e}") # --------------- Inference --------------- @spaces.GPU def run_normal(prompt: str, negative_prompt: str = "", guidance_scale: float = 7.5, progress=gr.Progress(track_tqdm=True)): prompt = sanitize_prompt(prompt) or DEFAULT_PROMPT neg_full = f"{NEGATIVE_BASE}, {sanitize_prompt(negative_prompt)}".strip().strip(",") with torch.inference_mode(): print(prompt,neg_full) conditioning = compel(prompt, negative_prompt=neg_full) out = pipe( prompt_embeds=conditioning.embeds, pooled_prompt_embeds=conditioning.pooled_embeds, negative_prompt_embeds=conditioning.negative_embeds, negative_pooled_prompt_embeds=conditioning.negative_pooled_embeds, num_inference_steps=25, guidance_scale=float(guidance_scale), width=(WIDTH // 8) * 8, height=(HEIGHT // 8) * 8, ) return out.images[0] # --------------- UI --------------- css = """ .gradio-container{ max-width: 768px !important; margin: 0 auto; } """ normal_examples = [ "1girl, (upper body)++, black long hair, hime cut, black eyes, looking at viewer, blue and purple hydrangea", "1girl, (full body)++, black long hair, hime cut, black eyes, looking at viewer, school uniform, blue and purple hydrangea", "no humans, manga, black and white, monochrome, Mt. fuji, 4k, highly detailed", "no humans, manga, black and white, monochrome, Shibuya street, 4k, highly detailed", "anime, 1boy++, (upper body)++, silver very short hair, blue eyes, looking at viewer, white background", "anime, 1boy++, (full body)++, silver very short hair, blue eyes, looking at viewer, white background", ] with gr.Blocks(css=css) as demo: gr.Markdown( """ # Emi 2.5 Official demo for [Emi 2.5](https://huggingface.co/aipicasso/emi-2-5). Click the generate button! 本モデルの生成物は各種法令に従って取り扱って下さい。 """ ) gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button") with gr.Group(): with gr.Row(): prompt_normal = gr.Textbox( show_label=False, scale=4, placeholder=( "Your prompt, e.g.: 1girl, (upper body)++, brown bob short hair, " "brown eyes, looking at viewer, cherry blossom" ), ) button_normal = gr.Button("Generate", min_width=120) output_normal = gr.Image(label="Your result image", interactive=False) with gr.Accordion("Advanced Settings", open=False): negative_prompt_normal = gr.Textbox(label="Negative Prompt") guidance_scale_normal = gr.Number(label="Guidance Scale", value=7.5) gr.Examples( examples=normal_examples, fn=run_normal, inputs=[prompt_normal], outputs=[output_normal], cache_examples=False, ) gr.on( triggers=[button_normal.click, prompt_normal.submit], fn=run_normal, inputs=[prompt_normal, negative_prompt_normal, guidance_scale_normal], outputs=[output_normal], ) # SSRのexperimentalメッセージが気になる場合は ssr_mode=False に demo.launch() # .launch(ssr_mode=False) でもOK