File size: 6,674 Bytes
0186224
a91402f
 
76a8c47
4af9c2b
a91402f
 
4af9c2b
a91402f
 
4af9c2b
0186224
3f61ad1
2bd6b89
a91402f
 
 
 
 
 
 
3f61ad1
5146ff1
a91402f
4af9c2b
a91402f
 
c8d73ef
a91402f
 
 
 
 
 
426eb90
a91402f
 
 
 
 
 
 
 
0186224
a91402f
 
 
 
 
 
 
 
 
 
 
56e3624
 
 
3f61ad1
013ce92
3f61ad1
 
0186224
a91402f
 
 
 
 
 
 
0186224
 
a91402f
 
0186224
 
a91402f
 
0186224
a91402f
 
 
0186224
 
a91402f
 
0186224
 
a91402f
c8d73ef
0186224
a91402f
 
 
0186224
a91402f
 
a6b996a
426eb90
a91402f
 
 
 
8c49122
0186224
4af9c2b
a91402f
 
 
 
0186224
426eb90
a91402f
 
 
83f3590
 
a91402f
426eb90
3e093a9
 
 
 
a91402f
 
 
 
426eb90
 
a91402f
0186224
a91402f
114766c
a91402f
 
114766c
a91402f
114766c
c9a9082
23fe301
b895768
c517e31
 
02eec09
 
c9a9082
056fb20
4af9c2b
a91402f
 
 
 
 
 
 
a5ad6db
4af9c2b
 
a91402f
 
 
 
 
 
 
 
 
4af9c2b
 
a91402f
 
 
 
 
 
 
 
426eb90
a91402f
 
4af9c2b
a91402f
4af9c2b
 
 
 
a91402f
0186224
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
# app.py - Hugging Face Spaces (Gradio) for SDXL + CompelForSDXL (deprecation & length fix)
import os
import re
import spaces
import gradio as gr
import torch

from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
from huggingface_hub import hf_hub_download
from safetensors.torch import load_file

# --- Compel imports (prefer SDXL wrapper) ---
from compel import CompelForSDXL, ReturnedEmbeddingsType, DiffusersTextualInversionManager

MODEL_ID = "aipicasso/emi-2-5"
HF_TOKEN = os.getenv("TOKEN", None)

DEFAULT_PROMPT = (
    "1girl, (upper body)++, black long hair, hime cut, black eyes, "
    "looking at viewer, blue and purple hydrangea"
)

NEGATIVE_BASE = "(unaestheticXLv31)++, (unaestheticXL_Alb2)++, bad hands, bad anatomy, low quality, 3d, photo, text"
BANNED_WORDS = {"pokemon", "pikachu", "picachu", "mario", "sonic", "genshin"}

WIDTH = 768
HEIGHT = 1344

def sanitize_prompt(s: str) -> str:
    if not s:
        return ""
    out = s
    for w in BANNED_WORDS:
        out = re.sub(w, "", out, flags=re.IGNORECASE)
    return re.sub(r"\s{2,}", " ", out).strip()

def pick_dtype() -> torch.dtype:
    if torch.cuda.is_available() and torch.cuda.is_bf16_supported():
        return torch.bfloat16
    return torch.float16

dtype = pick_dtype()

# ---------------- Pipeline ----------------
scheduler = EulerAncestralDiscreteScheduler.from_pretrained(
    MODEL_ID, subfolder="scheduler", token=HF_TOKEN
)
pipe = StableDiffusionXLPipeline.from_pretrained(
    MODEL_ID,
    scheduler=scheduler,
    torch_dtype=dtype,
    use_safetensors=True,
    token=HF_TOKEN,
)

pipe.to("cuda" if torch.cuda.is_available() else "cpu")
pipe.set_progress_bar_config(disable=True)

# --------------- Compel ---------------
textual_inversion_manager = DiffusersTextualInversionManager(pipe)
compel = CompelForSDXL(pipe,textual_inversion_manager=textual_inversion_manager)

# ---- Negative TI: unaestheticXL v3.1 ----
neg_ti_path = hf_hub_download(
    repo_id="Aikimi/unaestheticXL_Negative_TI",
    filename="unaestheticXLv31.safetensors",
    token=HF_TOKEN,
)
state_dict = load_file(neg_ti_path)
pipe.load_textual_inversion(
    state_dict["clip_g"], token="unaestheticXLv31",
    text_encoder=pipe.text_encoder_2, tokenizer=pipe.tokenizer_2,
)
pipe.load_textual_inversion(
    state_dict["clip_l"], token="unaestheticXLv31",
    text_encoder=pipe.text_encoder, tokenizer=pipe.tokenizer,
)

# ---- Optional local TI ----
if os.path.exists("unaestheticXL_Alb2.safetensors"):
    state_dict2 = load_file("unaestheticXL_Alb2.safetensors")
    pipe.load_textual_inversion(
        state_dict2["clip_g"], token="unaestheticXL_Alb2",
        text_encoder=pipe.text_encoder_2, tokenizer=pipe.tokenizer_2,
    )
    pipe.load_textual_inversion(
        state_dict2["clip_l"], token="unaestheticXL_Alb2",
        text_encoder=pipe.text_encoder, tokenizer=pipe.tokenizer,
    )

# ---- Optional LoRA (UNetのみでもOK) ----
if os.path.exists("fix_hands.pt"):
    try:
        pipe.load_lora_weights(".", weight_name="fix_hands.pt")
        pipe.fuse_lora(lora_scale=1.0, safe_fusing=True)
    except Exception as e:
        print(f"[WARN] LoRA load/fuse skipped: {e}")

# Optional FreeU
try:
    pipe.enable_freeu(s1=1.2, s2=0.7, b1=1.1, b2=1.3)
except Exception as e:
    print(f"[WARN] FreeU not enabled: {e}")

# --------------- Inference ---------------
@spaces.GPU
def run_normal(prompt: str,
               negative_prompt: str = "",
               guidance_scale: float = 7.5,
               progress=gr.Progress(track_tqdm=True)):

    prompt = sanitize_prompt(prompt) or DEFAULT_PROMPT
    neg_full = f"{NEGATIVE_BASE}, {sanitize_prompt(negative_prompt)}".strip().strip(",")

    with torch.inference_mode():
        print(prompt,neg_full)
        conditioning = compel(prompt, negative_prompt=neg_full)

        out = pipe(
            prompt_embeds=conditioning.embeds, 
            pooled_prompt_embeds=conditioning.pooled_embeds,
            negative_prompt_embeds=conditioning.negative_embeds,
            negative_pooled_prompt_embeds=conditioning.negative_pooled_embeds,
            num_inference_steps=25,
            guidance_scale=float(guidance_scale),
            width=(WIDTH // 8) * 8,
            height=(HEIGHT // 8) * 8,
        )
    return out.images[0]

# --------------- UI ---------------
css = """
.gradio-container{
  max-width: 768px !important;
  margin: 0 auto;
}
"""

normal_examples = [
    "1girl, (upper body)++, black long hair, hime cut, black eyes, looking at viewer, blue and purple hydrangea",
    "1girl, (full body)++, black long hair, hime cut, black eyes, looking at viewer, school uniform, blue and purple hydrangea",
    "no humans, manga, black and white, monochrome, Mt. fuji, 4k, highly detailed",
    "no humans, manga, black and white, monochrome, Shibuya street, 4k, highly detailed",
    "anime, 1boy++, (upper body)++, silver very short hair, blue eyes, looking at viewer, white background",
    "anime, 1boy++, (full body)++, silver very short hair, blue eyes, looking at viewer, white background",
]

with gr.Blocks(css=css) as demo:
    gr.Markdown(
        """
# Emi 2.5
Official demo for [Emi 2.5](https://huggingface.co/aipicasso/emi-2-5). Click the generate button!  
本モデルの生成物は各種法令に従って取り扱って下さい。
"""
    )
    gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
    with gr.Group():
        with gr.Row():
            prompt_normal = gr.Textbox(
                show_label=False,
                scale=4,
                placeholder=(
                    "Your prompt, e.g.: 1girl, (upper body)++, brown bob short hair, "
                    "brown eyes, looking at viewer, cherry blossom"
                ),
            )
            button_normal = gr.Button("Generate", min_width=120)
        output_normal = gr.Image(label="Your result image", interactive=False)
        with gr.Accordion("Advanced Settings", open=False):
            negative_prompt_normal = gr.Textbox(label="Negative Prompt")
            guidance_scale_normal = gr.Number(label="Guidance Scale", value=7.5)

    gr.Examples(
        examples=normal_examples,
        fn=run_normal,
        inputs=[prompt_normal],
        outputs=[output_normal],
        cache_examples=False,
    )

    gr.on(
        triggers=[button_normal.click, prompt_normal.submit],
        fn=run_normal,
        inputs=[prompt_normal, negative_prompt_normal, guidance_scale_normal],
        outputs=[output_normal],
    )

# SSRのexperimentalメッセージが気になる場合は ssr_mode=False に
demo.launch()  # .launch(ssr_mode=False) でもOK