Qwen-Image-2509-One

Paused

File size: 16,086 Bytes

import gradio as gr
import numpy as np
import random
import torch
import spaces
from PIL import Image
from diffusers import FlowMatchEulerDiscreteScheduler
from optimization import optimize_pipeline_
from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3
import math

# --- Model Loading ---
dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"

scheduler_config = {
    "base_image_seq_len": 256,
    "base_shift": math.log(3),
    "invert_sigmas": False,
    "max_image_seq_len": 8192,
    "max_shift": math.log(3),
    "num_train_timesteps": 1000,
    "shift": 1.0,
    "shift_terminal": None,
    "stochastic_sampling": False,
    "time_shift_type": "exponential",
    "use_beta_sigmas": False,
    "use_dynamic_shifting": True,
    "use_exponential_sigmas": False,
    "use_karras_sigmas": False,
}
scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
#"Qwen/Qwen-Image-Edit-2509",
pipe = QwenImageEditPlusPipeline.from_pretrained(
    
    "Qwen/Qwen-Image-Edit-2509",
    scheduler=scheduler,
    torch_dtype=dtype
).to(device)

pipe.load_lora_weights(
    "rahul7star/qwen-char-lora",
    weight_name="qwen_lora/Qwen-Image-Edit-2509-Lightning-4steps-V1.0-bf16_dim1.safetensors"
)

# pipe.load_lora_weights(
#     "yty9/GGUF-V5-NSFW-Qwen-Image-Edit-Rapid-AIO",
#     weight_name="Qwen-Rapid-AIO-NSFW-v5.safetensors"
# )



pipe.fuse_lora(lora_scale=1.0)


# pipe.load_lora_weights(
#     "rahul7star/qwen-char-lora",
#     weight_name="qwen_lora/qwen-edit-skin_1.1_000002750.safetensors",
# )


# pipe.load_lora_weights(
#     "rahul7star/qwen-char-lora",
#     weight_name="qwen_lora/qwen-multiple-angle.safetensors",
# )


# pipe.load_lora_weights(
#     "rahul7star/qwen-char-lora",
#     weight_name="qwen_lora/jib_qwen_fix_000002750.safetensors",
# )



# pipe.load_lora_weights(
#     "rahul7star/qwen-char-lora",
#     weight_name="qwen_lora/removeclothing_qwen-edit.safetensors",
# )


pipe.fuse_lora(lora_scale=1.0)

pipe.transformer.__class__ = QwenImageTransformer2DModel
pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
optimize_pipeline_(pipe, image=[Image.new("RGB", (1024, 1024)), Image.new("RGB", (1024, 1024))], prompt="prompt")

# --- Constants ---
MAX_SEED = np.iinfo(np.int32).max

# 内部デフォルト（アコーディオンの初期値にも使用）
DEFAULT_SEED = 0
DEFAULT_RANDOMIZE = True
DEFAULT_TRUE_GUIDANCE_SCALE = 1.0
DEFAULT_NUM_INFERENCE_STEPS = 4

# カメラオプション（送信値は常に 'cn'）
CAMERA_OPTIONS = [
    {"cn": "镜头方向左回转45度", "ja": "左に45度回転", "en": "Rotate camera 45° left"},
    {"cn": "镜头向右回转45度", "ja": "右に45度回転", "en": "Rotate camera 45° right"},
    {"cn": "镜头方向左回转90度", "ja": "左に90度回転", "en": "Rotate camera 90° left"},
    {"cn": "镜头向右回转90度", "ja": "右に90度回転", "en": "Rotate camera 90° right"},
    {"cn": "将镜头转为俯视", "ja": "上から見下ろす", "en": "Switch to top-down view"},
    {"cn": "将镜头转为仰视", "ja": "下から見上げる", "en": "Switch to low-angle view"},
    {"cn": "将镜头转为特写镜头", "ja": "クローズアップ", "en": "Switch to close-up lens"},
    {"cn": "将镜头转为中近景镜头", "ja": "ややクローズアップ", "en": "Switch to medium close-up lens"},
    {"cn": "将镜头转为拉远镜头", "ja": "ズームアウト", "en": "Switch to zoom out lens"},
]

# 自由入力オプション（言語別表示）
CUSTOM_OPTION_VALUE = "__custom__"
CUSTOM_LABELS = {
    "en": "Custom (enter Chinese or English prompt)",
    "ja": "自由入力（中国語、英語で入力）",
    "zh": "自定义（中文或英文输入）",
}

# i18n 辞書（表示は単言語）
I18N = {
    "title": {
        "en": "Camera Work",
        "ja": "カメラワーク",
        "zh": "镜头控制",
    },
    "notice": {
        "en": "Note: Please avoid uploading images created by others. There may be rights infringements.",
        "ja": "注意：他者が作成した画像のアップロードはご遠慮ください。権利侵害の可能性があります。",
        "zh": "注意：请勿上传他人创作的图片，可能涉及权利侵害。",
    },
    "input_image": {"en": "Input image", "ja": "入力画像", "zh": "输入图像"},
    "dropdown_label": {
        "en": "Camera work",
        "ja": "カメラワーク",
        "zh": "镜头操作",
    },
    "custom_cn_label": {
        "en": "Custom prompt(English and Chinese recommended)",
        "ja": "自由入力のプロンプト(英語、中国語がおすすめ)",
        "zh": "自定义提示词(推荐英文和中文)",
    },
    "custom_cn_ph": {
        "en": "e.g., 将镜头转为斜俯视 并 拉远镜头",
        "ja": "例: 将镜头转为斜俯视 并 拉远镜头",
        "zh": "例如：将镜头转为斜俯视 并 拉远镜头",
    },
    "extra_label": {
        "en": "Extra prompt (optional, appended at end)(English and Chinese recommended)",
        "ja": "追加プロンプト（任意・末尾に付加）(英語、中国語がおすすめ)",
        "zh": "附加提示词（可选，追加在末尾）(推荐英文和中文)",
    },
    "extra_ph": {
        "en": "e.g., Subject is a girl",
        "ja": "例: 被摄体是一名女孩子",
        "zh": "例如：被摄体是一名女孩子",
    },
    "accordion": {"en": "Advanced settings", "ja": "詳細設定", "zh": "高级设置"},
    "seed": {"en": "Seed", "ja": "Seed", "zh": "Seed"},
    "rand": {"en": "Randomize seed", "ja": "ランダムシード", "zh": "随机种子"},
    "tgs": {"en": "True guidance scale", "ja": "True guidance scale", "zh": "True guidance scale"},
    "steps": {"en": "Steps", "ja": "生成ステップ数", "zh": "生成步数"},
    "run": {"en": "Generate", "ja": "生成", "zh": "生成"},
    "output": {"en": "Output image", "ja": "出力画像", "zh": "输出图像"},
    "status": {"en": "Status", "ja": "ステータス", "zh": "状态"},
    "status_ok": {
        "en": "Generated 1 image (PNG).",
        "ja": "1枚生成しました（PNG）。",
        "zh": "已生成 1 张图片（PNG）。",
    },
    "err_no_img": {
        "en": "Error: Please upload an input image.",
        "ja": "エラー: 入力画像をアップロードしてください",
        "zh": "错误：请先上传输入图像。",
    },
    "err_no_custom": {
        "en": "Error: Please enter a custom prompt.",
        "ja": "エラー: 自由入力のプロンプトを入力してください",
        "zh": "错误：请输入自定义提示词。",
    },
    "lang_label": {"en": "UI Language", "ja": "UI言語", "zh": "界面语言"},
}

def t(key, lang):
    return I18N[key][lang]

def build_dropdown_choices(lang):
    # 表示は選択言語のみ（送信値は中国語）
    if lang not in ("en", "ja", "zh"):
        lang = "en"
    ch = []
    for item in CAMERA_OPTIONS:
        label = item[lang]  # 単言語表示
        ch.append((label, item["cn"]))  # 値は中国語を送る
    ch.append((CUSTOM_LABELS[lang], CUSTOM_OPTION_VALUE))
    return ch

def _append_prompt(base: str, extra: str) -> str:
    extra = (extra or "").strip()
    return (base if not extra else f"{base} {extra}").strip()

def generate_single_view(input_images, prompt, seed, num_inference_steps, true_guidance_scale):
    generator = torch.Generator(device=device).manual_seed(seed)
    result = pipe(
        image=input_images if input_images else None,
        prompt=prompt,
        negative_prompt=" ",
        num_inference_steps=num_inference_steps,
        generator=generator,
        true_cfg_scale=true_guidance_scale,
        num_images_per_prompt=1,
    ).images
    return result[0]

@spaces.GPU(duration=46)
def generate_from_dropdown(
    image,
    dropdown_value_cn,
    custom_cn,
    extra_prompt="",
    seed=DEFAULT_SEED,
    randomize_seed=DEFAULT_RANDOMIZE,
    true_guidance_scale=DEFAULT_TRUE_GUIDANCE_SCALE,
    num_inference_steps=DEFAULT_NUM_INFERENCE_STEPS,
    lang="en",
    progress=gr.Progress(track_tqdm=True),
):
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)

    if image is None:
        return None, t("err_no_img", lang)

    if isinstance(image, Image.Image):
        input_image = image.convert("RGB")
    else:
        input_image = Image.open(image).convert("RGB")

    pil_images = [input_image]

    if dropdown_value_cn == CUSTOM_OPTION_VALUE:
        base_cn = (custom_cn or "").strip()
        if not base_cn:
            return None, t("err_no_custom", lang)
    else:
        base_cn = dropdown_value_cn or CAMERA_OPTIONS[0]["cn"]

    final_prompt = _append_prompt(base_cn, extra_prompt)

    progress(0.6, desc="Generating..." if lang=="en" else ("生成中..." if lang=="ja" else "生成中..."))
    out = generate_single_view(pil_images, final_prompt, seed, num_inference_steps, true_guidance_scale)
    progress(1.0, desc="Done" if lang=="en" else ("完了" if lang=="ja" else "完成"))

    return out, t("status_ok", lang)

# --- UI ---
css = """
#app-wrap {margin: 0 auto; max-width: 1200px;}
.notice {
  background: #fff8e1;
  border: 1px solid #facc15;
  color: #713f12;
  padding: 12px 14px;
  border-radius: 12px;
  font-weight: 600;
  line-height: 1.5;
  margin-bottom: 10px;
}
.card {
  background: white;
  border: 1px solid #e5e7eb;
  border-radius: 14px;
  padding: 14px;
  box-shadow: 0 1px 2px rgba(0,0,0,0.04);
}
.small { font-size: 12px; color: #6b7280; }
.preview {
  background: #f9fafb;
  border: 1px dashed #cbd5e1;
  border-radius: 10px;
  padding: 8px 10px;
  font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace;
  white-space: pre-wrap;
}
"""

with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
    # 言語選択（デフォルト英語）
    lang_selector = gr.Radio(
        label=I18N["lang_label"]["en"],
        choices=[("English", "en"), ("日本語", "ja"), ("中文", "zh")],
        value="en",
        interactive=True,
    )

    title_md = gr.Markdown(I18N["title"]["en"])

    with gr.Column(elem_id="app-wrap"):
        notice_html = gr.HTML(f"<div class='notice'>{I18N['notice']['en']}</div>")

        with gr.Row():
            with gr.Column(scale=1):
                input_image = gr.Image(label=I18N["input_image"]["en"], type="pil", height=420)

            with gr.Column(scale=1, elem_classes=["card"]):
                dropdown = gr.Dropdown(
                    label=I18N["dropdown_label"]["en"],
                    choices=build_dropdown_choices("en"),
                    value=CAMERA_OPTIONS[0]["cn"],  # 値は中国語（見た目は単言語ラベル）
                    allow_custom_value=False,
                    interactive=True,
                )

                custom_cn = gr.Textbox(
                    label=I18N["custom_cn_label"]["en"],
                    placeholder=I18N["custom_cn_ph"]["en"],
                    visible=False,
                    lines=2
                )

                extra_prompt = gr.Textbox(
                    label=I18N["extra_label"]["en"],
                    placeholder=I18N["extra_ph"]["en"],
                    lines=2
                )

                # 詳細設定アコーディオン
                with gr.Accordion(I18N["accordion"]["en"], open=False) as adv_acc:
                    seed = gr.Slider(label=I18N["seed"]["en"], minimum=0, maximum=MAX_SEED, step=1, value=DEFAULT_SEED)
                    randomize_seed = gr.Checkbox(label=I18N["rand"]["en"], value=DEFAULT_RANDOMIZE)
                    true_guidance_scale = gr.Slider(label=I18N["tgs"]["en"], minimum=1.0, maximum=10.0, step=0.1, value=DEFAULT_TRUE_GUIDANCE_SCALE)
                    num_inference_steps = gr.Slider(label=I18N["steps"]["en"], minimum=1, maximum=40, step=1, value=DEFAULT_NUM_INFERENCE_STEPS)

                run_button = gr.Button(I18N["run"]["en"], variant="primary")

        with gr.Row():
            with gr.Column(scale=1, elem_classes=["card"]):
                result_image = gr.Image(label=I18N["output"]["en"], type="pil", format="png", height=520, show_download_button=True)
                status_text = gr.Textbox(label=I18N["status"]["en"], interactive=False)

    # ドロップダウン選択に応じた「自由入力」欄の表示切替のみ残す
    def _toggle_custom(v_cn, extra, custom_text):
        is_custom = (v_cn == CUSTOM_OPTION_VALUE)
        return gr.update(visible=is_custom)

    dropdown.change(
        fn=_toggle_custom,
        inputs=[dropdown, extra_prompt, custom_cn],
        outputs=[custom_cn]
    )
    extra_prompt.change(
        fn=_toggle_custom,
        inputs=[dropdown, extra_prompt, custom_cn],
        outputs=[custom_cn]
    )
    custom_cn.change(
        fn=_toggle_custom,
        inputs=[dropdown, extra_prompt, custom_cn],
        outputs=[custom_cn]
    )

    # 言語切替（単言語表示に統一）
    def _switch_lang(lang, current_dropdown_value):
        return (
            gr.update(label=I18N["lang_label"][lang]),                                # lang_selector label
            I18N["title"][lang],                                                      # title_md value
            gr.update(value=f"<div class='notice'>{I18N['notice'][lang]}</div>"),     # notice_html
            gr.update(label=I18N["input_image"][lang]),                               # input_image label
            gr.update(label=I18N["dropdown_label"][lang],
                      choices=build_dropdown_choices(lang),
                      value=current_dropdown_value if current_dropdown_value else CAMERA_OPTIONS[0]["cn"]),  # dropdown
            gr.update(label=I18N["custom_cn_label"][lang], placeholder=I18N["custom_cn_ph"][lang]),          # custom_cn
            gr.update(label=I18N["extra_label"][lang], placeholder=I18N["extra_ph"][lang]),                  # extra_prompt
            gr.update(label=I18N["seed"][lang]),                                     # seed
            gr.update(label=I18N["rand"][lang]),                                     # randomize_seed
            gr.update(label=I18N["tgs"][lang]),                                      # true_guidance_scale
            gr.update(label=I18N["steps"][lang]),                                     # num_inference_steps
            gr.update(value=I18N["run"][lang]),                                      # run_button text
            gr.update(label=I18N["output"][lang]),                                   # result_image
            gr.update(label=I18N["status"][lang]),                                   # status_text
        )

    lang_selector.change(
        fn=_switch_lang,
        inputs=[lang_selector, dropdown],
        outputs=[
            lang_selector,         # label update
            title_md,              # markdown title
            notice_html,           # notice
            input_image,           # image label
            dropdown,              # dropdown (choices/label/value)
            custom_cn,             # custom label/ph
            extra_prompt,          # extra label/ph
            seed,                  # seed label
            randomize_seed,        # randomize label
            true_guidance_scale,   # tgs label
            num_inference_steps,   # steps label
            run_button,            # button text
            result_image,          # label
            status_text,           # label
        ],
    )

    # 実行（UIは単言語表示、送信は中国語値）
    run_button.click(
        fn=generate_from_dropdown,
        inputs=[input_image, dropdown, custom_cn, extra_prompt, seed, randomize_seed, true_guidance_scale, num_inference_steps, lang_selector],
        outputs=[result_image, status_text],
    )

if __name__ == "__main__":
    demo.launch()