rahul7star commited on
Commit
8996337
·
verified ·
1 Parent(s): 766561b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +143 -80
app.py CHANGED
@@ -11,53 +11,37 @@ from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
11
  from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3
12
  import math
13
  import os
 
 
 
 
 
 
 
14
  import tempfile
15
  from huggingface_hub import hf_hub_download
 
 
 
16
 
17
- # --- Model & Repo ---
18
- HF_MODEL = os.environ.get("HF_UPLOAD_REPO", "rahul7star/qwen-edit-img-repo")
19
- dtype = torch.bfloat16
20
- device = "cuda" if torch.cuda.is_available() else "cpu"
21
-
22
- # --- Camera prompts ---
23
- BASE_PROMPTS = {
24
- "front": "Move the camera to a front-facing position showing the full character. Background is plain white.",
25
- "back": "Move the camera to a back-facing position showing the full character. Background is plain white.",
26
- "left": "Move the camera to a side (left) profile view. Background is plain white.",
27
- "right": "Move the camera to a side (right) profile view. Background is plain white.",
28
- "45_left": "Rotate camera 45° left",
29
- "45_right": "Rotate camera 45° right",
30
- "90_left": "Rotate camera 90° left",
31
- "90_right": "Rotate camera 90° right",
32
- "top_down": "Switch to top-down view",
33
- "low_angle": "Switch to low-angle view",
34
- "close_up": "Switch to close-up lens",
35
- "medium_close_up": "Switch to medium close-up lens",
36
- "zoom_out": "Switch to zoom out lens",
37
- }
38
-
39
- # --- Resolution presets ---
40
- RESOLUTIONS = {
41
- "1:4": (512, 2048),
42
- "1:3": (576, 1728),
43
- "nealy 9:16": (768, 1344),
44
- "nealy 2:3": (832, 1216),
45
- "3:4": (896, 1152),
46
- }
47
 
48
- MAX_SEED = np.iinfo(np.int32).max
49
 
50
  # --- CPU-only upload function ---
51
  def upload_image_and_prompt_cpu(input_image, prompt_text) -> str:
52
  from datetime import datetime
53
- import uuid, shutil
54
  from huggingface_hub import HfApi
55
 
 
56
  api = HfApi()
 
 
57
  today_str = datetime.now().strftime("%Y-%m-%d")
58
  unique_subfolder = f"Upload-Image-{uuid.uuid4().hex[:8]}"
59
  hf_folder = f"{today_str}/{unique_subfolder}"
60
 
 
61
  with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_img:
62
  if isinstance(input_image, str):
63
  shutil.copy(input_image, tmp_img.name)
@@ -65,6 +49,7 @@ def upload_image_and_prompt_cpu(input_image, prompt_text) -> str:
65
  input_image.save(tmp_img.name, format="PNG")
66
  tmp_img_path = tmp_img.name
67
 
 
68
  api.upload_file(
69
  path_or_fileobj=tmp_img_path,
70
  path_in_repo=f"{hf_folder}/input_image.png",
@@ -73,6 +58,7 @@ def upload_image_and_prompt_cpu(input_image, prompt_text) -> str:
73
  token=os.environ.get("HUGGINGFACE_HUB_TOKEN")
74
  )
75
 
 
76
  summary_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt").name
77
  with open(summary_file, "w", encoding="utf-8") as f:
78
  f.write(prompt_text)
@@ -85,11 +71,17 @@ def upload_image_and_prompt_cpu(input_image, prompt_text) -> str:
85
  token=os.environ.get("HUGGINGFACE_HUB_TOKEN")
86
  )
87
 
 
88
  os.remove(tmp_img_path)
89
  os.remove(summary_file)
 
90
  return hf_folder
91
 
92
- # --- Scheduler & model load ---
 
 
 
 
93
  scheduler_config = {
94
  "base_image_seq_len": 256,
95
  "base_shift": math.log(3),
@@ -106,38 +98,66 @@ scheduler_config = {
106
  "use_exponential_sigmas": False,
107
  "use_karras_sigmas": False,
108
  }
 
 
109
  scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
110
 
 
111
  pipe = QwenImageEditPlusPipeline.from_pretrained(
112
  "Qwen/Qwen-Image-Edit-2509",
113
  scheduler=scheduler,
114
  torch_dtype=dtype
115
  ).to(device)
116
 
117
- # Load LoRA weights
118
  pipe.load_lora_weights(
119
  "rahul7star/qwen-char-lora",
120
  weight_name="qwen_lora/Qwen-Image-Edit-2509-Lightning-4steps-V1.0-bf16_dim1.safetensors"
121
  )
122
  pipe.fuse_lora(lora_scale=1.0)
123
 
 
 
 
 
 
 
124
  pipe.load_lora_weights(
125
  "rahul7star/qwen-char-lora",
126
  weight_name="qwen_lora/qwen-multiple-char.safetensors",
127
  )
128
  pipe.fuse_lora(lora_scale=1.0)
129
 
 
130
  pipe.transformer.__class__ = QwenImageTransformer2DModel
131
  pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
132
  optimize_pipeline_(pipe, image=[Image.new("RGB", (1024, 1024)), Image.new("RGB", (1024, 1024))], prompt="prompt")
133
 
134
- # --- Utilities ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  def _append_prompt(base: str, extra: str) -> str:
136
  extra = (extra or "").strip()
137
  return (base if not extra else f"{base} {extra}").strip()
138
 
139
  def generate_single_view(input_images, prompt, seed, num_inference_steps, true_guidance_scale):
140
  generator = torch.Generator(device=device).manual_seed(seed)
 
 
141
  result = pipe(
142
  image=input_images if input_images else None,
143
  prompt=prompt,
@@ -153,10 +173,6 @@ def generate_single_view(input_images, prompt, seed, num_inference_steps, true_g
153
  print("Upload failed:", e)
154
  return result[0]
155
 
156
- def resize_to_preset(img: Image.Image, preset_key: str) -> Image.Image:
157
- w, h = RESOLUTIONS[preset_key]
158
- return img.resize((w, h), Image.LANCZOS)
159
-
160
  def concat_images_horizontally(images, bg_color=(255, 255, 255)):
161
  images = [img.convert("RGB") for img in images if img is not None]
162
  if not images:
@@ -176,84 +192,131 @@ def concat_images_horizontally(images, bg_color=(255, 255, 255)):
176
  x += img.width
177
  return canvas
178
 
179
- # --- Main generation function ---
 
 
 
 
180
  @spaces.GPU()
181
  def generate_turnaround(
182
  image,
183
- selected_angles,
184
  extra_prompt="",
185
- preset_key="nealy 9:16",
186
  seed=42,
187
  randomize_seed=False,
188
  true_guidance_scale=1.0,
189
  num_inference_steps=4,
190
  progress=gr.Progress(track_tqdm=True),
191
  ):
 
 
 
 
 
 
192
  if randomize_seed:
193
  seed = random.randint(0, MAX_SEED)
194
  if image is None:
195
- return {}, seed, " 入力画像をアップロードしてください"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
 
197
- input_image = image.convert("RGB") if isinstance(image, Image.Image) else Image.open(image).convert("RGB")
 
198
 
199
- results = {}
200
- current_seed = seed
201
- for i, angle in enumerate(selected_angles):
202
- progress((i+1)/len(selected_angles), desc=f"{angle} 生成中...")
203
- prompt = _append_prompt(BASE_PROMPTS[angle], extra_prompt)
204
- img = generate_single_view([input_image], prompt, current_seed, num_inference_steps, true_guidance_scale)
205
- img = resize_to_preset(img, preset_key)
206
- results[angle] = img
207
- current_seed += 1
208
 
209
- # Concatenate all selected images in order
210
- concat_img = concat_images_horizontally(list(results.values()))
211
- results["concat"] = concat_img
212
 
213
- return results, seed, f"✅ {preset_key} にリサイズして {len(selected_angles)} 視点+連結画像を生成しました"
 
 
 
 
 
 
 
 
 
214
 
215
  # --- UI ---
216
  css = """
217
  #col-container {margin: 0 auto; max-width: 1400px;}
218
  .image-container img {object-fit: contain !important; max-width: 100%; max-height: 100%;}
 
 
 
 
 
 
 
 
 
 
 
219
  """
220
 
221
  with gr.Blocks(css=css) as demo:
 
222
  with gr.Column(elem_id="col-container"):
 
 
223
  input_image = gr.Image(label="入力画像", type="pil", height=500)
224
- extra_prompt = gr.Textbox(label="追加プロンプト", placeholder="high detail, anime style, soft lighting, 4k", lines=2)
225
- preset_dropdown = gr.Dropdown(label="出力解像度プリセット", choices=list(RESOLUTIONS.keys()), value="nealy 9:16")
226
- seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
227
- randomize_seed = gr.Checkbox(label="ランダムシード", value=True)
228
-
229
- # --- Checklist for angles ---
230
- select_all_checkbox = gr.Checkbox(label="Select All", value=True)
231
- angles_checklist = gr.CheckboxGroup(
232
- label="生成するカメラ視点を選択",
233
- choices=list(BASE_PROMPTS.keys()),
234
- value=list(BASE_PROMPTS.keys())
235
  )
236
 
237
- # JS: update checklist when select_all changes
238
- select_all_checkbox.change(
239
- lambda select_all: list(BASE_PROMPTS.keys()) if select_all else [],
240
- inputs=select_all_checkbox,
241
- outputs=angles_checklist
242
  )
243
 
244
  run_button = gr.Button("🎨 生成開始", variant="primary")
245
  status_text = gr.Textbox(label="ステータス", interactive=False)
246
 
247
- # Dynamic image outputs
248
- image_outputs = {angle: gr.Image(label=angle, type="pil", format="png", height=400, show_download_button=True)
249
- for angle in BASE_PROMPTS.keys()}
250
- image_outputs["concat"] = gr.Image(label="連結画像", type="pil", format="png", height=400, show_download_button=True)
 
 
 
 
 
 
 
 
 
 
 
251
 
252
- # Button click
253
  run_button.click(
254
  fn=generate_turnaround,
255
- inputs=[input_image, angles_checklist, extra_prompt, preset_dropdown, seed, randomize_seed, seed, 4],
256
- outputs=[image_outputs, seed, status_text],
257
  )
258
 
259
  if __name__ == "__main__":
 
11
  from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3
12
  import math
13
  import os
14
+
15
+ import os
16
+ import spaces
17
+ import torch
18
+ from diffusers import AutoencoderKLWan, WanPipeline, WanImageToVideoPipeline, UniPCMultistepScheduler
19
+ from diffusers.utils import export_to_video
20
+ import gradio as gr
21
  import tempfile
22
  from huggingface_hub import hf_hub_download
23
+ import numpy as np
24
+ from PIL import Image
25
+ import random
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ HF_MODEL = os.environ.get("HF_UPLOAD_REPO", "rahul7star/qwen-edit-img-repo")
29
 
30
  # --- CPU-only upload function ---
31
  def upload_image_and_prompt_cpu(input_image, prompt_text) -> str:
32
  from datetime import datetime
33
+ import tempfile, os, uuid, shutil
34
  from huggingface_hub import HfApi
35
 
36
+ # Instantiate the HfApi class
37
  api = HfApi()
38
+ print(prompt_text)
39
+
40
  today_str = datetime.now().strftime("%Y-%m-%d")
41
  unique_subfolder = f"Upload-Image-{uuid.uuid4().hex[:8]}"
42
  hf_folder = f"{today_str}/{unique_subfolder}"
43
 
44
+ # Save image temporarily
45
  with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_img:
46
  if isinstance(input_image, str):
47
  shutil.copy(input_image, tmp_img.name)
 
49
  input_image.save(tmp_img.name, format="PNG")
50
  tmp_img_path = tmp_img.name
51
 
52
+ # Upload image using HfApi instance
53
  api.upload_file(
54
  path_or_fileobj=tmp_img_path,
55
  path_in_repo=f"{hf_folder}/input_image.png",
 
58
  token=os.environ.get("HUGGINGFACE_HUB_TOKEN")
59
  )
60
 
61
+ # Save prompt as summary.txt
62
  summary_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt").name
63
  with open(summary_file, "w", encoding="utf-8") as f:
64
  f.write(prompt_text)
 
71
  token=os.environ.get("HUGGINGFACE_HUB_TOKEN")
72
  )
73
 
74
+ # Cleanup
75
  os.remove(tmp_img_path)
76
  os.remove(summary_file)
77
+
78
  return hf_folder
79
 
80
+ # --- Model Loading ---
81
+ dtype = torch.bfloat16
82
+ device = "cuda" if torch.cuda.is_available() else "cpu"
83
+
84
+ # Scheduler configuration for Lightning
85
  scheduler_config = {
86
  "base_image_seq_len": 256,
87
  "base_shift": math.log(3),
 
98
  "use_exponential_sigmas": False,
99
  "use_karras_sigmas": False,
100
  }
101
+
102
+ # Initialize scheduler
103
  scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
104
 
105
+ # Load model
106
  pipe = QwenImageEditPlusPipeline.from_pretrained(
107
  "Qwen/Qwen-Image-Edit-2509",
108
  scheduler=scheduler,
109
  torch_dtype=dtype
110
  ).to(device)
111
 
 
112
  pipe.load_lora_weights(
113
  "rahul7star/qwen-char-lora",
114
  weight_name="qwen_lora/Qwen-Image-Edit-2509-Lightning-4steps-V1.0-bf16_dim1.safetensors"
115
  )
116
  pipe.fuse_lora(lora_scale=1.0)
117
 
118
+ # pipe.load_lora_weights(
119
+ # "rahul7star/qwen-char-lora",
120
+ # weight_name="qwen_lora/qwen-multiple-angle.safetensors",
121
+ # )
122
+ # pipe.fuse_lora(lora_scale=1.0)
123
+
124
  pipe.load_lora_weights(
125
  "rahul7star/qwen-char-lora",
126
  weight_name="qwen_lora/qwen-multiple-char.safetensors",
127
  )
128
  pipe.fuse_lora(lora_scale=1.0)
129
 
130
+
131
  pipe.transformer.__class__ = QwenImageTransformer2DModel
132
  pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
133
  optimize_pipeline_(pipe, image=[Image.new("RGB", (1024, 1024)), Image.new("RGB", (1024, 1024))], prompt="prompt")
134
 
135
+ # --- Constants ---
136
+ MAX_SEED = np.iinfo(np.int32).max
137
+ PROMPTS = {
138
+ "front": "Move the camera to a front-facing position so the full body of the character is visible. The character stands with both arms extended slightly downward and close to the thighs, keeping the body evenly balanced on both sides. The legs are positioned symmetrically with a narrow stance. The background is plain white.",
139
+ "back": "Move the camera to a back-facing position so the full body of the character is visible. Background is plain white.",
140
+ "left": "Move the camera to a side view (profile) from the left so the full body of the character is visible. Background is plain white.",
141
+ "right": "Move the camera to a side view (profile) from the right so the full body of the character is visible. Background is plain white."
142
+ }
143
+
144
+ # NEW: 出力解像度プリセット
145
+ RESOLUTIONS = {
146
+ "1:4": (512, 2048),
147
+ "1:3": (576, 1728),
148
+ "nealy 9:16": (768, 1344),
149
+ "nealy 2:3": (832, 1216),
150
+ "3:4": (896, 1152),
151
+ }
152
+
153
  def _append_prompt(base: str, extra: str) -> str:
154
  extra = (extra or "").strip()
155
  return (base if not extra else f"{base} {extra}").strip()
156
 
157
  def generate_single_view(input_images, prompt, seed, num_inference_steps, true_guidance_scale):
158
  generator = torch.Generator(device=device).manual_seed(seed)
159
+ print(prompt)
160
+
161
  result = pipe(
162
  image=input_images if input_images else None,
163
  prompt=prompt,
 
173
  print("Upload failed:", e)
174
  return result[0]
175
 
 
 
 
 
176
  def concat_images_horizontally(images, bg_color=(255, 255, 255)):
177
  images = [img.convert("RGB") for img in images if img is not None]
178
  if not images:
 
192
  x += img.width
193
  return canvas
194
 
195
+ # NEW: リサイズユーティリティ
196
+ def resize_to_preset(img: Image.Image, preset_key: str) -> Image.Image:
197
+ w, h = RESOLUTIONS[preset_key]
198
+ return img.resize((w, h), Image.LANCZOS)
199
+
200
  @spaces.GPU()
201
  def generate_turnaround(
202
  image,
 
203
  extra_prompt="",
204
+ preset_key="nealy 9:16", # NEW: デフォルト
205
  seed=42,
206
  randomize_seed=False,
207
  true_guidance_scale=1.0,
208
  num_inference_steps=4,
209
  progress=gr.Progress(track_tqdm=True),
210
  ):
211
+ print(extra_prompt)
212
+ try:
213
+ upload_image_and_prompt_cpu(image, extra_prompt)
214
+ except Exception as e:
215
+ print("Upload failed:", e)
216
+
217
  if randomize_seed:
218
  seed = random.randint(0, MAX_SEED)
219
  if image is None:
220
+ return None, None, None, None, None, seed, "エラー: 入力画像をアップロードしてください"
221
+
222
+ if isinstance(image, Image.Image):
223
+ input_image = image.convert("RGB")
224
+ else:
225
+ input_image = Image.open(image).convert("RGB")
226
+
227
+ pil_images = [input_image]
228
+
229
+ # 各プロンプト末尾に追記
230
+ p_front = _append_prompt(PROMPTS["front"], extra_prompt)
231
+ p_back = _append_prompt(PROMPTS["back"], extra_prompt)
232
+ p_left = _append_prompt(PROMPTS["left"], extra_prompt)
233
+ p_right = _append_prompt(PROMPTS["right"], extra_prompt)
234
+
235
+ progress(0.25, desc="正面生成中...")
236
+ front = generate_single_view(pil_images, p_front, seed, num_inference_steps, true_guidance_scale)
237
 
238
+ progress(0.5, desc="背面生成中...")
239
+ back = generate_single_view([front], p_back, seed+1, num_inference_steps, true_guidance_scale)
240
 
241
+ progress(0.75, desc="左側面生成中...")
242
+ left = generate_single_view([front], p_left, seed+2, num_inference_steps, true_guidance_scale)
 
 
 
 
 
 
 
243
 
244
+ progress(1.0, desc="右側面生成中...")
245
+ right = generate_single_view([front], p_right, seed+3, num_inference_steps, true_guidance_scale)
 
246
 
247
+ # NEW: ここで指定プリセットにリサイズ
248
+ front_r = resize_to_preset(front, preset_key)
249
+ back_r = resize_to_preset(back, preset_key)
250
+ left_r = resize_to_preset(left, preset_key)
251
+ right_r = resize_to_preset(right, preset_key)
252
+
253
+ # NEW: リサイズ後を連結(横:正面→右→背面→左)
254
+ concat = concat_images_horizontally([front_r, right_r, back_r, left_r])
255
+
256
+ return front_r, back_r, left_r, right_r, concat, seed, f"✅ {preset_key} にリサイズして4視点+連結画像を生成しました"
257
 
258
  # --- UI ---
259
  css = """
260
  #col-container {margin: 0 auto; max-width: 1400px;}
261
  .image-container img {object-fit: contain !important; max-width: 100%; max-height: 100%;}
262
+ /* 追加: 注意ボックスのスタイル */
263
+ .notice {
264
+ background: #fff5f5;
265
+ border: 1px solid #fca5a5;
266
+ color: #7f1d1d;
267
+ padding: 12px 14px;
268
+ border-radius: 10px;
269
+ font-weight: 600;
270
+ line-height: 1.5;
271
+ margin-bottom: 10px;
272
+ }
273
  """
274
 
275
  with gr.Blocks(css=css) as demo:
276
+
277
  with gr.Column(elem_id="col-container"):
278
+
279
+
280
  input_image = gr.Image(label="入力画像", type="pil", height=500)
281
+
282
+ # 追記プロンプト欄
283
+ extra_prompt = gr.Textbox(
284
+ label="追加プロンプト(各視点プロンプトの末尾に追記)",
285
+ placeholder="例: high detail, anime style, soft lighting, 4k, pastel colors",
286
+ lines=2
 
 
 
 
 
287
  )
288
 
289
+ # NEW: 出力解像度プリセットのプルダウン
290
+ preset_dropdown = gr.Dropdown(
291
+ label="出力解像度プリセット",
292
+ choices=list(RESOLUTIONS.keys()),
293
+ value="nealy 9:16"
294
  )
295
 
296
  run_button = gr.Button("🎨 生成開始", variant="primary")
297
  status_text = gr.Textbox(label="ステータス", interactive=False)
298
 
299
+ with gr.Row():
300
+ result_front = gr.Image(label="正面", type="pil", format="png", height=400, show_download_button=True)
301
+ result_back = gr.Image(label="背面", type="pil", format="png", height=400, show_download_button=True)
302
+ with gr.Row():
303
+ result_left = gr.Image(label="左側面", type="pil", format="png", height=400, show_download_button=True)
304
+ result_right = gr.Image(label="右側面", type="pil", format="png", height=400, show_download_button=True)
305
+
306
+ # PNG連結出力
307
+ result_concat = gr.Image(label="連結画像(正面→右→背面→左)", type="pil", format="png", height=400, show_download_button=True)
308
+
309
+ with gr.Accordion("⚙️ 詳細設定", open=False):
310
+ seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
311
+ randomize_seed = gr.Checkbox(label="ランダムシード", value=True)
312
+ true_guidance_scale = gr.Slider(label="True guidance scale", minimum=1.0, maximum=10.0, step=0.1, value=1.0)
313
+ num_inference_steps = gr.Slider(label="生成ステップ数", minimum=1, maximum=40, step=1, value=4)
314
 
315
+ # NEW: クリック時に preset_dropdown を引数として渡す
316
  run_button.click(
317
  fn=generate_turnaround,
318
+ inputs=[input_image, extra_prompt, preset_dropdown, seed, randomize_seed, true_guidance_scale, num_inference_steps],
319
+ outputs=[result_front, result_back, result_left, result_right, result_concat, seed, status_text],
320
  )
321
 
322
  if __name__ == "__main__":