Spaces:

aptol
/

genshin

Running on Zero

App Files Files Community

aptol commited on Aug 18, 2025

Commit

f72c130

verified ·

1 Parent(s): bb6302e

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -61

app.py CHANGED Viewed

@@ -552,12 +552,12 @@ def step1_cpu(img, keep_rembg, do_weaponless, weapon_terms):
     return [preview], out_path, "\n".join(logs)
 def _resize_to_multiple(img: Image.Image, multiple: int = 8, max_side: int = 768) -> Image.Image:
-    """Aspect 유지 + 8의 배수 크기로 리사이즈 (최대 변은 max_side로 제한)"""
     w, h = img.size
-    # 1) 최대 변 제한
     scale = min(1.0, float(max_side) / float(max(w, h)))
     w = int(w * scale); h = int(h * scale)
-    # 2) 8 배수로 내림
     w = max(multiple, (w // multiple) * multiple)
     h = max(multiple, (h // multiple) * multiple)
     if (w, h) != img.size:
@@ -565,18 +565,18 @@ def _resize_to_multiple(img: Image.Image, multiple: int = 8, max_side: int = 768
     return img
 def _make_tpose_canvas_like(img: Image.Image) -> Image.Image:
-    """입력 이미지와 같은 해상도의 T-포즈 캔버스 생성"""
     w, h = img.size
     size = min(w, h)
     base = Image.new("RGB", (w, h), "black")
-    # T-포즈 가이드는 정사각 영역에 그린 후 중앙 정렬
     square = Image.new("RGB", (size, size), "black")
     d = ImageDraw.Draw(square)
     cx, cy = size//2, int(size*0.58)
     arm = int(size*0.36); leg = int(size*0.36); head = int(size*0.06)
     # spine
     d.line([(cx, cy-int(size*0.28)), (cx, cy+int(size*0.04))], fill="white", width=10)
-    # arms
     yA = cy-int(size*0.22)
     d.line([(cx-arm, yA), (cx+arm, yA)], fill="white", width=10)
     # legs
@@ -586,101 +586,111 @@ def _make_tpose_canvas_like(img: Image.Image) -> Image.Image:
     d.ellipse([(cx-head, yA-int(size*0.18)-head), (cx+head, yA-int(size*0.18)+head)], outline="white", width=10)
     # joints
     for pt in [(cx,yA), (cx-arm,yA), (cx+arm,yA), (cx,cy), (cx,cy+int(size*0.04))]:
-        d.ellipse([(pt[0]-8,pt[1]-8),(pt[0]+8,pt[1]+8)], fill="white")
-    # 중앙 배치
     offx = (w - size)//2; offy = (h - size)//2
     base.paste(square, (offx, offy))
     return base
-@spaces.GPU(duration=600)  # ← ZeroGPU 환경: 여기서만 CUDA/모델 로딩 허용
 def step1_gpu_refine(
-    s1_path,
-    enforce_tpose, tpose_strength, tpose_steps, tpose_guidance,
-    do_redraw_flag, redraw_strength, redraw_steps, redraw_guidance
 ):
-    # 안정화 파라미터 클램프
-    tpose_strength = max(0.35, min(0.65, float(tpose_strength)))
-    tpose_steps    = int(max(12,  min(28,  int(tpose_steps))))
-    tpose_guidance = max(5.5,  min(9.0,  float(tpose_guidance)))
-    redraw_strength = max(0.25, min(0.5,  float(redraw_strength)))
-    redraw_steps    = int(max(12,  min(28,  int(redraw_steps))))
-    redraw_guidance = max(5.0,  min(9.0,  float(redraw_guidance)))
-    # 입력 이미지와 포즈 캔버스를 같은 해상도(8의 배수)로 맞추기
-    img_rgb = img.convert("RGB")
-    img_rgb = _resize_to_multiple(img_rgb, multiple=8, max_side=768)
-    pose_canvas = _make_tpose_canvas_like(img_rgb)   # 입력과 동일 해상도
-    # (선택) 시드 고정 원하면:
-    # generator = None
-    # try:
-    #     import torch
-    #     generator = torch.Generator(device="cuda" if torch.cuda.is_available() else "cpu").manual_seed(0)
-    # except Exception:
-    #     pass
-    """GPU 단계: ControlNet(OpenPose)로 T-포즈 강제 + img2img 리드로우"""
     logs = []
     if not s1_path or not Path(s1_path).exists():
         raise gr.Error("STEP1 이미지가 없습니다. 먼저 STEP1(CPU)을 실행하세요.")
-    img = Image.open(s1_path).convert("RGBA")
-    # ---- T-포즈 (ControlNet/OpenPose)
-    # ---- T-포즈 (ControlNet/OpenPose)
     if enforce_tpose:
         try:
             from diffusers import ControlNetModel, StableDiffusionControlNetImg2ImgPipeline
-            import torch
-            dev = "cuda" if torch.cuda.is_available() else "cpu"
             controlnet = ControlNetModel.from_pretrained(
                 "lllyasviel/control_v11p_sd15_openpose",
-                torch_dtype=torch.float16 if dev == "cuda" else torch.float32
             )
-            pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
                 "runwayml/stable-diffusion-v1-5",
                 controlnet=controlnet,
-                torch_dtype=torch.float16 if dev == "cuda" else torch.float32
-            ).to(dev)
-            # ✅ 여기서 리사이즈 & 포즈 캔버스 생성
             img_rgb = _resize_to_multiple(img.convert("RGB"), multiple=8, max_side=768)
             pose_canvas = _make_tpose_canvas_like(img_rgb)
-            img = pipe(
                 prompt="T-pose, full body, clean anime lines",
                 image=img_rgb,
                 control_image=pose_canvas,
                 strength=float(tpose_strength),
                 guidance_scale=float(tpose_guidance),
                 num_inference_steps=int(tpose_steps),
-                # generator=generator,
-            ).images[0].convert("RGBA")
         except Exception as e:
-            logs.append(f"T-포즈 실패: {e}")
-    # ---- img2img 리드로우 (옵션)
-    # ---- 리드로우
     if do_redraw_flag:
         try:
             img_for_redraw = _resize_to_multiple(img.convert("RGB"), multiple=8, max_side=768)
-            img = pipe(
                 prompt="clean anime illustration, sharp lines, simple solid background",
                 image=img_for_redraw,
                 strength=float(redraw_strength),
                 guidance_scale=float(redraw_guidance),
                 num_inference_steps=int(redraw_steps),
-                # generator=generator,
-            ).images[0].convert("RGBA")
         except Exception as e:
-            logs.append(f"리드로우 실패: {e}")
     out_path = _save_png(img, OUT / "step1" / "input_preprocessed.png")
-    return [(out_path, "refined")], out_path, "\n".join(logs)
 # ---------------------------------
 # STEP2: Spaces call (model + texture)

     return [preview], out_path, "\n".join(logs)
 def _resize_to_multiple(img: Image.Image, multiple: int = 8, max_side: int = 768) -> Image.Image:
+    """Aspect 유지 + 8의 배수 리사이즈 (최대 변은 max_side)"""
     w, h = img.size
+    # 최대 변 제한
     scale = min(1.0, float(max_side) / float(max(w, h)))
     w = int(w * scale); h = int(h * scale)
+    # 8 배수로 내림
     w = max(multiple, (w // multiple) * multiple)
     h = max(multiple, (h // multiple) * multiple)
     if (w, h) != img.size:
     return img
 def _make_tpose_canvas_like(img: Image.Image) -> Image.Image:
+    """입력과 동일 해상도의 T-포즈 가이드 캔버스 생성"""
+    from PIL import ImageDraw
     w, h = img.size
     size = min(w, h)
     base = Image.new("RGB", (w, h), "black")
     square = Image.new("RGB", (size, size), "black")
     d = ImageDraw.Draw(square)
     cx, cy = size//2, int(size*0.58)
     arm = int(size*0.36); leg = int(size*0.36); head = int(size*0.06)
     # spine
     d.line([(cx, cy-int(size*0.28)), (cx, cy+int(size*0.04))], fill="white", width=10)
+    # arms (T)
     yA = cy-int(size*0.22)
     d.line([(cx-arm, yA), (cx+arm, yA)], fill="white", width=10)
     # legs
     d.ellipse([(cx-head, yA-int(size*0.18)-head), (cx+head, yA-int(size*0.18)+head)], outline="white", width=10)
     # joints
     for pt in [(cx,yA), (cx-arm,yA), (cx+arm,yA), (cx,cy), (cx,cy+int(size*0.04))]:
+        d.ellipse([(pt[0]-8,pt[1]-8), (pt[0]+8,pt[1]+8)], fill="white")
     offx = (w - size)//2; offy = (h - size)//2
     base.paste(square, (offx, offy))
     return base
+@spaces.GPU(duration=600)
 def step1_gpu_refine(
+    s1_path: str,
+    enforce_tpose: bool, tpose_strength: float, tpose_steps: int, tpose_guidance: float,
+    do_redraw_flag: bool, redraw_strength: float, redraw_steps: int, redraw_guidance: float
 ):
+    """
+    GPU 단계: ControlNet(OpenPose)로 T-포즈 강제 → (선택) img2img 리드로우.
+    - ZeroGPU 규칙: torch/diffusers 로드는 이 함수 내부에서만!
+    - image/control_image 해상도 동일 + 8의 배수로 강제.
+    """
     logs = []
+    # ====== 입력 확인 & 기본 이미지 로드 ======
     if not s1_path or not Path(s1_path).exists():
         raise gr.Error("STEP1 이미지가 없습니다. 먼저 STEP1(CPU)을 실행하세요.")
+    # 항상 먼저 img 초기화 (UnboundLocal 방지)
+    img: Image.Image = Image.open(s1_path).convert("RGBA")
+    # ====== 안전 파라미터 클램프 (형태 붕괴 방지) ======
+    tpose_strength  = max(0.35, min(0.65, float(tpose_strength)))
+    tpose_steps     = int(max(12,   min(28,  int(tpose_steps)))))
+    tpose_guidance  = max(5.5,  min(9.0,  float(tpose_guidance)))
+    redraw_strength = max(0.25, min(0.5,  float(redraw_strength)))
+    redraw_steps    = int(max(12,   min(28,  int(redraw_steps))))
+    redraw_guidance = max(5.0,  min(9.0,  float(redraw_guidance)))
+    # ====== 디바이스 결정 (ZeroGPU: 이 함�� 내부에서만) ======
+    try:
+        import torch
+        dev = "cuda" if torch.cuda.is_available() else "cpu"
+    except Exception:
+        dev = "cpu"
+    # ====== T-포즈 강제 (ControlNet/OpenPose) ======
     if enforce_tpose:
         try:
             from diffusers import ControlNetModel, StableDiffusionControlNetImg2ImgPipeline
+            # 모델 로드 (함수 내부)
             controlnet = ControlNetModel.from_pretrained(
                 "lllyasviel/control_v11p_sd15_openpose",
+                torch_dtype=(torch.float16 if dev == "cuda" else torch.float32)
             )
+            pipe_pose = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
                 "runwayml/stable-diffusion-v1-5",
                 controlnet=controlnet,
+                torch_dtype=(torch.float16 if dev == "cuda" else torch.float32)
+            )
+            if dev == "cuda":
+                pipe_pose.to("cuda")
+            # 입력/컨트롤 이미지 해상도 정규화 (동일 + 8배수)
             img_rgb = _resize_to_multiple(img.convert("RGB"), multiple=8, max_side=768)
             pose_canvas = _make_tpose_canvas_like(img_rgb)
+            out = pipe_pose(
                 prompt="T-pose, full body, clean anime lines",
                 image=img_rgb,
                 control_image=pose_canvas,
                 strength=float(tpose_strength),
                 guidance_scale=float(tpose_guidance),
                 num_inference_steps=int(tpose_steps),
+            ).images[0]
+            img = out.convert("RGBA")
+            logs.append("ControlNet(OpenPose) T-포즈 적용")
         except Exception as e:
+            logs.append(f"T-포즈 ControlNet 실패: {e}")
+    # ====== (옵션) img2img 리드로우 ======
     if do_redraw_flag:
         try:
+            from diffusers import StableDiffusionImg2ImgPipeline
+            pipe_redraw = StableDiffusionImg2ImgPipeline.from_pretrained(
+                "runwayml/stable-diffusion-v1-5",
+                torch_dtype=(torch.float16 if dev == "cuda" else torch.float32)
+            )
+            if dev == "cuda":
+                pipe_redraw.to("cuda")
             img_for_redraw = _resize_to_multiple(img.convert("RGB"), multiple=8, max_side=768)
+            out = pipe_redraw(
                 prompt="clean anime illustration, sharp lines, simple solid background",
                 image=img_for_redraw,
                 strength=float(redraw_strength),
                 guidance_scale=float(redraw_guidance),
                 num_inference_steps=int(redraw_steps),
+            ).images[0]
+            img = out.convert("RGBA")
+            logs.append("img2img 리드로우 적용")
         except Exception as e:
+            logs.append(f"img2img 리드로우 실패: {e}")
+    # ====== 저장 & 갤러리 미리보기 ======
     out_path = _save_png(img, OUT / "step1" / "input_preprocessed.png")
+    try:
+        preview = _to_preview(img)  # 있으면 사용
+    except Exception:
+        preview = img.convert("RGB")
+    return [preview], str(out_path), "\n".join(logs)
 # ---------------------------------
 # STEP2: Spaces call (model + texture)