Spaces:

aptol
/

genshin

Running on Zero

App Files Files Community

aptol commited on Aug 18

Commit

3153d6a

verified ·

1 Parent(s): 70e511b

Update app.py

Browse files

Files changed (1) hide show

app.py +102 -85

app.py CHANGED Viewed

@@ -689,68 +689,117 @@ def step1_gpu_refine(
     except Exception:
         dev = "cpu"; dtype = None  # type: ignore
-    # ---- T-포즈 (ControlNet/OpenPose)
     if enforce_tpose:
         try:
             from diffusers import (
                 ControlNetModel,
                 StableDiffusionControlNetImg2ImgPipeline,
                 DPMSolverMultistepScheduler
             )
-            import torch, math
             dev  = "cuda" if torch.cuda.is_available() else "cpu"
             dtype = torch.float16 if dev == "cuda" else torch.float32
-            # 1) ControlNet 로드
-            controlnet = ControlNetModel.from_pretrained(
-                "lllyasviel/control_v11p_sd15_openpose",
-                torch_dtype=dtype
             )
-            pipe_pose = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
                 "runwayml/stable-diffusion-v1-5",
                 controlnet=controlnet,
                 torch_dtype=dtype,
-                safety_checker=None,
-                feature_extractor=None,
             )
-            # 세이프티 완전 비활성 (우리가 만든 유틸)
-            pipe_pose = _disable_safety(pipe_pose)
-            # ★ 더 안정적인 Karras DPM-Solver
             try:
-                pipe_pose.scheduler = DPMSolverMultistepScheduler.from_config(
-                    pipe_pose.scheduler.config,
-                    use_karras_sigmas=True
                 )
             except Exception:
                 pass
-            if dev == "cuda": pipe_pose.to("cuda")
             try:
-                pipe_pose.enable_vae_slicing()
             except Exception:
                 pass
-            # 2) 입력/포즈 준비 (해상도 통일 + 8배수)
-            base_rgb   = _resize_to_multiple(img.convert("RGB"), multiple=8, max_side=512)
-            # 원본 포즈를 추출해서 T포즈와 '약하게' 블렌드(0.2)
-            pose_orig  = _openpose_canvas_from_image(base_rgb)  # 원본 포즈(스켈레톤)
-            pose_t     = _make_tpose_canvas_like(base_rgb)      # 우리가 그린 T포즈 캔버스
-            pose_canvas= _blend_pose_canvases(pose_orig, pose_t, alpha=0.20).resize(base_rgb.size)
-            # 선이 너무 얇으면 수렴이 흔들려서 두껍게 강화
-            try:
-                from PIL import ImageDraw
-                pc = pose_canvas.copy()
-                draw = ImageDraw.Draw(pc)
-                # 테두리 강화(하얀 프레임을 더함)
-                w,h = pc.size
-                draw.rectangle([2,2,w-3,h-3], outline=(255,255,255), width=2)
-                pose_canvas = pc
-            except Exception:
-                pass
             # 3) 프로ンプ트 (밝고 단순한 배경 + NSFW 방지 단어)
             POS = (
@@ -798,31 +847,7 @@ def step1_gpu_refine(
             out = out_b
-            # 5) 얼굴 보호(원본에서 얼굴만 복원): 간단한 밝기/색 기반 박스 추정
-            try:
-                # 얼굴 박스를 MediaPipe 없이 추정(대충 상단 중앙 30~35%)
-                W,H = out.size
-                cx, cy = W//2, int(H*0.30)
-                bw, bh = int(W*0.36), int(H*0.28)
-                x1, y1 = max(0, cx-bw//2), max(0, cy-bh//2)
-                x2, y2 = min(W, cx+bw//2), min(H, cy+bh//2)
-                face_new = out.crop((x1,y1,x2,y2))
-                face_old = Image.open(s1_path).convert("RGBA").resize((W,H), Image.LANCZOS).crop((x1,y1,x2,y2))
-                # 소프트 마스크로 자연스럽게 덮어씌우기
-                import numpy as np
-                m = Image.new("L", (x2-x1, y2-y1), 0)
-                from PIL import ImageFilter
-                # 타원형 마스크
-                mm = Image.new("L", m.size, 0)
-                draw = ImageDraw.Draw(mm)
-                draw.ellipse([4,4,mm.size[0]-5,mm.size[1]-5], fill=255)
-                mm = mm.filter(ImageFilter.GaussianBlur(6))
-                face_mix = Image.composite(face_old, face_new, mm)
-                out.paste(face_mix, (x1,y1), mm)
-            except Exception:
-                pass
             # 6) 너무 어두우면 밝기 리프트 + 실패 시 원본 롤백
             if _mean_brightness(out) < 16:
@@ -840,34 +865,26 @@ def step1_gpu_refine(
-    # ---- (옵션) 리드로우(img2img)
-    if do_redraw_flag:
-        try:
             from diffusers import StableDiffusionImg2ImgPipeline
-            pipe_redraw = StableDiffusionImg2ImgPipeline.from_pretrained(
-                "runwayml/stable-diffusion-v1-5",
-                torch_dtype=dtype,
-                safety_checker=None,
-                feature_extractor=None,
             )
-            pipe_redraw = _disable_safety(pipe_redraw)
-            if dev == "cuda":
-                pipe_redraw.to("cuda")
-            img_for_redraw = _resize_to_multiple(img.convert("RGB"), multiple=8, max_side=768)
-            Image.fromarray(img_for_redraw).save(OUT/"step1"/"dbg_04_before_redraw.png")
-            out = pipe_redraw(
-                prompt="clean anime illustration, sharp lines, simple solid background, same outfit and colors",
-                negative_prompt="deformed, extra limbs, bad anatomy, watermark, text, noisy",
-                image=img_for_redraw,
-                strength=float(redraw_strength),
-                guidance_scale=float(redraw_guidance),
-                num_inference_steps=int(redraw_steps),
-            ).images[0]
             img = out.convert("RGBA")
             img.save(OUT/"step1"/"dbg_05_after_redraw.png")
             logs.append("img2img 리드로우 적용")

     except Exception:
         dev = "cpu"; dtype = None  # type: ignore
     if enforce_tpose:
         try:
+            import math, torch
+            from PIL import Image
             from diffusers import (
                 ControlNetModel,
                 StableDiffusionControlNetImg2ImgPipeline,
                 DPMSolverMultistepScheduler
             )
+            from diffusers.utils import load_image
+            from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel
             dev  = "cuda" if torch.cuda.is_available() else "cpu"
             dtype = torch.float16 if dev == "cuda" else torch.float32
+            # --- 입력/캔버스 준비 ---
+            base_rgb   = _resize_to_multiple(img.convert("RGB"), multiple=8, max_side=512)
+            # 원본 포즈 캔버스 + T포즈 캔버스 → 약블렌드(원본 80% : T 20%)
+            pose_orig  = _openpose_canvas_from_image(base_rgb)
+            pose_t     = _make_tpose_canvas_like(base_rgb)
+            pose_canvas= _blend_pose_canvases(pose_orig, pose_t, alpha=0.20).resize(base_rgb.size)
+            # --- Dual ControlNet: OpenPose + Reference-Only ---
+            cn_pose = ControlNetModel.from_pretrained(
+                "lllyasviel/control_v11p_sd15_openpose", torch_dtype=dtype
             )
+            cn_ref  = ControlNetModel.from_pretrained(
+                "lllyasviel/control_v11f1e_sd15_tile", torch_dtype=dtype
+            )
+            # 참고: reference-only는 tile 모델에서 ref 모드로 동작합니다.
+            controlnet = MultiControlNetModel([cn_pose, cn_ref])
+            pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
                 "runwayml/stable-diffusion-v1-5",
                 controlnet=controlnet,
                 torch_dtype=dtype,
+                safety_checker=None, feature_extractor=None,
             )
+            pipe = _disable_safety(pipe)
             try:
+                pipe.scheduler = DPMSolverMultistepScheduler.from_config(
+                    pipe.scheduler.config, use_karras_sigmas=True
                 )
             except Exception:
                 pass
+            if dev == "cuda":
+                pipe.to("cuda")
             try:
+                pipe.enable_vae_slicing()
             except Exception:
                 pass
+            # --- 컨디션 이미지 2개 준비 ---
+            control_images = [
+                pose_canvas,    # 0: 포즈
+                base_rgb        # 1: 레퍼런스(색/텍스처)
+            ]
+            # --- 프롬프트 ---
+            POS = (
+                "clean anime illustration, full body, sharp lines, same outfit and colors as reference, "
+                "T-pose tendency, white studio background, bright, high-key lighting"
+            )
+            NEG = (
+                "glitch, collage, cutout, fragments, abstract shapes, mosaic, compression artifacts, "
+                "extra limbs, extra fingers, deformed, melted, noisy, text, watermark, black background"
+            )
+            # --- 파라미�� (안정값) ---
+            steps     = int(max(16, min(28, int(tpose_steps))))
+            strength  = float(max(0.50, min(0.65, float(tpose_strength))))
+            guidance  = float(max(7.0,  min(9.5,  float(tpose_guidance))))
+            # controlnet 영향: [OpenPose, Reference]
+            cond_scales = [0.22, 0.70]           # 포즈는 약하게, 레퍼런스는 강하게
+            start_list  = [0.05, 0.00]           # 포즈는 초반부터, 레퍼런스는 전 구간
+            end_list    = [0.35, 0.80]           # 포즈는 중반까지만, 레퍼런스는 오래
+            # --- 실행 ---
+            out = pipe(
+                prompt=POS, negative_prompt=NEG,
+                image=base_rgb,
+                control_image=control_images,
+                num_inference_steps=steps,
+                strength=strength,
+                guidance_scale=guidance,
+                controlnet_conditioning_scale=cond_scales,
+                control_guidance_start=start_list,
+                control_guidance_end=end_list,
+                guess_mode=True,
+                # reference-only 모드: tile controlnet에 적용되는 힌트 토글
+                # (diffusers 0.29 기준, tile은 ref-like 역할로 충분)
+            ).images[0].convert("RGBA")
+            # --- 어두움 가드 + 밝기 리프트 ---
+            if _mean_brightness(out) < 16:
+                out = _lift_brightness(out, gain=1.18, gamma=0.90)
+            if _mean_brightness(out) < 12:
+                logs.append("T-포즈(DualCN) 결과가 어두워 원본 유지")
+            else:
+                img = out
+                try:
+                    pose_canvas.save(OUT/"step1"/"dbg_pose_blend.png")
+                    img.save(OUT/"step1"/"dbg_03_after_dualcn.png")
+                except Exception:
+                    pass
+                logs.append("T-포즈(Dual-ControlNet) 적용: Pose 0.22 + Reference 0.70")
+        except Exception as e:
+            logs.append(f"T-포즈 Dual-ControlNet 실패: {e}")
             # 3) 프로ンプ트 (밝고 단순한 배경 + NSFW 방지 단어)
             POS = (
             out = out_b
             # 6) 너무 어두우면 밝기 리프트 + 실패 시 원본 롤백
             if _mean_brightness(out) < 16:
+        # ---- (옵션) 리드로우(img2img)
+        if do_redraw_flag:
             from diffusers import StableDiffusionImg2ImgPipeline
+            pr = StableDiffusionImg2ImgPipeline.from_pretrained(
+                "runwayml/stable-diffusion-v1-5", torch_dtype=dtype,
+                safety_checker=None, feature_extractor=None
             )
+            pr = _disable_safety(pr)
+            if dev == "cuda": pr.to("cuda")
+            img_for = _resize_to_multiple(img.convert("RGB"), 8, 640)
+            img = pr(
+                prompt="clean anime illustration, sharp lines, flat colors, plain white background",
+                negative_prompt="glitch, mosaic, text, watermark, noisy",
+                image=img_for,
+                strength=float(max(0.30, min(0.45, float(redraw_strength)))),
+                guidance_scale=float(max(6.5, min(9.0, float(redraw_guidance)))),
+                num_inference_steps=int(max(14, min(28, int(redraw_steps)))),
+            ).images[0].convert("RGBA")
             img = out.convert("RGBA")
             img.save(OUT/"step1"/"dbg_05_after_redraw.png")
             logs.append("img2img 리드로우 적용")