Spaces:

LogicGoInfotechSpaces
/

object_remover

Running on T4

App Files Files Community

LogicGoInfotechSpaces commited on Oct 31

Commit

20727d7

1 Parent(s): e7611db

fix(mask): ensure proper mask interpretation - selected areas are removed; add detailed logging

Browse files

Files changed (2) hide show

api/main.py +16 -5
src/core.py +17 -5

api/main.py CHANGED Viewed

@@ -125,29 +125,40 @@ def _load_rgba_image(path: str) -> Image.Image:
 def _load_rgba_mask_from_image(img: Image.Image) -> np.ndarray:
-    # Standard convention: white=remove (255), black=keep (0)
-    # Convert to RGBA where alpha=0 means "to remove", alpha=255 means "keep"
     if img.mode != "RGBA":
         # For RGB/Grayscale masks: white (value>128) = remove, black (value<=128) = keep
         gray = img.convert("L")
         arr = np.array(gray)
-        # White pixels (>128) should have alpha=0 (to remove), black pixels (<=128) alpha=255 (keep)
         alpha = np.where(arr > 128, 0, 255).astype(np.uint8)
         rgba = np.zeros((img.height, img.width, 4), dtype=np.uint8)
         rgba[:, :, 3] = alpha
         return rgba
-    # For RGBA: check if alpha channel is used or RGB channels
     arr = np.array(img)
     alpha = arr[:, :, 3]
-    # If alpha is mostly opaque (mean > 200), treat RGB channels as mask values
     if alpha.mean() > 200:
         # Use RGB to determine mask: white in RGB = remove
         gray = cv2.cvtColor(arr[:, :, :3], cv2.COLOR_RGB2GRAY)
         alpha = np.where(gray > 128, 0, 255).astype(np.uint8)
         rgba = arr.copy()
         rgba[:, :, 3] = alpha
         return rgba
     # Alpha channel already encodes the mask
     return arr

 def _load_rgba_mask_from_image(img: Image.Image) -> np.ndarray:
+    """
+    Convert mask image to RGBA format.
+    Standard convention: white (255) = area to remove, black (0) = area to keep
+    Returns RGBA where alpha=0 means "to remove", alpha=255 means "keep"
+    (This will be inverted in process_inpaint if invert_mask=True)
+    """
     if img.mode != "RGBA":
         # For RGB/Grayscale masks: white (value>128) = remove, black (value<=128) = keep
         gray = img.convert("L")
         arr = np.array(gray)
+        # White pixels (>128) should have alpha=0 (to remove after inversion)
+        # Black pixels (<=128) should have alpha=255 (to keep after inversion)
         alpha = np.where(arr > 128, 0, 255).astype(np.uint8)
         rgba = np.zeros((img.height, img.width, 4), dtype=np.uint8)
         rgba[:, :, 3] = alpha
+        log.info(f"Loaded {img.mode} mask: {int((alpha == 0).sum())} pixels marked for removal (alpha=0)")
         return rgba
+    # For RGBA: check if alpha channel is meaningful
     arr = np.array(img)
     alpha = arr[:, :, 3]
+    # If alpha is mostly opaque everywhere (mean > 200), treat RGB channels as mask values
     if alpha.mean() > 200:
         # Use RGB to determine mask: white in RGB = remove
         gray = cv2.cvtColor(arr[:, :, :3], cv2.COLOR_RGB2GRAY)
         alpha = np.where(gray > 128, 0, 255).astype(np.uint8)
         rgba = arr.copy()
         rgba[:, :, 3] = alpha
+        log.info(f"Loaded RGBA mask (RGB-based): {int((alpha == 0).sum())} pixels marked for removal (alpha=0)")
         return rgba
     # Alpha channel already encodes the mask
+    log.info(f"Loaded RGBA mask (alpha-based): {int((alpha < 128).sum())} pixels marked for removal (alpha<128)")
     return arr

src/core.py CHANGED Viewed

@@ -459,16 +459,28 @@ def process_inpaint(image, mask, invert_mask=True):
     image = norm_img(image)
     # Convert RGBA mask to single-channel mask.
-    # Standard: white=remove (255), black=keep (0)
-    # When invert_mask=True (default): alpha=0 (transparent/painted) → 255 (remove), alpha=255 → 0 (keep)
     alpha_channel = mask[:,:,3]
-    mask = (255 - alpha_channel) if invert_mask else alpha_channel
     mask = resize_max_size(mask, size_limit=size_limit, interpolation=interpolation)
-    # Debug: log mask statistics
     mask_nonzero = int((mask > 128).sum())
-    print(f"Mask shape: {mask.shape}, non-zero pixels (>128): {mask_nonzero}")
     mask = norm_img(mask)
     res_np_img = run(image, mask)

     image = norm_img(image)
     # Convert RGBA mask to single-channel mask.
+    # Standard LaMa convention: 1 = remove, 0 = keep
+    # User draws with alpha=0 (transparent), we want those to become 1 (remove)
     alpha_channel = mask[:,:,3]
+    # When invert_mask=True: alpha=0 (painted/transparent) → 255 → 1 (remove)
+    # When invert_mask=False: alpha=255 (opaque) → 255 → 1 (remove)
+    if invert_mask:
+        # Inverted: transparent (0) means remove, opaque (255) means keep
+        mask = 255 - alpha_channel
+    else:
+        # Normal: opaque (255) means remove, transparent (0) means keep
+        mask = alpha_channel
     mask = resize_max_size(mask, size_limit=size_limit, interpolation=interpolation)
+    # Debug: log mask statistics BEFORE normalization
     mask_nonzero = int((mask > 128).sum())
+    mask_total = mask.shape[0] * mask.shape[1]
+    print(f"Mask shape: {mask.shape}, pixels to remove (>128): {mask_nonzero}/{mask_total} ({100*mask_nonzero/mask_total:.1f}%)")
+    # Normalize: values > 0 become 1.0, 0 stays 0
+    # After this, 1.0 = remove, 0.0 = keep (LaMa expects this)
     mask = norm_img(mask)
     res_np_img = run(image, mask)