refactor, add thresh, save fix
Browse files
app.py
CHANGED
|
@@ -36,8 +36,9 @@ app_version = 'ddn1'
|
|
| 36 |
|
| 37 |
device = torch.device("cpu")
|
| 38 |
labels = ['Live', 'Spoof']
|
| 39 |
-
|
| 40 |
-
|
|
|
|
| 41 |
examples = [
|
| 42 |
['examples/1_1_21_2_33_scene_fake.jpg'],
|
| 43 |
['examples/frame150_real.jpg'],
|
|
@@ -78,7 +79,7 @@ class Normaliztion_valtest(object):
|
|
| 78 |
return image_x
|
| 79 |
|
| 80 |
|
| 81 |
-
def
|
| 82 |
transform = transforms.Compose([Normaliztion_valtest()])
|
| 83 |
files_total = 1
|
| 84 |
image_x = np.zeros((files_total, 256, 256, 3))
|
|
@@ -86,10 +87,10 @@ def prepare_data(images, boxes, depths):
|
|
| 86 |
|
| 87 |
for i, (image, bbox, depth_img) in enumerate(
|
| 88 |
zip(images, boxes, depths)):
|
| 89 |
-
x, y,
|
| 90 |
depth_img = cv.cvtColor(depth_img, cv.COLOR_RGB2GRAY)
|
| 91 |
-
image = image[y:
|
| 92 |
-
depth_img = depth_img[y:
|
| 93 |
|
| 94 |
image_x[i, :, :, :] = cv.resize(image, (256, 256))
|
| 95 |
# transform to binary mask --> threshold = 0
|
|
@@ -100,89 +101,110 @@ def prepare_data(images, boxes, depths):
|
|
| 100 |
depth_x = torch.from_numpy(depth_x.astype(float)).float()
|
| 101 |
return image_x, depth_x
|
| 102 |
|
| 103 |
-
|
| 104 |
def find_largest_face(faces):
|
|
|
|
| 105 |
largest_face = None
|
| 106 |
largest_area = 0
|
| 107 |
-
|
| 108 |
-
|
| 109 |
area = w * h
|
| 110 |
if area > largest_area:
|
| 111 |
largest_area = area
|
| 112 |
-
largest_face =
|
| 113 |
return largest_face
|
| 114 |
|
| 115 |
|
| 116 |
-
def
|
|
|
|
| 117 |
if img is None:
|
| 118 |
-
return
|
| 119 |
grey = cv.cvtColor(img, cv.COLOR_RGB2GRAY)
|
| 120 |
faces = faceClassifier.detectMultiScale(
|
| 121 |
grey, scaleFactor=1.1, minNeighbors=4)
|
| 122 |
-
|
| 123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
if face is not None:
|
| 125 |
x, y, w, h = face
|
| 126 |
x2 = x + w
|
| 127 |
y2 = y + h
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
# if model_name == 'DeePixBiS':
|
| 133 |
-
mask, binary = deepix_model.forward(faceRegion)
|
| 134 |
-
res_deepix = torch.mean(mask).item()
|
| 135 |
-
cls_deepix = 'Real' if res_deepix >= pix_threshhold else 'Spoof'
|
| 136 |
-
|
| 137 |
-
confidences_deepix = {'Real confidence': res_deepix}
|
| 138 |
-
color_deepix = (0, 255, 0) if cls_deepix == 'Real' else (255, 0, 0)
|
| 139 |
-
img_deepix = cv.rectangle(img.copy(), (x, y), (x2, y2), color_deepix, 2)
|
| 140 |
-
cv.putText(img_deepix, cls_deepix, (x, y2 + 30),
|
| 141 |
-
cv.FONT_HERSHEY_COMPLEX, 1, color_deepix)
|
| 142 |
-
|
| 143 |
-
# else:
|
| 144 |
-
dense_flag = True
|
| 145 |
-
box = [x, y, x2, y2, 1]
|
| 146 |
-
param_lst, roi_box_lst = tddfa(img, [box])
|
| 147 |
-
|
| 148 |
-
ver_lst = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag)
|
| 149 |
-
depth_img = depth(img, ver_lst, tddfa.tri, with_bg_flag=False)
|
| 150 |
-
with torch.no_grad():
|
| 151 |
-
map_score_list = []
|
| 152 |
-
image_x, map_x = prepare_data([img], [list(face)], [depth_img])
|
| 153 |
-
# get the inputs
|
| 154 |
-
image_x = image_x.unsqueeze(0)
|
| 155 |
-
map_x = map_x.unsqueeze(0)
|
| 156 |
-
inputs = image_x.to(device)
|
| 157 |
-
test_maps = map_x.to(device)
|
| 158 |
-
optimizer.zero_grad()
|
| 159 |
-
|
| 160 |
-
map_score = 0.0
|
| 161 |
-
for frame_t in range(inputs.shape[1]):
|
| 162 |
-
mu, logvar, map_x, x_concat, x_Block1, x_Block2, x_Block3, x_input = cdcn_model(inputs[:, frame_t, :, :, :])
|
| 163 |
-
|
| 164 |
-
score_norm = torch.sum(mu) / torch.sum(test_maps[:, frame_t, :, :])
|
| 165 |
-
map_score += score_norm
|
| 166 |
-
map_score = map_score / inputs.shape[1]
|
| 167 |
-
map_score_list.append(map_score)
|
| 168 |
-
|
| 169 |
-
res_dsdg = map_score_list[0].item()
|
| 170 |
-
if res_dsdg > 10:
|
| 171 |
-
res_dsdg = 0.0
|
| 172 |
-
cls_dsdg = 'Real' if res_dsdg >= dsdg_threshold else 'Spoof'
|
| 173 |
-
res_dsdg = res_dsdg * 300
|
| 174 |
-
|
| 175 |
-
confidences_dsdg = {'Real confidence': res_dsdg}
|
| 176 |
-
color_dsdg = (0, 255, 0) if cls_dsdg == 'Real' else (255, 0, 0)
|
| 177 |
-
img_dsdg = cv.rectangle(img.copy(), (x, y), (x2, y2), color_dsdg, 2)
|
| 178 |
-
cv.putText(img_dsdg, cls_dsdg, (x, y2 + 30),
|
| 179 |
-
cv.FONT_HERSHEY_COMPLEX, 1, color_dsdg)
|
| 180 |
-
|
| 181 |
-
cls_deepix, cls_dsdg = [1 if cls_ == 'Real' else 0 for cls_ in [cls_deepix, cls_dsdg]]
|
| 182 |
-
|
| 183 |
-
return img_deepix, confidences_deepix, img_dsdg, confidences_dsdg, cls_deepix, cls_dsdg
|
| 184 |
else:
|
| 185 |
-
return img, {}, img, {}, None
|
| 186 |
|
| 187 |
|
| 188 |
def upload_to_s3(image_array, app_version, *labels):
|
|
@@ -199,12 +221,12 @@ def upload_to_s3(image_array, app_version, *labels):
|
|
| 199 |
s3 = boto3.client('s3')
|
| 200 |
|
| 201 |
# Encode labels and app version in image file name
|
| 202 |
-
encoded_labels = '_'.join([str(label) for label in labels])
|
| 203 |
random_string = str(uuid.uuid4()).split('-')[-1]
|
| 204 |
image_name = f"{folder}/{app_version}/{encoded_labels}_{random_string}.jpg"
|
| 205 |
|
| 206 |
# Save image as JPEG
|
| 207 |
-
image = Image.fromarray(
|
| 208 |
image_bytes = io.BytesIO()
|
| 209 |
image.save(image_bytes, format='JPEG')
|
| 210 |
image_bytes.seek(0)
|
|
@@ -222,25 +244,30 @@ demo = gr.Blocks()
|
|
| 222 |
with demo:
|
| 223 |
with gr.Row():
|
| 224 |
with gr.Column():
|
| 225 |
-
input_img = gr.Image(source='webcam', shape=None, type='numpy')
|
|
|
|
| 226 |
btn_run = gr.Button(value="Run")
|
| 227 |
with gr.Column():
|
| 228 |
outputs=[
|
| 229 |
gr.Image(label='DeePixBiS', type='numpy'),
|
| 230 |
gr.Label(num_top_classes=2, label='DeePixBiS'),
|
|
|
|
| 231 |
gr.Image(label='DSDG', type='numpy'),
|
| 232 |
-
gr.Label(num_top_classes=2, label='DSDG')
|
|
|
|
| 233 |
with gr.Column():
|
| 234 |
radio = gr.Radio(
|
| 235 |
-
["
|
| 236 |
flag = gr.Button(value="Flag")
|
| 237 |
status = gr.Textbox()
|
| 238 |
-
example_block = gr.Examples(examples, [input_img], outputs
|
| 239 |
|
| 240 |
-
|
| 241 |
-
btn_run.click(inference, [input_img], outputs+labels)
|
| 242 |
app_version_block = gr.Textbox(value=app_version, visible=False)
|
| 243 |
-
flag.click(
|
|
|
|
|
|
|
|
|
|
| 244 |
|
| 245 |
|
| 246 |
if __name__ == '__main__':
|
|
|
|
| 36 |
|
| 37 |
device = torch.device("cpu")
|
| 38 |
labels = ['Live', 'Spoof']
|
| 39 |
+
PIX_THRESHOLD = 0.45
|
| 40 |
+
DSDG_THRESHOLD = 80
|
| 41 |
+
MIN_FACE_WIDTH_THRESHOLD = 210
|
| 42 |
examples = [
|
| 43 |
['examples/1_1_21_2_33_scene_fake.jpg'],
|
| 44 |
['examples/frame150_real.jpg'],
|
|
|
|
| 79 |
return image_x
|
| 80 |
|
| 81 |
|
| 82 |
+
def prepare_data_dsdg(images, boxes, depths):
|
| 83 |
transform = transforms.Compose([Normaliztion_valtest()])
|
| 84 |
files_total = 1
|
| 85 |
image_x = np.zeros((files_total, 256, 256, 3))
|
|
|
|
| 87 |
|
| 88 |
for i, (image, bbox, depth_img) in enumerate(
|
| 89 |
zip(images, boxes, depths)):
|
| 90 |
+
x, y, x2, y2 = bbox
|
| 91 |
depth_img = cv.cvtColor(depth_img, cv.COLOR_RGB2GRAY)
|
| 92 |
+
image = image[y:y2, x:x2]
|
| 93 |
+
depth_img = depth_img[y:y2, x:x2]
|
| 94 |
|
| 95 |
image_x[i, :, :, :] = cv.resize(image, (256, 256))
|
| 96 |
# transform to binary mask --> threshold = 0
|
|
|
|
| 101 |
depth_x = torch.from_numpy(depth_x.astype(float)).float()
|
| 102 |
return image_x, depth_x
|
| 103 |
|
|
|
|
| 104 |
def find_largest_face(faces):
|
| 105 |
+
# find the largest face in the list
|
| 106 |
largest_face = None
|
| 107 |
largest_area = 0
|
| 108 |
+
for face in faces:
|
| 109 |
+
x, y, w, h = face
|
| 110 |
area = w * h
|
| 111 |
if area > largest_area:
|
| 112 |
largest_area = area
|
| 113 |
+
largest_face = face
|
| 114 |
return largest_face
|
| 115 |
|
| 116 |
|
| 117 |
+
def extract_face(img):
|
| 118 |
+
face = None
|
| 119 |
if img is None:
|
| 120 |
+
return face
|
| 121 |
grey = cv.cvtColor(img, cv.COLOR_RGB2GRAY)
|
| 122 |
faces = faceClassifier.detectMultiScale(
|
| 123 |
grey, scaleFactor=1.1, minNeighbors=4)
|
| 124 |
+
if len(faces):
|
| 125 |
+
face = find_largest_face(faces)
|
| 126 |
+
return face
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
def deepix_model_inference(img, bbox):
|
| 130 |
+
x, y, x2, y2 = bbox
|
| 131 |
+
faceRegion = img[y:y2, x:x2]
|
| 132 |
+
faceRegion = tfms(faceRegion)
|
| 133 |
+
faceRegion = faceRegion.unsqueeze(0)
|
| 134 |
+
mask, binary = deepix_model.forward(faceRegion)
|
| 135 |
+
res_deepix = torch.mean(mask).item()
|
| 136 |
+
cls_deepix = 'Real' if res_deepix >= PIX_THRESHOLD else 'Spoof'
|
| 137 |
+
confidences_deepix = {'Real confidence': res_deepix}
|
| 138 |
+
color_deepix = (0, 255, 0) if cls_deepix == 'Real' else (255, 0, 0)
|
| 139 |
+
img_deepix = cv.rectangle(img.copy(), (x, y), (x2, y2), color_deepix, 2)
|
| 140 |
+
cv.putText(img_deepix, cls_deepix, (x, y2 + 30),
|
| 141 |
+
cv.FONT_HERSHEY_COMPLEX, 1, color_deepix)
|
| 142 |
+
cls_deepix = 1 if cls_deepix == 'Real' else 0
|
| 143 |
+
return img_deepix, confidences_deepix, cls_deepix
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
def dsdg_model_inference(img, bbox, dsdg_thresh):
|
| 147 |
+
dsdg_thresh = dsdg_thresh / 30000
|
| 148 |
+
dense_flag = True
|
| 149 |
+
x, y, x2, y2 = bbox
|
| 150 |
+
w = x2 - x
|
| 151 |
+
h = y2 - y
|
| 152 |
+
if w < MIN_FACE_WIDTH_THRESHOLD:
|
| 153 |
+
color_dsdg = (0, 0, 0)
|
| 154 |
+
text = f'Small res ({w}*{h})'
|
| 155 |
+
img_dsdg = cv.rectangle(img.copy(), (x, y), (x2, y2), color_dsdg, 2)
|
| 156 |
+
cv.putText(img_dsdg, text, (x, y2 + 30),
|
| 157 |
+
cv.FONT_HERSHEY_COMPLEX, 1, color_dsdg)
|
| 158 |
+
cls_dsdg = 2
|
| 159 |
+
return img_dsdg, {}, cls_dsdg
|
| 160 |
+
bbox_conf = list(bbox)
|
| 161 |
+
bbox_conf.append(1)
|
| 162 |
+
param_lst, roi_box_lst = tddfa(img, [bbox_conf])
|
| 163 |
+
ver_lst = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag)
|
| 164 |
+
depth_img = depth(img, ver_lst, tddfa.tri, with_bg_flag=False)
|
| 165 |
+
with torch.no_grad():
|
| 166 |
+
map_score_list = []
|
| 167 |
+
image_x, map_x = prepare_data_dsdg([img], [list(bbox)], [depth_img])
|
| 168 |
+
# get the inputs
|
| 169 |
+
image_x = image_x.unsqueeze(0)
|
| 170 |
+
map_x = map_x.unsqueeze(0)
|
| 171 |
+
inputs = image_x.to(device)
|
| 172 |
+
test_maps = map_x.to(device)
|
| 173 |
+
optimizer.zero_grad()
|
| 174 |
+
map_score = 0.0
|
| 175 |
+
for frame_t in range(inputs.shape[1]):
|
| 176 |
+
mu, logvar, map_x, x_concat, x_Block1, x_Block2, x_Block3, x_input = cdcn_model(inputs[:, frame_t, :, :, :])
|
| 177 |
+
score_norm = torch.sum(mu) / torch.sum(test_maps[:, frame_t, :, :])
|
| 178 |
+
map_score += score_norm
|
| 179 |
+
map_score = map_score / inputs.shape[1]
|
| 180 |
+
map_score_list.append(map_score)
|
| 181 |
+
res_dsdg = map_score_list[0].item()
|
| 182 |
+
if res_dsdg > 10:
|
| 183 |
+
res_dsdg = 0.0
|
| 184 |
+
cls_dsdg = 'Real' if res_dsdg >= dsdg_thresh else 'Spoof'
|
| 185 |
+
text = f'{cls_dsdg} {w}*{h}'
|
| 186 |
+
res_dsdg = res_dsdg * 300
|
| 187 |
+
confidences_dsdg = {'Real confidence': res_dsdg}
|
| 188 |
+
color_dsdg = (0, 255, 0) if cls_dsdg == 'Real' else (255, 0, 0)
|
| 189 |
+
img_dsdg = cv.rectangle(img.copy(), (x, y), (x2, y2), color_dsdg, 2)
|
| 190 |
+
cv.putText(img_dsdg, text, (x, y2 + 30),
|
| 191 |
+
cv.FONT_HERSHEY_COMPLEX, 1, color_dsdg)
|
| 192 |
+
cls_dsdg = 1 if cls_dsdg == 'Real' else 0
|
| 193 |
+
return img_dsdg, confidences_dsdg, cls_dsdg
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
def inference(img, dsdg_thresh):
|
| 197 |
+
face = extract_face(img)
|
| 198 |
if face is not None:
|
| 199 |
x, y, w, h = face
|
| 200 |
x2 = x + w
|
| 201 |
y2 = y + h
|
| 202 |
+
bbox = (x, y, x2, y2)
|
| 203 |
+
img_deepix, confidences_deepix, cls_deepix = deepix_model_inference(img, bbox)
|
| 204 |
+
img_dsdg, confidences_dsdg, cls_dsdg = dsdg_model_inference(img, bbox, dsdg_thresh)
|
| 205 |
+
return img_deepix, confidences_deepix, cls_deepix, img_dsdg, confidences_dsdg, cls_dsdg
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
else:
|
| 207 |
+
return img, {}, None, img, {}, None
|
| 208 |
|
| 209 |
|
| 210 |
def upload_to_s3(image_array, app_version, *labels):
|
|
|
|
| 221 |
s3 = boto3.client('s3')
|
| 222 |
|
| 223 |
# Encode labels and app version in image file name
|
| 224 |
+
encoded_labels = '_'.join([str(int(label)) for label in labels])
|
| 225 |
random_string = str(uuid.uuid4()).split('-')[-1]
|
| 226 |
image_name = f"{folder}/{app_version}/{encoded_labels}_{random_string}.jpg"
|
| 227 |
|
| 228 |
# Save image as JPEG
|
| 229 |
+
image = Image.fromarray(image_array)
|
| 230 |
image_bytes = io.BytesIO()
|
| 231 |
image.save(image_bytes, format='JPEG')
|
| 232 |
image_bytes.seek(0)
|
|
|
|
| 244 |
with demo:
|
| 245 |
with gr.Row():
|
| 246 |
with gr.Column():
|
| 247 |
+
input_img = gr.Image(source='webcam', shape=None, type='numpy', streaming=False)
|
| 248 |
+
dsdg_thresh = gr.Slider(value=DSDG_THRESHOLD, label='DSDG threshold')
|
| 249 |
btn_run = gr.Button(value="Run")
|
| 250 |
with gr.Column():
|
| 251 |
outputs=[
|
| 252 |
gr.Image(label='DeePixBiS', type='numpy'),
|
| 253 |
gr.Label(num_top_classes=2, label='DeePixBiS'),
|
| 254 |
+
gr.Number(visible=False, value=-1),
|
| 255 |
gr.Image(label='DSDG', type='numpy'),
|
| 256 |
+
gr.Label(num_top_classes=2, label='DSDG'),
|
| 257 |
+
gr.Number(visible=False, value=-1)]
|
| 258 |
with gr.Column():
|
| 259 |
radio = gr.Radio(
|
| 260 |
+
["Spoof", "Real", "None"], label="True label", type='index')
|
| 261 |
flag = gr.Button(value="Flag")
|
| 262 |
status = gr.Textbox()
|
| 263 |
+
example_block = gr.Examples(examples, [input_img], outputs)
|
| 264 |
|
| 265 |
+
btn_run.click(inference, [input_img, dsdg_thresh], outputs)
|
|
|
|
| 266 |
app_version_block = gr.Textbox(value=app_version, visible=False)
|
| 267 |
+
flag.click(
|
| 268 |
+
upload_to_s3,
|
| 269 |
+
[input_img, app_version_block, radio]+[outputs[2], outputs[5]],
|
| 270 |
+
[status], show_progress=True)
|
| 271 |
|
| 272 |
|
| 273 |
if __name__ == '__main__':
|