Spaces:

OpenCVUniversity
/

Blink-Detection-Using-OpenCV

Sleeping

App Files Files Community

Ankan Ghosh commited on Dec 17, 2024

Commit

a42db59

verified ·

1 Parent(s): 9acb682

Upload 4 files

Browse files

Files changed (5) hide show

.gitattributes +1 -0
app.py +305 -0
click.wav +0 -0
input-video.mp4 +3 -0
requirements.txt +4 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+input-video.mp4 filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,305 @@

+import cv2
+import numpy as np
+import time
+import os
+import matplotlib.pyplot as plt
+import gradio as gr
+try:
+    from pygame import mixer
+    mixer_init = True
+except ModuleNotFoundError:
+    mixer = None
+    mixer_init = False
+# ------------------------------------------------------------------------------
+# 1. Initializations.
+# ------------------------------------------------------------------------------
+# Initialize counter for the number of blinks detected.
+BLINK = 0
+# Model file paths.
+MODEL_PATH = "./model/res10_300x300_ssd_iter_140000.caffemodel"
+CONFIG_PATH = "./model/deploy.prototxt"
+LBF_MODEL = "./model/lbfmodel.yaml"
+# Create a face detector network instance.
+net = cv2.dnn.readNetFromCaffe(CONFIG_PATH, MODEL_PATH)
+# Create the landmark detector instance.
+landmarkDetector = cv2.face.createFacemarkLBF()
+landmarkDetector.loadModel(LBF_MODEL)
+# ------------------------------------------------------------------------------
+# 2. Function definitions.
+# ------------------------------------------------------------------------------
+def detect_faces(image, detection_threshold=0.70):
+    blob = cv2.dnn.blobFromImage(image, 1.0, (300, 300), [104, 117, 123])
+    net.setInput(blob)
+    detections = net.forward()
+    faces = []
+    img_h = image.shape[0]
+    img_w = image.shape[1]
+    for detection in detections[0][0]:
+        if detection[2] >= detection_threshold:
+            left = detection[3] * img_w
+            top = detection[4] * img_h
+            right = detection[5] * img_w
+            bottom = detection[6] * img_h
+            face_w = right - left
+            face_h = bottom - top
+            face_roi = (left, top, face_w, face_h)
+            faces.append(face_roi)
+    return np.array(faces).astype(int)
+def get_primary_face(faces, frame_h, frame_w):
+    primary_face_index = None
+    face_height_max = 0
+    for idx in range(len(faces)):
+        face = faces[idx]
+        x1 = face[0]
+        y1 = face[1]
+        x2 = x1 + face[2]
+        y2 = y1 + face[3]
+        if x1 > frame_w or y1 > frame_h or x2 > frame_w or y2 > frame_h:
+            continue
+        if x1 < 0 or y1 < 0 or x2 < 0 or y2 < 0:
+            continue
+        # Prioritize the face with the maximum height.
+        if face[3] > face_height_max:
+            primary_face_index = idx
+            face_height_max = face[3]
+    if primary_face_index is not None:
+        primary_face = faces[primary_face_index]
+    else:
+        primary_face = None
+    return primary_face
+def visualize_eyes(landmarks, frame):
+    for i in range(36, 48):
+        cv2.circle(frame, tuple(landmarks[i].astype("int")), 2, (0, 255, 0), -1)
+def get_eye_aspect_ratio(landmarks):
+    vert_dist_1right = calculate_distance(landmarks[37], landmarks[41])
+    vert_dist_2right = calculate_distance(landmarks[38], landmarks[40])
+    vert_dist_1left = calculate_distance(landmarks[43], landmarks[47])
+    vert_dist_2left = calculate_distance(landmarks[44], landmarks[46])
+    horz_dist_right = calculate_distance(landmarks[36], landmarks[39])
+    horz_dist_left = calculate_distance(landmarks[42], landmarks[45])
+    EAR_left = (vert_dist_1left + vert_dist_2left) / (2.0 * horz_dist_left)
+    EAR_right = (vert_dist_1right + vert_dist_2right) / (2.0 * horz_dist_right)
+    ear = (EAR_left + EAR_right) / 2
+    return ear
+def calculate_distance(A, B):
+    distance = ((A[0] - B[0]) ** 2 + (A[1] - B[1]) ** 2) ** 0.5
+    return distance
+def play(file):
+    if mixer_init:
+        mixer.init()
+        sound = mixer.Sound(file)
+        sound.play()
+# ------------------------------------------------------------------------------
+# 3. Processing function (to be used in Gradio).
+# ------------------------------------------------------------------------------
+def process_video(input_video):
+    # Generate unique filenames for the outputs
+    out_video_filename = "processed_video.mp4"
+    out_plot_filename = "ear_plot.png"
+    cap = cv2.VideoCapture(input_video)
+    ret, frame = cap.read()
+    if not ret:
+        print("Cannot read the input video.")
+        return None, None
+    frame_h = frame.shape[0]
+    frame_w = frame.shape[1]
+    # Initialize writer for processed video
+    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+    fps = cap.get(cv2.CAP_PROP_FPS) if cap.get(cv2.CAP_PROP_FPS) > 0 else 30
+    out_writer = cv2.VideoWriter(out_video_filename, fourcc, fps, (frame_w, frame_h))
+    # Calibration
+    frame_count = 0
+    frame_calib = 30  # Number of frames to use for threshold calibration.
+    sum_ear = 0
+    BLINK = 0
+    state_prev = state_curr = "open"
+    ear_values = []
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+        # Detect Face.
+        faces = detect_faces(frame, detection_threshold=0.90)
+        if len(faces) > 0:
+            # Use primary face
+            primary_face = get_primary_face(faces, frame_h, frame_w)
+            if primary_face is not None:
+                cv2.rectangle(
+                    frame,
+                    (primary_face[0], primary_face[1]),
+                    (primary_face[0] + primary_face[2], primary_face[1] + primary_face[3]),
+                    (0, 255, 0),
+                    3,
+                )
+                # Detect Landmarks
+                retval, landmarksList = landmarkDetector.fit(frame, np.expand_dims(primary_face, 0))
+                if retval:
+                    landmarks = landmarksList[0][0]
+                    # Display detections.
+                    visualize_eyes(landmarks, frame)
+                    # Get EAR
+                    ear = get_eye_aspect_ratio(landmarks)
+                    ear_values.append(ear)
+                    if frame_count < frame_calib:
+                        frame_count += 1
+                        sum_ear += ear
+                    elif frame_count == frame_calib:
+                        frame_count += 1
+                        avg_ear = sum_ear / frame_count
+                        HIGHER_TH = 0.90 * avg_ear
+                        LOWER_TH = 0.80 * HIGHER_TH
+                        print("SET EAR HIGH: ", HIGHER_TH)
+                        print("SET EAR LOW: ", LOWER_TH)
+                    else:
+                        if ear < LOWER_TH:
+                            state_curr = "closed"
+                        elif ear > HIGHER_TH:
+                            state_curr = "open"
+                        if state_prev == "closed" and state_curr == "open":
+                            BLINK += 1
+                            if mixer_init:
+                                play("./click.wav")
+                        state_prev = state_curr
+                        cv2.putText(
+                            frame,
+                            f"Blink Counter: {BLINK}",
+                            (10, 80),
+                            cv2.FONT_HERSHEY_SIMPLEX,
+                            1.5,
+                            (0, 0, 255),
+                            4,
+                            cv2.LINE_AA,
+                        )
+            else:
+                # No valid face detected
+                pass
+        else:
+            # No faces
+            pass
+        frame_out_final = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        out_writer.write(frame)
+        yield frame_out_final, None, None
+    cap.release()
+    out_writer.release()
+    # Plot EAR values if collected
+    if ear_values:
+        plt.figure(figsize=(10, 5.625))
+        plt.plot(ear_values, label="EAR")
+        plt.title("Eye Aspect Ratio (EAR) over time")
+        plt.xlabel("Frame Index")
+        plt.ylabel("EAR")
+        plt.legend()
+        plt.grid(True)
+        plt.savefig(out_plot_filename)
+        plt.close()
+    else:
+        out_plot_filename = None
+    yield None, out_video_filename, out_plot_filename
+# ------------------------------------------------------------------------------
+# 4. Gradio UI
+# ------------------------------------------------------------------------------
+def process_gradio(video_file):
+    if video_file is None:
+        return None, None, None
+    video_path = video_file
+    output_frames = None
+    processed_video = None
+    plot_img = None
+    # Process video using generator
+    for frame_out, processed_video_path, plot_path in process_video(video_path):
+        if frame_out is not None:
+            output_frames = frame_out  # Update frames dynamically
+            yield output_frames, None, None  # Gradio updates frames step-by-step
+        else:
+            processed_video = processed_video_path
+            plot_img = plot_path
+    # Final yield with processed video and EAR plot
+    yield None, processed_video, plot_img
+with gr.Blocks() as demo:
+    gr.Markdown("# Blink Detection with OpenCV")
+    gr.Markdown("Upload a video to detect blinks and view the EAR plot after processing.")
+    video_input = gr.Video(label="Input Video")
+    process_btn = gr.Button("Process")
+    output_frames = gr.Image(label="Output Frames")
+    with gr.Row():
+        processed_video = gr.Video(label="Processed Video")
+        ear_plot = gr.Image(label="EAR Plot")
+    process_btn.click(process_gradio, inputs=video_input, outputs=[output_frames, processed_video, ear_plot])
+    examples = [
+        ["./input-video.mp4"],
+    ]
+    with gr.Row():
+        gr.Examples(
+            examples=examples,
+            inputs=[video_input],
+            label="Load Example Video",
+        )
+if __name__ == "__main__":
+    demo.launch()

click.wav ADDED Viewed

Binary file (195 kB). View file

input-video.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7c1bdb3d8302bbb63bc5fb8137e2b532182bb3126261bebd5f1d6cd48d52dfab
+size 38229628

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+opencv-contrib-python
+gradio
+matplotlib
+pygame