-
Notifications
You must be signed in to change notification settings - Fork 0
/
mediapipe-hgr-task-webcam.py
197 lines (161 loc) · 5.88 KB
/
mediapipe-hgr-task-webcam.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
import streamlit as st
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import cv2
import numpy as np
import time
from mediapipe.framework.formats import landmark_pb2
# Local Modules
import content
import utils
import plotting
st.error("!! This Demo is currently not working !!", icon="🚨")
BaseOptions = mp.tasks.BaseOptions
GestureRecognizer = mp.tasks.vision.GestureRecognizer
GestureRecognizerOptions = mp.tasks.vision.GestureRecognizerOptions
VisionRunningMode = mp.tasks.vision.RunningMode
# Set up MediaPipe
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
# Global variables
COUNTER, FPS = 0, 0
START_TIME = time.time()
recognition_result = None
def save_result(
result: vision.GestureRecognizerResult,
unused_output_image: mp.Image,
timestamp_ms: int,
):
global recognition_result
recognition_result = result
@st.cache_resource
def load_model(model_path, num_hands=2):
base_options = python.BaseOptions(model_asset_path=model_path)
options = vision.GestureRecognizerOptions(
base_options=base_options,
running_mode=VisionRunningMode.LIVE_STREAM,
num_hands=num_hands,
min_hand_detection_confidence=0.5,
min_hand_presence_confidence=0.5,
min_tracking_confidence=0.5,
result_callback=save_result,
)
return vision.GestureRecognizer.create_from_options(options)
def process_frame(frame, recognizer):
global COUNTER, FPS, START_TIME, recognition_result
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)
recognizer.recognize_async(mp_image, time.time_ns() // 1_000_000)
COUNTER += 1
if COUNTER % 10 == 0:
FPS = 10 / (time.time() - START_TIME)
START_TIME = time.time()
if recognition_result:
frame = draw_landmarks_and_gestures(frame, recognition_result)
cv2.putText(
frame,
f"FPS = {FPS:.1f}",
(24, 50),
cv2.FONT_HERSHEY_DUPLEX,
1,
(0, 0, 0),
1,
cv2.LINE_AA,
)
return frame
def draw_landmarks_and_gestures(frame, result):
if result.hand_landmarks:
for hand_index, hand_landmarks in enumerate(result.hand_landmarks):
hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
hand_landmarks_proto.landmark.extend(
[
landmark_pb2.NormalizedLandmark(
x=landmark.x, y=landmark.y, z=landmark.z
)
for landmark in hand_landmarks
]
)
mp_drawing.draw_landmarks(
frame,
hand_landmarks_proto,
mp_hands.HAND_CONNECTIONS,
mp_drawing_styles.get_default_hand_landmarks_style(),
mp_drawing_styles.get_default_hand_connections_style(),
)
if result.gestures:
gesture = result.gestures[hand_index]
category_name = gesture[0].category_name
score = round(gesture[0].score, 2)
result_text = f"{category_name} ({score})"
frame_height, frame_width = frame.shape[:2]
x_min = min([landmark.x for landmark in hand_landmarks])
y_min = min([landmark.y for landmark in hand_landmarks])
x_min_px = int(x_min * frame_width)
y_min_px = int(y_min * frame_height)
cv2.putText(
frame,
result_text,
(x_min_px, y_min_px - 10),
cv2.FONT_HERSHEY_DUPLEX,
1,
(255, 255, 255),
2,
cv2.LINE_AA,
)
return frame
st.title("Hand Gesture Recognition")
# Sidebar
model_path, num_hands = utils.mediapipe_sidebar_options()
# Load the model
recognizer = load_model(model_path, num_hands)
# Content
content.content_mediapipe_hgr_task_webcam()
# Create columns
col1, col2 = st.columns(2)
# Placeholder for video feed
with col1:
st.markdown("## 🎥 Video Feed:")
video_placeholder = st.empty()
col11, col12 = st.columns([1, 1])
with col11:
run_button = st.button(
":green[Run]", type="secondary", use_container_width=True
)
with col12:
stop_button = st.button("Stop", type="primary", use_container_width=True)
# Placeholder for gesture recognition results
with col2:
st.markdown("## 🗂️ Model Prediction:")
st.markdown("### Hand Gestures:")
gesture_text = st.empty()
if run_button:
with st.spinner("Open webcam..."):
cap = cv2.VideoCapture(0) # Use 0 for default webcam
if not cap.isOpened():
st.error("Could not open webcam.")
while True:
success, frame = cap.read()
if not success:
st.warning("Failed to read frame from webcam.", icon="⚠️")
break
frame = cv2.flip(frame, 1)
processed_frame = process_frame(frame, recognizer)
video_placeholder.image(processed_frame, channels="BGR", use_column_width=True)
if recognition_result and recognition_result.gestures:
gesture_info = []
for hand_index, gestures in enumerate(recognition_result.gestures):
for gesture in gestures:
gesture_info.append(
f"✋ Hand {hand_index + 1}: Model predicts the Class/Gesture **:red[{gesture.category_name}]** "
f"with a Probability/Score of **:red[{gesture.score:.2f}]**"
)
all_gestures = "\n\n".join(gesture_info)
gesture_text.markdown(all_gestures)
# Check for stop button
if stop_button:
break
# Release resources
cap.release()
recognizer.close()