-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
74 lines (64 loc) · 2.68 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import cv2
import mediapipe as mp
import numpy as np
import pyaudio
import time
import yaml
config = None
with open("./config.yml", "r") as f:
config = yaml.safe_load(f)
vid = cv2.VideoCapture(0)
mph = mp.solutions.hands
hands = mph.Hands()
mp_draw = mp.solutions.drawing_utils
p = pyaudio.PyAudio()
fs = 44100 # sampling rate, Hz, must be integer
duration = 0.1 # in seconds, may be float
stream = p.open(format=pyaudio.paFloat32,
channels=1,
rate=fs,
output=True)
while 1:
success, image = vid.read()
if config["mirror_camera"]: # only need this if camera is mirrored
image = cv2.flip(image, 1)
img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = hands.process(img_rgb)
dimensions = image.shape # (height, width, channels)
if results.multi_hand_landmarks:
mhp = []
for hand_land_mark in results.multi_hand_landmarks:
mhpx, mhpy = 0, 0
for id, lm in enumerate(hand_land_mark.landmark):
h, w, c = image.shape
cx, cy = int(lm.x * w), int(lm.y * h)
if cx > mhpx:
mhpx = cx
if cy > mhpy:
mhpy = cy
# TODO: draw a theremin on the screen, probably add instructions too
cv2.circle(image, (cx, cy), 8, (255, 255, 0), cv2.FILLED)
mp_draw.draw_landmarks(image, hand_land_mark, mph.HAND_CONNECTIONS)
mhp.append((mhpx, mhpy))
if len(mhp) == 2:
# figure out which is pitch and which is volume
# TODO: pitch perception to frequency is actually logarithmic, not linear, make some linearization function
if mhp[0][0] > mhp[1][0]:
pitch = ((mhp[0][0] - (dimensions[1]/2))/(dimensions[1]/2)) * (config["max_pitch"] - config["min_pitch"]) + config["min_pitch"] # normalize pitch
volume = (dimensions[0] - mhp[1][1]) / dimensions[0]
else:
pitch = ((mhp[1][0] - (dimensions[1]/2))/(dimensions[1]/2)) * (config["max_pitch"] - config["min_pitch"]) + config["min_pitch"] # normalize pitch
volume = (dimensions[0] - mhp[0][1]) / dimensions[0]
if pitch > 0:
samples = (np.sin(2 * np.pi * np.arange(fs * duration) * pitch / fs)).astype(np.float32)
output_bytes = (volume * samples).tobytes()
start_time = time.time()
stream.write(output_bytes)
cv2.imshow("Theremin", image)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
vid.release()
cv2.destroyAllWindows()
stream.stop_stream()
stream.close()
p.terminate()