-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.py
266 lines (220 loc) · 11 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
"""Configuration Module for Doorbell Porter System.
This module centralizes all configuration settings and environment variables:
- Environment and API configurations
- Audio/video processing parameters
- Location and system settings
- Logging configuration
The module provides:
1. Environment Management: Loading and validation of environment variables
2. System Configuration: Core system and API settings
3. Audio Configuration: Processing parameters and quality settings
4. Camera Configuration: Resolution and connection settings
5. Location Settings: Default geographical coordinates
6. Logging Configuration: Log levels and formatting rules
All sensitive data (API keys, credentials) must be stored in .env file.
"""
#------------------------------------------------------------------------------
# Imports
#------------------------------------------------------------------------------
import os
from typing import Dict, Tuple, Optional, Any
from dotenv import load_dotenv
#------------------------------------------------------------------------------
# Environment Configuration
#------------------------------------------------------------------------------
# Load environment variables
load_dotenv()
def get_required_env(key: str) -> str:
"""Get required environment variable or raise error if not found."""
value = os.getenv(key)
if value is None:
raise ValueError(f"Missing required environment variable: {key}")
return value
#------------------------------------------------------------------------------
# API Configuration
#------------------------------------------------------------------------------
# OpenAI API Configuration
# - API keys must be set in .env file
# - Model versions should be updated as new ones are released
OPENAI_API_KEY = get_required_env('OPENAI_API_KEY')
VOICE: str = 'alloy' # Available voices: alloy, echo, fable, onyx, nova, shimmer
# Model Configuration
# - MODEL: Used for real-time audio conversations
# - VISION_MODEL: Used for image analysis
MODEL = 'gpt-4o-realtime-preview-2024-12-17' # Real-time conversation model
VISION_MODEL = 'gpt-4o-mini' # Vision analysis model
#------------------------------------------------------------------------------
# Feature Configuration
#------------------------------------------------------------------------------
# Enable/disable optional features
# Set to True to enable a feature, False to disable
FEATURES = {
'WEATHER': True, # OpenWeatherMap API for real-time weather data
# Requires OPENWEATHER_API_KEY in .env
# Provides temperature, conditions, humidity, wind
# Extended info: pressure, visibility, cloud cover
'LIGHT_CONTROL': True, # Magic Home LED control via flux_led library
# Requires LED_IP in .env (e.g. 10.0.0.148)
# Supports: on/off control
# Auto-on in low light conditions
# Auto-off after inactivity
'VISION': True, # Reolink camera vision features
# Uses doorbell's built-in camera
# Captures snapshots on events
# Analyzes images using OpenAI Vision
# Resolution: 640x480 (configurable)
}
#------------------------------------------------------------------------------
# Optional Feature Configuration
#------------------------------------------------------------------------------
# Optional environment variables based on enabled features
def _get_optional_env(key: str, required_feature: str) -> Optional[str]:
"""Get environment variable if its feature is enabled, otherwise return None."""
if FEATURES[required_feature]:
value = os.getenv(key)
if value is None:
raise ValueError(f"Missing required environment variable for {required_feature}: {key}")
return value
return None
# Get optional API keys and configuration based on enabled features
WEATHER_API_KEY = _get_optional_env('OPENWEATHER_API_KEY', 'WEATHER')
LED_IP = _get_optional_env('LED_IP', 'LIGHT_CONTROL')
#------------------------------------------------------------------------------
# Doorbell Configuration
#------------------------------------------------------------------------------
# Required doorbell settings
DOORBELL_URL = get_required_env('DOORBELL_URL')
DOORBELL_USERNAME = get_required_env('DOORBELL_USERNAME')
DOORBELL_PASSWORD = get_required_env('DOORBELL_PASSWORD')
WEBHOOK_HOST = get_required_env('WEBHOOK_HOST')
WEBHOOK_PORT = int(get_required_env('WEBHOOK_PORT'))
#------------------------------------------------------------------------------
# Audio Configuration
#------------------------------------------------------------------------------
# Audio processing settings
CHUNK: int = 1024 # Buffer size in bytes (1024 to 8192)
# Smaller values reduce latency but increase CPU usage
CHANNELS: int = 1 # Number of audio channels
# 1: Mono (required for G.711)
# 2: Stereo (not supported)
RATE: int = 24000 # Sample rate in Hz
# 24000: OpenAI's preferred rate
# 8000: G.711 μ-law (for doorbell)
USE_FFMPEG_BACKCHANNEL: bool = False # Audio processing method
# True: Use FFmpeg (better quality, more CPU)
# False: Use audioop (lower CPU, basic processing)
# Audio processing configuration
AUDIO_PROCESSING = {
'INCOMING_AUDIO': {
# Volume scaling factor for incoming audio (0.1 to 1.0)
# Higher values increase volume, lower values decrease it
'VOLUME_SCALE': '0.8', # Moderate volume level
},
'BACKCHANNEL': {
# Enable two-step resampling for better quality (24kHz -> 16kHz -> 8kHz)
# True: Better quality but more CPU usage
# False: Direct 24kHz -> 8kHz conversion
'ENABLE_SMOOTH_RESAMPLING': True, # Disabled for lower latency
# Remove DC offset from audio signal
# True: Prevents audio drift and improves quality
# False: Raw audio without DC correction
'DC_OFFSET_REMOVAL': False,
# Minimum audio level to pass through (0 to 32767)
# Higher values reduce background noise
'NOISE_GATE_THRESHOLD': 1000, # Lower threshold for smoother transitions
# Volume adjustment ratio (0.0 to 1.0)
# Controls overall volume level
'VOLUME_TARGET_RATIO': 0.1, # Increased for better audibility
# Maximum allowed signal level (0 to 32767)
# Prevents audio clipping
'PEAK_LIMITER_THRESHOLD': 2000 # High threshold to preserve dynamics
}
}
#------------------------------------------------------------------------------
# Camera Configuration
#------------------------------------------------------------------------------
# Camera settings
USE_FFMPEG_SNAPSHOT: bool = False # Set to False to use direct API method
SNAPSHOT_RESOLUTION: Tuple[int, int] = (640, 480) # Width x Height
SNAPSHOT_CHANNEL: int = 0 # Camera channel (usually 0)
#------------------------------------------------------------------------------
# Location Configuration
#------------------------------------------------------------------------------
# Default location (Denver, CO)
DEFAULT_LOCATION: Dict[str, Any] = {
"name": "Denver",
"lat": 39.7392,
"lon": -104.9903
}
#------------------------------------------------------------------------------
# AI System Configuration
#------------------------------------------------------------------------------
# System message for the AI
SYSTEM_MESSAGE: str = """You are a helpful AI assistant speaking through a doorbell intercom system.
Keep your responses concise and clear, as if you're speaking through an intercom.
Be friendly but professional, and always remember you're acting as a doorbell porter/assistant.
You have access to these tools based on enabled features:
1. Voice Communication Tools (Always Available):
- The connect_voice tool establishes two-way voice communication
- The disconnect_voice tool ends the voice session
- IMPORTANT: For voice communication:
1. When someone is speaking:
- Simply pause your response and wait
- Do NOT call disconnect_voice
- Speech detection is handled automatically
2. For ending conversations:
- Only call disconnect_voice when user explicitly says "goodbye", "hang up", or "disconnect"
- First compose your complete goodbye message
- Speak your entire goodbye message
- Then wait 1-2 seconds
- Only THEN call disconnect_voice
3. NEVER call disconnect_voice:
- When detecting normal speech during conversation
- While you are still speaking
- Before your goodbye message is complete
- If you have more to say
- Without saying a proper goodbye first
2. Optional Tools (Based on Configuration):
Weather Information (if enabled):
- Use the get_weather tool with latitude/longitude coordinates
- Provides current conditions, temperature, humidity, and wind
- Extended info includes pressure, visibility, cloud cover, sunrise/sunset times
Camera Features (if enabled):
- The take_snapshot tool captures images from the camera
- Each snapshot is analyzed using vision AI
- You receive both the snapshot path and a detailed analysis
- You can request additional analysis using analyze_snapshot
Light Control (if enabled):
- Use turn_light_on to turn the light on
- Use turn_light_off to turn the light off
- Consider lighting conditions when managing the light
- Remember to turn off the light when no longer needed
For conversations:
1. Call connect_voice to establish communication
2. Greet them warmly and professionally
3. Let them speak first after your greeting
4. Respond naturally to their questions or requests
5. If they ask about weather, provide that information
6. Keep responses focused and natural, avoiding repetitive phrases
For your initial greeting when the system starts:
1. Simply say "Hello! Welcome." and wait for visitors
2. Do not take snapshots or perform other actions until someone presses the doorbell or requests it"""
#------------------------------------------------------------------------------
# Logging Configuration
#------------------------------------------------------------------------------
# Configure logging settings
LOGGING_CONFIG: Dict[str, Any] = {
'format': '%(asctime)s | %(name)-12s | %(levelname)-8s | %(message)s',
'datefmt': '%Y-%m-%d %H:%M:%S',
'level': 'INFO',
'loggers': {
'app': {'level': 'INFO'},
'openai': {'level': 'WARNING'},
'doorbell': {'level': 'INFO'},
'tool': {'level': 'INFO'},
'websockets': {'level': 'WARNING'},
'websockets.client': {'level': 'WARNING'},
'websockets.server': {'level': 'WARNING'},
'websockets.protocol': {'level': 'WARNING'}
}
}