-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathdocker-compose.yml
156 lines (149 loc) · 5.62 KB
/
docker-compose.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
services:
# LiteLLM can be enabled in Open WebUI if you want direct access to models
litellm:
build: litellm/
container_name: litellm
ports:
- 4000:4000
environment:
- LITELLM_LOG=WARN
- LITELLM_MASTER_KEY=$LITELLM_MASTER_KEY
- GEMINI_API_KEY=$GEMINI_API_KEY
#- ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY
# This is the tool server and also provisions Letta & Open WebUI
hayhooks:
build: hayhooks/
container_name: hayhooks
ports:
- 1416:1416
volumes:
- ./hayhooks/pipelines:/pipelines
environment:
- OPENAI_API_BASE=http://litellm:4000
- OPENAI_API_KEY=$LITELLM_MASTER_KEY
# Could probably use gemini-2.0-flash-lite here
- SEARCH_MODEL=gemini-2.0-flash
- EXTRACT_MODEL=gemini-2.0-flash
- HAYHOOKS_PIPELINES_DIR=/pipelines
- HAYHOOKS_DISABLE_SSL=true
- HAYHOOKS_HOST=0.0.0.0
- HAYHOOKS_PORT=1416
- LOG_LEVEL=INFO
- HAYSTACK_LOG_LEVEL=INFO
- HAYHOOKS_SHOW_TRACEBACKS=true
# Needed for search pipeline
- TAVILY_API_KEY=$TAVILY_API_KEY
- OPENWEBUI_BASE_URL=http://open-webui:8080
- LETTA_BASE_URL=http://letta:8283
healthcheck:
test: ["CMD", "curl", "-f", "http://127.0.0.1:1416/status"]
interval: 10s
timeout: 5s
retries: 18
start_period: 5s
# Letta is an agent building framework with built-in memory/vectordb support.
# https://docs.letta.com/quickstart/docker
letta:
image: letta/letta:0.7.3
container_name: letta
ports:
- 8283:8283
volumes:
# Mount the MCP configuration file (this points to Hayhooks)
- ./letta_mcp_config.json:/root/.letta/mcp_config.json
environment:
# https://docs.letta.com/guides/server/docker#setting-environment-variables
ANTHROPIC_API_KEY: $ANTHROPIC_API_KEY
GEMINI_API_KEY: $GEMINI_API_KEY
# Adding Ollama just in case it's running
OLLAMA_BASE_URL: $OLLAMA_BASE_URL
restart: on-failure
healthcheck:
test: ["CMD", "curl", "-f", "http://127.0.0.1:8283/v1/health/"]
interval: 5s
timeout: 5s
retries: 18
start_period: 1s
# Open WebUI is the front-end UI to Letta
open-webui:
image: ghcr.io/open-webui/open-webui:0.6.5
container_name: open-webui
volumes:
- open-webui:/app/backend/data
ports:
- ${OPEN_WEBUI_PORT-3000}:8080
environment:
# https://docs.openwebui.com/getting-started/env-configuration/
- GLOBAL_LOG_LEVEL=WARNING
# Disable admin login
- WEBUI_AUTH=false
# Enable the /docs endpoint for OpenAPI viewing
#- ENV=dev
# Prevent a langchain warning
- USER_AGENT=openwebui
# Disable sneaky calls to LLMs by all methods possible (use Ollama or Flash-Lite if desired)
- ENABLE_TAGS_GENERATION=false
- ENABLE_TITLE_GENERATION=false
- ENABLE_EVALUATION_ARENA_MODELS=false
- ENABLE_AUTOCOMPLETE_GENERATION=false
- ENABLE_RETRIEVAL_QUERY_GENERATION=false
# Enable Ollama (it may need tweaking to work with Letta)
- ENABLE_OLLAMA_API=true
- OLLAMA_BASE_URL=$OLLAMA_BASE_URL
# Point the RAG at Gemini to disable giant downloads
- RAG_EMBEDDING_ENGINE=openai
- RAG_OPENAI_API_BASE_URL=http://litellm:4000
- RAG_OPENAI_API_KEY=$LITELLM_MASTER_KEY
- RAG_EMBEDDING_MODEL=text-embedding-005
# Enable Tavily Web Search in Open WebUI (commented out as Letta does a better job)
#- ENABLE_RAG_WEB_SEARCH=true
#- RAG_WEB_SEARCH_ENGINE=tavily
#- TAVILY_API_KEY=$TAVILY_API_KEY
# Add models from LiteLLM (commented out to focus attention on Letta)
#- OPENAI_API_BASE_URLS=http://litellm:4000
#- OPENAI_API_KEY=$LITELLM_MASTER_KEY
# Disable giant downloads
- AUDIO_STT_ENGINE=
restart: unless-stopped
# https://docs.openwebui.com/getting-started/advanced-topics/monitoring/#basic-health-check-endpoint
healthcheck:
test: ["CMD", "curl", "-f", "http://127.0.0.1:8080/health"]
interval: 10s
timeout: 5s
retries: 18
start_period: 5s
# This service runs after open-webui and letta has come up and is healthy
# and adds an agent and tools to Letta and changes settings in Open WebUI.
initializer:
build: ./initializer
container_name: initializer
depends_on:
open-webui:
condition: service_healthy
letta:
condition: service_healthy
environment:
- OPEN_WEBUI_URL=http://open-webui:8080
- LETTA_BASE_URL=http://letta:8283
- HAYHOOKS_BASE_URL=http://hayhooks:1416
#if you prefer using Anthropic, be warned that it has *very* low rate limits
#- CHAT_MODEL=anthropic/claude-3-7-sonnet-20250219
#
# If you don't have a google api key at all, you can always use letta/letta-free
#- CHAT_MODEL=letta/letta-free
#
# Priced competitively, this gives developers access to increased rate limits to use with 2.5 Pro.
# The experimental version of Gemini 2.5 Pro remains available for free with lower rate limits.
# -- https://blog.google/products/gemini/gemini-preview-model-billing-update/
#
# Note: 'gemini-2.5-pro-preview-03-25' is a billed model,
# you can continue to use 'gemini-2.5-pro-exp-03-25' on the free tier.
# -- https://ai.google.dev/gemini-api/docs/thinking
#
#- CHAT_MODEL=google_ai/gemini-2.5-pro-preview-03-25
- CHAT_MODEL=google_ai/gemini-2.5-pro-exp-03-25
# This embedding model is likewise free (for now), you can also use letta/letta-free
#EMBEDDING_MODEL=letta/letta-free
- EMBEDDING_MODEL=google_ai/gemini-embedding-exp-03-07
volumes:
open-webui: