docker-compose.yml


services:

  # LiteLLM can be enabled in Open WebUI if you want direct access to models
  litellm:
    build: litellm/
    container_name: litellm
    ports:
      - 4000:4000
    environment:
      - LITELLM_LOG=WARN
      - LITELLM_MASTER_KEY=$LITELLM_MASTER_KEY
      - GEMINI_API_KEY=$GEMINI_API_KEY
      #- ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY
      
  # This is the tool server and also provisions Letta & Open WebUI
  hayhooks:
    build: hayhooks/
    container_name: hayhooks
    ports:
      - 1416:1416
    volumes:
      - ./hayhooks/pipelines:/pipelines
    environment:
      - OPENAI_API_BASE=http://litellm:4000
      - OPENAI_API_KEY=$LITELLM_MASTER_KEY
      # Could probably use gemini-2.0-flash-lite here 
      - SEARCH_MODEL=gemini-2.0-flash
      - EXTRACT_MODEL=gemini-2.0-flash
      - HAYHOOKS_PIPELINES_DIR=/pipelines
      - HAYHOOKS_DISABLE_SSL=true
      - HAYHOOKS_HOST=0.0.0.0
      - HAYHOOKS_PORT=1416
      - LOG_LEVEL=INFO
      - HAYSTACK_LOG_LEVEL=INFO
      - HAYHOOKS_SHOW_TRACEBACKS=true
      # Needed for search pipeline
      - TAVILY_API_KEY=$TAVILY_API_KEY
      - OPENWEBUI_BASE_URL=http://open-webui:8080
      - LETTA_BASE_URL=http://letta:8283
    healthcheck:
      test: ["CMD", "curl", "-f", "http://127.0.0.1:1416/status"]
      interval: 10s
      timeout: 5s
      retries: 18
      start_period: 5s

  # Letta is an agent building framework with built-in memory/vectordb support.
  # https://docs.letta.com/quickstart/docker
  letta:
    image: letta/letta:0.7.3
    container_name: letta
    ports:
      - 8283:8283
    volumes:
      # Mount the MCP configuration file (this points to Hayhooks)
      - ./letta_mcp_config.json:/root/.letta/mcp_config.json
    environment:
      # https://docs.letta.com/guides/server/docker#setting-environment-variables
      ANTHROPIC_API_KEY: $ANTHROPIC_API_KEY
      GEMINI_API_KEY: $GEMINI_API_KEY
      # Adding Ollama just in case it's running
      OLLAMA_BASE_URL: $OLLAMA_BASE_URL
    restart: on-failure
    healthcheck:
      test: ["CMD", "curl", "-f", "http://127.0.0.1:8283/v1/health/"]
      interval: 5s
      timeout: 5s
      retries: 18
      start_period: 1s

  # Open WebUI is the front-end UI to Letta
  open-webui:
    image: ghcr.io/open-webui/open-webui:0.6.5
    container_name: open-webui
    volumes:
     - open-webui:/app/backend/data
    ports:
      - ${OPEN_WEBUI_PORT-3000}:8080
    environment:
      # https://docs.openwebui.com/getting-started/env-configuration/
      - GLOBAL_LOG_LEVEL=WARNING
      # Disable admin login
      - WEBUI_AUTH=false
      # Enable the /docs endpoint for OpenAPI viewing
      #- ENV=dev
      # Prevent a langchain warning
      - USER_AGENT=openwebui
      # Disable sneaky calls to LLMs by all methods possible (use Ollama or Flash-Lite if desired)
      - ENABLE_TAGS_GENERATION=false
      - ENABLE_TITLE_GENERATION=false
      - ENABLE_EVALUATION_ARENA_MODELS=false
      - ENABLE_AUTOCOMPLETE_GENERATION=false
      - ENABLE_RETRIEVAL_QUERY_GENERATION=false
      # Enable Ollama (it may need tweaking to work with Letta)
      - ENABLE_OLLAMA_API=true
      - OLLAMA_BASE_URL=$OLLAMA_BASE_URL
      # Point the RAG at Gemini to disable giant downloads
      - RAG_EMBEDDING_ENGINE=openai
      - RAG_OPENAI_API_BASE_URL=http://litellm:4000
      - RAG_OPENAI_API_KEY=$LITELLM_MASTER_KEY
      - RAG_EMBEDDING_MODEL=text-embedding-005
      # Enable Tavily Web Search in Open WebUI (commented out as Letta does a better job)
      #- ENABLE_RAG_WEB_SEARCH=true
      #- RAG_WEB_SEARCH_ENGINE=tavily
      #- TAVILY_API_KEY=$TAVILY_API_KEY
      # Add models from LiteLLM (commented out to focus attention on Letta)
      #- OPENAI_API_BASE_URLS=http://litellm:4000
      #- OPENAI_API_KEY=$LITELLM_MASTER_KEY
      # Disable giant downloads
      - AUDIO_STT_ENGINE=
    restart: unless-stopped
    # https://docs.openwebui.com/getting-started/advanced-topics/monitoring/#basic-health-check-endpoint
    healthcheck:
      test: ["CMD", "curl", "-f", "http://127.0.0.1:8080/health"]
      interval: 10s
      timeout: 5s
      retries: 18
      start_period: 5s

  # This service runs after open-webui and letta has come up and is healthy
  # and adds an agent and tools to Letta and changes settings in Open WebUI.
  initializer:
    build: ./initializer
    container_name: initializer
    depends_on:
      open-webui:
        condition: service_healthy
      letta:
        condition: service_healthy
    environment:
      - OPEN_WEBUI_URL=http://open-webui:8080
      - LETTA_BASE_URL=http://letta:8283
      - HAYHOOKS_BASE_URL=http://hayhooks:1416
      #if you prefer using Anthropic, be warned that it has *very* low rate limits
      #- CHAT_MODEL=anthropic/claude-3-7-sonnet-20250219
      #
      # If you don't have a google api key at all, you can always use letta/letta-free
      #- CHAT_MODEL=letta/letta-free
      #
      # Priced competitively, this gives developers access to increased rate limits to use with 2.5 Pro. 
      # The experimental version of Gemini 2.5 Pro remains available for free with lower rate limits. 
      #   -- https://blog.google/products/gemini/gemini-preview-model-billing-update/
      #
      # Note: 'gemini-2.5-pro-preview-03-25' is a billed model,
      # you can continue to use 'gemini-2.5-pro-exp-03-25' on the free tier.
      #   -- https://ai.google.dev/gemini-api/docs/thinking
      #
      #- CHAT_MODEL=google_ai/gemini-2.5-pro-preview-03-25
      - CHAT_MODEL=google_ai/gemini-2.5-pro-exp-03-25
      # This embedding model is likewise free (for now), you can also use letta/letta-free
      #EMBEDDING_MODEL=letta/letta-free
      - EMBEDDING_MODEL=google_ai/gemini-embedding-exp-03-07

volumes:
  open-webui: