Proper dev & deployment images. Converts front-end to SPA. (#84)

* begin work on dev environment * more work on dev image * working dev + prod images with SPA front-end * reworked dockerfile * make CI point to the right action * Improvements to github actions (#79) * Improvements to github actions * Change username to repo owner username * Add fix for login into ghcr (#81) * Update bug_report.yml * added dev instructions to readme * reduced number of steps in dockerfile --------- Co-authored-by: Juan Calderon-Perez <835733+gaby@users.noreply.github.com>
serge-chat · Mar 27, 2023 · 293a23e · 293a23e
1 parent 75de7fa
commit 293a23e
Show file tree

Hide file tree

Showing 20 changed files with 423 additions and 123 deletions.
diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml
@@ -49,6 +49,7 @@ jobs:
         uses: docker/build-push-action@v4
         with:
           context: .
+          target: release
           push: ${{ github.event_name != 'pull_request' }}
           cache-from: type=gha
           cache-to: type=gha,mode=max

diff --git a/Dockerfile b/Dockerfile
@@ -1,4 +1,5 @@
-FROM gcc:11 as llama_builder 
+# Compile llama
+FROM gcc:11 as llama_builder
 
 WORKDIR /tmp
 
@@ -8,54 +9,73 @@ RUN cd llama.cpp && \
     make && \
     mv main llama
 
-# Copy over rest of the project files
+# Base image for node
+FROM node:19 as node_base
 
-FROM ubuntu:22.04 as deployment
+WORKDIR /usr/src/app
+# Install pip and requirements
+
+# Base image for runtime
+FROM ubuntu:22.04 as base
 
 ARG DEBIAN_FRONTEND=noninteractive
 ENV TZ=Europe/Amsterdam
 
 WORKDIR /usr/src/app
 
-# install pip
-RUN apt update
-RUN apt-get install -y python3-pip curl wget
-RUN pip install --upgrade pip
+# Install MongoDB and necessary tools
+RUN apt update && \
+    apt install -y curl wget gnupg python3-pip && \
+    wget -qO - https://www.mongodb.org/static/pgp/server-6.0.asc | apt-key add - && \
+    echo "deb [ arch=amd64,arm64 ] https://repo.mongodb.org/apt/ubuntu jammy/mongodb-org/6.0 multiverse" | tee /etc/apt/sources.list.d/mongodb-org-6.0.list && \
+    apt-get update && \
+    apt-get install -y mongodb-org
 
-# install nodejs
-RUN curl -sL https://deb.nodesource.com/setup_19.x | bash
-RUN apt-get install nodejs
+# copy & install python reqs
+COPY ./api/requirements.txt api/requirements.txt
+RUN pip install --upgrade pip && \
+    pip install --no-cache-dir -r ./api/requirements.txt
 
+# Copy files
+COPY --from=llama_builder /tmp/llama.cpp/llama /usr/bin/llama
 
-# MongoDB
-RUN wget -qO - https://www.mongodb.org/static/pgp/server-6.0.asc | apt-key add -
-RUN echo "deb [ arch=amd64,arm64 ] https://repo.mongodb.org/apt/ubuntu jammy/mongodb-org/6.0 multiverse" | tee /etc/apt/sources.list.d/mongodb-org-6.0.list
+# Dev environment
+FROM base as dev
+ENV NODE_ENV='development'
 
+# Install Node.js and npm packages
+COPY --from=node_base /usr/local /usr/local
+COPY ./web/package*.json ./
+RUN npm ci
 
-RUN apt-get update
-RUN apt-get install -y mongodb-org
+# Copy the rest of the project files
+COPY web /usr/src/app/web
+COPY ./api /usr/src/app/api
 
+COPY dev.sh /usr/src/app/dev.sh
+RUN chmod +x /usr/src/app/dev.sh
+CMD ./dev.sh
 
-# install requirements
-COPY ./api/requirements.txt api/requirements.txt
-RUN pip install -r ./api/requirements.txt
+# Build frontend
+FROM node_base as frontend_builder
 
-# copy llama binary from llama_builder
-COPY --from=llama_builder /tmp/llama.cpp/llama /usr/bin/llama
-
-# copy built webserver from web_builder
-COPY ./web/package.json web/
-COPY ./web/package-lock.json web/
-# Install Node modules
-RUN cd web && npm install
+COPY ./web/package*.json ./
+RUN npm ci
 
 COPY ./web /usr/src/app/web/
+WORKDIR /usr/src/app/web/
+RUN npm run build
+
+# Runtime environment
+FROM base as release
+
+ENV NODE_ENV='production'
+WORKDIR /usr/src/app
 
-#copy api folder
+COPY --from=frontend_builder /usr/src/app/web/build /usr/src/app/api/static/
 COPY ./api /usr/src/app/api
 
-# get the deploy script with the right permissions
 COPY deploy.sh /usr/src/app/deploy.sh
 RUN chmod +x /usr/src/app/deploy.sh
 
-CMD /usr/src/app/deploy.sh
+CMD ./deploy.sh
diff --git a/README.md b/README.md
@@ -14,21 +14,21 @@ A chat interface based on `llama.cpp` for running Alpaca models. Entirely self-h
 ## Getting started
 
 Setting up Serge is very easy. TLDR for running it with Alpaca 7B:
+
 ```
 git clone https://github.com/nsarrazin/serge.git
 cd serge
 
-docker compose up -d
+docker compose up --build -d
 docker compose exec serge python3 /usr/src/app/api/utils/download.py tokenizer 7B
 ```
 
 #### Windows
 
-:warning: For cloning on windows, use `git clone https://github.com/nsarrazin/serge.git --config core.autocrlf=input`.  
+:warning: For cloning on windows, use `git clone https://github.com/nsarrazin/serge.git --config core.autocrlf=input`.
 
 Make sure you have docker desktop installed, WSL2 configured and enough free RAM to run models. (see below)
 
-
 #### Kubernetes
 
 Setting up Serge on Kubernetes can be found in the wiki: https://github.com/nsarrazin/serge/wiki/Integrating-Serge-in-your-orchestration#kubernetes-example
@@ -59,6 +59,16 @@ llama will just crash if you don't have enough available memory for your model.
 
 Feel free to join the discord if you need help with the setup: https://discord.gg/62Hc6FEYQH
 
+## Contributing
+
+Serge is always open for contributions! If you catch a bug or have a feature idea, feel free to open an issue or a PR.
+
+If you want to run Serge in development mode (with hot-module reloading for svelte & autoreload for FastAPI) you can do so like this:
+
+```
+docker compose -f docker-compose.dev.yml up -d --build
+```
+
 ## What's next
 
 - [x] Front-end to interface with the API

diff --git a/api/Dockerfile.api b/api/Dockerfile.api
diff --git a/api/main.py b/api/main.py
@@ -9,10 +9,12 @@
 from beanie.odm.enums import SortDirection
 
 from sse_starlette.sse import EventSourceResponse
-from utils.initiate_database import initiate_database
+from utils.initiate_database import initiate_database, Settings
 from utils.generate import generate, get_full_prompt_from_chat
 from utils.convert import convert_all
 from models import Question, Chat, ChatParameters
+from fastapi.staticfiles import StaticFiles
+from starlette.responses import FileResponse
 
 
 # Configure logging settings
@@ -27,6 +29,8 @@
 # Define a logger for the current module
 logger = logging.getLogger(__name__)
 
+settings = Settings()
+
 tags_metadata = [
     {
         "name": "misc.",
@@ -46,6 +50,32 @@
     title="Serge", version="0.0.1", description=description, tags_metadata=tags_metadata
 )
 
+api_app = FastAPI(title="Serge API")
+app.mount('/api', api_app)
+
+if settings.NODE_ENV == "production":
+    @app.middleware("http")
+    async def add_custom_header(request, call_next):
+        response = await call_next(request)
+        if response.status_code == 404:
+            return FileResponse('static/200.html')
+        return response
+
+    @app.exception_handler(404)
+    def not_found(request, exc):
+        return FileResponse('static/200.html')
+
+    async def homepage(request):
+        return FileResponse('static/200.html')
+
+    app.route('/', homepage)
+    app.mount('/', StaticFiles(directory='static'))
+
+if settings.NODE_ENV == "development":
+    start_app = api_app
+else:
+    start_app = app
+
 origins = [
     "http://localhost",
     "http://api:9124",
@@ -90,7 +120,7 @@ async def convert_model_files():
     logger.info("models are ready")
 
 
-@app.on_event("startup")
+@start_app.on_event("startup")
 async def start_database():
     logger.info("initializing database connection")
     await initiate_database()
@@ -99,14 +129,14 @@ async def start_database():
     asyncio.create_task(convert_model_files())
 
 
-@app.get("/models", tags=["misc."])
+@api_app.get("/models", tags=["misc."])
 def list_of_installed_models(
         models: Annotated[list[str], Depends(dep_models_ready)]
 ):
     return models
 
 
-@app.post("/chat", tags=["chats"])
+@api_app.post("/chat", tags=["chats"])
 async def create_new_chat(
     model: str = "ggml-alpaca-7B-q4_0.bin",
     temperature: float = 0.1,
@@ -136,14 +166,14 @@ async def create_new_chat(
     return chat.id
 
 
-@app.get("/chat/{chat_id}", tags=["chats"])
+@api_app.get("/chat/{chat_id}", tags=["chats"])
 async def get_specific_chat(chat_id: str):
     chat = await Chat.get(chat_id)
     await chat.fetch_all_links()
 
     return chat
 
-@app.delete("/chat/{chat_id}", tags=["chats"])
+@api_app.delete("/chat/{chat_id}", tags=["chats"])
 async def delete_chat(chat_id: str):
     chat = await Chat.get(chat_id)
     deleted_chat = await chat.delete()
@@ -175,7 +205,7 @@ def remove_matching_end(a, b):
 
     return b
 
-@app.get("/chat/{chat_id}/question", dependencies=[Depends(dep_models_ready)])
+@api_app.get("/chat/{chat_id}/question", dependencies=[Depends(dep_models_ready)])
 async def stream_ask_a_question(chat_id: str, prompt: str):
 
     chat = await Chat.get(chat_id)
@@ -216,7 +246,7 @@ async def event_generator():
 
     return EventSourceResponse(event_generator())
 
-@app.post("/chat/{chat_id}/question", dependencies=[Depends(dep_models_ready)])
+@api_app.post("/chat/{chat_id}/question", dependencies=[Depends(dep_models_ready)])
 async def ask_a_question(chat_id: str, prompt: str):
     chat = await Chat.get(chat_id)
     await chat.fetch_link(Chat.parameters)
@@ -240,7 +270,7 @@ async def ask_a_question(chat_id: str, prompt: str):
 
     return {"question" : prompt, "answer" : answer[len(full_prompt)+1:]}
 
-@app.get("/chats", tags=["chats"])
+@api_app.get("/chats", tags=["chats"])
 async def get_all_chats():
     res = []
 

diff --git a/api/utils/generate.py b/api/utils/generate.py
@@ -53,7 +53,7 @@ async def generate(
             if return_code != 0:
                 error_output = await procLlama.stderr.read()
                 logger.error(error_output.decode("utf-8"))
-                raise ValueError(error_output.decode("utf-8"))
+                raise ValueError(f"RETURN CODE {return_code}\n\n"+error_output.decode("utf-8"))
             else:
                 return
 

diff --git a/api/utils/initiate_database.py b/api/utils/initiate_database.py
@@ -8,6 +8,7 @@
 
 
 class Settings(BaseSettings):
+    NODE_ENV: str = "development"
     class Config:
         orm_mode = True
 

diff --git a/deploy.sh b/deploy.sh
@@ -2,14 +2,11 @@
 
 mongod &
 
-# Start the web server
-cd web && npm run dev -- --host 0.0.0.0 --port 8008 &
-
 # Start the API
-cd api && uvicorn main:app --host 0.0.0.0 --port 9124 --root-path /api/ &
+cd api && uvicorn main:app --host 0.0.0.0 --port 8008 &
 
 # Wait for any process to exit
 wait -n
 
 # Exit with status of process that exited first
-exit $?
+exit $?
diff --git a/dev.sh b/dev.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+mongod &
+
+# Start the web server
+cd web && npm run dev -- --host 0.0.0.0 --port 8008 &
+
+# Start the API
+cd api && uvicorn main:api_app --reload --host 0.0.0.0 --port 9124 --root-path /api/ &
+
+# Wait for any process to exit
+wait -n
+# Exit with status of process that exited first
+exit $?
diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml
@@ -0,0 +1,20 @@
+version: "3.9"
+services:
+  serge:
+    restart: on-failure
+    build: 
+      context: .
+      dockerfile: Dockerfile
+      target: dev
+    volumes:
+      - ./web:/usr/src/app/web/
+      - ./api:/usr/src/app/api/
+      - datadb:/data/db
+      - weights:/usr/src/app/weights/
+      - /etc/localtime:/etc/localtime:ro
+    ports:
+      - "8008:8008"
+
+volumes:
+  datadb:
+  weights:
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -5,6 +5,7 @@ services:
     build: 
       context: .
       dockerfile: Dockerfile
+      target: release
     volumes:
       - datadb:/data/db
       - weights:/usr/src/app/weights/