From 0fcb6601d3252a78032b3064b19a55430bb8e1de Mon Sep 17 00:00:00 2001 From: Test User Date: Wed, 11 May 2022 09:31:32 +0000 Subject: [PATCH 1/4] =?UTF-8?q?refactor:=20=F0=9F=92=A1=20use=20a=20tempor?= =?UTF-8?q?al=20name=20for=20the=20directory?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../charts/datasets-server/templates/_initContainerAssets.tpl | 4 ++-- .../charts/datasets-server/templates/_initContainerCache.tpl | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/infra/charts/datasets-server/templates/_initContainerAssets.tpl b/infra/charts/datasets-server/templates/_initContainerAssets.tpl index b0852c05b6..71c046c605 100644 --- a/infra/charts/datasets-server/templates/_initContainerAssets.tpl +++ b/infra/charts/datasets-server/templates/_initContainerAssets.tpl @@ -4,9 +4,9 @@ imagePullPolicy: IfNotPresent command: ["/bin/sh", "-c"] args: - - chown {{ .Values.uid }}:{{ .Values.gid }} {{ .Values.storage.assetsDirectory | quote }}; + - chown {{ .Values.uid }}:{{ .Values.gid }} /mounted-path; volumeMounts: - - mountPath: {{ .Values.storage.assetsDirectory | quote }} + - mountPath: /mounted-path mountPropagation: None name: nfs subPath: "{{ include "assets.subpath" . }}" diff --git a/infra/charts/datasets-server/templates/_initContainerCache.tpl b/infra/charts/datasets-server/templates/_initContainerCache.tpl index b3f8d87be4..baeae66a74 100644 --- a/infra/charts/datasets-server/templates/_initContainerCache.tpl +++ b/infra/charts/datasets-server/templates/_initContainerCache.tpl @@ -4,9 +4,9 @@ imagePullPolicy: IfNotPresent command: ["/bin/sh", "-c"] args: - - chown {{ .Values.uid }}:{{ .Values.gid }} {{ .Values.storage.cacheDirectory | quote }}; + - chown {{ .Values.uid }}:{{ .Values.gid }} /mounted-path; volumeMounts: - - mountPath: {{ .Values.storage.cacheDirectory | quote }} + - mountPath: /mounted-path mountPropagation: None name: nfs subPath: "{{ include "cache.subpath" . }}" From 4a03d88f26a90bf8bc6ab7d8ecba00ac6fa5e509 Mon Sep 17 00:00:00 2001 From: Test User Date: Wed, 11 May 2022 09:36:09 +0000 Subject: [PATCH 2/4] =?UTF-8?q?refactor:=20=F0=9F=92=A1=20the=20mount=20pa?= =?UTF-8?q?th=20is=20relevant=20only=20for=20the=20container?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../datasets-server/templates/api/_container.tpl | 4 ++-- .../templates/datasets-worker/_container.tpl | 10 +++++----- .../templates/splits-worker/_container.tpl | 10 +++++----- infra/charts/datasets-server/values.yaml | 14 ++++++++++---- 4 files changed, 22 insertions(+), 16 deletions(-) diff --git a/infra/charts/datasets-server/templates/api/_container.tpl b/infra/charts/datasets-server/templates/api/_container.tpl index 119a293fa3..1e0714aba2 100644 --- a/infra/charts/datasets-server/templates/api/_container.tpl +++ b/infra/charts/datasets-server/templates/api/_container.tpl @@ -6,7 +6,7 @@ - name: APP_PORT value: {{ .Values.api.appPort | quote }} - name: ASSETS_DIRECTORY - value: {{ .Values.storage.assetsDirectory | quote }} + value: {{ .Values.api.assetsDirectory | quote }} - name: LOG_LEVEL value: {{ .Values.api.logLevel | quote }} - name: MAX_AGE_LONG_SECONDS @@ -26,7 +26,7 @@ image: "{{ .Values.api.image.repository }}/{{ .Values.api.image.name }}:{{ .Values.api.image.tag }}" imagePullPolicy: {{ .Values.api.image.pullPolicy }} volumeMounts: - - mountPath: {{ .Values.storage.assetsDirectory | quote }} + - mountPath: {{ .Values.api.assetsDirectory | quote }} mountPropagation: None name: nfs subPath: "{{ include "assets.subpath" . }}" diff --git a/infra/charts/datasets-server/templates/datasets-worker/_container.tpl b/infra/charts/datasets-server/templates/datasets-worker/_container.tpl index ed547beb92..4a4c43152c 100644 --- a/infra/charts/datasets-server/templates/datasets-worker/_container.tpl +++ b/infra/charts/datasets-server/templates/datasets-worker/_container.tpl @@ -2,15 +2,15 @@ - name: "{{ include "name" . }}-datasets-worker" env: - name: ASSETS_DIRECTORY - value: {{ .Values.storage.assetsDirectory | quote }} + value: {{ .Values.datasetsWorker.assetsDirectory | quote }} - name: DATASETS_BLOCKLIST value: {{ .Values.datasetsWorker.datasetsBlocklist | quote }} - name: DATASETS_REVISION value: {{ .Values.datasetsWorker.datasetsRevision | quote }} - name: HF_DATASETS_CACHE - value: "{{ .Values.storage.cacheDirectory }}/datasets" + value: "{{ .Values.datasetsWorker.cacheDirectory }}/datasets" - name: HF_MODULES_CACHE - value: "{{ .Values.storage.cacheDirectory }}/modules" + value: "{{ .Values.datasetsWorker.cacheDirectory }}/modules" - name: HF_TOKEN # see https://kubernetes.io/docs/concepts/configuration/secret/#creating-a-secret # and https://kubernetes.io/docs/concepts/configuration/secret/#using-secrets-as-environment-variables @@ -53,12 +53,12 @@ image: "{{ .Values.datasetsWorker.image.repository }}/{{ .Values.datasetsWorker.image.name }}:{{ .Values.datasetsWorker.image.tag }}" imagePullPolicy: {{ .Values.datasetsWorker.image.pullPolicy }} volumeMounts: - - mountPath: {{ .Values.storage.assetsDirectory | quote }} + - mountPath: {{ .Values.datasetsWorker.assetsDirectory | quote }} mountPropagation: None name: nfs subPath: "{{ include "assets.subpath" . }}" readOnly: false - - mountPath: {{ .Values.storage.cacheDirectory | quote }} + - mountPath: {{ .Values.datasetsWorker.cacheDirectory | quote }} mountPropagation: None name: nfs subPath: "{{ include "cache.subpath" . }}" diff --git a/infra/charts/datasets-server/templates/splits-worker/_container.tpl b/infra/charts/datasets-server/templates/splits-worker/_container.tpl index 01867d188e..bf0b188f7f 100644 --- a/infra/charts/datasets-server/templates/splits-worker/_container.tpl +++ b/infra/charts/datasets-server/templates/splits-worker/_container.tpl @@ -2,15 +2,15 @@ - name: "{{ include "name" . }}-splits-worker" env: - name: ASSETS_DIRECTORY - value: {{ .Values.storage.assetsDirectory | quote }} + value: {{ .Values.splitsWorker.assetsDirectory | quote }} - name: DATASETS_BLOCKLIST value: {{ .Values.splitsWorker.datasetsBlocklist | quote }} - name: DATASETS_REVISION value: {{ .Values.splitsWorker.datasetsRevision | quote }} - name: HF_DATASETS_CACHE - value: "{{ .Values.storage.cacheDirectory }}/datasets" + value: "{{ .Values.splitsWorker.cacheDirectory }}/datasets" - name: HF_MODULES_CACHE - value: "{{ .Values.storage.cacheDirectory }}/modules" + value: "{{ .Values.splitsWorker.cacheDirectory }}/modules" - name: HF_TOKEN # see https://kubernetes.io/docs/concepts/configuration/secret/#creating-a-secret # and https://kubernetes.io/docs/concepts/configuration/secret/#using-secrets-as-environment-variables @@ -53,12 +53,12 @@ image: "{{ .Values.splitsWorker.image.repository }}/{{ .Values.splitsWorker.image.name }}:{{ .Values.splitsWorker.image.tag }}" imagePullPolicy: {{ .Values.splitsWorker.image.pullPolicy }} volumeMounts: - - mountPath: {{ .Values.storage.assetsDirectory | quote }} + - mountPath: {{ .Values.splitsWorker.assetsDirectory | quote }} mountPropagation: None name: nfs subPath: "{{ include "assets.subpath" . }}" readOnly: false - - mountPath: {{ .Values.storage.cacheDirectory | quote }} + - mountPath: {{ .Values.splitsWorker.cacheDirectory | quote }} mountPropagation: None name: nfs subPath: "{{ include "cache.subpath" . }}" diff --git a/infra/charts/datasets-server/values.yaml b/infra/charts/datasets-server/values.yaml index 6cb9171834..09068a29e9 100644 --- a/infra/charts/datasets-server/values.yaml +++ b/infra/charts/datasets-server/values.yaml @@ -11,10 +11,6 @@ mongodb: queueDatabase: "datasets_server_queue" storage: - # Directory of assets (audio files and images that will be served for the web) - assetsDirectory: "/assets" - # Directory of the "datasets" library cache (both for modules and datasets) - cacheDirectory: "/cache" nfs: {} domain: "datasets-server.us.dev.moon.huggingface.tech" @@ -45,6 +41,8 @@ api: nodeSelector: {} tolerations: [] + # Directory of assets (audio files and images that will be served for the web) + assetsDirectory: "/assets" readinessPort: 80 # Application hostname - it must not be set to localhost to work in Kube! appHostname: "0.0.0.0" @@ -77,6 +75,10 @@ datasetsWorker: nodeSelector: {} tolerations: [] + # Directory of assets (audio files and images that will be served for the web) + assetsDirectory: "/assets" + # Directory of the "datasets" library cache (both for modules and datasets) + cacheDirectory: "/cache" # Datasets blocklist datasetsBlocklist: "" # Git reference for the canonical datasets on https://github.com/huggingface/datasets @@ -120,6 +122,10 @@ splitsWorker: nodeSelector: {} tolerations: [] + # Directory of assets (audio files and images that will be served for the web) + assetsDirectory: "/assets" + # Directory of the "datasets" library cache (both for modules and datasets) + cacheDirectory: "/cache" # Datasets blocklist datasetsBlocklist: "" # Git reference for the canonical datasets on https://github.com/huggingface/datasets From d5d85a88dfa8eb47a40a274ef779b84a0e1bea06 Mon Sep 17 00:00:00 2001 From: Test User Date: Wed, 11 May 2022 12:41:10 +0000 Subject: [PATCH 3/4] =?UTF-8?q?feat:=20=F0=9F=8E=B8=20add=20nginx=20as=20a?= =?UTF-8?q?=20reverse=20proxy?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .vscode/monorepo.code-workspace | 4 ++ docker-compose.yml | 25 +++++++++- infra/charts/datasets-server/env/dev.yaml | 11 ++++- .../nginx-templates/default.conf.template | 31 ++++++++++++ .../datasets-server/templates/_helpers.tpl | 28 +++++++++++ .../datasets-server/templates/ingress.yaml | 7 ++- .../templates/reverse-proxy/_container.tpl | 47 +++++++++++++++++++ .../templates/reverse-proxy/configMap.yaml | 10 ++++ .../templates/reverse-proxy/deployment.yaml | 45 ++++++++++++++++++ .../templates/reverse-proxy/service.yaml | 18 +++++++ infra/charts/datasets-server/values.yaml | 36 ++++++++++++++ services/reverse-proxy/README.md | 31 ++++++++++++ 12 files changed, 286 insertions(+), 7 deletions(-) create mode 100644 infra/charts/datasets-server/nginx-templates/default.conf.template create mode 100644 infra/charts/datasets-server/templates/reverse-proxy/_container.tpl create mode 100644 infra/charts/datasets-server/templates/reverse-proxy/configMap.yaml create mode 100644 infra/charts/datasets-server/templates/reverse-proxy/deployment.yaml create mode 100644 infra/charts/datasets-server/templates/reverse-proxy/service.yaml create mode 100644 services/reverse-proxy/README.md diff --git a/.vscode/monorepo.code-workspace b/.vscode/monorepo.code-workspace index 4b1dc75635..f1d9cb3265 100644 --- a/.vscode/monorepo.code-workspace +++ b/.vscode/monorepo.code-workspace @@ -20,6 +20,10 @@ "name": "services/api", "path": "../services/api" }, + { + "name": "services/reverse-proxy", + "path": "../services/reverse-proxy" + }, { "name": "services/worker", "path": "../services/worker" diff --git a/docker-compose.yml b/docker-compose.yml index 371e748285..9d4a923dd3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,25 @@ version: "3.9" services: + reverse-proxy: + image: nginx:1.20 + volumes: + - ./infra/charts/datasets-server/nginx-templates/:/etc/nginx/templates:ro + - reverse-proxy-cache:/nginx-cache + - assets:/assets:ro + ports: + - "8000:80" + environment: + - ASSETS_DIRECTORY=/assets + - CACHE_DIRECTORY=/nginx-cache + - CACHE_INACTIVE=24h + - CACHE_MAX_SIZE=1g + - CACHE_ZONE_SIZE=50m + - HOST=localhost + - PORT=80 + - TARGET_URL=http://api:8080 + depends_on: + api: + condition: service_started api: build: context: . @@ -7,13 +27,13 @@ services: volumes: - assets:/assets:ro environment: - APP_PORT: 8000 + APP_PORT: 8080 APP_HOSTNAME: 0.0.0.0 ASSETS_DIRECTORY: "/assets" MONGO_URL: "mongodb://mongodb" WEB_CONCURRENCY: 1 ports: - - 8000:8000 + - 8080:8080 depends_on: mongodb: condition: service_started @@ -54,3 +74,4 @@ services: volumes: assets: mongo: + reverse-proxy-cache: diff --git a/infra/charts/datasets-server/env/dev.yaml b/infra/charts/datasets-server/env/dev.yaml index 5d22ae7e23..3aa8e761b3 100644 --- a/infra/charts/datasets-server/env/dev.yaml +++ b/infra/charts/datasets-server/env/dev.yaml @@ -8,7 +8,7 @@ storage: domain: "datasets-server.us.dev.moon.huggingface.tech" -api: +reverseProxy: replicas: 1 ingress: @@ -28,6 +28,15 @@ api: limits: cpu: 1 +api: + replicas: 1 + + resources: + requests: + cpu: 0.01 + limits: + cpu: 1 + datasetsWorker: replicas: 2 diff --git a/infra/charts/datasets-server/nginx-templates/default.conf.template b/infra/charts/datasets-server/nginx-templates/default.conf.template new file mode 100644 index 0000000000..e78bb753e8 --- /dev/null +++ b/infra/charts/datasets-server/nginx-templates/default.conf.template @@ -0,0 +1,31 @@ +proxy_cache_path ${CACHE_DIRECTORY}/ levels=1:2 keys_zone=STATIC:${CACHE_ZONE_SIZE} inactive=${CACHE_INACTIVE} max_size=${CACHE_MAX_SIZE}; + +server { + listen ${PORT}; + listen [::]:${PORT}; + server_name ${HOST}; + + add_header 'Access-Control-Allow-Origin' '*' always; + + location /assets/ { + alias ${ASSETS_DIRECTORY}/; + } + + location / { + proxy_pass ${TARGET_URL}; + proxy_set_header Host $proxy_host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_http_version 1.1; + # cache all the HEAD+GET requests (without Set-Cookie) + # Cache-Control is used to determine the cache duration + # see https://www.nginx.com/blog/nginx-caching-guide/ + proxy_buffering on; + proxy_cache STATIC; + proxy_cache_use_stale error timeout invalid_header updating http_500 http_502 http_503 http_504; + proxy_cache_background_update on; + proxy_cache_lock on; + add_header X-Cache-Status $upstream_cache_status; + } +} diff --git a/infra/charts/datasets-server/templates/_helpers.tpl b/infra/charts/datasets-server/templates/_helpers.tpl index bc3c28dce6..82c9e9fe14 100644 --- a/infra/charts/datasets-server/templates/_helpers.tpl +++ b/infra/charts/datasets-server/templates/_helpers.tpl @@ -12,6 +12,13 @@ Expand the name of the release. {{- default .Release.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} {{- end }} +{{/* +The name of the release in uppercase and with underscores. +*/}} +{{- define "upper-release" -}} +{{- include "release" . | replace "-" "_" | upper }} +{{- end }} + {{/* Create chart name and version as used by the chart label. */}} @@ -42,6 +49,11 @@ heritage: {{ $.Release.Service | quote }} chart: "{{ include "name" . }}" {{- end }} +{{- define "labels.reverseProxy" -}} +{{ include "labels" . }} +app: "{{ .Release.Name }}-reverse-proxy" +{{- end -}} + {{- define "labels.api" -}} {{ include "labels" . }} app: "{{ include "release" . }}-api" @@ -75,6 +87,14 @@ The cache/ subpath in the NFS {{- printf "%s/%s/%s/" .Chart.Name .Release.Name "cache" }} {{- end }} +{{/* +The cache/ subpath in the NFS +- in a subdirectory named as the chart (datasets-server/), and below it, +- in a subdirectory named as the Release, so that Releases will not share the same assets/ dir +*/}} +{{- define "nginx.cache.subpath" -}} +{{- printf "%s/%s/%s/" .Chart.Name .Release.Name "nginx-cache" }} +{{- end }} {{/* The URL to access the mongodb instance created if mongodb.enable is true @@ -83,3 +103,11 @@ It's named using the Release name {{- define "mongodb.url" -}} {{- printf "mongodb://%s-mongodb" .Release.Name }} {{- end }} + +{{/* +The URL to access the API service from another container +See https://kubernetes.io/docs/concepts/services-networking/connect-applications-service/#environment-variables +*/}} +{{- define "api.url" -}} +{{- printf "http://${%s_API_SERVICE_HOST}:${%s_API_SERVICE_PORT}" ( include "upper-release" . ) ( include "upper-release" . ) }} +{{- end }} diff --git a/infra/charts/datasets-server/templates/ingress.yaml b/infra/charts/datasets-server/templates/ingress.yaml index bd8e35fd1f..9d7dc8a6d4 100644 --- a/infra/charts/datasets-server/templates/ingress.yaml +++ b/infra/charts/datasets-server/templates/ingress.yaml @@ -3,9 +3,9 @@ kind: Ingress metadata: annotations: # to communicate with AWS - {{ toYaml .Values.api.ingress.annotations | nindent 4 }} + {{ toYaml .Values.reverseProxy.ingress.annotations | nindent 4 }} labels: - {{ include "labels.api" . | nindent 4 }} + {{ include "labels.reverseProxy" . | nindent 4 }} name: {{ include "release" . }} namespace: {{ .Release.Namespace }} spec: @@ -15,8 +15,7 @@ spec: paths: - backend: service: - name: "{{ include "release" . }}-api" + name: "{{ include "release" . }}-reverse-proxy" port: name: http pathType: ImplementationSpecific - diff --git a/infra/charts/datasets-server/templates/reverse-proxy/_container.tpl b/infra/charts/datasets-server/templates/reverse-proxy/_container.tpl new file mode 100644 index 0000000000..9ebe06249a --- /dev/null +++ b/infra/charts/datasets-server/templates/reverse-proxy/_container.tpl @@ -0,0 +1,47 @@ +{{- define "containerReverseProxy" -}} +- name: "{{ include "name" . }}-reverse-proxy" + image: "{{ .Values.reverseProxy.image.repository }}/{{ .Values.reverseProxy.image.name }}:{{ .Values.reverseProxy.image.tag }}" + imagePullPolicy: {{ .Values.reverseProxy.image.pullPolicy }} + env: + - name: ASSETS_DIRECTORY + value: {{ .Values.reverseProxy.assetsDirectory | quote }} + - name: CACHE_DIRECTORY + value: {{ .Values.reverseProxy.cacheDirectory | quote }} + - name: CACHE_INACTIVE + value: {{ .Values.reverseProxy.cacheInactive | quote }} + - name: CACHE_MAX_SIZE + value: {{ .Values.reverseProxy.cacheMaxSize | quote }} + - name: CACHE_ZONE_SIZE + value: {{ .Values.reverseProxy.cacheZoneSize | quote }} + - name: HOST + value: {{ .Values.reverseProxy.host | quote }} + - name: PORT + value: {{ .Values.reverseProxy.port | quote }} + volumeMounts: + - name: nginx-templates + mountPath: /etc/nginx/templates + mountPropagation: None + readOnly: true + - mountPath: {{ .Values.reverseProxy.assetsDirectory | quote }} + mountPropagation: None + name: nfs + subPath: "{{ include "assets.subpath" . }}" + readOnly: true + - mountPath: {{ .Values.reverseProxy.cacheDirectory | quote }} + mountPropagation: None + name: nfs + subPath: "{{ include "nginx.cache.subpath" . }}" + readOnly: false + readinessProbe: + tcpSocket: + port: {{ .Values.reverseProxy.readinessPort }} + livenessProbe: + tcpSocket: + port: {{ .Values.reverseProxy.readinessPort }} + ports: + - containerPort: {{ .Values.reverseProxy.port }} + name: http + protocol: TCP + resources: + {{ toYaml .Values.reverseProxy.resources | nindent 4 }} +{{- end -}} diff --git a/infra/charts/datasets-server/templates/reverse-proxy/configMap.yaml b/infra/charts/datasets-server/templates/reverse-proxy/configMap.yaml new file mode 100644 index 0000000000..7cf0526f32 --- /dev/null +++ b/infra/charts/datasets-server/templates/reverse-proxy/configMap.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + {{ include "labels.reverseProxy" . | nindent 4 }} + name: "{{ include "release" . }}-reverse-proxy" + namespace: {{ .Release.Namespace }} +data: + default.conf.template: |- + {{ .Files.Get .Values.reverseProxy.nginxTemplateFile | replace "${TARGET_URL}" (include "api.url" . | quote ) | nindent 4 }} diff --git a/infra/charts/datasets-server/templates/reverse-proxy/deployment.yaml b/infra/charts/datasets-server/templates/reverse-proxy/deployment.yaml new file mode 100644 index 0000000000..7b4f7461d1 --- /dev/null +++ b/infra/charts/datasets-server/templates/reverse-proxy/deployment.yaml @@ -0,0 +1,45 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + {{ include "labels.reverseProxy" . | nindent 4 }} + name: "{{ include "release" . }}-reverse-proxy" + namespace: {{ .Release.Namespace }} +spec: + progressDeadlineSeconds: 600 + replicas: {{ .Values.reverseProxy.replicas }} + revisionHistoryLimit: 10 + selector: + matchLabels: + {{ include "labels.reverseProxy" . | nindent 6 }} + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate + template: + metadata: + labels: + {{ include "labels.reverseProxy" . | nindent 8 }} + spec: + initContainers: + {{ include "initContainerAssets" . | nindent 8 }} + containers: + {{ include "containerReverseProxy" . | nindent 8 }} + nodeSelector: + {{ toYaml .Values.reverseProxy.nodeSelector | nindent 8 }} + tolerations: + {{ toYaml .Values.reverseProxy.tolerations | nindent 8 }} + volumes: + - name: nfs + nfs: + server: {{ .Values.storage.nfs.server }} + path: {{ .Values.storage.nfs.path }} + - name: nginx-templates + configMap: + name: "{{ include "release" . }}-reverse-proxy" + defaultMode: 420 + optional: false + items: + - key: "default.conf.template" + path: "default.conf.template" diff --git a/infra/charts/datasets-server/templates/reverse-proxy/service.yaml b/infra/charts/datasets-server/templates/reverse-proxy/service.yaml new file mode 100644 index 0000000000..93eef209cc --- /dev/null +++ b/infra/charts/datasets-server/templates/reverse-proxy/service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: "{{ include "release" . }}-reverse-proxy" + annotations: + {{ toYaml .Values.reverseProxy.service.annotations | nindent 4 }} + namespace: {{ .Release.Namespace }} + labels: + {{ include "labels.reverseProxy" . | nindent 4 }} +spec: + ports: + - name: http + port: 80 + protocol: TCP + targetPort: {{ .Values.reverseProxy.port }} + selector: + {{ include "labels.reverseProxy" . | nindent 4 }} + type: {{ .Values.reverseProxy.service.type }} diff --git a/infra/charts/datasets-server/values.yaml b/infra/charts/datasets-server/values.yaml index 09068a29e9..cebf2a3597 100644 --- a/infra/charts/datasets-server/values.yaml +++ b/infra/charts/datasets-server/values.yaml @@ -17,6 +17,42 @@ domain: "datasets-server.us.dev.moon.huggingface.tech" uid: 1000 gid: 3000 +reverseProxy: + image: + repository: docker.io + name: nginx + tag: "1.20" + pullPolicy: IfNotPresent + + replicas: 1 + + service: + type: NodePort + annotations: { } + + ingress: + annotations: { } + + resources: + requests: + cpu: 1 + limits: + cpu: 1 + nodeSelector: {} + tolerations: [] + + # Directory of assets (audio files and images that will be served for the web) + assetsDirectory: "/assets" + # Directory of the nginx cache + cacheDirectory: "/nginx-cache" + readinessPort: 80 + cacheInactive: 24h + cacheMaxSize: 1g + cacheZoneSize: 50m + host: localhost + nginxTemplateFile: "nginx-templates/default.conf.template" + port: 80 + api: image: repository: 707930574880.dkr.ecr.us-east-1.amazonaws.com diff --git a/services/reverse-proxy/README.md b/services/reverse-proxy/README.md new file mode 100644 index 0000000000..d8e83bf926 --- /dev/null +++ b/services/reverse-proxy/README.md @@ -0,0 +1,31 @@ +# Datasets server - reverse proxy + +> Reverse-proxy in front of the API + +See [docker-compose.yml](../../docker-compose.yml) for usage. + +Note that the template configuration is located in [infra/charts/datasets-server/nginx-templates/](../../infra/charts/datasets-server/nginx-templates/) in order to be reachable by the Helm chart to deploy on Kubernetes. + +The reverse proxy uses nginx: + +- it serves the static assets directly (the API also serves them if required, but it's unnecessary to go through starlette for this, and it generates errors in Safari, see [1](https://github.com/encode/starlette/issues/950) and [2](https://developer.apple.com/library/archive/documentation/AppleApplications/Reference/SafariWebContent/CreatingVideoforSafarioniPhone/CreatingVideoforSafarioniPhone.html#//apple_ref/doc/uid/TP40006514-SW6) +- it proxies the other requests to the API +- it caches all the API responses, depending on their `cache-control` header +- it sets the `Access-Control-Allow-Origin` header to `*` to allow cross-origin requests + +It takes various environment variables, all of them are mandatory: + +- `ASSETS_DIRECTORY`: the directory that contains the static assets, eg `/assets` +- `CACHE_INACTIVE`: maximum duration before being removed from cache, eg `24h` (see [proxy_cache_path](https://nginx.org/en/docs/http/ngx_http_proxy_module.html#proxy_cache_path)) +- `CACHE_MAX_SIZE`: maximum size of the cache, eg `1g` (see [proxy_cache_path](https://nginx.org/en/docs/http/ngx_http_proxy_module.html#proxy_cache_path)) +- `CACHE_DIRECTORY`: the directory that contains the nginx cache, eg `/nginx-cache` +- `CACHE_ZONE_SIZE`: size of the cache index, eg `50m` (see [proxy_cache_path](https://nginx.org/en/docs/http/ngx_http_proxy_module.html#proxy_cache_path)) +- `HOST`: domain of the reverse proxy, eg `localhost` +- `PORT`: port of the reverse proxy, eg `80` +- `TARGET_URL`= URL of the API, eg `http://api:8080` + +The image requires three directories to be mounted (from volumes): + +- `$ASSETS_DIRECTORY` (read-only): the directory that contains the static assets. +- `$CACHE_DIRECTORY` (read/write): the directory that contains the nginx cache +- `/etc/nginx/templates` (read-only): the directory that contains the nginx configuration template ([templates](./templates/)) From 4febb557f6679682e56124c961dc7530df373642 Mon Sep 17 00:00:00 2001 From: Test User Date: Wed, 11 May 2022 14:12:19 +0000 Subject: [PATCH 4/4] =?UTF-8?q?refactor:=20=F0=9F=92=A1=20use=20the=20doma?= =?UTF-8?q?in=20service=20instead=20of=20environment=20vars?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/huggingface/datasets-server/pull/245#discussion_r870320484. --- infra/charts/datasets-server/templates/_helpers.tpl | 11 ++--------- .../templates/reverse-proxy/_container.tpl | 2 ++ .../templates/reverse-proxy/configMap.yaml | 2 +- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/infra/charts/datasets-server/templates/_helpers.tpl b/infra/charts/datasets-server/templates/_helpers.tpl index 82c9e9fe14..114c421213 100644 --- a/infra/charts/datasets-server/templates/_helpers.tpl +++ b/infra/charts/datasets-server/templates/_helpers.tpl @@ -12,13 +12,6 @@ Expand the name of the release. {{- default .Release.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} {{- end }} -{{/* -The name of the release in uppercase and with underscores. -*/}} -{{- define "upper-release" -}} -{{- include "release" . | replace "-" "_" | upper }} -{{- end }} - {{/* Create chart name and version as used by the chart label. */}} @@ -106,8 +99,8 @@ It's named using the Release name {{/* The URL to access the API service from another container -See https://kubernetes.io/docs/concepts/services-networking/connect-applications-service/#environment-variables +See https://kubernetes.io/docs/concepts/services-networking/dns-pod-service/#a-aaaa-records */}} {{- define "api.url" -}} -{{- printf "http://${%s_API_SERVICE_HOST}:${%s_API_SERVICE_PORT}" ( include "upper-release" . ) ( include "upper-release" . ) }} +{{- printf "http://%s-api.%s.svc.cluster.local:80" ( include "release" . ) ( .Release.Namespace ) }} {{- end }} diff --git a/infra/charts/datasets-server/templates/reverse-proxy/_container.tpl b/infra/charts/datasets-server/templates/reverse-proxy/_container.tpl index 9ebe06249a..4c8ad4c219 100644 --- a/infra/charts/datasets-server/templates/reverse-proxy/_container.tpl +++ b/infra/charts/datasets-server/templates/reverse-proxy/_container.tpl @@ -17,6 +17,8 @@ value: {{ .Values.reverseProxy.host | quote }} - name: PORT value: {{ .Values.reverseProxy.port | quote }} + - name: TARGET_URL + value: {{ include "api.url" . | quote }} volumeMounts: - name: nginx-templates mountPath: /etc/nginx/templates diff --git a/infra/charts/datasets-server/templates/reverse-proxy/configMap.yaml b/infra/charts/datasets-server/templates/reverse-proxy/configMap.yaml index 7cf0526f32..a8f5da2274 100644 --- a/infra/charts/datasets-server/templates/reverse-proxy/configMap.yaml +++ b/infra/charts/datasets-server/templates/reverse-proxy/configMap.yaml @@ -7,4 +7,4 @@ metadata: namespace: {{ .Release.Namespace }} data: default.conf.template: |- - {{ .Files.Get .Values.reverseProxy.nginxTemplateFile | replace "${TARGET_URL}" (include "api.url" . | quote ) | nindent 4 }} + {{ .Files.Get .Values.reverseProxy.nginxTemplateFile | nindent 4 }}