Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace grpc_health_probe with the built-in gRPC container probe feature #2189

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions cmd/db-manager/v1beta1/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
FROM golang:alpine AS build-env

ARG TARGETARCH
ENV GRPC_HEALTH_PROBE_VERSION v0.4.15
andreyvelich marked this conversation as resolved.
Show resolved Hide resolved

WORKDIR /go/src/github.com/kubeflow/katib

Expand All @@ -18,13 +17,8 @@ COPY pkg/ pkg/
# Build the binary.
RUN CGO_ENABLED=0 GOOS=linux GOARCH="${TARGETARCH}" go build -a -o katib-db-manager ./cmd/db-manager/v1beta1

# Add GRPC health probe.
RUN wget -qO /bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-${TARGETARCH} \
&& chmod +x /bin/grpc_health_probe

# Copy the db-manager into a thin image.
FROM alpine:3.15
WORKDIR /app
COPY --from=build-env /bin/grpc_health_probe /bin/
COPY --from=build-env /go/src/github.com/kubeflow/katib/katib-db-manager /app/
ENTRYPOINT ["./katib-db-manager"]
6 changes: 0 additions & 6 deletions cmd/suggestion/goptuna/v1beta1/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
FROM golang:alpine AS build-env

ARG TARGETARCH
ENV GRPC_HEALTH_PROBE_VERSION v0.4.15

WORKDIR /go/src/github.com/kubeflow/katib

Expand All @@ -18,18 +17,13 @@ COPY pkg/ pkg/
# Build the binary.
RUN CGO_ENABLED=0 GOOS=linux GOARCH=${TARGETARCH} go build -a -o goptuna-suggestion ./cmd/suggestion/goptuna/v1beta1

# Add GRPC health probe.
RUN wget -qO/bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-${TARGETARCH} \
&& chmod +x /bin/grpc_health_probe

# Copy the Goptuna suggestion into a thin image.
FROM alpine:3.15

ENV TARGET_DIR /opt/katib

WORKDIR ${TARGET_DIR}

COPY --from=build-env /bin/grpc_health_probe /bin/
COPY --from=build-env /go/src/github.com/kubeflow/katib/goptuna-suggestion ${TARGET_DIR}/

RUN chgrp -R 0 ${TARGET_DIR} \
Expand Down
9 changes: 0 additions & 9 deletions cmd/suggestion/hyperband/v1beta1/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,3 @@
FROM alpine:3.15 AS downloader

ARG TARGETARCH
ENV GRPC_HEALTH_PROBE_VERSION v0.4.15

RUN wget -qO/bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-${TARGETARCH} \
&& chmod +x /bin/grpc_health_probe

FROM python:3.10-slim

ARG TARGETARCH
Expand All @@ -22,7 +14,6 @@ RUN if [ "${TARGETARCH}" = "ppc64le" ] || [ "${TARGETARCH}" = "arm64" ]; then \

ADD ./pkg/ ${TARGET_DIR}/pkg/
ADD ./${SUGGESTION_DIR}/ ${TARGET_DIR}/${SUGGESTION_DIR}/
COPY --from=downloader /bin/grpc_health_probe /bin/grpc_health_probe

WORKDIR ${TARGET_DIR}/${SUGGESTION_DIR}

Expand Down
9 changes: 0 additions & 9 deletions cmd/suggestion/hyperopt/v1beta1/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,3 @@
FROM alpine:3.15 AS downloader

ARG TARGETARCH
ENV GRPC_HEALTH_PROBE_VERSION v0.4.15

RUN wget -qO/bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-${TARGETARCH} \
&& chmod +x /bin/grpc_health_probe

FROM python:3.10-slim

ARG TARGETARCH
Expand All @@ -22,7 +14,6 @@ RUN if [ "${TARGETARCH}" = "ppc64le" ]; then \

ADD ./pkg/ ${TARGET_DIR}/pkg/
ADD ./${SUGGESTION_DIR}/ ${TARGET_DIR}/${SUGGESTION_DIR}/
COPY --from=downloader /bin/grpc_health_probe /bin/grpc_health_probe

WORKDIR ${TARGET_DIR}/${SUGGESTION_DIR}

Expand Down
9 changes: 0 additions & 9 deletions cmd/suggestion/nas/darts/v1beta1/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,3 @@
FROM alpine:3.15 as downloader

ARG TARGETARCH
ENV GRPC_HEALTH_PROBE_VERSION v0.4.15

RUN wget -qO/bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-${TARGETARCH} \
&& chmod +x /bin/grpc_health_probe

FROM python:3.10-slim

ARG TARGETARCH
Expand All @@ -22,7 +14,6 @@ RUN if [ "${TARGETARCH}" = "ppc64le" ]; then \

ADD ./pkg/ ${TARGET_DIR}/pkg/
ADD ./${SUGGESTION_DIR}/ ${TARGET_DIR}/${SUGGESTION_DIR}/
COPY --from=downloader /bin/grpc_health_probe /bin/grpc_health_probe

WORKDIR ${TARGET_DIR}/${SUGGESTION_DIR}

Expand Down
10 changes: 0 additions & 10 deletions cmd/suggestion/nas/enas/v1beta1/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,17 +1,8 @@
FROM alpine:3.15 AS downloader

ARG TARGETARCH
ENV GRPC_HEALTH_PROBE_VERSION v0.4.15

RUN wget -qO/bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-${TARGETARCH} \
&& chmod +x /bin/grpc_health_probe

FROM python:3.10-slim

ARG TARGETARCH
ENV TARGET_DIR /opt/katib
ENV SUGGESTION_DIR cmd/suggestion/nas/enas/v1beta1
ENV GRPC_HEALTH_PROBE_VERSION v0.4.15
ENV PYTHONPATH ${TARGET_DIR}:${TARGET_DIR}/pkg/apis/manager/v1beta1/python:${TARGET_DIR}/pkg/apis/manager/health/python

RUN if [ "${TARGETARCH}" = "ppc64le" ]; then \
Expand All @@ -23,7 +14,6 @@ RUN if [ "${TARGETARCH}" = "ppc64le" ]; then \

ADD ./pkg/ ${TARGET_DIR}/pkg/
ADD ./${SUGGESTION_DIR}/ ${TARGET_DIR}/${SUGGESTION_DIR}/
COPY --from=downloader /bin/grpc_health_probe /bin/grpc_health_probe

WORKDIR ${TARGET_DIR}/${SUGGESTION_DIR}

Expand Down
9 changes: 0 additions & 9 deletions cmd/suggestion/optuna/v1beta1/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,3 @@
FROM alpine:3.15 AS downloader

ARG TARGETARCH
ENV GRPC_HEALTH_PROBE_VERSION v0.4.15

RUN wget -qO /bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-${TARGETARCH} \
&& chmod +x /bin/grpc_health_probe

FROM python:3.10-slim

ARG TARGETARCH
Expand All @@ -22,7 +14,6 @@ RUN if [ "${TARGETARCH}" = "ppc64le" ]; then \

ADD ./pkg/ ${TARGET_DIR}/pkg/
ADD ./${SUGGESTION_DIR}/ ${TARGET_DIR}/${SUGGESTION_DIR}/
COPY --from=downloader /bin/grpc_health_probe /bin/grpc_health_probe

WORKDIR ${TARGET_DIR}/${SUGGESTION_DIR}

Expand Down
9 changes: 0 additions & 9 deletions cmd/suggestion/pbt/v1beta1/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,3 @@
FROM alpine:3.15 AS downloader

ARG TARGETARCH
ENV GRPC_HEALTH_PROBE_VERSION v0.4.15

RUN wget -qO /bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-${TARGETARCH} \
&& chmod +x /bin/grpc_health_probe

FROM python:3.10-slim

ARG TARGETARCH
Expand All @@ -22,7 +14,6 @@ RUN if [ "${TARGETARCH}" = "ppc64le" ]; then \

ADD ./pkg/ ${TARGET_DIR}/pkg/
ADD ./${SUGGESTION_DIR}/ ${TARGET_DIR}/${SUGGESTION_DIR}/
COPY --from=downloader /bin/grpc_health_probe /bin/grpc_health_probe

WORKDIR ${TARGET_DIR}/${SUGGESTION_DIR}

Expand Down
9 changes: 0 additions & 9 deletions cmd/suggestion/skopt/v1beta1/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,3 @@
FROM alpine:3.15 AS downloader

ARG TARGETARCH
ENV GRPC_HEALTH_PROBE_VERSION v0.4.15

RUN wget -qO /bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-${TARGETARCH} \
&& chmod +x /bin/grpc_health_probe

FROM python:3.10-slim

ARG TARGETARCH
Expand All @@ -22,7 +14,6 @@ RUN if [ "${TARGETARCH}" = "ppc64le" ]; then \

ADD ./pkg/ ${TARGET_DIR}/pkg/
ADD ./${SUGGESTION_DIR}/ ${TARGET_DIR}/${SUGGESTION_DIR}/
COPY --from=downloader /bin/grpc_health_probe /bin/grpc_health_probe

WORKDIR ${TARGET_DIR}/${SUGGESTION_DIR}

Expand Down
4 changes: 2 additions & 2 deletions manifests/v1beta1/components/db-manager/db-manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ spec:
- name: api
containerPort: 6789
livenessProbe:
exec:
command: ["/bin/grpc_health_probe", "-addr=:6789"]
grpc:
port: 6789
initialDelaySeconds: 10
periodSeconds: 60
failureThreshold: 5
8 changes: 4 additions & 4 deletions pkg/controller.v1beta1/consts/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,6 @@ const (
// DefaultEarlyStoppingPort is the default port of EarlyStopping service.
DefaultEarlyStoppingPort = 6788

// DefaultGRPCService is the default suggestion service name,
// which is used to run healthz check using grpc probe.
DefaultGRPCService = "manager.v1beta1.Suggestion"

// DefaultGRPCRetryAttempts is the the maximum number of retries for gRPC calls
DefaultGRPCRetryAttempts = 10
// DefaultGRPCRetryPeriod is a fixed period of time between gRPC call retries
Expand Down Expand Up @@ -171,6 +167,10 @@ var (
// DefaultKatibDBManagerServicePort is the default Port of Katib DB Manager
DefaultKatibDBManagerServicePort = env.GetEnvOrDefault(DefaultKatibDBManagerServicePortEnvName, "6789")

// DefaultGRPCService is the default suggestion service name,
// which is used to run healthz check using grpc probe.
DefaultGRPCService = "manager.v1beta1.Suggestion"

// List of all valid keys of trial metadata for substitution in Trial template
TrialTemplateMetaKeys = []string{
TrialTemplateMetaKeyOfName,
Expand Down
20 changes: 6 additions & 14 deletions pkg/controller.v1beta1/suggestion/composer/composer.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,6 @@ const (
defaultPeriodForReady = 10
defaultPeriodForLive = 120
defaultFailureThreshold = 12
// Ref https://github.com/grpc-ecosystem/grpc-health-probe/
defaultGRPCHealthCheckProbe = "/bin/grpc_health_probe"
)

var (
Expand Down Expand Up @@ -210,12 +208,9 @@ func (g *General) desiredContainers(s *suggestionsv1beta1.Suggestion,
if viper.GetBool(consts.ConfigEnableGRPCProbeInSuggestion) && suggestionContainer.ReadinessProbe == nil {
suggestionContainer.ReadinessProbe = &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
Exec: &corev1.ExecAction{
Command: []string{
defaultGRPCHealthCheckProbe,
fmt.Sprintf("-addr=:%d", consts.DefaultSuggestionPort),
fmt.Sprintf("-service=%s", consts.DefaultGRPCService),
},
GRPC: &corev1.GRPCAction{
Port: consts.DefaultSuggestionPort,
Service: &consts.DefaultGRPCService,
},
},
InitialDelaySeconds: defaultInitialDelaySeconds,
Expand All @@ -225,12 +220,9 @@ func (g *General) desiredContainers(s *suggestionsv1beta1.Suggestion,
if viper.GetBool(consts.ConfigEnableGRPCProbeInSuggestion) && suggestionContainer.LivenessProbe == nil {
suggestionContainer.LivenessProbe = &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
Exec: &corev1.ExecAction{
Command: []string{
defaultGRPCHealthCheckProbe,
fmt.Sprintf("-addr=:%d", consts.DefaultSuggestionPort),
fmt.Sprintf("-service=%s", consts.DefaultGRPCService),
},
GRPC: &corev1.GRPCAction{
Port: consts.DefaultSuggestionPort,
Service: &consts.DefaultGRPCService,
},
},
// Ref https://srcco.de/posts/kubernetes-liveness-probes-are-dangerous.html
Expand Down
18 changes: 6 additions & 12 deletions pkg/controller.v1beta1/suggestion/composer/composer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -817,25 +817,19 @@ func newFakeContainers() []corev1.Container {
},
ReadinessProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
Exec: &corev1.ExecAction{
Command: []string{
defaultGRPCHealthCheckProbe,
fmt.Sprintf("-addr=:%d", consts.DefaultSuggestionPort),
fmt.Sprintf("-service=%s", consts.DefaultGRPCService),
},
GRPC: &corev1.GRPCAction{
Port: consts.DefaultSuggestionPort,
Service: &consts.DefaultGRPCService,
},
},
InitialDelaySeconds: defaultInitialDelaySeconds,
PeriodSeconds: defaultPeriodForReady,
},
LivenessProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
Exec: &corev1.ExecAction{
Command: []string{
defaultGRPCHealthCheckProbe,
fmt.Sprintf("-addr=:%d", consts.DefaultSuggestionPort),
fmt.Sprintf("-service=%s", consts.DefaultGRPCService),
},
GRPC: &corev1.GRPCAction{
Port: consts.DefaultSuggestionPort,
Service: &consts.DefaultGRPCService,
},
},
InitialDelaySeconds: defaultInitialDelaySeconds,
Expand Down