Skip to content
This repository has been archived by the owner on Jun 19, 2022. It is now read-only.

Tune broker components #1269

Merged
merged 3 commits into from
Jun 12, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions pkg/broker/handler/providers.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package handler
import (
"context"
"net/http"
"time"

"cloud.google.com/go/pubsub"
cepubsub "github.com/cloudevents/sdk-go/protocol/pubsub/v2"
Expand All @@ -37,6 +38,12 @@ var (

DefaultHTTPClient = &http.Client{
Transport: &ochttp.Transport{
Base: &http.Transport{
MaxIdleConns: 1000,
MaxIdleConnsPerHost: 500,
MaxConnsPerHost: 500,
IdleConnTimeout: 30 * time.Second,
},
Propagation: &tracecontext.HTTPFormat{},
},
}
Expand Down
20 changes: 15 additions & 5 deletions pkg/reconciler/brokercell/brokercell.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ func (r *Reconciler) makeIngressHPAArgs(bc *intv1alpha1.BrokerCell) resources.Au
ComponentName: resources.IngressName,
BrokerCell: bc,
AvgCPUUtilization: 95,
AvgMemoryUsage: "450Mi",
AvgMemoryUsage: "700Mi",
MaxReplicas: 10,
}
}
Expand All @@ -246,8 +246,13 @@ func (r *Reconciler) makeFanoutHPAArgs(bc *intv1alpha1.BrokerCell) resources.Aut
ComponentName: resources.FanoutName,
BrokerCell: bc,
AvgCPUUtilization: 95,
AvgMemoryUsage: "900Mi",
MaxReplicas: 20,
// The limit we set is 3000Mi which is mostly used to prevent surging
// memory usage causing OOM.
// Here we only set half of the limit so that in case of surging memory
// usage, HPA could have enough time to kick in.
// See: https://github.com/google/knative-gcp/issues/1265
AvgMemoryUsage: "1500Mi",
MaxReplicas: 10,
}
}

Expand All @@ -268,8 +273,13 @@ func (r *Reconciler) makeRetryHPAArgs(bc *intv1alpha1.BrokerCell) resources.Auto
ComponentName: resources.RetryName,
BrokerCell: bc,
AvgCPUUtilization: 95,
AvgMemoryUsage: "1400Mi",
MaxReplicas: 20,
// The limit we set is 3000Mi which is mostly used to prevent surging
// memory usage causing OOM.
// Here we only set half of the limit so that in case of surging memory
// usage, HPA could have enough time to kick in.
// See: https://github.com/google/knative-gcp/issues/1265
AvgMemoryUsage: "1500Mi",
MaxReplicas: 10,
}
}

Expand Down
20 changes: 12 additions & 8 deletions pkg/reconciler/brokercell/resources/deployments.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,11 @@ func MakeIngressDeployment(args IngressArgs) *appsv1.Deployment {
InitialDelaySeconds: 5,
PeriodSeconds: 2,
SuccessThreshold: 1,
TimeoutSeconds: 1,
TimeoutSeconds: 5,
}
container.Resources = corev1.ResourceRequirements{
Limits: corev1.ResourceList{
corev1.ResourceMemory: resource.MustParse("500Mi"),
corev1.ResourceMemory: resource.MustParse("1000Mi"),
},
Requests: corev1.ResourceList{
corev1.ResourceMemory: resource.MustParse("500Mi"),
Expand All @@ -67,10 +67,10 @@ func MakeFanoutDeployment(args FanoutArgs) *appsv1.Deployment {
container := containerTemplate(args.Args)
container.Resources = corev1.ResourceRequirements{
Limits: corev1.ResourceList{
corev1.ResourceMemory: resource.MustParse("1000Mi"),
corev1.ResourceMemory: resource.MustParse("3000Mi"),
},
Requests: corev1.ResourceList{
corev1.ResourceMemory: resource.MustParse("1000Mi"),
corev1.ResourceMemory: resource.MustParse("500Mi"),
corev1.ResourceCPU: resource.MustParse("1500m"),
},
}
Expand All @@ -80,6 +80,10 @@ func MakeFanoutDeployment(args FanoutArgs) *appsv1.Deployment {
ContainerPort: handler.DefaultHealthCheckPort,
},
)
container.Env = append(container.Env, corev1.EnvVar{
Name: "MAX_CONCURRENCY_PER_EVENT",
Value: "100",
})
container.LivenessProbe = &corev1.Probe{
Handler: corev1.Handler{
HTTPGet: &corev1.HTTPGetAction{
Expand All @@ -92,7 +96,7 @@ func MakeFanoutDeployment(args FanoutArgs) *appsv1.Deployment {
InitialDelaySeconds: 15,
PeriodSeconds: 15,
SuccessThreshold: 1,
TimeoutSeconds: 1,
TimeoutSeconds: 5,
}
return deploymentTemplate(args.Args, []corev1.Container{container})
}
Expand All @@ -102,10 +106,10 @@ func MakeRetryDeployment(args RetryArgs) *appsv1.Deployment {
container := containerTemplate(args.Args)
container.Resources = corev1.ResourceRequirements{
Limits: corev1.ResourceList{
corev1.ResourceMemory: resource.MustParse("1500Mi"),
corev1.ResourceMemory: resource.MustParse("3000Mi"),
},
Requests: corev1.ResourceList{
corev1.ResourceMemory: resource.MustParse("1500Mi"),
corev1.ResourceMemory: resource.MustParse("500Mi"),
corev1.ResourceCPU: resource.MustParse("1000m"),
},
}
Expand All @@ -127,7 +131,7 @@ func MakeRetryDeployment(args RetryArgs) *appsv1.Deployment {
InitialDelaySeconds: 15,
PeriodSeconds: 15,
SuccessThreshold: 1,
TimeoutSeconds: 1,
TimeoutSeconds: 5,
}
return deploymentTemplate(args.Args, []corev1.Container{container})
}
Expand Down
8 changes: 5 additions & 3 deletions pkg/reconciler/brokercell/testingdata/fanout_deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ spec:
initialDelaySeconds: 15
periodSeconds: 15
successThreshold: 1
timeoutSeconds: 1
timeoutSeconds: 5
env:
- name: GOOGLE_APPLICATION_CREDENTIALS
value: /var/secrets/google/key.json
Expand All @@ -65,17 +65,19 @@ spec:
value: config-observability
- name: METRICS_DOMAIN
value: knative.dev/internal/eventing
- name: MAX_CONCURRENCY_PER_EVENT
value: "100"
volumeMounts:
- name: broker-config
mountPath: /var/run/cloud-run-events/broker
- name: google-broker-key
mountPath: /var/secrets/google
resources:
limits:
memory: 1000Mi
memory: 3000Mi
requests:
cpu: 1500m
memory: 1000Mi
memory: 500Mi
ports:
- name: metrics
containerPort: 9090
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ spec:
initialDelaySeconds: 15
periodSeconds: 15
successThreshold: 1
timeoutSeconds: 1
timeoutSeconds: 5
env:
- name: GOOGLE_APPLICATION_CREDENTIALS
value: /var/secrets/google/key.json
Expand All @@ -66,17 +66,19 @@ spec:
value: config-observability
- name: METRICS_DOMAIN
value: knative.dev/internal/eventing
- name: MAX_CONCURRENCY_PER_EVENT
value: "100"
volumeMounts:
- name: broker-config
mountPath: /var/run/cloud-run-events/broker
- name: google-broker-key
mountPath: /var/secrets/google
resources:
limits:
memory: 1000Mi
memory: 3000Mi
requests:
cpu: 1500m
memory: 1000Mi
memory: 500Mi
ports:
- name: metrics
containerPort: 9090
Expand Down
4 changes: 2 additions & 2 deletions pkg/reconciler/brokercell/testingdata/fanout_hpa.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ spec:
kind: Deployment
name: test-brokercell-brokercell-fanout
minReplicas: 1
maxReplicas: 20
maxReplicas: 10
metrics:
- type: Resource
resource:
Expand All @@ -44,4 +44,4 @@ spec:
name: memory
target:
type: AverageValue
averageValue: 900Mi
averageValue: 1500Mi
4 changes: 2 additions & 2 deletions pkg/reconciler/brokercell/testingdata/ingress_deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ spec:
initialDelaySeconds: 5
periodSeconds: 2
successThreshold: 1
timeoutSeconds: 1
timeoutSeconds: 5
env:
- name: GOOGLE_APPLICATION_CREDENTIALS
value: /var/secrets/google/key.json
Expand All @@ -74,7 +74,7 @@ spec:
mountPath: /var/secrets/google
resources:
limits:
memory: 500Mi
memory: 1000Mi
requests:
cpu: 1000m
memory: 500Mi
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ spec:
initialDelaySeconds: 5
periodSeconds: 2
successThreshold: 1
timeoutSeconds: 1
timeoutSeconds: 5
env:
- name: GOOGLE_APPLICATION_CREDENTIALS
value: /var/secrets/google/key.json
Expand All @@ -75,7 +75,7 @@ spec:
mountPath: /var/secrets/google
resources:
limits:
memory: 500Mi
memory: 1000Mi
requests:
cpu: 1000m
memory: 500Mi
Expand Down
2 changes: 1 addition & 1 deletion pkg/reconciler/brokercell/testingdata/ingress_hpa.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,4 @@ spec:
name: memory
target:
type: AverageValue
averageValue: 450Mi
averageValue: 700Mi
6 changes: 3 additions & 3 deletions pkg/reconciler/brokercell/testingdata/retry_deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ spec:
initialDelaySeconds: 15
periodSeconds: 15
successThreshold: 1
timeoutSeconds: 1
timeoutSeconds: 5
env:
- name: GOOGLE_APPLICATION_CREDENTIALS
value: /var/secrets/google/key.json
Expand All @@ -72,10 +72,10 @@ spec:
mountPath: /var/secrets/google
resources:
limits:
memory: 1500Mi
memory: 3000Mi
requests:
cpu: 1000m
memory: 1500Mi
memory: 500Mi
ports:
- name: metrics
containerPort: 9090
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ spec:
initialDelaySeconds: 15
periodSeconds: 15
successThreshold: 1
timeoutSeconds: 1
timeoutSeconds: 5
env:
- name: GOOGLE_APPLICATION_CREDENTIALS
value: /var/secrets/google/key.json
Expand All @@ -73,10 +73,10 @@ spec:
mountPath: /var/secrets/google
resources:
limits:
memory: 1500Mi
memory: 3000Mi
requests:
cpu: 1000m
memory: 1500Mi
memory: 500Mi
ports:
- name: metrics
containerPort: 9090
Expand Down
4 changes: 2 additions & 2 deletions pkg/reconciler/brokercell/testingdata/retry_hpa.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ spec:
kind: Deployment
name: test-brokercell-brokercell-retry
minReplicas: 1
maxReplicas: 20
maxReplicas: 10
metrics:
- type: Resource
resource:
Expand All @@ -44,4 +44,4 @@ spec:
name: memory
target:
type: AverageValue
averageValue: 1400Mi
averageValue: 1500Mi