diff --git a/README.md b/README.md index 5b0b595ff7..4471b6d48e 100644 --- a/README.md +++ b/README.md @@ -147,6 +147,17 @@ Default: Unset Valid Values: stdout or a file path Specifies where to write the logging output. Either to stdout or to override the default file. +`DISABLE_INTROSPECTION` +Type: Boolean +Default: `false` +Specifies whether introspection endpoints are disabled on a worker node. Setting this to `true` will reduce the debugging +information we can get from the node when running the `aws-cni-support.sh` script. + +`DISABLE_METRICS` +Type: Boolean +Default: `false` +Specifies whether prometeus metrics endpoints are enabled on a worker node. + ### Notes `L-IPAMD`(aws-node daemonSet) running on every worker node requires access to kubernetes API server. If it can **not** reach kubernetes API server, ipamD will exit and CNI will not be able to get any IP address for Pods. Here is a way to confirm if `L-IPAMD` has access to the kubernetes API server. diff --git a/config/v1.3/aws-k8s-cni.yaml b/config/v1.3/aws-k8s-cni.yaml index 33956b84c7..7cc05ac10b 100644 --- a/config/v1.3/aws-k8s-cni.yaml +++ b/config/v1.3/aws-k8s-cni.yaml @@ -69,7 +69,7 @@ spec: tolerations: - operator: Exists containers: - - image: 602401143452.dkr.ecr.us-west-2.amazonaws.com/amazon-k8s-cni:v1.3.3 + - image: 602401143452.dkr.ecr.us-west-2.amazonaws.com/amazon-k8s-cni:v1.3.4 imagePullPolicy: Always ports: - containerPort: 61678 @@ -126,5 +126,3 @@ spec: plural: eniconfigs singular: eniconfig kind: ENIConfig - - diff --git a/ipamd/introspect.go b/ipamd/introspect.go index 8a58510d4a..f58f07b908 100644 --- a/ipamd/introspect.go +++ b/ipamd/introspect.go @@ -16,20 +16,22 @@ package ipamd import ( "encoding/json" "net/http" + "os" "strconv" "sync" "time" - log "github.com/cihub/seelog" - "github.com/prometheus/client_golang/prometheus/promhttp" - "github.com/aws/amazon-vpc-cni-k8s/pkg/networkutils" "github.com/aws/amazon-vpc-cni-k8s/pkg/utils" + log "github.com/cihub/seelog" ) const ( - // IntrospectionPort is the port for ipamd introspection - IntrospectionPort = 61678 + // introspectionAddress is listening on localhost 61679 for ipamd introspection + introspectionAddress = "127.0.0.1:61679" + + // Environment variable to disable the introspection endpoints + envDisableIntrospection = "DISABLE_INTROSPECTION" ) type rootResponse struct { @@ -37,43 +39,43 @@ type rootResponse struct { } // LoggingHandler is a object for handling http request -type LoggingHandler struct{ h http.Handler } - -// NewLoggingHandler creates a new LoggingHandler object. -func NewLoggingHandler(handler http.Handler) LoggingHandler { - return LoggingHandler{h: handler} +type LoggingHandler struct { + h http.Handler } func (lh LoggingHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { - log.Info("Handling http request", "method", r.Method, "from", r.RemoteAddr, "uri", r.RequestURI) + log.Info("Handling http request: ", ", method: ", r.Method, ", from: ", r.RemoteAddr, ", URI: ", r.RequestURI) lh.h.ServeHTTP(w, r) } -// SetupHTTP sets up ipamd introspection service endpoint -func (c *IPAMContext) SetupHTTP() { - server := c.setupServer() +// ServeIntrospection sets up ipamd introspection endpoints +func (c *IPAMContext) ServeIntrospection() { + if disableIntrospection() { + log.Info("Introspection endpoints disabled") + return + } + log.Info("Serving introspection endpoints on ", introspectionAddress) + server := c.setupIntrospectionServer() for { once := sync.Once{} - utils.RetryWithBackoff(utils.NewSimpleBackoff(time.Second, time.Minute, 0.2, 2), func() error { - // TODO, make this cancellable and use the passed in context; for - // now, not critical if this gets interrupted + _ = utils.RetryWithBackoff(utils.NewSimpleBackoff(time.Second, time.Minute, 0.2, 2), func() error { err := server.ListenAndServe() once.Do(func() { - log.Error("Error running http api", "err", err) + log.Error("Error running http API: ", err) }) return err }) } } -func (c *IPAMContext) setupServer() *http.Server { +func (c *IPAMContext) setupIntrospectionServer() *http.Server { serverFunctions := map[string]func(w http.ResponseWriter, r *http.Request){ "/v1/enis": eniV1RequestHandler(c), - "/v1/pods": podV1RequestHandler(c), - "/v1/networkutils-env-settings": networkEnvV1RequestHandler(c), - "/v1/ipamd-env-settings": ipamdEnvV1RequestHandler(c), "/v1/eni-configs": eniConfigRequestHandler(c), + "/v1/pods": podV1RequestHandler(c), + "/v1/networkutils-env-settings": networkEnvV1RequestHandler(), + "/v1/ipamd-env-settings": ipamdEnvV1RequestHandler(), } paths := make([]string, 0, len(serverFunctions)) for path := range serverFunctions { @@ -84,11 +86,11 @@ func (c *IPAMContext) setupServer() *http.Server { availableCommandResponse, err := json.Marshal(&availableCommands) if err != nil { - log.Error("Failed to Marshal: %v", err) + log.Errorf("Failed to marshal: %v", err) } defaultHandler := func(w http.ResponseWriter, r *http.Request) { - w.Write(availableCommandResponse) + logErr(w.Write(availableCommandResponse)) } serveMux := http.NewServeMux() @@ -96,19 +98,17 @@ func (c *IPAMContext) setupServer() *http.Server { for key, fn := range serverFunctions { serveMux.HandleFunc(key, fn) } - serveMux.Handle("/metrics", promhttp.Handler()) // Log all requests and then pass through to serveMux loggingServeMux := http.NewServeMux() loggingServeMux.Handle("/", LoggingHandler{serveMux}) server := &http.Server{ - Addr: ":" + strconv.Itoa(IntrospectionPort), + Addr: introspectionAddress, Handler: loggingServeMux, ReadTimeout: 5 * time.Second, WriteTimeout: 5 * time.Second, } - return server } @@ -116,11 +116,11 @@ func eniV1RequestHandler(ipam *IPAMContext) func(http.ResponseWriter, *http.Requ return func(w http.ResponseWriter, r *http.Request) { responseJSON, err := json.Marshal(ipam.dataStore.GetENIInfos()) if err != nil { - log.Error("Failed to marshal ENI data: %v", err) + log.Errorf("Failed to marshal ENI data: %v", err) http.Error(w, http.StatusText(http.StatusInternalServerError), http.StatusInternalServerError) return } - w.Write(responseJSON) + logErr(w.Write(responseJSON)) } } @@ -128,11 +128,11 @@ func podV1RequestHandler(ipam *IPAMContext) func(http.ResponseWriter, *http.Requ return func(w http.ResponseWriter, r *http.Request) { responseJSON, err := json.Marshal(ipam.dataStore.GetPodInfos()) if err != nil { - log.Error("Failed to marshal pod data: %v", err) + log.Errorf("Failed to marshal pod data: %v", err) http.Error(w, http.StatusText(http.StatusInternalServerError), http.StatusInternalServerError) return } - w.Write(responseJSON) + logErr(w.Write(responseJSON)) } } @@ -140,40 +140,56 @@ func eniConfigRequestHandler(ipam *IPAMContext) func(http.ResponseWriter, *http. return func(w http.ResponseWriter, r *http.Request) { responseJSON, err := json.Marshal(ipam.eniConfig.Getter()) if err != nil { - log.Error("Failed to marshal pod data: %v", err) + log.Errorf("Failed to marshal ENI config: %v", err) http.Error(w, http.StatusText(http.StatusInternalServerError), http.StatusInternalServerError) return } - w.Write(responseJSON) + logErr(w.Write(responseJSON)) } } -func networkEnvV1RequestHandler(ipam *IPAMContext) func(http.ResponseWriter, *http.Request) { +func networkEnvV1RequestHandler() func(http.ResponseWriter, *http.Request) { return func(w http.ResponseWriter, r *http.Request) { responseJSON, err := json.Marshal(networkutils.GetConfigForDebug()) if err != nil { - log.Error("Failed to marshal env var data: %v", err) + log.Errorf("Failed to marshal network env var data: %v", err) http.Error(w, http.StatusText(http.StatusInternalServerError), http.StatusInternalServerError) return } - w.Write(responseJSON) + logErr(w.Write(responseJSON)) } } -func ipamdEnvV1RequestHandler(ipam *IPAMContext) func(http.ResponseWriter, *http.Request) { +func ipamdEnvV1RequestHandler() func(http.ResponseWriter, *http.Request) { return func(w http.ResponseWriter, r *http.Request) { responseJSON, err := json.Marshal(GetConfigForDebug()) if err != nil { - log.Error("Failed to marshal env var data: %v", err) + log.Errorf("Failed to marshal ipamd env var data: %v", err) http.Error(w, http.StatusText(http.StatusInternalServerError), http.StatusInternalServerError) return } - w.Write(responseJSON) + logErr(w.Write(responseJSON)) } } -func metricsHandler(ipam *IPAMContext) func(http.ResponseWriter, *http.Request) { - return func(w http.ResponseWriter, r *http.Request) { - promhttp.Handler() +func logErr(_ int, err error) { + if err != nil { + log.Errorf("Write failed: %v", err) + } +} + +// disableIntrospection returns true if we should disable the introspection +func disableIntrospection() bool { + return getEnvBoolWithDefault(envDisableIntrospection, false) +} + +func getEnvBoolWithDefault(envName string, def bool) bool { + if strValue := os.Getenv(envName); strValue != "" { + parsedValue, err := strconv.ParseBool(strValue) + if err == nil { + return parsedValue + } + log.Errorf("Failed to parse %s, using default `%t`: %v", envName, def, err.Error()) } + return def } diff --git a/ipamd/metrics.go b/ipamd/metrics.go new file mode 100644 index 0000000000..8c36df8b31 --- /dev/null +++ b/ipamd/metrics.go @@ -0,0 +1,72 @@ +// Copyright 2014-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package ipamd + +import ( + "net/http" + "strconv" + "sync" + "time" + + log "github.com/cihub/seelog" + "github.com/prometheus/client_golang/prometheus/promhttp" + + "github.com/aws/amazon-vpc-cni-k8s/pkg/utils" +) + +const ( + // metricsPort is the port for prometheus metrics + metricsPort = 61678 + + // Environment variable to disable the metrics endpoint on 61678 + envDisableMetrics = "DISABLE_METRICS" +) + +// ServeMetrics sets up ipamd metrics and introspection endpoints +func (c *IPAMContext) ServeMetrics() { + if disableMetrics() { + log.Info("Metrics endpoint disabled") + return + } + + log.Info("Serving metrics on port ", metricsPort) + server := c.setupMetricsServer() + for { + once := sync.Once{} + _ = utils.RetryWithBackoff(utils.NewSimpleBackoff(time.Second, time.Minute, 0.2, 2), func() error { + err := server.ListenAndServe() + once.Do(func() { + log.Error("Error running http API: ", err) + }) + return err + }) + } +} + +func (c *IPAMContext) setupMetricsServer() *http.Server { + serveMux := http.NewServeMux() + serveMux.Handle("/metrics", promhttp.Handler()) + server := &http.Server{ + Addr: ":" + strconv.Itoa(metricsPort), + Handler: serveMux, + ReadTimeout: 5 * time.Second, + WriteTimeout: 5 * time.Second, + } + return server +} + +// disableMetrics returns true if we should disable metrics +func disableMetrics() bool { + return getEnvBoolWithDefault(envDisableMetrics, false) +} diff --git a/main.go b/main.go index 3f2d0d45e2..ee284947a3 100644 --- a/main.go +++ b/main.go @@ -60,13 +60,24 @@ func _main() int { awsK8sAgent, err := ipamd.New(discoverController, eniConfigController) if err != nil { - log.Error("initialization failure", err) + log.Error("Initialization failure ", err) return 1 } + // Pool manager go awsK8sAgent.StartNodeIPPoolManager() - go awsK8sAgent.SetupHTTP() - awsK8sAgent.RunRPCHandler() + + // Prometheus metrics + go awsK8sAgent.ServeMetrics() + + // CNI introspection endpoints + go awsK8sAgent.ServeIntrospection() + + err = awsK8sAgent.RunRPCHandler() + if err != nil { + log.Error("Failed to set up gRPC handler ", err) + return 1 + } return 0 } diff --git a/scripts/aws-cni-support.sh b/scripts/aws-cni-support.sh index 4b1ec43ddd..8b374373b3 100755 --- a/scripts/aws-cni-support.sh +++ b/scripts/aws-cni-support.sh @@ -17,18 +17,18 @@ # Set language to C to make sorting consistent among different environments. export LANG=C -set -euo pipefail +set -uo pipefail LOG_DIR="/var/log/aws-routed-eni" mkdir -p ${LOG_DIR} -# collecting L-IPAMD introspection data -curl http://localhost:61678/v1/enis > ${LOG_DIR}/eni.out -curl http://localhost:61678/v1/pods > ${LOG_DIR}/pod.out -curl http://localhost:61678/v1/networkutils-env-settings > ${LOG_DIR}/networkutils-env.out -curl http://localhost:61678/v1/ipamd-env-settings > ${LOG_DIR}/ipamd-env.out -curl http://localhost:61678/v1/eni-configs > ${LOG_DIR}/eni-configs.out +# Collecting L-IPAMD introspection data +curl http://localhost:61679/v1/enis > ${LOG_DIR}/eni.out +curl http://localhost:61679/v1/pods > ${LOG_DIR}/pod.out +curl http://localhost:61679/v1/networkutils-env-settings > ${LOG_DIR}/networkutils-env.out +curl http://localhost:61679/v1/ipamd-env-settings > ${LOG_DIR}/ipamd-env.out +curl http://localhost:61679/v1/eni-configs > ${LOG_DIR}/eni-configs.out -# metrics +# Metrics curl http://localhost:61678/metrics 2>&1 > ${LOG_DIR}/metrics.out # Collecting kubelet introspection data @@ -80,4 +80,4 @@ for f in /proc/sys/net/ipv4/conf/*/rp_filter; do echo "$f = $(cat ${f})" >> ${LOG_DIR}/sysctls.out done -tar -cvzf ${LOG_DIR}/aws-cni-support.tar.gz ${LOG_DIR}/ +tar --exclude 'aws-cni-support.tar.gz' -cvzf ${LOG_DIR}/aws-cni-support.tar.gz ${LOG_DIR}/