Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add flag to disable metrics and introspection #436

Merged
merged 1 commit into from
May 2, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,17 @@ Default: Unset
Valid Values: stdout or a file path
Specifies where to write the logging output. Either to stdout or to override the default file.

`DISABLE_INTROSPECTION`
Type: Boolean
Default: `false`
Specifies whether introspection endpoints are disabled on a worker node. Setting this to `true` will reduce the debugging
information we can get from the node when running the `aws-cni-support.sh` script.

`DISABLE_METRICS`
Type: Boolean
Default: `false`
Specifies whether prometeus metrics endpoints are enabled on a worker node.

### Notes

`L-IPAMD`(aws-node daemonSet) running on every worker node requires access to kubernetes API server. If it can **not** reach kubernetes API server, ipamD will exit and CNI will not be able to get any IP address for Pods. Here is a way to confirm if `L-IPAMD` has access to the kubernetes API server.
Expand Down
4 changes: 1 addition & 3 deletions config/v1.3/aws-k8s-cni.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ spec:
tolerations:
- operator: Exists
containers:
- image: 602401143452.dkr.ecr.us-west-2.amazonaws.com/amazon-k8s-cni:v1.3.3
- image: 602401143452.dkr.ecr.us-west-2.amazonaws.com/amazon-k8s-cni:v1.3.4
imagePullPolicy: Always
ports:
- containerPort: 61678
Expand Down Expand Up @@ -126,5 +126,3 @@ spec:
plural: eniconfigs
singular: eniconfig
kind: ENIConfig


100 changes: 58 additions & 42 deletions ipamd/introspect.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,64 +16,66 @@ package ipamd
import (
"encoding/json"
"net/http"
"os"
"strconv"
"sync"
"time"

log "github.com/cihub/seelog"
"github.com/prometheus/client_golang/prometheus/promhttp"

"github.com/aws/amazon-vpc-cni-k8s/pkg/networkutils"
"github.com/aws/amazon-vpc-cni-k8s/pkg/utils"
log "github.com/cihub/seelog"
)

const (
// IntrospectionPort is the port for ipamd introspection
IntrospectionPort = 61678
// introspectionAddress is listening on localhost 61679 for ipamd introspection
introspectionAddress = "127.0.0.1:61679"

// Environment variable to disable the introspection endpoints
envDisableIntrospection = "DISABLE_INTROSPECTION"
)

type rootResponse struct {
AvailableCommands []string
}

// LoggingHandler is a object for handling http request
type LoggingHandler struct{ h http.Handler }

// NewLoggingHandler creates a new LoggingHandler object.
func NewLoggingHandler(handler http.Handler) LoggingHandler {
return LoggingHandler{h: handler}
type LoggingHandler struct {
h http.Handler
}

func (lh LoggingHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
log.Info("Handling http request", "method", r.Method, "from", r.RemoteAddr, "uri", r.RequestURI)
log.Info("Handling http request: ", ", method: ", r.Method, ", from: ", r.RemoteAddr, ", URI: ", r.RequestURI)
lh.h.ServeHTTP(w, r)
}

// SetupHTTP sets up ipamd introspection service endpoint
func (c *IPAMContext) SetupHTTP() {
server := c.setupServer()
// ServeIntrospection sets up ipamd introspection endpoints
func (c *IPAMContext) ServeIntrospection() {
if disableIntrospection() {
log.Info("Introspection endpoints disabled")
return
}

log.Info("Serving introspection endpoints on ", introspectionAddress)
server := c.setupIntrospectionServer()
for {
once := sync.Once{}
utils.RetryWithBackoff(utils.NewSimpleBackoff(time.Second, time.Minute, 0.2, 2), func() error {
// TODO, make this cancellable and use the passed in context; for
// now, not critical if this gets interrupted
_ = utils.RetryWithBackoff(utils.NewSimpleBackoff(time.Second, time.Minute, 0.2, 2), func() error {
err := server.ListenAndServe()
once.Do(func() {
log.Error("Error running http api", "err", err)
log.Error("Error running http API: ", err)
})
return err
})
}
}

func (c *IPAMContext) setupServer() *http.Server {
func (c *IPAMContext) setupIntrospectionServer() *http.Server {
serverFunctions := map[string]func(w http.ResponseWriter, r *http.Request){
"/v1/enis": eniV1RequestHandler(c),
"/v1/pods": podV1RequestHandler(c),
"/v1/networkutils-env-settings": networkEnvV1RequestHandler(c),
"/v1/ipamd-env-settings": ipamdEnvV1RequestHandler(c),
"/v1/eni-configs": eniConfigRequestHandler(c),
"/v1/pods": podV1RequestHandler(c),
"/v1/networkutils-env-settings": networkEnvV1RequestHandler(),
"/v1/ipamd-env-settings": ipamdEnvV1RequestHandler(),
}
paths := make([]string, 0, len(serverFunctions))
for path := range serverFunctions {
Expand All @@ -84,96 +86,110 @@ func (c *IPAMContext) setupServer() *http.Server {
availableCommandResponse, err := json.Marshal(&availableCommands)

if err != nil {
log.Error("Failed to Marshal: %v", err)
log.Errorf("Failed to marshal: %v", err)
}

defaultHandler := func(w http.ResponseWriter, r *http.Request) {
w.Write(availableCommandResponse)
logErr(w.Write(availableCommandResponse))
}

serveMux := http.NewServeMux()
serveMux.HandleFunc("/", defaultHandler)
for key, fn := range serverFunctions {
serveMux.HandleFunc(key, fn)
}
serveMux.Handle("/metrics", promhttp.Handler())

// Log all requests and then pass through to serveMux
loggingServeMux := http.NewServeMux()
loggingServeMux.Handle("/", LoggingHandler{serveMux})

server := &http.Server{
Addr: ":" + strconv.Itoa(IntrospectionPort),
Addr: introspectionAddress,
Handler: loggingServeMux,
ReadTimeout: 5 * time.Second,
WriteTimeout: 5 * time.Second,
}

return server
}

func eniV1RequestHandler(ipam *IPAMContext) func(http.ResponseWriter, *http.Request) {
return func(w http.ResponseWriter, r *http.Request) {
responseJSON, err := json.Marshal(ipam.dataStore.GetENIInfos())
if err != nil {
log.Error("Failed to marshal ENI data: %v", err)
log.Errorf("Failed to marshal ENI data: %v", err)
http.Error(w, http.StatusText(http.StatusInternalServerError), http.StatusInternalServerError)
return
}
w.Write(responseJSON)
logErr(w.Write(responseJSON))
}
}

func podV1RequestHandler(ipam *IPAMContext) func(http.ResponseWriter, *http.Request) {
return func(w http.ResponseWriter, r *http.Request) {
responseJSON, err := json.Marshal(ipam.dataStore.GetPodInfos())
if err != nil {
log.Error("Failed to marshal pod data: %v", err)
log.Errorf("Failed to marshal pod data: %v", err)
http.Error(w, http.StatusText(http.StatusInternalServerError), http.StatusInternalServerError)
return
}
w.Write(responseJSON)
logErr(w.Write(responseJSON))
}
}

func eniConfigRequestHandler(ipam *IPAMContext) func(http.ResponseWriter, *http.Request) {
return func(w http.ResponseWriter, r *http.Request) {
responseJSON, err := json.Marshal(ipam.eniConfig.Getter())
if err != nil {
log.Error("Failed to marshal pod data: %v", err)
log.Errorf("Failed to marshal ENI config: %v", err)
http.Error(w, http.StatusText(http.StatusInternalServerError), http.StatusInternalServerError)
return
}
w.Write(responseJSON)
logErr(w.Write(responseJSON))
}
}

func networkEnvV1RequestHandler(ipam *IPAMContext) func(http.ResponseWriter, *http.Request) {
func networkEnvV1RequestHandler() func(http.ResponseWriter, *http.Request) {
return func(w http.ResponseWriter, r *http.Request) {
responseJSON, err := json.Marshal(networkutils.GetConfigForDebug())
if err != nil {
log.Error("Failed to marshal env var data: %v", err)
log.Errorf("Failed to marshal network env var data: %v", err)
http.Error(w, http.StatusText(http.StatusInternalServerError), http.StatusInternalServerError)
return
}
w.Write(responseJSON)
logErr(w.Write(responseJSON))
}
}

func ipamdEnvV1RequestHandler(ipam *IPAMContext) func(http.ResponseWriter, *http.Request) {
func ipamdEnvV1RequestHandler() func(http.ResponseWriter, *http.Request) {
return func(w http.ResponseWriter, r *http.Request) {
responseJSON, err := json.Marshal(GetConfigForDebug())
if err != nil {
log.Error("Failed to marshal env var data: %v", err)
log.Errorf("Failed to marshal ipamd env var data: %v", err)
http.Error(w, http.StatusText(http.StatusInternalServerError), http.StatusInternalServerError)
return
}
w.Write(responseJSON)
logErr(w.Write(responseJSON))
}
}

func metricsHandler(ipam *IPAMContext) func(http.ResponseWriter, *http.Request) {
return func(w http.ResponseWriter, r *http.Request) {
promhttp.Handler()
func logErr(_ int, err error) {
if err != nil {
log.Errorf("Write failed: %v", err)
}
}

// disableIntrospection returns true if we should disable the introspection
func disableIntrospection() bool {
return getEnvBoolWithDefault(envDisableIntrospection, false)
}

func getEnvBoolWithDefault(envName string, def bool) bool {
if strValue := os.Getenv(envName); strValue != "" {
parsedValue, err := strconv.ParseBool(strValue)
if err == nil {
return parsedValue
}
log.Errorf("Failed to parse %s, using default `%t`: %v", envName, def, err.Error())
}
return def
}
72 changes: 72 additions & 0 deletions ipamd/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// Copyright 2014-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"). You may
// not use this file except in compliance with the License. A copy of the
// License is located at
//
// http://aws.amazon.com/apache2.0/
//
// or in the "license" file accompanying this file. This file is distributed
// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
// express or implied. See the License for the specific language governing
// permissions and limitations under the License.

package ipamd

import (
"net/http"
"strconv"
"sync"
"time"

log "github.com/cihub/seelog"
"github.com/prometheus/client_golang/prometheus/promhttp"

"github.com/aws/amazon-vpc-cni-k8s/pkg/utils"
)

const (
// metricsPort is the port for prometheus metrics
metricsPort = 61678

// Environment variable to disable the metrics endpoint on 61678
envDisableMetrics = "DISABLE_METRICS"
)

// ServeMetrics sets up ipamd metrics and introspection endpoints
func (c *IPAMContext) ServeMetrics() {
if disableMetrics() {
log.Info("Metrics endpoint disabled")
return
}

log.Info("Serving metrics on port ", metricsPort)
server := c.setupMetricsServer()
for {
once := sync.Once{}
_ = utils.RetryWithBackoff(utils.NewSimpleBackoff(time.Second, time.Minute, 0.2, 2), func() error {
err := server.ListenAndServe()
once.Do(func() {
log.Error("Error running http API: ", err)
})
return err
})
}
}

func (c *IPAMContext) setupMetricsServer() *http.Server {
serveMux := http.NewServeMux()
serveMux.Handle("/metrics", promhttp.Handler())
server := &http.Server{
Addr: ":" + strconv.Itoa(metricsPort),
Handler: serveMux,
ReadTimeout: 5 * time.Second,
WriteTimeout: 5 * time.Second,
}
return server
}

// disableMetrics returns true if we should disable metrics
func disableMetrics() bool {
return getEnvBoolWithDefault(envDisableMetrics, false)
}
17 changes: 14 additions & 3 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,24 @@ func _main() int {
awsK8sAgent, err := ipamd.New(discoverController, eniConfigController)

if err != nil {
log.Error("initialization failure", err)
log.Error("Initialization failure ", err)
return 1
}

// Pool manager
go awsK8sAgent.StartNodeIPPoolManager()
go awsK8sAgent.SetupHTTP()
awsK8sAgent.RunRPCHandler()

// Prometheus metrics
go awsK8sAgent.ServeMetrics()

// CNI introspection endpoints
go awsK8sAgent.ServeIntrospection()

err = awsK8sAgent.RunRPCHandler()
if err != nil {
log.Error("Failed to set up gRPC handler ", err)
return 1
}

return 0
}
18 changes: 9 additions & 9 deletions scripts/aws-cni-support.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,18 @@
# Set language to C to make sorting consistent among different environments.
export LANG=C

set -euo pipefail
set -uo pipefail
LOG_DIR="/var/log/aws-routed-eni"
mkdir -p ${LOG_DIR}

# collecting L-IPAMD introspection data
curl http://localhost:61678/v1/enis > ${LOG_DIR}/eni.out
curl http://localhost:61678/v1/pods > ${LOG_DIR}/pod.out
curl http://localhost:61678/v1/networkutils-env-settings > ${LOG_DIR}/networkutils-env.out
curl http://localhost:61678/v1/ipamd-env-settings > ${LOG_DIR}/ipamd-env.out
curl http://localhost:61678/v1/eni-configs > ${LOG_DIR}/eni-configs.out
# Collecting L-IPAMD introspection data
curl http://localhost:61679/v1/enis > ${LOG_DIR}/eni.out
curl http://localhost:61679/v1/pods > ${LOG_DIR}/pod.out
curl http://localhost:61679/v1/networkutils-env-settings > ${LOG_DIR}/networkutils-env.out
curl http://localhost:61679/v1/ipamd-env-settings > ${LOG_DIR}/ipamd-env.out
curl http://localhost:61679/v1/eni-configs > ${LOG_DIR}/eni-configs.out

# metrics
# Metrics
curl http://localhost:61678/metrics 2>&1 > ${LOG_DIR}/metrics.out

# Collecting kubelet introspection data
Expand Down Expand Up @@ -80,4 +80,4 @@ for f in /proc/sys/net/ipv4/conf/*/rp_filter; do
echo "$f = $(cat ${f})" >> ${LOG_DIR}/sysctls.out
done

tar -cvzf ${LOG_DIR}/aws-cni-support.tar.gz ${LOG_DIR}/
tar --exclude 'aws-cni-support.tar.gz' -cvzf ${LOG_DIR}/aws-cni-support.tar.gz ${LOG_DIR}/