Skip to content

Commit

Permalink
Fetch Region and CLUSTER_ID information from cni-metrics-helper env v…
Browse files Browse the repository at this point in the history
…ariables if available

removed unnecessary logs

Update failing test

Updated ClusterRole permissions

Rename mType to metricType
Fetch Region only if not available

Remove redundant logging
  • Loading branch information
cgchinmay committed Nov 1, 2021
1 parent db4e993 commit 2d7f32d
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 47 deletions.
16 changes: 15 additions & 1 deletion cmd/cni-metrics-helper/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,23 @@ func main() {
}
}

// Fetch region, if using IRSA it be will auto injected as env variable in pod spec
// If not found then it will be empty, in which case we will try to fetch it from IMDS (existing approach)
// This can also mean that Cx is not using IRSA and we shouldn't enforce IRSA requirement
region, _ := os.LookupEnv("AWS_REGION")

// should be name/identifier for the cluster if specified
clusterID, _ := os.LookupEnv("AWS_CLUSTER_ID")

log.Infof("Using REGION=%s and CLUSTER_ID=%s", region, clusterID)

log.Infof("Starting CNIMetricsHelper. Sending metrics to CloudWatch: %v, LogLevel %s", options.submitCW, logConfig.LogLevel)

clientSet, err := k8sapi.GetKubeClientSet()
if err != nil {
log.Fatalf("Error Fetching Kubernetes Client: %s", err)
os.Exit(1)
}

rawK8SClient, err := k8sapi.CreateKubeClient()
if err != nil {
Expand All @@ -98,7 +112,7 @@ func main() {
var cw publisher.Publisher

if options.submitCW {
cw, err = publisher.New(ctx)
cw, err = publisher.New(ctx, region, clusterID)
if err != nil {
log.Fatalf("Failed to create publisher: %v", err)
}
Expand Down
10 changes: 5 additions & 5 deletions cmd/cni-metrics-helper/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -238,11 +238,11 @@ func postProcessingHistogram(convert metricsConvert, log logger.Logger) bool {
func processMetric(family *dto.MetricFamily, convert metricsConvert, log logger.Logger) (bool, error) {
resetDetected := false

mType := family.GetType()
metricType := family.GetType()
for _, metric := range family.GetMetric() {
for _, act := range convert.actions {
if act.matchFunc(metric) {
switch mType {
switch metricType {
case dto.MetricType_GAUGE:
processGauge(metric, &act)
case dto.MetricType_HISTOGRAM:
Expand All @@ -256,7 +256,7 @@ func processMetric(family *dto.MetricFamily, convert metricsConvert, log logger.
}
}

switch mType {
switch metricType {
case dto.MetricType_COUNTER:
curResetDetected := postProcessingCounter(convert, log)
if curResetDetected {
Expand Down Expand Up @@ -316,9 +316,9 @@ func filterMetrics(originalMetrics map[string]*dto.MetricFamily,
func produceCloudWatchMetrics(t metricsTarget, families map[string]*dto.MetricFamily, convertDef map[string]metricsConvert, cw publisher.Publisher) {
for key, family := range families {
convertMetrics := convertDef[key]
mType := family.GetType()
metricType := family.GetType()
for _, action := range convertMetrics.actions {
switch mType {
switch metricType {
case dto.MetricType_COUNTER:
if t.submitCloudWatch() {
dataPoint := &cloudwatch.MetricDatum{
Expand Down
33 changes: 4 additions & 29 deletions config/master/cni-metrics-helper.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,37 +18,9 @@ metadata:
rules:
- apiGroups: [""]
resources:
- nodes
- pods
- pods/proxy
- services
- resourcequotas
- replicationcontrollers
- limitranges
- persistentvolumeclaims
- persistentvolumes
- namespaces
- endpoints
verbs: ["list", "watch", "get"]
- apiGroups: ["extensions"]
resources:
- daemonsets
- deployments
- replicasets
verbs: ["list", "watch"]
- apiGroups: ["apps"]
resources:
- statefulsets
verbs: ["list", "watch"]
- apiGroups: ["batch"]
resources:
- cronjobs
- jobs
verbs: ["list", "watch"]
- apiGroups: ["autoscaling"]
resources:
- horizontalpodautoscalers
verbs: ["list", "watch"]
verbs: ["get", "watch", "list"]
---
# Source: cni-metrics-helper/templates/clusterrolebinding.yaml
apiVersion: rbac.authorization.k8s.io/v1
Expand Down Expand Up @@ -89,6 +61,9 @@ spec:
- env:
- name: USE_CLOUDWATCH
value: "true"
# Optional: Should be ClusterName/ClusterIdentifier used as the metric dimension
- name: AWS_CLUSTER_ID
value: ""
name: cni-metrics-helper
image: "602401143452.dkr.ecr.us-west-2.amazonaws.com/cni-metrics-helper:v1.9.3"
serviceAccountName: cni-metrics-helper
36 changes: 24 additions & 12 deletions pkg/publisher/publisher.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,21 +85,33 @@ type cloudWatchPublisher struct {
}

// New returns a new instance of `Publisher`
func New(ctx context.Context) (Publisher, error) {
func New(ctx context.Context, region string, clusterID string) (Publisher, error) {
sess := awssession.New()
// Get cluster-ID
ec2Client, err := ec2wrapper.NewMetricsClient()
if err != nil {
return nil, errors.Wrap(err, "publisher: unable to obtain EC2 service client")
}
clusterID := getClusterID(ec2Client)

// Get ec2metadata client
ec2MetadataClient := ec2metadatawrapper.New(sess)
// Case 1: Cx not using IRSA, we need to get region and clusterID using IMDS (existing approach)
// Case 2: Cx using IRSA but not specified clusterID, we can still get this info if IMDS is not blocked
// Case 3: Cx blocked IMDS access and not using IRSA (which means region == "") OR
// not specified clusterID then its a Cx error
if region == "" || clusterID == "" {
ec2Client, err := ec2wrapper.NewMetricsClient()
if err != nil {
return nil, errors.Wrap(err, "publisher: unable to obtain EC2 service client")
}

region, err := ec2MetadataClient.Region()
if err != nil {
return nil, errors.Wrap(err, "publisher: Unable to obtain region")
// If Customers have explicitly specified clusterID then skip auto generating it
if clusterID == "" {
clusterID = getClusterID(ec2Client)
}

// Get ec2metadata client
ec2MetadataClient := ec2metadatawrapper.New(sess)

if region == "" {
region, err = ec2MetadataClient.Region()
if err != nil {
return nil, errors.Wrap(err, "publisher: Unable to obtain region")
}
}
}

// Get AWS session
Expand Down
12 changes: 12 additions & 0 deletions pkg/publisher/publisher_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,18 @@ const (
testMonitorDuration = time.Millisecond * 10
)

func TestCloudWatchPublisherWithNoIMDS(t *testing.T) {
ctx, cancel := context.WithCancel(context.TODO())
defer cancel()

region := "us-west-2"
clusterID := testClusterID

cw, err := New(ctx, region, clusterID)
assert.NoError(t, err)
assert.NotNil(t, cw)
}

func TestCloudWatchPublisherWithSingleDatum(t *testing.T) {
cloudwatchPublisher := getCloudWatchPublisher(t)

Expand Down

0 comments on commit 2d7f32d

Please sign in to comment.