e2e mem sanity test (#617)

* add mem e2e test, clean up integ test workflow/docs * fix placeholder runner, add mem test to benchmark suite * implement mem fetcher, code cleanup * implement mem fetcher, code cleanup * remove bad metric dimension * fix measured metrics * fix fetcher names * update README, formatting * add license header
aws · Oct 20, 2022 · 49314b8 · 49314b8
1 parent 94c654d
commit 49314b8
Show file tree

Hide file tree

Showing 14 changed files with 224 additions and 61 deletions.
diff --git a/.github/workflows/integrationTest.yml b/.github/workflows/integrationTest.yml
@@ -7,8 +7,6 @@ env:
   TERRAFORM_AWS_ASSUME_ROLE: ${{ secrets.TERRAFORM_AWS_ASSUME_ROLE }}
   S3_INTEGRATION_BUCKET: ${{ secrets.S3_INTEGRATION_BUCKET }}
   KEY_NAME: ${{ secrets.KEY_NAME }}
-  VPC_SECURITY_GROUPS_IDS: ${{ secrets.VPC_SECURITY_GROUPS_IDS }}
-  IAM_ROLE: ${{ secrets.IAM_ROLE }}
   GPG_PRIVATE_KEY: ${{ secrets.GPG_PRIVATE_KEY }}
   PASSPHRASE: ${{ secrets.PASSPHRASE }}
   GPG_KEY_NAME: ${{ secrets.GPG_KEY_NAME }}

diff --git a/integration/terraform/ec2/README.md b/integration/terraform/ec2/README.md
@@ -128,8 +128,8 @@ Outputs:
 
 4. the UI should ask you for inputs for the parameters. In `GitHubOrg`, type in your github username. In `RepositoryName`, type in your fork repo's name. e.g. amazon-cloudwatch-agent
 5. Choose a stackname. Anything. e.g. Terraform-IntegTest-Role
-6. After creating the stack, navigate to IAM console
-7. Search for an IAM role with the stack name you chose above. e.g. Terraform-IntegTest-Role...
+6. After creating the stack, navigate to the `Resources` tab of the created stack
+7. Click on the role ID that was created by CloudFormation
 8. Click add permission
 9. Click attach policy, and then click create policy.
 10. Click JSON tab and copy and paste the following
@@ -232,7 +232,8 @@ Outputs:
         "iam:ListPolicyVersions",
         "iam:DeleteInstanceProfile",
         "iam:DeletePolicy",
-        "iam:ListInstanceProfilesForRole"
+        "iam:ListInstanceProfilesForRole",
+        "iam:DeleteRole"
       ],
       "Resource": "*"
     }

diff --git a/integration/test/metric/cpu.go b/integration/test/metric/cpu.go
@@ -9,17 +9,19 @@ package metric
 import (
 	"log"
 
+	"github.com/aws/aws-sdk-go-v2/aws"
 	"github.com/aws/aws-sdk-go-v2/service/cloudwatch/types"
-	"github.com/aws/aws-sdk-go/aws"
 )
 
 type CPUMetricValueFetcher struct {
 	baseMetricValueFetcher
 }
 
-func (f *CPUMetricValueFetcher) Fetch(namespace string, metricName string, stat Statistics) ([]float64, error) {
+var _ MetricValueFetcher = (*CPUMetricValueFetcher)(nil)
+
+func (f *CPUMetricValueFetcher) Fetch(namespace, metricName string, stat Statistics) ([]float64, error) {
 	dimensions := f.getMetricSpecificDimensions()
-	values, err := f.fetch(namespace, dimensions, metricName, stat)
+	values, err := f.fetch(namespace, metricName, dimensions, stat)
 	if err != nil {
 		log.Printf("Error while fetching metric value for %v: %v", metricName, err.Error())
 	}

diff --git a/integration/test/metric/mem.go b/integration/test/metric/mem.go
@@ -0,0 +1,49 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: MIT
+
+//go:build linux && integration
+// +build linux,integration
+
+package metric
+
+import (
+	"github.com/aws/aws-sdk-go-v2/service/cloudwatch/types"
+	"log"
+)
+
+var memSupportedMetricValues = map[string]struct{}{
+	"mem_active":            {},
+	"mem_available":         {},
+	"mem_available_percent": {},
+	"mem_buffered":          {},
+	"mem_cached":            {},
+	"mem_free":              {},
+	"mem_inactive":          {},
+	"mem_total":             {},
+	"mem_used":              {},
+	"mem_used_percent":      {},
+}
+
+type MemMetricValueFetcher struct {
+	baseMetricValueFetcher
+}
+
+var _ MetricValueFetcher = (*MemMetricValueFetcher)(nil)
+
+func (f *MemMetricValueFetcher) Fetch(namespace, metricName string, stat Statistics) ([]float64, error) {
+	dims := f.getMetricSpecificDimensions()
+	values, err := f.fetch(namespace, metricName, dims, stat)
+	if err != nil {
+		log.Printf("Error while fetching metric value for %s: %v", metricName, err)
+	}
+	return values, err
+}
+
+func (f *MemMetricValueFetcher) isApplicable(metricName string) bool {
+	_, exists := memSupportedMetricValues[metricName]
+	return exists
+}
+
+func (f *MemMetricValueFetcher) getMetricSpecificDimensions() []types.Dimension {
+	return []types.Dimension{}
+}
diff --git a/integration/test/metric/metric_value_query.go b/integration/test/metric/metric_value_query.go
@@ -12,13 +12,14 @@ import (
 	"time"
 
 	"github.com/aws/amazon-cloudwatch-agent/integration/test"
+	"github.com/aws/aws-sdk-go-v2/aws"
 	"github.com/aws/aws-sdk-go-v2/service/cloudwatch"
 	"github.com/aws/aws-sdk-go-v2/service/cloudwatch/types"
-	"github.com/aws/aws-sdk-go/aws"
 )
 
 var metricValueFetchers = []MetricValueFetcher{
 	&CPUMetricValueFetcher{},
+	&MemMetricValueFetcher{},
 }
 
 func GetMetricFetcher(metricName string) (MetricValueFetcher, error) {
@@ -33,15 +34,15 @@ func GetMetricFetcher(metricName string) (MetricValueFetcher, error) {
 }
 
 type MetricValueFetcher interface {
-	Fetch(namespace string, metricName string, stat Statistics) ([]float64, error)
-	fetch(namespace string, metricSpecificDimensions []types.Dimension, metricName string, stat Statistics) ([]float64, error)
+	Fetch(namespace, metricName string, stat Statistics) ([]float64, error)
+	fetch(namespace, metricName string, metricSpecificDimensions []types.Dimension, stat Statistics) ([]float64, error)
 	isApplicable(metricName string) bool
 	getMetricSpecificDimensions() []types.Dimension
 }
 
 type baseMetricValueFetcher struct{}
 
-func (f *baseMetricValueFetcher) fetch(namespace string, metricSpecificDimensions []types.Dimension, metricName string, stat Statistics) ([]float64, error) {
+func (f *baseMetricValueFetcher) fetch(namespace, metricName string, metricSpecificDimensions []types.Dimension, stat Statistics) ([]float64, error) {
 	ec2InstanceId := test.GetInstanceId()
 	instanceIdDimension := types.Dimension{
 		Name:  aws.String("InstanceId"),

diff --git a/integration/test/metric/query-json.json b/integration/test/metric/query-json.json
diff --git a/integration/test/metric_value_benchmark/agent_configs/base_linux_config.json b/integration/test/metric_value_benchmark/agent_configs/base_linux_config.json
@@ -0,0 +1,31 @@
+{
+  "agent": {
+    "metrics_collection_interval": 60,
+    "run_as_user": "root",
+    "debug": true,
+    "logfile": ""
+  },
+  "metrics": {
+    "metrics_collected": {
+      "mem": {
+        "measurement": [
+          "mem_used_percent"
+        ]
+      },
+      "disk": {
+        "measurement": [
+          "used_percent"
+        ],
+        "resources": [
+          "*"
+        ]
+      }
+    },
+    "append_dimensions": {
+      "ImageId": "${aws:ImageId}",
+      "InstanceId": "${aws:InstanceId}",
+      "InstanceType": "${aws:InstanceType}",
+      "AutoScalingGroupName": "${aws:AutoScalingGroupName}"
+    }
+  }
+}
diff --git a/..._benchmark/agent_configs/base_config.json → ...e_benchmark/agent_configs/cpu_config.json b/..._benchmark/agent_configs/base_config.json → ...e_benchmark/agent_configs/cpu_config.json
diff --git a/integration/test/metric_value_benchmark/agent_configs/mem_config.json b/integration/test/metric_value_benchmark/agent_configs/mem_config.json
@@ -0,0 +1,22 @@
+{
+  "agent": {
+    "metrics_collection_interval": 60,
+    "run_as_user": "root",
+    "debug": true,
+    "logfile": ""
+  },
+  "metrics": {
+    "namespace": "MetricValueBenchmarkTest",
+    "append_dimensions": {
+      "InstanceId": "${aws:InstanceId}"
+    },
+    "metrics_collected": {
+      "mem": {
+        "measurement": [
+          "active", "available", "available_percent", "buffered", "cached", "free", "inactive", "total",
+          "used", "used_percent"
+        ]
+      }
+    }
+  }
+}
diff --git a/integration/test/metric_value_benchmark/base_test.go b/integration/test/metric_value_benchmark/base_test.go
@@ -9,22 +9,25 @@ package metric_value_benchmark
 import (
 	"fmt"
 	"log"
+	"path/filepath"
 	"time"
 
 	"github.com/aws/amazon-cloudwatch-agent/integration/test"
 	"github.com/aws/amazon-cloudwatch-agent/integration/test/status"
 )
 
-const configOutputPath = "/opt/aws/amazon-cloudwatch-agent/bin/config.json"
-const agentConfigDirectory = "agent_configs"
-const agentConfigFileName = "/base_config.json"
-const minimumAgentRuntime = 3 * time.Minute
+const (
+	configOutputPath     = "/opt/aws/amazon-cloudwatch-agent/bin/config.json"
+	agentConfigDirectory = "agent_configs"
+	minimumAgentRuntime  = 3 * time.Minute
+)
 
 type ITestRunner interface {
 	validate() status.TestGroupResult
 	getTestName() string
 	getAgentConfigFileName() string
 	getAgentRunDuration() time.Duration
+	getMeasuredMetrics() []string
 }
 
 type TestRunner struct {
@@ -55,7 +58,7 @@ func (t *TestRunner) runAgent() (status.TestGroupResult, error) {
 		},
 	}
 
-	agentConfigPath := agentConfigDirectory + t.testRunner.getAgentConfigFileName()
+	agentConfigPath := filepath.Join(agentConfigDirectory, t.testRunner.getAgentConfigFileName())
 	log.Printf("Starting agent using agent config file %s", agentConfigPath)
 	test.CopyFile(agentConfigPath, configOutputPath)
 	err := test.StartAgent(configOutputPath, false)

diff --git a/integration/test/metric_value_benchmark/cpu_test.go b/integration/test/metric_value_benchmark/cpu_test.go
@@ -13,22 +13,16 @@ import (
 	"github.com/aws/amazon-cloudwatch-agent/integration/test/status"
 )
 
-const cpuTestName = "CPU"
-
 type CPUTestRunner struct {
 }
 
-var metricsToFetch = []string{
-	"cpu_time_active", "cpu_time_guest", "cpu_time_guest_nice", "cpu_time_idle", "cpu_time_iowait", "cpu_time_irq",
-	"cpu_time_nice", "cpu_time_softirq", "cpu_time_steal", "cpu_time_system", "cpu_time_user",
-	"cpu_usage_active", "cpu_usage_guest", "cpu_usage_guest_nice", "cpu_usage_idle", "cpu_usage_iowait",
-	"cpu_usage_irq", "cpu_usage_nice", "cpu_usage_softirq", "cpu_usage_steal", "cpu_usage_system", "cpu_usage_user"}
+var _ ITestRunner = (*CPUTestRunner)(nil)
 
 func (t *CPUTestRunner) validate() status.TestGroupResult {
-	testResults := []status.TestResult{}
-	for _, metricName := range metricsToFetch {
-		testResult := validateCpuMetric(metricName)
-		testResults = append(testResults, testResult)
+	metricsToFetch := t.getMeasuredMetrics()
+	testResults := make([]status.TestResult, len(metricsToFetch))
+	for i, metricName := range metricsToFetch {
+		testResults[i] = validateCpuMetric(metricName)
 	}
 
 	return status.TestGroupResult{
@@ -38,17 +32,25 @@ func (t *CPUTestRunner) validate() status.TestGroupResult {
 }
 
 func (t *CPUTestRunner) getTestName() string {
-	return cpuTestName
+	return "CPU"
 }
 
 func (t *CPUTestRunner) getAgentConfigFileName() string {
-	return agentConfigFileName
+	return "cpu_config.json"
 }
 
 func (t *CPUTestRunner) getAgentRunDuration() time.Duration {
 	return minimumAgentRuntime
 }
 
+func (t *CPUTestRunner) getMeasuredMetrics() []string {
+	return []string{
+		"cpu_time_active", "cpu_time_guest", "cpu_time_guest_nice", "cpu_time_idle", "cpu_time_iowait", "cpu_time_irq",
+		"cpu_time_nice", "cpu_time_softirq", "cpu_time_steal", "cpu_time_system", "cpu_time_user",
+		"cpu_usage_active", "cpu_usage_guest", "cpu_usage_guest_nice", "cpu_usage_idle", "cpu_usage_iowait",
+		"cpu_usage_irq", "cpu_usage_nice", "cpu_usage_softirq", "cpu_usage_steal", "cpu_usage_system", "cpu_usage_user"}
+}
+
 func validateCpuMetric(metricName string) status.TestResult {
 	testResult := status.TestResult{
 		Name:   metricName,

diff --git a/integration/test/metric_value_benchmark/mem_test.go b/integration/test/metric_value_benchmark/mem_test.go
@@ -0,0 +1,73 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: MIT
+
+//go:build linux && integration
+// +build linux,integration
+
+package metric_value_benchmark
+
+import (
+	"github.com/aws/amazon-cloudwatch-agent/integration/test/metric"
+	"github.com/aws/amazon-cloudwatch-agent/integration/test/status"
+	"time"
+)
+
+type MemTestRunner struct {
+}
+
+var _ ITestRunner = (*MemTestRunner)(nil)
+
+func (m *MemTestRunner) validate() status.TestGroupResult {
+	metricsToFetch := m.getMeasuredMetrics()
+	testResults := make([]status.TestResult, len(metricsToFetch))
+	for i, name := range metricsToFetch {
+		testResults[i] = m.validateMemMetric(name)
+	}
+
+	return status.TestGroupResult{
+		Name:        m.getTestName(),
+		TestResults: testResults,
+	}
+}
+
+func (m *MemTestRunner) getTestName() string {
+	return "Mem"
+}
+
+func (m *MemTestRunner) getAgentConfigFileName() string {
+	return "mem_config.json"
+}
+
+func (m *MemTestRunner) getAgentRunDuration() time.Duration {
+	return minimumAgentRuntime
+}
+
+func (m *MemTestRunner) getMeasuredMetrics() []string {
+	return []string{
+		"mem_active", "mem_available", "mem_available_percent", "mem_buffered", "mem_cached",
+		"mem_free", "mem_inactive", "mem_total", "mem_used", "mem_used_percent"}
+}
+
+func (m *MemTestRunner) validateMemMetric(metricName string) status.TestResult {
+	testResult := status.TestResult{
+		Name:   metricName,
+		Status: status.FAILED,
+	}
+
+	fetcher, err := metric.GetMetricFetcher(metricName)
+	if err != nil {
+		return testResult
+	}
+
+	values, err := fetcher.Fetch(namespace, metricName, metric.AVERAGE)
+	if err != nil {
+		return testResult
+	}
+
+	if !isAllValuesGreaterThanOrEqualToZero(metricName, values) {
+		return testResult
+	}
+
+	testResult.Status = status.SUCCESSFUL
+	return testResult
+}