Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(aws): Use go-cache and clean up metrics #37

Merged
merged 1 commit into from
Jan 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions pkg/providers/aws/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"net/url"
"time"

"github.com/patrickmn/go-cache"
"github.com/re-cinq/cloud-carbon/pkg/config"

"github.com/aws/aws-sdk-go-v2/aws"
Expand All @@ -22,11 +23,16 @@ var (
)

// Client contains the AWS config and service clients
// and is used to call the API
// and is used to access the API
type Client struct {
cfg *aws.Config
// AWS specific config for auth and client creation
cfg *aws.Config

// service APIs
ec2Client *ec2Client
cloudWatchClient *cloudWatchClient

cache *cache.Cache
}

// NewClient creates a struct with the AWS config, EC2 Client, and CloudWatch Client
Expand Down Expand Up @@ -59,6 +65,8 @@ func NewClient(ctx context.Context, currentConfig *config.Account, customTranspo
cfg: &cfg,
ec2Client: ec2Client,
cloudWatchClient: cloudWatchClient,
// TODO: configure expiry and deletion
cache: cache.New(12*time.Hour, 36*time.Minute),
}, nil
}

Expand Down
181 changes: 0 additions & 181 deletions pkg/providers/aws/cache.go

This file was deleted.

40 changes: 0 additions & 40 deletions pkg/providers/aws/cache_test.go

This file was deleted.

96 changes: 54 additions & 42 deletions pkg/providers/aws/cloudwatch.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ import (
"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/service/cloudwatch"
"github.com/aws/aws-sdk-go-v2/service/cloudwatch/types"
"github.com/patrickmn/go-cache"
"github.com/re-cinq/cloud-carbon/pkg/providers/util"
v1 "github.com/re-cinq/cloud-carbon/pkg/types/v1"
"k8s.io/klog/v2"
)
Expand Down Expand Up @@ -38,54 +40,66 @@ func NewCloudWatchClient(cfg *aws.Config) *cloudWatchClient {
}

// Get the resource consumption of an ec2 instance
func (e *cloudWatchClient) GetEC2Metrics(region awsRegion, interval time.Duration, cache *awsCache) (map[string]v1.Instance, error) {
serviceMetrics := make(map[string]v1.Instance)
func (e *cloudWatchClient) GetEC2Metrics(ca *cache.Cache, region string, interval time.Duration) ([]v1.Instance, error) {
instances := []v1.Instance{}
local := make(map[string]*v1.Instance)

end := time.Now().UTC()
start := end.Add(-interval)

// Get the cpu consumption for all the instances in the region
cpuMetrics, err := e.getEC2CPU(region, start, end, interval)
if err != nil || len(cpuMetrics) < 1 {
return serviceMetrics, err
if err != nil {
return instances, err
}

if len(cpuMetrics) == 0 {
return instances, fmt.Errorf("no cpu metrics collected from CloudWatch")
}
for _, cpuMetric := range cpuMetrics {
// load the instance metadata from the cache, because the query does not give us
instanceMetadata := cache.Get(region, ec2Service, cpuMetric.instanceID)
if instanceMetadata == nil {
klog.Warningf("instance id %s is not present in the metadata, temporarily skipping collecting metrics", cpuMetric.instanceID)

// TODO: Will need to iterate cpuMetrics and memMetrics
for i := range cpuMetrics {
// to avoid Implicit memory aliasing in for loop
metric := cpuMetrics[i]

instanceID, ok := metric.Labels().Get("instanceID")
if !ok {
klog.Errorf("error metric doesn't have an instanceID: %+v", metric)
continue
}

// load the instance metadata from the cache, because the query does not give us instance info
cachedInstance, exists := ca.Get(util.CacheKey(region, ec2Service, instanceID))
if cachedInstance == nil || !exists {
klog.Warningf("instance id %s is not present in the metadata, temporarily skipping collecting metrics", instanceID)
continue
}

// if we got here it means that we do have the instance metadata
instanceService, exists := serviceMetrics[cpuMetric.instanceID]
meta := cachedInstance.(*resource)

// update local instance metadata map
s, exists := local[instanceID]
if !exists {
// Then create a new one
s := v1.NewInstance(cpuMetric.instanceID, provider)
// Then create a new local instance from cached
s = v1.NewInstance(instanceID, provider)
s.SetService("EC2")
s.SetKind(instanceMetadata.kind).SetRegion(region)
s.AddLabel("Name", instanceMetadata.name)
serviceMetrics[cpuMetric.instanceID] = *s

// Makes it easier to use it
instanceService = serviceMetrics[cpuMetric.instanceID]
instanceService.SetKind(instanceMetadata.kind).SetRegion(region)
s.SetKind(meta.kind)
s.SetRegion(region)
}

// Build the resource
cpu := v1.NewMetric(v1.CPU.String()).SetResourceUnit(cpuMetric.unit).SetUnitAmount(float64(instanceMetadata.coreCount))
cpu.SetUsage(cpuMetric.value).SetType(cpuMetric.kind)
s.AddLabel("Name", meta.name)
metric.SetUnitAmount(float64(meta.coreCount))
s.Metrics().Upsert(&metric)

// Update the CPU information now
instanceService.Metrics().Upsert(cpu)
local[instanceID] = s
instances = append(instances, *s)
}

// Return the collected metrics
return serviceMetrics, nil
return instances, nil
}

// Get the CPU resource consumption of an ec2 instance
func (e *cloudWatchClient) getEC2CPU(region awsRegion, start, end time.Time, interval time.Duration) ([]awsMetric, error) {
func (e *cloudWatchClient) getEC2CPU(region string, start, end time.Time, interval time.Duration) ([]v1.Metric, error) {
// Override the region
withRegion := func(o *cloudwatch.Options) {
o.Region = region
Expand Down Expand Up @@ -114,24 +128,22 @@ func (e *cloudWatchClient) getEC2CPU(region awsRegion, start, end time.Time, int
}

// Collector
var cpuMetrics []awsMetric
var cpuMetrics []v1.Metric

// Loop through the result and build the intermediate awsMetric model
for _, metric := range output.MetricDataResults {
instanceID := aws.ToString(metric.Label)
if instanceID == "Other" {
return nil, errors.New("error bad query passed to GetMetricData - instanceID not found in label")
}

if len(metric.Values) > 0 {
cpuMetric := awsMetric{
value: metric.Values[0],
instanceID: aws.ToString(metric.Label),
kind: v1.CPU,
unit: v1.Core,
name: v1.CPU.String(),
}

if cpuMetric.instanceID == "Other" {
return nil, errors.New("error bad query passed to GetMetricData - instanceID not found in label")
}

cpuMetrics = append(cpuMetrics, cpuMetric)
cpu := v1.NewMetric(v1.CPU.String())
cpu.SetResourceUnit(v1.Core).SetUsage(metric.Values[0]).SetType(v1.CPU)
cpu.SetLabels(map[string]string{
"instanceID": instanceID,
})
cpuMetrics = append(cpuMetrics, *cpu)
}
}

Expand Down
Loading