Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add memory for gcp #34

Merged
merged 1 commit into from
Jan 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions pkg/calculator/calculator.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ package calculator

import (
"time"

v1 "github.com/re-cinq/cloud-carbon/pkg/types/v1"
)

// TODO add links / sources for where numbers and calculations are gathered
Expand All @@ -14,7 +12,7 @@ const lifespan = 4

type calculate struct {
cores float64
usageCPU v1.Percentage
usageCPU float64
minWatts float64
maxWatts float64
chip float64
Expand All @@ -37,7 +35,7 @@ func (c *calculate) operationalCPUEmissions(interval time.Duration) float64 {
// architecture is unknown the Min and Max wattage is the average of all machines
// for that provider, and is supplied in the provider defaults. This is being
// handled in the types/factors package (the point of reading in coefficient data).
avgWatts := c.minWatts + float64(c.usageCPU)*(c.maxWatts-c.minWatts)
avgWatts := c.minWatts + c.usageCPU*(c.maxWatts-c.minWatts)

// Operational Emissions are calculated by multiplying the avgWatts, vCPUHours, PUE,
// and region grid CO2e. The PUE is collected from the providers. The CO2e grid data
Expand Down
11 changes: 5 additions & 6 deletions pkg/calculator/calculator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"testing"
"time"

v1 "github.com/re-cinq/cloud-carbon/pkg/types/v1"
"github.com/stretchr/testify/assert"
)

Expand All @@ -20,7 +19,7 @@ type testcase struct {
func defaultCalc() *calculate {
return &calculate{
cores: 4,
usageCPU: v1.Percentage(25),
usageCPU: 25.0,
minWatts: 1.3423402398570,
maxWatts: 4.00498247528,
chip: 35.23458732,
Expand All @@ -47,7 +46,7 @@ func TestCalculateEmissions(t *testing.T) {
interval: 30 * time.Second,
calc: &calculate{
cores: 0,
usageCPU: v1.Percentage(0),
usageCPU: 0,
minWatts: 0,
maxWatts: 0,
chip: 0,
Expand Down Expand Up @@ -95,7 +94,7 @@ func TestCalculateEmissions(t *testing.T) {

func() *testcase {
c := defaultCalc()
c.usageCPU = v1.Percentage(50)
c.usageCPU = 50
// test with vCPU utilization at 50%
return &testcase{
name: "50% utilization",
Expand All @@ -107,7 +106,7 @@ func TestCalculateEmissions(t *testing.T) {

func() *testcase {
c := defaultCalc()
c.usageCPU = v1.Percentage(100)
c.usageCPU = 100
// test with vCPU utilization at 100%
return &testcase{
name: "100% utilization",
Expand Down Expand Up @@ -152,7 +151,7 @@ func TestCalculateEmissions(t *testing.T) {
interval: 30 * time.Second,
calc: &calculate{
cores: 32,
usageCPU: v1.Percentage(90),
usageCPU: 90,
minWatts: 3.0369270833333335,
maxWatts: 8.575357663690477,
chip: 129.77777777777777,
Expand Down
2 changes: 1 addition & 1 deletion pkg/providers/aws/cloudwatch.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ func (e *cloudWatchClient) GetEc2Metrics(region awsRegion, cache *awsCache) map[

// Build the resource
cpu := v1.NewMetric(v1.CPU.String()).SetResourceUnit(cpuMetric.unit).SetUnitAmount(float64(instanceMetadata.coreCount))
cpu.SetUsagePercentage(cpuMetric.value).SetType(cpuMetric.kind)
cpu.SetUsage(cpuMetric.value).SetType(cpuMetric.kind)

// Update the CPU information now
instanceService.Metrics().Upsert(cpu)
Expand Down
171 changes: 60 additions & 111 deletions pkg/providers/gcp/gcp.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package gcp

import (
"context"
"errors"
"fmt"
"net/url"
"path"
Expand All @@ -11,7 +12,6 @@ import (
compute "cloud.google.com/go/compute/apiv1"
"cloud.google.com/go/compute/apiv1/computepb"
monitoring "cloud.google.com/go/monitoring/apiv3/v2"
monitoringpb "cloud.google.com/go/monitoring/apiv3/v2/monitoringpb"
cache "github.com/patrickmn/go-cache"
"github.com/re-cinq/cloud-carbon/pkg/config"
v1 "github.com/re-cinq/cloud-carbon/pkg/types/v1"
Expand All @@ -20,35 +20,6 @@ import (
"k8s.io/klog/v2"
)

var (
/*
* An MQL query that will return data from Google Cloud with the
* - Instance Name
* - Region
* - Zone
* - Machine Type
* - Reserved CPUs
* - Utilization
*/
CPUQuery = `
fetch gce_instance
| { metric 'compute.googleapis.com/instance/cpu/utilization'
; metric 'compute.googleapis.com/instance/cpu/reserved_cores' }
| outer_join 0
| filter project_id = '%s'
| group_by [
resource.instance_id,
metric.instance_name,
metadata.system.region,
resource.zone,
metadata.system.machine_type,
reserved_cores: format(t_1.value.reserved_cores, '%%f')
], [max(t_0.value.utilization)]
| window %s
| within %s
`
)

// GCP is the structure used as the provider for Google Cloud Platform
type GCP struct {
// GCP Clients
Expand Down Expand Up @@ -155,123 +126,101 @@ func (g *GCP) GetMetricsForInstances(
) ([]v1.Instance, error) {
var instances []v1.Instance

metrics, err := g.instanceMetrics(
cpumetrics, err := g.instanceCPUMetrics(
// TODO these parameters can be cleaned up
ctx, project, fmt.Sprintf(CPUQuery, project, window, window),
)
if err != nil {
return instances, err
}

memmetrics, err := g.instanceMemoryMetrics(
ctx, project, fmt.Sprintf(MEMQuery, project, window, window),
)
if err != nil {
return instances, err
}

// we use a lookup to add different metrics to the same instance
lookup := make(map[string]*v1.Instance)

// TODO there seems to be duplicated logic here
// Why not create instance whuile collecting metric instead of handeling
// it in two steps
for _, m := range metrics {
for _, m := range append(cpumetrics, memmetrics...) {
metric := *m

zone, ok := metric.Labels().Get("zone")
if !ok {
continue
}

region, ok := metric.Labels().Get("region")
if !ok {
continue
}

instanceName, ok := metric.Labels().Get("name")
if !ok {
continue
}

instanceID, ok := metric.Labels().Get("id")
if !ok {
continue
}

machineType, ok := metric.Labels().Get("machine_type")
if !ok {
meta, err := getMetadata(&metric)
if err != nil {
continue
}

// Load the cache
// TODO make this more explicit, im not sure why this
// is needed as we dont use the cache anywhere
cachedInstance, ok := g.cache.Get(cacheKey(zone, service, instanceName))
cachedInstance, ok := g.cache.Get(cacheKey(meta.zone, service, meta.name))
if cachedInstance == nil && !ok {
continue
}

instance := v1.NewInstance(instanceID, provider).SetService(service)
i, ok := lookup[meta.id]
if !ok {
i = v1.NewInstance(meta.id, provider).SetService(service)
}

i.SetKind(meta.machineType)
i.SetRegion(meta.region)
i.SetZone(meta.zone)
i.Metrics().Upsert(&metric)

instance.SetKind(machineType)
instance.SetRegion(region)
instance.SetZone(zone)
instance.Metrics().Upsert(&metric)
lookup[meta.id] = i
}

instances = append(instances, *instance)
// create list of instances
// TODO: this seems repetitive
for _, v := range lookup {
instances = append(instances, *v)
}

return instances, nil
}

// instanceMetrics runs a query on googe cloud monitoring using MQL
// and responds with a list of metrics
func (g *GCP) instanceMetrics(
ctx context.Context,
project, query string,
) ([]*v1.Metric, error) {
var metrics []*v1.Metric
type metadata struct {
zone, region, name, id, machineType string
}

it := g.monitoring.QueryTimeSeries(ctx, &monitoringpb.QueryTimeSeriesRequest{
Name: fmt.Sprintf("projects/%s", project),
Query: query,
})
func getMetadata(m *v1.Metric) (*metadata, error) {
zone, ok := m.Labels().Get("zone")
if !ok {
return &metadata{}, errors.New("zone not found")
}

for {
resp, err := it.Next()
if err == iterator.Done {
break
}
if err != nil {
return nil, err
}
region, ok := m.Labels().Get("region")
if !ok {
return &metadata{}, errors.New("region not found")
}

// This is dependant on the MQL query
// label ordering
instanceID := resp.GetLabelValues()[0].GetStringValue()
instanceName := resp.GetLabelValues()[1].GetStringValue()
region := resp.GetLabelValues()[2].GetStringValue()
zone := resp.GetLabelValues()[3].GetStringValue()
instanceType := resp.GetLabelValues()[4].GetStringValue()
totalCores := resp.GetLabelValues()[5].GetStringValue()

m := v1.NewMetric("cpu")
m.SetResourceUnit(v1.Core)
m.SetType(v1.CPU).SetUsagePercentage(
// translate fraction to a percentage
resp.GetPointData()[0].GetValues()[0].GetDoubleValue() * 100,
)
name, ok := m.Labels().Get("name")
if !ok {
return &metadata{}, errors.New("instance name not found")
}

f, err := strconv.ParseFloat(totalCores, 64)
// TODO: we should not fail here but collect errors
if err != nil {
klog.Errorf("failed to parse GCP metric %s", err)
continue
}
id, ok := m.Labels().Get("id")
if !ok {
return &metadata{}, errors.New("instance id not found")
}

m.SetUnitAmount(f)
m.SetLabels(v1.Labels{
"id": instanceID,
"name": instanceName,
"region": region,
"zone": zone,
"machine_type": instanceType,
})
metrics = append(metrics, m)
machineType, ok := m.Labels().Get("machine_type")
if !ok {
return &metadata{}, errors.New("machine type not found")
}
return metrics, nil
return &metadata{
zone: zone,
region: region,
name: name,
id: id,
machineType: machineType,
}, nil
}

// Refresh fetches all the Instances
Expand Down
Loading
Loading