Skip to content

Commit

Permalink
[COST-5027] opt-in per namespace ROS OCP recommendations (#355)
Browse files Browse the repository at this point in the history
  • Loading branch information
maskarb authored May 21, 2024
1 parent 16ae929 commit 58145e2
Show file tree
Hide file tree
Showing 13 changed files with 348 additions and 62 deletions.
2 changes: 1 addition & 1 deletion api/v1beta1/metricsconfig_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ type PrometheusStatus struct {
DisabledMetricsCollectionCostManagement *bool `json:"disabled_metrics_collection_cost_management,omitempty"`

// DisabledMetricsCollectionResourceOptimization is a field of KokuMetricsConfigStatus to represent whether or not collecting
// resource-optimzation metrics is disabled. The default is true.
// resource-optimization metrics is disabled. The default is true.
// +kubebuilder:default=true
DisabledMetricsCollectionResourceOptimization *bool `json:"disabled_metrics_collection_resource_optimization,omitempty"`

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -865,7 +865,7 @@ spec:
default: true
description: |-
DisabledMetricsCollectionResourceOptimization is a field of KokuMetricsConfigStatus to represent whether or not collecting
resource-optimzation metrics is disabled. The default is true.
resource-optimization metrics is disabled. The default is true.
type: boolean
last_query_start_time:
description: LastQueryStartTime is a field of KokuMetricsConfigStatus
Expand Down
37 changes: 34 additions & 3 deletions internal/collector/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ var (

log = logr.Log.WithName("collector")

ErrNoData = errors.New("no data to collect")
ErrNoData = errors.New("no data to collect")
ErrROSNoEnabledNamespaces = errors.New("no enabled namespaces for ROS")
)

type mappedCSVStruct map[string]csvStruct
Expand Down Expand Up @@ -239,17 +240,23 @@ func GenerateReports(cr *metricscfgv1beta1.MetricsConfig, dirCfg *dirconfig.Dire
timeRange := c.TimeSeries
start := timeRange.Start.Add(1 * time.Second)
end := start.Add(14*time.Minute + 59*time.Second)
var err error
for i := 1; i < 5; i++ {
timeRange.Start = start
timeRange.End = end
rosCollector.TimeSeries = timeRange
if err := generateResourceOpimizationReports(log, rosCollector, dirCfg, nodeRows, yearMonth); err != nil {
return err
if err = generateResourceOpimizationReports(log, rosCollector, dirCfg, nodeRows, yearMonth); err != nil {
if !errors.Is(err, ErrROSNoEnabledNamespaces) {
return err
}
}
start = start.Add(15 * time.Minute)
end = end.Add(15 * time.Minute)
}

if errors.Is(err, ErrROSNoEnabledNamespaces) {
return ErrROSNoEnabledNamespaces
}
}

//################################################################################################################
Expand Down Expand Up @@ -386,6 +393,15 @@ func generateResourceOpimizationReports(log gologr.Logger, c *PrometheusCollecto
ts := c.TimeSeries.End
log.Info(fmt.Sprintf("querying for resource-optimization for ts: %+v", ts))
rosResults := mappedResults{}

namespacesAreEnabled, err := areNamespacesEnabled(c, ts)
if err != nil {
return err
}
if !namespacesAreEnabled {
return ErrROSNoEnabledNamespaces
}

if err := c.getQueryResults(ts, resourceOptimizationQueries, &rosResults, MaxRetries); err != nil {
return err
}
Expand Down Expand Up @@ -424,6 +440,21 @@ func generateResourceOpimizationReports(log gologr.Logger, c *PrometheusCollecto
return nil
}

func areNamespacesEnabled(c *PrometheusCollector, ts time.Time) (bool, error) {
vector, err := c.getVectorQuerySimple(rosNamespaceFilter, ts)
if err != nil {
return false, fmt.Errorf("failed to query for namespaces: %v", err)
}

namespaces := []string{}
for _, sample := range vector {
for _, field := range rosNamespaceFilter.MetricKey {
namespaces = append(namespaces, string(sample.Metric[field]))
}
}
return len(namespaces) > 0, nil
}

func findFields(input model.Metric, str string) string {
result := []string{}
for name, val := range input {
Expand Down
83 changes: 69 additions & 14 deletions internal/collector/collector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,18 +143,21 @@ func TestGenerateReports(t *testing.T) {
mapResults[query.QueryString] = &mockPromResult{value: *res}
}
}
for _, query := range *resourceOptimizationQueries {

qs := append(*resourceOptimizationQueries, rosNamespaceFilter)
for _, query := range qs {
res := &model.Vector{}
Load(filepath.Join("test_files", "test_data", query.Name), res, t)
mapResults[query.QueryString] = &mockPromResult{value: *res}
}

copyfakeTimeRange := fakeTimeRange
fakeCollector := &PrometheusCollector{
PromConn: mockPrometheusConnection{
mappedResults: &mapResults,
t: t,
},
TimeSeries: &fakeTimeRange,
TimeSeries: &copyfakeTimeRange,
}
if err := GenerateReports(fakeCR, fakeDirCfg, fakeCollector); err != nil {
t.Errorf("Failed to generate reports: %v", err)
Expand Down Expand Up @@ -194,18 +197,21 @@ func TestGenerateReportsNoROS(t *testing.T) {
mapResults[query.QueryString] = &mockPromResult{value: *res}
}
}
for _, query := range *resourceOptimizationQueries {

qs := append(*resourceOptimizationQueries, rosNamespaceFilter)
for _, query := range qs {
res := &model.Vector{}
Load(filepath.Join("test_files", "test_data", query.Name), res, t)
mapResults[query.QueryString] = &mockPromResult{value: *res}
}

copyfakeTimeRange := fakeTimeRange
fakeCollector := &PrometheusCollector{
PromConn: mockPrometheusConnection{
mappedResults: &mapResults,
t: t,
},
TimeSeries: &fakeTimeRange,
TimeSeries: &copyfakeTimeRange,
}
noRosCR := fakeCR.DeepCopy()
noRosCR.Spec.PrometheusConfig.DisableMetricsCollectionResourceOptimization = &trueDef
Expand All @@ -226,6 +232,50 @@ func TestGenerateReportsNoROS(t *testing.T) {
}
}

func TestGenerateReportsNoEnabledROS(t *testing.T) {
mapResults := make(mappedMockPromResult)
queryList := []*querys{nodeQueries, namespaceQueries, podQueries, volQueries}
for _, q := range queryList {
for _, query := range *q {
res := &model.Matrix{}
Load(filepath.Join("test_files", "test_data", query.Name), res, t)
mapResults[query.QueryString] = &mockPromResult{value: *res}
}
}

// add the namespace specific query
res := &model.Vector{}
mapResults[rosNamespaceFilter.QueryString] = &mockPromResult{value: *res}

copyfakeTimeRange := fakeTimeRange
fakeCollector := &PrometheusCollector{
PromConn: mockPrometheusConnection{
mappedResults: &mapResults,
t: t,
},
TimeSeries: &copyfakeTimeRange,
}
var err error
if err = GenerateReports(fakeCR, fakeDirCfg, fakeCollector); err == nil {
t.Errorf("Something failed: %v", err)
}
if !errors.Is(err, ErrROSNoEnabledNamespaces) {
t.Errorf("incorrect error returned: %v", err)
}

// ####### everything below compares the generated reports to the expected reports #######
expectedMap := getFiles("expected_reports", t)
generatedMap := getFiles("test_reports", t)
expectedDiff := 1 // The expected diff is equal to the number of ROS reports we generate. If we add or remove reports, this number should change

if len(expectedMap)-len(generatedMap) != expectedDiff {
t.Errorf("incorrect number of reports generated")
}
if err := fakeDirCfg.Reports.RemoveContents(); err != nil {
t.Fatal("failed to cleanup reports directory")
}
}

func TestGenerateReportsNoCost(t *testing.T) {
mapResults := make(mappedMockPromResult)
queryList := []*querys{nodeQueries, namespaceQueries, podQueries, volQueries}
Expand All @@ -236,18 +286,21 @@ func TestGenerateReportsNoCost(t *testing.T) {
mapResults[query.QueryString] = &mockPromResult{value: *res}
}
}
for _, query := range *resourceOptimizationQueries {

qs := append(*resourceOptimizationQueries, rosNamespaceFilter)
for _, query := range qs {
res := &model.Vector{}
Load(filepath.Join("test_files", "test_data", query.Name), res, t)
mapResults[query.QueryString] = &mockPromResult{value: *res}
}

copyfakeTimeRange := fakeTimeRange
fakeCollector := &PrometheusCollector{
PromConn: mockPrometheusConnection{
mappedResults: &mapResults,
t: t,
},
TimeSeries: &fakeTimeRange,
TimeSeries: &copyfakeTimeRange,
}
noCostCR := fakeCR.DeepCopy()
noCostCR.Spec.PrometheusConfig.DisableMetricsCollectionCostManagement = &trueDef
Expand All @@ -258,7 +311,7 @@ func TestGenerateReportsNoCost(t *testing.T) {
// ####### everything below compares the generated reports to the expected reports #######
expectedMap := getFiles("expected_reports", t)
generatedMap := getFiles("test_reports", t)
expectedDiff := 4 // The expected diff is equal to the number of ROS reports we generate. If we add or remove reports, this number should change
expectedDiff := 4 // The expected diff is equal to the number of Cost reports we generate. If we add or remove reports, this number should change

if len(expectedMap)-len(generatedMap) != expectedDiff {
t.Errorf("incorrect number of reports generated")
Expand All @@ -271,12 +324,13 @@ func TestGenerateReportsNoCost(t *testing.T) {
func TestGenerateReportsQueryErrors(t *testing.T) {
MaxRetries = 1
mapResults := make(mappedMockPromResult)
copyfakeTimeRange := fakeTimeRange
fakeCollector := &PrometheusCollector{
PromConn: mockPrometheusConnection{
mappedResults: &mapResults,
t: t,
},
TimeSeries: &fakeTimeRange,
TimeSeries: &copyfakeTimeRange,
}

queryList := []*querys{nodeQueries, podQueries, volQueries, namespaceQueries}
Expand All @@ -287,11 +341,11 @@ func TestGenerateReportsQueryErrors(t *testing.T) {
mapResults[query.QueryString] = &mockPromResult{value: *res}
}
}
for _, query := range *resourceOptimizationQueries {
res := &model.Vector{}
Load(filepath.Join("test_files", "test_data", query.Name), res, t)
mapResults[query.QueryString] = &mockPromResult{value: *res}
}

// add the namespace specific query
res := &model.Vector{}
Load(filepath.Join("test_files", "test_data", rosNamespaceFilter.Name), res, t)
mapResults[rosNamespaceFilter.QueryString] = &mockPromResult{value: *res}

resourceOptimizationError := "resourceOptimization error"
for _, q := range *resourceOptimizationQueries {
Expand Down Expand Up @@ -349,12 +403,13 @@ func TestGenerateReportsNoNodeData(t *testing.T) {
}
}

copyfakeTimeRange := fakeTimeRange
fakeCollector := &PrometheusCollector{
PromConn: mockPrometheusConnection{
mappedResults: &mapResults,
t: t,
},
TimeSeries: &fakeTimeRange,
TimeSeries: &copyfakeTimeRange,
}
if err := GenerateReports(fakeCR, fakeDirCfg, fakeCollector); err != nil && err != ErrNoData {
t.Errorf("Failed to generate reports: %v", err)
Expand Down
18 changes: 18 additions & 0 deletions internal/collector/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -289,3 +289,21 @@ func (c *PrometheusCollector) getQueryResults(ts time.Time, queries *querys, res

return nil
}

func (c *PrometheusCollector) getVectorQuerySimple(q query, ts time.Time) (model.Vector, error) {
ctx, cancel := context.WithTimeout(context.Background(), c.ContextTimeout)
defer cancel()

queryResult, warnings, err := c.PromConn.Query(ctx, q.QueryString, ts)
if err != nil {
return nil, fmt.Errorf("query: %s: error querying prometheus: %v", q.QueryString, err)
}
if len(warnings) > 0 {
log.Info("query warnings", "Warnings", warnings)
}
vector, ok := queryResult.(model.Vector)
if !ok {
return vector, fmt.Errorf("expected a vector in response to query, got a %v", queryResult.Type())
}
return vector, nil
}
Loading

0 comments on commit 58145e2

Please sign in to comment.