Skip to content

Commit

Permalink
Add support for setting cpu&mem limit strategy (#433)
Browse files Browse the repository at this point in the history
  • Loading branch information
mszostok authored Dec 19, 2024
1 parent c7f19ff commit 213ebb7
Show file tree
Hide file tree
Showing 7 changed files with 527 additions and 37 deletions.
97 changes: 93 additions & 4 deletions castai/resource_workload_scaling_policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,14 @@ import (
"github.com/castai/terraform-provider-castai/castai/sdk"
)

const minResourceMultiplierValue = 1.0

const (
FieldLimitStrategy = "limit"
FieldLimitStrategyType = "type"
FieldLimitStrategyMultiplier = "multiplier"
)

var (
k8sNameRegex = regexp.MustCompile("^[a-z0-9A-Z][a-z0-9A-Z._-]{0,61}[a-z0-9A-Z]$")
)
Expand Down Expand Up @@ -51,7 +59,7 @@ func resourceWorkloadScalingPolicy() *schema.Resource {
"apply_type": {
Type: schema.TypeString,
Required: true,
Description: `Recommendation apply type.
Description: `Recommendation apply type.
- IMMEDIATE - pods are restarted immediately when new recommendation is generated.
- DEFERRED - pods are not restarted and recommendation values are applied during natural restarts only (new deployment, etc.)`,
ValidateDiagFunc: validation.ToDiagFunc(validation.StringInSlice([]string{"IMMEDIATE", "DEFERRED"}, false)),
Expand Down Expand Up @@ -203,6 +211,34 @@ func workloadScalingPolicyResourceSchema(function string, overhead, minRecommend
Optional: true,
Description: "Max values for the recommendation, applies to every container. For memory - this is in MiB, for CPU - this is in cores.",
},
FieldLimitStrategy: {
Type: schema.TypeList,
Optional: true,
MaxItems: 1,
Description: "Resource limit settings",
Elem: workloadScalingPolicyResourceLimitSchema(),
},
},
}
}

func workloadScalingPolicyResourceLimitSchema() *schema.Resource {
return &schema.Resource{
Schema: map[string]*schema.Schema{
FieldLimitStrategyType: {
Type: schema.TypeString,
Required: true,
Description: fmt.Sprintf(`Defines limit strategy type.
- %s - removes the resource limit even if it was specified in the workload spec.
- %s - used to calculate the resource limit. The final value is determined by multiplying the resource request by the specified factor.`, sdk.NOLIMIT, sdk.MULTIPLIER),
ValidateDiagFunc: validation.ToDiagFunc(validation.StringInSlice([]string{string(sdk.MULTIPLIER), string(sdk.NOLIMIT)}, false)),
},
FieldLimitStrategyMultiplier: {
Type: schema.TypeFloat,
Optional: true,
Description: "Multiplier used to calculate the resource limit. It must be defined for the MULTIPLIER strategy.",
ValidateDiagFunc: validation.ToDiagFunc(validation.FloatAtLeast(minResourceMultiplierValue)),
},
},
}
}
Expand Down Expand Up @@ -375,19 +411,44 @@ func resourceWorkloadScalingPolicyDiff(_ context.Context, d *schema.ResourceDiff
cpu := toWorkloadScalingPolicies(d.Get("cpu").([]interface{})[0].(map[string]interface{}))
memory := toWorkloadScalingPolicies(d.Get("memory").([]interface{})[0].(map[string]interface{}))

if err := validateArgs(cpu, "cpu"); err != nil {
if err := validateResourcePolicy(cpu, "cpu"); err != nil {
return err
}
return validateArgs(memory, "memory")
return validateResourcePolicy(memory, "memory")
}

func validateArgs(r sdk.WorkloadoptimizationV1ResourcePolicies, res string) error {
func validateResourcePolicy(r sdk.WorkloadoptimizationV1ResourcePolicies, res string) error {
if r.Function == "QUANTILE" && len(r.Args) == 0 {
return fmt.Errorf("field %q: QUANTILE function requires args to be provided", res)
}
if r.Function == "MAX" && len(r.Args) > 0 {
return fmt.Errorf("field %q: MAX function doesn't accept any args", res)
}

err := validateResourceLimit(r)
if err != nil {
return fmt.Errorf("field %q: %w", res, err)
}
return nil
}

func validateResourceLimit(r sdk.WorkloadoptimizationV1ResourcePolicies) error {
if r.Limit == nil {
return nil
}

switch r.Limit.Type {
case sdk.NOLIMIT:
if r.Limit.Multiplier != nil {
return fmt.Errorf(`field %q: %q limit type doesn't accept multiplier value`, FieldLimitStrategy, sdk.NOLIMIT)
}
case sdk.MULTIPLIER:
if r.Limit.Multiplier == nil {
return fmt.Errorf(`field %q: %q limit type requires multiplier value to be provided`, FieldLimitStrategy, sdk.MULTIPLIER)
}
default:
return fmt.Errorf(`field %q: unknown limit type %q`, FieldLimitStrategy, r.Limit.Type)
}
return nil
}

Expand Down Expand Up @@ -449,10 +510,28 @@ func toWorkloadScalingPolicies(obj map[string]interface{}) sdk.Workloadoptimizat
if v, ok := obj["max"].(float64); ok && v > 0 {
out.Max = lo.ToPtr(v)
}
if v, ok := obj[FieldLimitStrategy].([]any); ok && len(v) > 0 {
out.Limit = toWorkloadResourceLimit(v[0].(map[string]any))
}

return out
}

func toWorkloadResourceLimit(obj map[string]any) *sdk.WorkloadoptimizationV1ResourceLimitStrategy {
if len(obj) == 0 {
return nil
}

out := &sdk.WorkloadoptimizationV1ResourceLimitStrategy{}
if v, ok := obj[FieldLimitStrategyType].(string); ok {
out.Type = sdk.WorkloadoptimizationV1ResourceLimitStrategyType(v)
}
if v, ok := obj[FieldLimitStrategyMultiplier].(float64); ok && v > 0 {
out.Multiplier = lo.ToPtr(v)
}
return out
}

func toWorkloadScalingPoliciesMap(p sdk.WorkloadoptimizationV1ResourcePolicies) []map[string]interface{} {
m := map[string]interface{}{
"function": p.Function,
Expand All @@ -467,6 +546,16 @@ func toWorkloadScalingPoliciesMap(p sdk.WorkloadoptimizationV1ResourcePolicies)
m["look_back_period_seconds"] = int(*p.LookBackPeriodSeconds)
}

if p.Limit != nil {
limit := map[string]any{}

limit[FieldLimitStrategyType] = p.Limit.Type
if p.Limit.Multiplier != nil {
limit[FieldLimitStrategyMultiplier] = *p.Limit.Multiplier
}
m[FieldLimitStrategy] = []map[string]any{limit}
}

return []map[string]interface{}{m}
}

Expand Down
58 changes: 50 additions & 8 deletions castai/resource_workload_scaling_policy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ import (
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/acctest"
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/resource"
"github.com/hashicorp/terraform-plugin-sdk/v2/terraform"
"github.com/samber/lo"
"github.com/stretchr/testify/require"

"github.com/castai/terraform-provider-castai/castai/sdk"
)
Expand Down Expand Up @@ -39,11 +41,15 @@ func TestAccResourceWorkloadScalingPolicy(t *testing.T) {
resource.TestCheckResourceAttr(resourceName, "cpu.0.look_back_period_seconds", "86401"),
resource.TestCheckResourceAttr(resourceName, "cpu.0.min", "0.1"),
resource.TestCheckResourceAttr(resourceName, "cpu.0.max", "1"),
resource.TestCheckResourceAttr(resourceName, "cpu.0.limit.0.type", "MULTIPLIER"),
resource.TestCheckResourceAttr(resourceName, "cpu.0.limit.0.multiplier", "1.2"),
resource.TestCheckResourceAttr(resourceName, "memory.0.function", "MAX"),
resource.TestCheckResourceAttr(resourceName, "memory.0.overhead", "0.25"),
resource.TestCheckResourceAttr(resourceName, "memory.0.apply_threshold", "0.1"),
resource.TestCheckResourceAttr(resourceName, "memory.0.args.#", "0"),
resource.TestCheckResourceAttr(resourceName, "memory.0.min", "100"),
resource.TestCheckResourceAttr(resourceName, "memory.0.limit.0.type", "MULTIPLIER"),
resource.TestCheckResourceAttr(resourceName, "memory.0.limit.0.multiplier", "1.8"),
),
},
{
Expand All @@ -67,12 +73,14 @@ func TestAccResourceWorkloadScalingPolicy(t *testing.T) {
resource.TestCheckResourceAttr(resourceName, "cpu.0.args.0", "0.9"),
resource.TestCheckResourceAttr(resourceName, "cpu.0.look_back_period_seconds", "86402"),
resource.TestCheckResourceAttr(resourceName, "cpu.0.min", "0.1"),
resource.TestCheckResourceAttr(resourceName, "cpu.0.limit.0.type", "NO_LIMIT"),
resource.TestCheckResourceAttr(resourceName, "memory.0.function", "QUANTILE"),
resource.TestCheckResourceAttr(resourceName, "memory.0.overhead", "0.35"),
resource.TestCheckResourceAttr(resourceName, "memory.0.apply_threshold", "0.2"),
resource.TestCheckResourceAttr(resourceName, "memory.0.args.0", "0.9"),
resource.TestCheckResourceAttr(resourceName, "memory.0.min", "100"),
resource.TestCheckResourceAttr(resourceName, "memory.0.max", "512"),
resource.TestCheckResourceAttr(resourceName, "memory.0.limit.0.type", "NO_LIMIT"),
resource.TestCheckResourceAttr(resourceName, "startup.0.period_seconds", "123"),
resource.TestCheckResourceAttr(resourceName, "downscaling.0.apply_type", "DEFERRED"),
resource.TestCheckResourceAttr(resourceName, "memory_event.0.apply_type", "DEFERRED"),
Expand Down Expand Up @@ -108,12 +116,20 @@ func scalingPolicyConfig(clusterName, projectID, name string) string {
min = 0.1
max = 1
look_back_period_seconds = 86401
limit {
type = "MULTIPLIER"
multiplier = 1.2
}
}
memory {
function = "MAX"
overhead = 0.25
apply_threshold = 0.1
min = 100
limit {
type = "MULTIPLIER"
multiplier = 1.8
}
}
}`, name)

Expand All @@ -135,6 +151,9 @@ func scalingPolicyConfigUpdated(clusterName, projectID, name string) string {
args = ["0.9"]
look_back_period_seconds = 86402
min = 0.1
limit {
type = "NO_LIMIT"
}
}
memory {
function = "QUANTILE"
Expand All @@ -143,6 +162,9 @@ func scalingPolicyConfigUpdated(clusterName, projectID, name string) string {
args = ["0.9"]
min = 100
max = 512
limit {
type = "NO_LIMIT"
}
}
startup {
period_seconds = 123
Expand All @@ -152,7 +174,7 @@ func scalingPolicyConfigUpdated(clusterName, projectID, name string) string {
}
memory_event {
apply_type = "DEFERRED"
}
}
anti_affinity {
consider_anti_affinity = true
}
Expand Down Expand Up @@ -187,10 +209,10 @@ func testAccCheckScalingPolicyDestroy(s *terraform.State) error {
return nil
}

func Test_validateArgs(t *testing.T) {
func Test_validateResourcePolicy(t *testing.T) {
tests := map[string]struct {
args sdk.WorkloadoptimizationV1ResourcePolicies
wantErr bool
args sdk.WorkloadoptimizationV1ResourcePolicies
errMsg string
}{
"should not return error when QUANTILE has args provided": {
args: sdk.WorkloadoptimizationV1ResourcePolicies{
Expand All @@ -202,20 +224,40 @@ func Test_validateArgs(t *testing.T) {
args: sdk.WorkloadoptimizationV1ResourcePolicies{
Function: "QUANTILE",
},
wantErr: true,
errMsg: `field "cpu": QUANTILE function requires args to be provided`,
},
"should return error when MAX has args provided": {
args: sdk.WorkloadoptimizationV1ResourcePolicies{
Function: "MAX",
Args: []string{"0.5"},
},
wantErr: true,
errMsg: `field "cpu": MAX function doesn't accept any args`,
},
"should return error when no value is specified for the multiplier strategy": {
args: sdk.WorkloadoptimizationV1ResourcePolicies{
Limit: &sdk.WorkloadoptimizationV1ResourceLimitStrategy{
Type: sdk.MULTIPLIER,
},
},
errMsg: `field "cpu": field "limit": "MULTIPLIER" limit type requires multiplier value to be provided`,
},
"should return error when a value is specified for the no limit strategy": {
args: sdk.WorkloadoptimizationV1ResourcePolicies{
Limit: &sdk.WorkloadoptimizationV1ResourceLimitStrategy{
Type: sdk.NOLIMIT,
Multiplier: lo.ToPtr(4.2),
},
},
errMsg: `field "cpu": field "limit": "NO_LIMIT" limit type doesn't accept multiplier value`,
},
}
for name, tt := range tests {
t.Run(name, func(t *testing.T) {
if err := validateArgs(tt.args, ""); (err != nil) != tt.wantErr {
t.Errorf("validateArgs() error = %v, wantErr %v", err, tt.wantErr)
err := validateResourcePolicy(tt.args, "cpu")
if tt.errMsg == "" {
require.NoError(t, err)
} else {
require.EqualError(t, err, tt.errMsg)
}
})
}
Expand Down
Loading

0 comments on commit 213ebb7

Please sign in to comment.