Skip to content

Commit

Permalink
feat: Add new service configuration for service CIDR to enable Windows
Browse files Browse the repository at this point in the history
See aws#4625 for more
  • Loading branch information
tzifudzi committed Sep 14, 2023
1 parent 2498587 commit 02ee419
Show file tree
Hide file tree
Showing 13 changed files with 100 additions and 40 deletions.
2 changes: 2 additions & 0 deletions charts/karpenter/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,8 @@ settings:
# -- Cluster endpoint. If not set, will be discovered during startup (EKS only)
clusterEndpoint: ""
# -- The default instance profile to use when launching nodes
clusterServiceIpv4Cidr: ""
# -- The CIDR block to assign Kubernetes service IP addresses from for ipv4
defaultInstanceProfile: ""
# -- If true then instances that support pod ENI will report a vpc.amazonaws.com/pod-eni resource
enablePodENI: false
Expand Down
3 changes: 3 additions & 0 deletions pkg/apis/settings/settings.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ var defaultSettings = &Settings{
ClusterCABundle: "",
ClusterName: "",
ClusterEndpoint: "",
ClusterServiceIpv4Cidr: "",
DefaultInstanceProfile: "",
EnablePodENI: false,
EnableENILimitedPodDensity: true,
Expand All @@ -51,6 +52,7 @@ type Settings struct {
ClusterCABundle string
ClusterName string
ClusterEndpoint string
ClusterServiceIpv4Cidr string //TODO: Test this in integration/unit tests and update docs.
DefaultInstanceProfile string
EnablePodENI bool
EnableENILimitedPodDensity bool
Expand All @@ -75,6 +77,7 @@ func (*Settings) Inject(ctx context.Context, cm *v1.ConfigMap) (context.Context,
configmap.AsString("aws.clusterCABundle", &s.ClusterCABundle),
configmap.AsString("aws.clusterName", &s.ClusterName),
configmap.AsString("aws.clusterEndpoint", &s.ClusterEndpoint),
configmap.AsString("aws.clusterServiceIpv4Cidr", &s.ClusterServiceIpv4Cidr),
configmap.AsString("aws.defaultInstanceProfile", &s.DefaultInstanceProfile),
configmap.AsBool("aws.enablePodENI", &s.EnablePodENI),
configmap.AsBool("aws.enableENILimitedPodDensity", &s.EnableENILimitedPodDensity),
Expand Down
13 changes: 13 additions & 0 deletions pkg/apis/settings/settings_validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package settings

import (
"fmt"
"net"
"net/url"
"time"

Expand Down Expand Up @@ -62,6 +63,18 @@ func (s Settings) validateEndpoint() (errs *apis.FieldError) {
return nil
}

// TODO: Write test for this?
func (s Settings) validateClusterServiceIPV4Cidr() (errs *apis.FieldError) {
if s.ClusterServiceIpv4Cidr == "" {
return nil
}
_, _, err := net.ParseCIDR(s.ClusterServiceIpv4Cidr)
if err != nil {
return errs.Also(apis.ErrInvalidValue(fmt.Sprintf("%q not a valid IPV4 CIDR", s.ClusterServiceIpv4Cidr), "clusterServiceIpv4Cidr"))
}
return nil
}

func (s Settings) validateTags() (errs *apis.FieldError) {
for k := range s.Tags {
for _, pattern := range v1alpha1.RestrictedTagPatterns {
Expand Down
47 changes: 42 additions & 5 deletions pkg/operator/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,19 @@ func NewOperator(ctx context.Context, operator *operator.Operator) (context.Cont
logging.FromContext(ctx).Fatalf("Checking EC2 API connectivity, %s", err)
}
logging.FromContext(ctx).With("region", *sess.Config.Region).Debugf("discovered region")
clusterEndpoint, err := ResolveClusterEndpoint(ctx, eks.New(sess))
describeCluster := DescribeClusterClosure(ctx, eks.New(sess))
clusterEndpoint, err := ResolveClusterEndpoint(ctx, describeCluster)
if err != nil {
logging.FromContext(ctx).Fatalf("unable to detect the cluster endpoint, %s", err)
} else {
logging.FromContext(ctx).With("cluster-endpoint", clusterEndpoint).Debugf("discovered cluster endpoint")
}
clusterServiceIpv4Cidr, err := ResolveClusterServiceIpv4Cidr(ctx, describeCluster)
if err != nil {
logging.FromContext(ctx).Fatalf("unable to detect the cluster service ipv4 cidr, %s", err)
} else {
logging.FromContext(ctx).With("cluster-service-ipv4-cidr", clusterServiceIpv4Cidr).Debugf("discovered cluster service ipv4 cidr")
}
// We perform best-effort on resolving the kube-dns IP
kubeDNSIP, err := kubeDNSIP(ctx, operator.KubernetesInterface)
if err != nil {
Expand Down Expand Up @@ -141,6 +148,7 @@ func NewOperator(ctx context.Context, operator *operator.Operator) (context.Cont
operator.Elected(),
kubeDNSIP,
clusterEndpoint,
clusterServiceIpv4Cidr,
)
instanceTypeProvider := instancetype.NewProvider(
*sess.Config.Region,
Expand Down Expand Up @@ -194,14 +202,43 @@ func checkEC2Connectivity(ctx context.Context, api *ec2.EC2) error {
return err
}

func ResolveClusterEndpoint(ctx context.Context, eksAPI eksiface.EKSAPI) (string, error) {
// TODO: Write tests to ensure describe cluster is called only once if invoked > 1
func DescribeClusterClosure(ctx context.Context, eksAPI eksiface.EKSAPI) func() (*eks.DescribeClusterOutput, error) {
var cachedResult *eks.DescribeClusterOutput
return func() (*eks.DescribeClusterOutput, error) {
if cachedResult != nil {
return cachedResult, nil
}

out, err := eksAPI.DescribeCluster(&eks.DescribeClusterInput{
Name: aws.String(settings.FromContext(ctx).ClusterName),
})
if err != nil {
return nil, fmt.Errorf("failed to resolve cluster information, %w", err)
}
return out, nil
}
}

// TODO: Only fetch ipv4 cidr if using ipv4. Is there a way to know if the operator is configured for Windows? Is so only for Windows
func ResolveClusterServiceIpv4Cidr(ctx context.Context, describeCluster func() (*eks.DescribeClusterOutput, error)) (string, error) {
clusterEndpointFromSettings := settings.FromContext(ctx).ClusterServiceIpv4Cidr
if clusterEndpointFromSettings != "" {
return clusterEndpointFromSettings, nil // cluster service ipv4 cidr is explicitly set
}
out, err := describeCluster()
if err != nil {
return "", fmt.Errorf("failed to resolve cluster service ipv4 cidr, %w", err)
}
return *out.Cluster.KubernetesNetworkConfig.ServiceIpv4Cidr, nil
}

func ResolveClusterEndpoint(ctx context.Context, describeCluster func() (*eks.DescribeClusterOutput, error)) (string, error) {
clusterEndpointFromSettings := settings.FromContext(ctx).ClusterEndpoint
if clusterEndpointFromSettings != "" {
return clusterEndpointFromSettings, nil // cluster endpoint is explicitly set
}
out, err := eksAPI.DescribeCluster(&eks.DescribeClusterInput{
Name: aws.String(settings.FromContext(ctx).ClusterName),
})
out, err := describeCluster()
if err != nil {
return "", fmt.Errorf("failed to resolve cluster endpoint, %w", err)
}
Expand Down
1 change: 1 addition & 0 deletions pkg/providers/amifamily/bootstrap/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
type Options struct {
ClusterName string
ClusterEndpoint string
ClusterServiceIpv4Cidr string
KubeletConfig *corev1beta1.KubeletConfiguration
Taints []core.Taint `hash:"set"`
Labels map[string]string `hash:"set"`
Expand Down
2 changes: 1 addition & 1 deletion pkg/providers/amifamily/bootstrap/windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ func (w Windows) Script() (string, error) {
}

userData.WriteString("[string]$EKSBootstrapScriptFile = \"$env:ProgramFiles\\Amazon\\EKS\\Start-EKSBootstrap.ps1\"\n")
userData.WriteString(fmt.Sprintf(`& $EKSBootstrapScriptFile -EKSClusterName '%s' -APIServerEndpoint '%s'`, w.ClusterName, w.ClusterEndpoint))
userData.WriteString(fmt.Sprintf(`& $EKSBootstrapScriptFile -EKSClusterName '%s' -APIServerEndpoint '%s' -ServiceCIDR '%s'`, w.ClusterName, w.ClusterEndpoint, w.ClusterServiceIpv4Cidr))
if w.CABundle != nil {
userData.WriteString(fmt.Sprintf(` -Base64ClusterCA '%s'`, *w.CABundle))
}
Expand Down
1 change: 1 addition & 0 deletions pkg/providers/amifamily/resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ type Resolver struct {
type Options struct {
ClusterName string
ClusterEndpoint string
ClusterServiceIpv4Cidr string
AWSENILimitedPodDensity bool
InstanceProfile string
CABundle *string `hash:"ignore"`
Expand Down
15 changes: 8 additions & 7 deletions pkg/providers/amifamily/windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,14 @@ func (w Windows) DefaultAMIs(version string, _ bool) []DefaultAMIOutput {
func (w Windows) UserData(kubeletConfig *corev1beta1.KubeletConfiguration, taints []v1.Taint, labels map[string]string, caBundle *string, _ []*cloudprovider.InstanceType, customUserData *string) bootstrap.Bootstrapper {
return bootstrap.Windows{
Options: bootstrap.Options{
ClusterName: w.Options.ClusterName,
ClusterEndpoint: w.Options.ClusterEndpoint,
KubeletConfig: kubeletConfig,
Taints: taints,
Labels: labels,
CABundle: caBundle,
CustomUserData: customUserData,
ClusterName: w.Options.ClusterName,
ClusterEndpoint: w.Options.ClusterEndpoint,
ClusterServiceIpv4Cidr: w.Options.ClusterServiceIpv4Cidr,
KubeletConfig: kubeletConfig,
Taints: taints,
Labels: labels,
CABundle: caBundle,
CustomUserData: customUserData,
},
}
}
Expand Down
41 changes: 22 additions & 19 deletions pkg/providers/launchtemplate/launchtemplate.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,28 +55,30 @@ const (

type Provider struct {
sync.Mutex
ec2api ec2iface.EC2API
amiFamily *amifamily.Resolver
securityGroupProvider *securitygroup.Provider
subnetProvider *subnet.Provider
cache *cache.Cache
caBundle *string
cm *pretty.ChangeMonitor
KubeDNSIP net.IP
ClusterEndpoint string
ec2api ec2iface.EC2API
amiFamily *amifamily.Resolver
securityGroupProvider *securitygroup.Provider
subnetProvider *subnet.Provider
cache *cache.Cache
caBundle *string
cm *pretty.ChangeMonitor
KubeDNSIP net.IP
ClusterEndpoint string
ClusterServiceIpv4Cidr string
}

func NewProvider(ctx context.Context, cache *cache.Cache, ec2api ec2iface.EC2API, amiFamily *amifamily.Resolver, securityGroupProvider *securitygroup.Provider, subnetProvider *subnet.Provider, caBundle *string, startAsync <-chan struct{}, kubeDNSIP net.IP, clusterEndpoint string) *Provider {
func NewProvider(ctx context.Context, cache *cache.Cache, ec2api ec2iface.EC2API, amiFamily *amifamily.Resolver, securityGroupProvider *securitygroup.Provider, subnetProvider *subnet.Provider, caBundle *string, startAsync <-chan struct{}, kubeDNSIP net.IP, clusterEndpoint string, clusterServiceIpv4Cidr string) *Provider {
l := &Provider{
ec2api: ec2api,
amiFamily: amiFamily,
securityGroupProvider: securityGroupProvider,
subnetProvider: subnetProvider,
cache: cache,
caBundle: caBundle,
cm: pretty.NewChangeMonitor(),
KubeDNSIP: kubeDNSIP,
ClusterEndpoint: clusterEndpoint,
ec2api: ec2api,
amiFamily: amiFamily,
securityGroupProvider: securityGroupProvider,
subnetProvider: subnetProvider,
cache: cache,
caBundle: caBundle,
cm: pretty.NewChangeMonitor(),
KubeDNSIP: kubeDNSIP,
ClusterEndpoint: clusterEndpoint,
ClusterServiceIpv4Cidr: clusterServiceIpv4Cidr,
}
l.cache.OnEvicted(l.cachedEvictedFunc(ctx))
go func() {
Expand Down Expand Up @@ -162,6 +164,7 @@ func (p *Provider) createAMIOptions(ctx context.Context, nodeClass *v1beta1.Node
options := &amifamily.Options{
ClusterName: settings.FromContext(ctx).ClusterName,
ClusterEndpoint: p.ClusterEndpoint,
ClusterServiceIpv4Cidr: p.ClusterServiceIpv4Cidr,
AWSENILimitedPodDensity: settings.FromContext(ctx).EnableENILimitedPodDensity,
InstanceProfile: instanceProfile,
SecurityGroups: lo.Map(securityGroups, func(s *ec2.SecurityGroup, _ int) v1beta1.SecurityGroup {
Expand Down
1 change: 1 addition & 0 deletions pkg/test/settings.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
type SettingOptions struct {
ClusterName *string
ClusterEndpoint *string
ClusterServiceIpv4Cidr *string
DefaultInstanceProfile *string
EnablePodENI *bool
EnableENILimitedPodDensity *bool
Expand Down
7 changes: 0 additions & 7 deletions website/content/en/preview/concepts/node-templates.md
Original file line number Diff line number Diff line change
Expand Up @@ -585,13 +585,6 @@ Write-Host "Running custom user data script"
</powershell>
```
{{% alert title="Windows Support Notice" color="warning" %}}
Currently, Karpenter does not specify `-ServiceCIDR` to [EKS Windows AMI Bootstrap script](https://docs.aws.amazon.com/eks/latest/userguide/eks-optimized-windows-ami.html#bootstrap-script-configuration-parameters).
Windows worker nodes will use `172.20.0.0/16` or `10.100.0.0/16` for Kubernetes service IP address ranges based on the IP address of the primary interface.
The effective ServiceCIDR can be verified at `$env:ProgramData\Amazon\EKS\cni\config\vpc-bridge.conf` on the worker node.
Support for the Windows ServiceCIDR argument can be tracked in a [Karpenter Github Issue](https://github.com/aws/karpenter/issues/4088). Currently, if the effective ServiceCIDR is incorrect for your windows worker nodes, you can add the following userData as a workaround.
```yaml
spec:
userData: |
Expand Down
2 changes: 2 additions & 0 deletions website/content/en/preview/concepts/settings.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ data:
aws.clusterName: karpenter-cluster
# The external kubernetes cluster endpoint for new nodes to connect with. If not specified, will discover the cluster endpoint using DescribeCluster API
aws.clusterEndpoint: https://00000000000000000000000000000000.gr7.us-west-2.eks.amazonaws.com
# The CIDR block to assign Kubernetes service IP addresses from for ipv4. If not specified, will discover the cluster service Ipv4 Cidr using DescribeCluster API
aws.clusterServiceIpv4Cidr: 10.100.0.0/16
# The default instance profile to use when provisioning nodes
aws.defaultInstanceProfile: karpenter-instance-profile
# If true, then instances that support pod ENI will report a vpc.amazonaws.com/pod-eni resource
Expand Down
5 changes: 4 additions & 1 deletion website/content/en/preview/upgrade-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,10 @@ We release a snapshot release for every commit that gets merged into the main re
Snapshot releases are suitable for testing, and troubleshooting but users should exercise great care if they decide to use them in production environments.
Snapshot releases are tagged with the git commit hash prefixed by the Karpenter major version. For example `v0-fc17bfc89ebb30a3b102a86012b3e3992ec08adf`. For more detailed examples on how to use snapshot releases look under "Usage" in [Karpenter Helm Chart](https://gallery.ecr.aws/karpenter/karpenter).

## Released Upgrade Notes
## Upgrading to v0.x.x

### Upgrading to v0.30.0+
* If you are using Windows and you were using x workaround you can now specify a ServiceCIDR...

### Upgrading to v0.30.0+

Expand Down

0 comments on commit 02ee419

Please sign in to comment.