diff --git a/src/cloud-api-adaptor/docs/SecureComms.md b/src/cloud-api-adaptor/docs/SecureComms.md index d73e2c4a76..46ac8c4176 100644 --- a/src/cloud-api-adaptor/docs/SecureComms.md +++ b/src/cloud-api-adaptor/docs/SecureComms.md @@ -32,8 +32,8 @@ See [Secure Comms Architecture Slides](./SecureComms.pdf) for more details. ### Deploy CAA Use any of the option for installing CAA depending on the cloud driver used. -### Deploy KBS-Operator -Deploy KBS-Operator by following instructions at [KBS Operator Getting Started](https://github.com/confidential-containers/kbs-operator?tab=readme-ov-file#getting-started). +### Deploy Trustee-Operator +Deploy Trustee-Operator by following instructions at [trustee Operator Getting Started](https://github.com/confidential-containers/trustee-operator?tab=readme-ov-file#getting-started). Make sure to uncomment the secret generation as recommended for both public and private key (`kbs-auth-public-key` and `kbs-client` secrets). @@ -46,15 +46,10 @@ kubectl get secret kbs-client -n kbs-operator-system -o json|jq --arg ns "confid For a testing environment, you may need to change the policy of the KBS and AS using the KBS Client to allow all or fit your own policy. One way to do that is: ```sh -kubectl -n kbs-operator-system exec deployment/kbs-deployment --container as -it -- /bin/bash - apt update - apt install vim - vim /opt/confidential-containers/attestation-service/opa/default.rego // replace to `default allow = true` - -kubectl -n kbs-operator-system exec deployment/kbs-deployment --container kbs -it -- /bin/bash - apt update - apt install vim - vim /opa/confidential-containers/kbs/policy.rego // replace to `default allow = true` +kubectl -n kbs-operator-system exec deployment/trustee-deployment --container as -it -- /bin/bash + sed -i.bak 's/^default allow = false/default allow = true/' /opt/confidential-containers/attestation-service/opa/default.rego + +kubectl -n kbs-operator-system get cm resource-policy -o yaml | sed "s/default allow = false/default allow = true/"|kubectl apply -f - ``` ### Build a podvm that enforces Secure-Comms diff --git a/src/cloud-api-adaptor/pkg/adaptor/cloud/cloud.go b/src/cloud-api-adaptor/pkg/adaptor/cloud/cloud.go index 6bbb7f7483..ffed48e961 100644 --- a/src/cloud-api-adaptor/pkg/adaptor/cloud/cloud.go +++ b/src/cloud-api-adaptor/pkg/adaptor/cloud/cloud.go @@ -43,7 +43,6 @@ const ( var logger = log.New(log.Writer(), "[adaptor/cloud] ", log.LstdFlags|log.Lmsgprefix) func (s *cloudService) addSandbox(sid sandboxID, sandbox *sandbox) error { - s.mutex.Lock() defer s.mutex.Unlock() @@ -57,7 +56,6 @@ func (s *cloudService) addSandbox(sid sandboxID, sandbox *sandbox) error { } func (s *cloudService) getSandbox(sid sandboxID) (*sandbox, error) { - if sid == "" { return nil, errors.New("empty sandbox id") } @@ -122,7 +120,6 @@ func (s *cloudService) ConfigVerifier() error { } func (s *cloudService) setInstance(sid sandboxID, instanceID, instanceName string) error { - s.mutex.Lock() defer s.mutex.Unlock() @@ -140,7 +137,6 @@ func (s *cloudService) setInstance(sid sandboxID, instanceID, instanceName strin } func (s *cloudService) GetInstanceID(ctx context.Context, podNamespace, podName string, wait bool) (string, error) { - s.mutex.Lock() defer s.mutex.Unlock() @@ -170,7 +166,6 @@ func (s *cloudService) Version(ctx context.Context, req *pb.VersionRequest) (*pb } func (s *cloudService) CreateVM(ctx context.Context, req *pb.CreateVMRequest) (res *pb.CreateVMResponse, err error) { - defer func() { if err != nil { logger.Print(err) @@ -267,7 +262,7 @@ func (s *cloudService) CreateVM(ctx context.Context, req *pb.CreateVMRequest) (r // Store daemon.json in worker node for debugging daemonJSONPath := filepath.Join(podDir, "daemon.json") - if err := os.WriteFile(daemonJSONPath, daemonJSON, 0666); err != nil { + if err := os.WriteFile(daemonJSONPath, daemonJSON, 0o666); err != nil { return nil, fmt.Errorf("storing %s: %w", daemonJSONPath, err) } logger.Printf("stored %s", daemonJSONPath) @@ -332,10 +327,9 @@ func (s *cloudService) CreateVM(ctx context.Context, req *pb.CreateVMRequest) (r } func (s *cloudService) StartVM(ctx context.Context, req *pb.StartVMRequest) (res *pb.StartVMResponse, err error) { - defer func() { if err != nil { - logger.Print(err) + logger.Printf("error starting instance: %v", err) } }() @@ -353,7 +347,7 @@ func (s *cloudService) StartVM(ctx context.Context, req *pb.StartVMRequest) (res if s.ppService != nil { if err := s.ppService.OwnPeerPod(sandbox.podName, sandbox.podNamespace, instance.ID); err != nil { - logger.Printf("failed to create PeerPod: %s", err.Error()) + logger.Printf("failed to create PeerPod: %v", err) } } @@ -373,7 +367,7 @@ func (s *cloudService) StartVM(ctx context.Context, req *pb.StartVMRequest) (res } if err := ci.Start(); err != nil { - return nil, fmt.Errorf("failed SshClientInstance.Start: %s", err) + return nil, fmt.Errorf("failed SshClientInstance.Start: %w", err) } // Set agentProxy @@ -406,19 +400,23 @@ func (s *cloudService) StartVM(ctx context.Context, req *pb.StartVMRequest) (res select { case <-ctx.Done(): - _ = sandbox.agentProxy.Shutdown() + // Start VM operation interrupted (calling context canceled) + logger.Printf("Error: start instance interrupted (%v). Cleaning up...", ctx.Err()) + if err := sandbox.agentProxy.Shutdown(); err != nil { + logger.Printf("stopping agent proxy: %v", err) + } return nil, ctx.Err() case err := <-errCh: return nil, err case <-sandbox.agentProxy.Ready(): } - logger.Printf("agent proxy is ready") + logger.Print("agent proxy is ready") + return &pb.StartVMResponse{}, nil } func (s *cloudService) StopVM(ctx context.Context, req *pb.StopVMRequest) (*pb.StopVMResponse, error) { - sid := sandboxID(req.Id) sandbox, err := s.getSandbox(sid) diff --git a/src/cloud-api-adaptor/pkg/adaptor/proxy/proxy.go b/src/cloud-api-adaptor/pkg/adaptor/proxy/proxy.go index 29cc1c2af5..366477c292 100644 --- a/src/cloud-api-adaptor/pkg/adaptor/proxy/proxy.go +++ b/src/cloud-api-adaptor/pkg/adaptor/proxy/proxy.go @@ -54,7 +54,6 @@ type agentProxy struct { } func NewAgentProxy(serverName, socketPath, pauseImage string, tlsConfig *tlsutil.TLSConfig, caService tlsutil.CAService, proxyTimeout time.Duration) AgentProxy { - return &agentProxy{ serverName: serverName, socketPath: socketPath, @@ -68,7 +67,6 @@ func NewAgentProxy(serverName, socketPath, pauseImage string, tlsConfig *tlsutil } func (p *agentProxy) dial(ctx context.Context, address string) (net.Conn, error) { - var conn net.Conn var dialer interface { @@ -109,14 +107,15 @@ func (p *agentProxy) dial(ctx context.Context, address string) (net.Conn, error) err := retry.Do( func() error { var err error - conn, err = dialer.DialContext(ctx, "tcp", address) + if conn, err = dialer.DialContext(ctx, "tcp", address); err != nil { + logger.Printf("Retrying agent proxy connection to %s...", address) + } return err }, retry.Attempts(0), retry.Context(ctx), retry.MaxDelay(5*time.Second), ) - if err != nil { err = fmt.Errorf("failed to establish agent proxy connection to %s: %w", address, err) logger.Print(err) @@ -128,7 +127,6 @@ func (p *agentProxy) dial(ctx context.Context, address string) (net.Conn, error) } func (p *agentProxy) Start(ctx context.Context, serverURL *url.URL) error { - if err := os.MkdirAll(filepath.Dir(p.socketPath), os.ModePerm); err != nil { return fmt.Errorf("failed to create parent directories for socket: %s", p.socketPath) } @@ -136,7 +134,7 @@ func (p *agentProxy) Start(ctx context.Context, serverURL *url.URL) error { return fmt.Errorf("failed to remove %s: %w", p.socketPath, err) } - logger.Printf("Listening on %s\n", p.socketPath) + logger.Printf("Listening on %s", p.socketPath) listener, err := net.Listen("unix", p.socketPath) if err != nil { @@ -203,7 +201,7 @@ func (p *agentProxy) Ready() chan struct{} { } func (p *agentProxy) Shutdown() error { - logger.Printf("shutting down socket forwarder") + logger.Print("shutting down socket forwarder") p.stopOnce.Do(func() { close(p.stopCh) }) @@ -215,7 +213,6 @@ func (p *agentProxy) CAService() tlsutil.CAService { } func (p *agentProxy) ClientCA() (certPEM []byte) { - if p.tlsConfig == nil { return nil } diff --git a/src/cloud-providers/ibmcloud/provider.go b/src/cloud-providers/ibmcloud/provider.go index dca051d266..73fed0f1ee 100644 --- a/src/cloud-providers/ibmcloud/provider.go +++ b/src/cloud-providers/ibmcloud/provider.go @@ -90,6 +90,9 @@ func NewProvider(config *Config) (provider.Provider, error) { // If this label exists assume we are in an IKS cluster primarySubnetID, iks := nodeLabels["ibm-provider.kubernetes.io/subnet-id"] + if !iks { + primarySubnetID, iks = nodeLabels["ibm-cloud.kubernetes.io/subnet-id"] + } if iks { if config.ZoneName == "" { config.ZoneName = nodeLabels["topology.kubernetes.io/zone"] @@ -249,7 +252,7 @@ func (p *ibmcloudVPCProvider) CreateInstance(ctx context.Context, podName, sandb return nil, err } - imageID, err := p.selectImage(ctx, spec) + imageID, err := p.selectImage(ctx, spec, instanceProfile) if err != nil { return nil, err } @@ -321,11 +324,11 @@ func (p *ibmcloudVPCProvider) updateInstanceProfileSpecList() error { // Iterate over the instance types and populate the instanceProfileSpecList for _, profileType := range instanceProfiles { - vcpus, memory, err := p.getProfileNameInformation(profileType) + vcpus, memory, arch, err := p.getProfileNameInformation(profileType) if err != nil { return err } - instanceProfileSpecList = append(instanceProfileSpecList, provider.InstanceTypeSpec{InstanceType: profileType, VCPUs: vcpus, Memory: memory}) + instanceProfileSpecList = append(instanceProfileSpecList, provider.InstanceTypeSpec{InstanceType: profileType, VCPUs: vcpus, Memory: memory, Arch: arch}) } // Sort the instanceProfileSpecList by Memory and update the serviceConfig @@ -334,8 +337,8 @@ func (p *ibmcloudVPCProvider) updateInstanceProfileSpecList() error { return nil } -// Add a method to retrieve cpu, memory, and storage from the profile name -func (p *ibmcloudVPCProvider) getProfileNameInformation(profileName string) (vcpu int64, memory int64, err error) { +// Add a method to retrieve cpu, memory, and arch from the profile name +func (p *ibmcloudVPCProvider) getProfileNameInformation(profileName string) (vcpu int64, memory int64, arch string, err error) { // Get the profile information from the instance type using IBMCloud API result, details, err := p.vpc.GetInstanceProfileWithContext(context.Background(), @@ -345,19 +348,31 @@ func (p *ibmcloudVPCProvider) getProfileNameInformation(profileName string) (vcp ) if err != nil { - return 0, 0, fmt.Errorf("instance profile name %s not found, due to %w\nFurther Details:\n%v", profileName, err, details) + return 0, 0, "", fmt.Errorf("instance profile name %s not found, due to %w\nFurther Details:\n%v", profileName, err, details) } vcpu = int64(*result.VcpuCount.(*vpcv1.InstanceProfileVcpu).Value) // Value returned is in GiB, convert to MiB memory = int64(*result.Memory.(*vpcv1.InstanceProfileMemory).Value) * 1024 - return vcpu, memory, nil + arch = string(*result.VcpuArchitecture.Value) + return vcpu, memory, arch, nil } // Select Image from list, invalid image IDs should have already been removed -func (p *ibmcloudVPCProvider) selectImage(ctx context.Context, spec provider.InstanceTypeSpec) (string, error) { +func (p *ibmcloudVPCProvider) selectImage(ctx context.Context, spec provider.InstanceTypeSpec, selectedInstanceProfile string) (string, error) { + + specArch := spec.Arch + if specArch == "" { + for _, instanceProfileSpec := range p.serviceConfig.InstanceProfileSpecList { + if instanceProfileSpec.InstanceType == selectedInstanceProfile { + specArch = instanceProfileSpec.Arch + break + } + } + } + for _, image := range p.serviceConfig.Images { - if spec.Arch != "" && image.Arch != spec.Arch { + if specArch != "" && image.Arch != specArch { continue } logger.Printf("selected image with ID <%s> out of %d images", image.ID, len(p.serviceConfig.Images)) diff --git a/src/cloud-providers/ibmcloud/provider_test.go b/src/cloud-providers/ibmcloud/provider_test.go index 456e27be6c..ade8f9c0be 100644 --- a/src/cloud-providers/ibmcloud/provider_test.go +++ b/src/cloud-providers/ibmcloud/provider_test.go @@ -93,7 +93,8 @@ func (v *mockVPC) GetInstanceProfileWithContext(context context.Context, options vcpu := int64(2) mem := int64(8) - return &vpcv1.InstanceProfile{VcpuCount: &vpcv1.InstanceProfileVcpu{Value: &vcpu}, Memory: &vpcv1.InstanceProfileMemory{Value: &mem}}, nil, nil + arch := "amd64" + return &vpcv1.InstanceProfile{VcpuCount: &vpcv1.InstanceProfileVcpu{Value: &vcpu}, Memory: &vpcv1.InstanceProfileMemory{Value: &mem}, VcpuArchitecture: &vpcv1.InstanceProfileVcpuArchitecture{Value: &arch}}, nil, nil } func (v *mockVPC) GetImageWithContext(context context.Context, options *vpcv1.GetImageOptions) (*vpcv1.Image, *core.DetailedResponse, error) { @@ -184,6 +185,7 @@ func TestGetInstanceTypeInformation(t *testing.T) { wantVcpu int64 wantMemory int64 wantErr bool + wantArch string }{ // Test getting instance type information for a valid instance type { @@ -197,6 +199,7 @@ func TestGetInstanceTypeInformation(t *testing.T) { }, wantVcpu: 2, wantMemory: 8192, + wantArch: "amd64", // Test should not return an error wantErr: false, }, @@ -212,13 +215,14 @@ func TestGetInstanceTypeInformation(t *testing.T) { }, wantVcpu: 0, wantMemory: 0, + wantArch: "", // Test should return an error wantErr: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - gotVcpu, gotMemory, err := tt.provider.getProfileNameInformation(tt.args.instanceType) + gotVcpu, gotMemory, gotArch, err := tt.provider.getProfileNameInformation(tt.args.instanceType) if (err != nil) != tt.wantErr { t.Errorf("ibmcloudProvider.getProfileNameInformation() error = %v, wantErr %v", err, tt.wantErr) return @@ -229,6 +233,9 @@ func TestGetInstanceTypeInformation(t *testing.T) { if gotMemory != tt.wantMemory { t.Errorf("ibmcloudProvider.getProfileNameInformation() gotMemory = %v, want %v", gotMemory, tt.wantMemory) } + if gotArch != tt.wantArch { + t.Errorf("ibmcloudProvider.getProfileNameInformation() gotArch = %v, want %v", gotArch, tt.wantArch) + } }) } } @@ -254,6 +261,7 @@ func TestGetImageDetails(t *testing.T) { expectListErr bool expectSelectErr bool wantID string + profileInstance string }{ // Test selecting an image from a valid image list { @@ -270,6 +278,7 @@ func TestGetImageDetails(t *testing.T) { expectListErr: false, expectSelectErr: false, wantID: "valid-id-1", + profileInstance: "bz2-2x8", }, // Test selecting an image from an empty image list { @@ -286,6 +295,7 @@ func TestGetImageDetails(t *testing.T) { expectListErr: true, expectSelectErr: false, wantID: "", + profileInstance: "bz2-2x8", }, // Test selecting an image from an image list with no valid ids { @@ -302,6 +312,7 @@ func TestGetImageDetails(t *testing.T) { expectListErr: true, expectSelectErr: false, wantID: "", + profileInstance: "bz2-2x8", }, // Test selecting an image from an image list with no valid archs { @@ -318,6 +329,38 @@ func TestGetImageDetails(t *testing.T) { expectListErr: false, expectSelectErr: true, wantID: "", + profileInstance: "bx2-2x8", + }, + { + name: "selectImageForValidInstanceArch", + provider: &ibmcloudVPCProvider{ + vpc: &mockVPC{}, + serviceConfig: &Config{ + Images: validImageList, + InstanceProfileSpecList: []provider.InstanceTypeSpec{{InstanceType: "bz2-2x8", Arch: "s390x"}}, + }, + }, + instanceSpec: provider.InstanceTypeSpec{}, + expectListErr: false, + expectSelectErr: false, + wantID: "valid-id-1", + profileInstance: "bz2-2x8", + }, + // Test selecting an image from an image list with no valid archs because of profile instance arch difference + { + name: "selectImageForInvalidInstanceArch", + provider: &ibmcloudVPCProvider{ + vpc: &mockVPC{}, + serviceConfig: &Config{ + Images: validImageList, + InstanceProfileSpecList: []provider.InstanceTypeSpec{{InstanceType: "bx2-2x8", Arch: "amd64"}}, + }, + }, + instanceSpec: provider.InstanceTypeSpec{}, + expectListErr: false, + expectSelectErr: true, + wantID: "", + profileInstance: "bx2-2x8", }, } for _, tt := range tests { @@ -329,7 +372,7 @@ func TestGetImageDetails(t *testing.T) { } return } - id, err := tt.provider.selectImage(context.Background(), tt.instanceSpec) + id, err := tt.provider.selectImage(context.Background(), tt.instanceSpec, tt.profileInstance) if tt.expectSelectErr { if err == nil { t.Errorf("ibmcloudProvider.selectImage() error = %v, expectSelectErr %v", err, tt.expectSelectErr)