diff --git a/.vscode/launch.json b/.vscode/launch.json index 2552aa34..89a08cdd 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -56,7 +56,7 @@ "-ginkgo.failFast", ], "env": { - "KUBEBUILDER_ASSETS": "${workspaceRoot}/bin/k8s/1.25.0-darwin-amd64", + "KUBEBUILDER_ASSETS": "${workspaceRoot}/bin/k8s/1.28.0-darwin-arm64", "GOMEGA_DEFAULT_EVENTUALLY_TIMEOUT": "10m", "GOMEGA_DEFAULT_EVENTUALLY_POLLING_INTERVAL": "500ms", }, diff --git a/cmd/main.go b/cmd/main.go index 83139428..e9325e52 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -179,6 +179,7 @@ func (*defaultController) SetOptions(opts *ctrl.Options) { namespaceCache := make(map[string]cache.Config) namespaceCache[corev1.NamespaceDefault] = cache.Config{} namespaceCache[nnfv1alpha1.DataMovementNamespace] = cache.Config{} + namespaceCache[nnfv1alpha1.DataMovementProfileNamespace] = cache.Config{} opts.Cache = cache.Options{DefaultNamespaces: namespaceCache} } @@ -205,6 +206,7 @@ func (*nodeController) SetOptions(opts *ctrl.Options) { namespaceCache := make(map[string]cache.Config) namespaceCache[corev1.NamespaceDefault] = cache.Config{} namespaceCache[nnfv1alpha1.DataMovementNamespace] = cache.Config{} + namespaceCache[nnfv1alpha1.DataMovementProfileNamespace] = cache.Config{} namespaceCache[os.Getenv("NNF_NODE_NAME")] = cache.Config{} opts.Cache = cache.Options{DefaultNamespaces: namespaceCache} } diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml index a662aad0..59ab6298 100644 --- a/config/default/kustomization.yaml +++ b/config/default/kustomization.yaml @@ -15,7 +15,6 @@ namePrefix: nnf-dm- resources: - ../rbac - ../manager - - ../dm_config # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in # crd/kustomization.yaml # - ../webhook diff --git a/config/dm_config/kustomization.yaml b/config/dm_config/kustomization.yaml deleted file mode 100644 index ef7be497..00000000 --- a/config/dm_config/kustomization.yaml +++ /dev/null @@ -1,10 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization - -generatorOptions: - disableNameSuffixHash: true - -configMapGenerator: -- files: - - nnf-dm-config.yaml - name: config \ No newline at end of file diff --git a/config/dm_config/nnf-dm-config.yaml b/config/dm_config/nnf-dm-config.yaml deleted file mode 100644 index ab84e2b0..00000000 --- a/config/dm_config/nnf-dm-config.yaml +++ /dev/null @@ -1,44 +0,0 @@ -# Each profile is capable of providing different configurations for data movement. -profiles: - # Default profile that is used for all data movement activity. - default: - # The number of slots specified in the MPI hostfile. A value less than 1 disables the use - # of slots in the hostfile. - slots: 8 - - # The number of max_slots specified in the MPI hostfile. A value less than 1 disables the use - # of max_slots in the hostfile. - maxSlots: 0 - - # The full command to execute data movement. $VARS are replaced by the nnf software. Available - # $VARS: - # HOSTFILE: hostfile that is created and used for mpirun. Contains a list of hosts and the - # slots/max_slots for each host. This hostfile is created at `/tmp//hostfile` - # UID: User ID that is inherited from the Workflow - # GID: Group ID that is inherited from the Workflow - # SRC: source for the data movement - # DEST destination for the data movement - # default: command: ulimit -n 2048 && mpirun --allow-run-as-root --hostfile $HOSTFILE dcp --progress 1 --uid $UID --gid $GID $SRC $DEST - command: ulimit -n 2048 && mpirun --allow-run-as-root --hostfile $HOSTFILE dcp --progress 1 --uid $UID --gid $GID $SRC $DEST - - # If true, enable the command's stdout to be saved in the log when the command completes - # successfully. On failure, the output is always logged. - logStdout: false - - # Similar to logStdout, store the command's stdout in Status.Message when the command - # completes successfully. On failure, the output is always stored. - storeStdout: false - - # Same as default profile but tell dcp not to copy xattrs - no-xattr: - slots: 8 - maxSlots: 0 - command: ulimit -n 2048 && mpirun --allow-run-as-root --hostfile $HOSTFILE dcp --progress 1 --xattrs none --uid $UID --gid $GID $SRC $DEST - logStdout: false - storeStdout: false - -# NnfDataMovement resources have the ability to collect and store the progress percentage and the -# last few lines of output in the CommandStatus field. This number is used for the interval to collect -# the progress data. `dcp --progress N` must be included in the data movement command in order for -# progress to be collected. A value less than 1 disables this functionality. -progressIntervalSeconds: 5 diff --git a/config/rbac/daemon_role.yaml b/config/rbac/daemon_role.yaml index 9d24b176..c3cabb0f 100644 --- a/config/rbac/daemon_role.yaml +++ b/config/rbac/daemon_role.yaml @@ -53,6 +53,14 @@ rules: - patch - update - watch +- apiGroups: + - nnf.cray.hpe.com + resources: + - nnfdatamovementprofiles + verbs: + - get + - list + - watch - apiGroups: - nnf.cray.hpe.com resources: diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index e1c525ce..118aa94a 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -136,6 +136,14 @@ rules: - get - patch - update +- apiGroups: + - nnf.cray.hpe.com + resources: + - nnfdatamovementprofiles + verbs: + - get + - list + - watch - apiGroups: - nnf.cray.hpe.com resources: diff --git a/daemons/compute/server/servers/server_default.go b/daemons/compute/server/servers/server_default.go index 14be41d9..0f2134df 100644 --- a/daemons/compute/server/servers/server_default.go +++ b/daemons/compute/server/servers/server_default.go @@ -96,6 +96,7 @@ type defaultServer struct { // Ensure permissions are granted to access the system configuration; this is done so the NNF // Node Name can be found given a Node Name. //+kubebuilder:rbac:groups=dataworkflowservices.github.io,resources=systemconfigurations,verbs=get;list;watch +//+kubebuilder:rbac:groups=nnf.cray.hpe.com,resources=nnfdatamovementprofiles,verbs=get;list;watch func CreateDefaultServer(opts *ServerOptions) (*defaultServer, error) { @@ -237,7 +238,7 @@ func (s *defaultServer) StartManager() error { } // Setup two managers for watching the individual data movement type resources. They behave -// similarily, performing a reconcile only for updates to this node +// similarly, performing a reconcile only for updates to this node func (s *defaultServer) setupWithManager(mgr ctrl.Manager) error { p := predicate.Funcs{ @@ -355,6 +356,20 @@ func (s *defaultServer) Create(ctx context.Context, req *pb.DataMovementCreateRe }, nil } + // Dm Profile - no pinned profiles here since copy_offload could use any profile + profile, err := s.getProfile(ctx, req.Profile) + if err != nil { + return &pb.DataMovementCreateResponse{ + Status: pb.DataMovementCreateResponse_FAILED, + Message: "Error finding profile: " + err.Error(), + }, nil + } + dm.Spec.ProfileReference = corev1.ObjectReference{ + Kind: reflect.TypeOf(nnfv1alpha1.NnfDataMovementProfile{}).Name(), + Name: profile.Name, + Namespace: profile.Namespace, + } + dm.Spec.UserId = userId dm.Spec.GroupId = groupId @@ -398,6 +413,47 @@ func setUserConfig(req *pb.DataMovementCreateRequest, dm *nnfv1alpha1.NnfDataMov } } +func (s *defaultServer) getProfile(ctx context.Context, profileName string) (*nnfv1alpha1.NnfDataMovementProfile, error) { + ns := "nnf-system" + + // If a profile is named then verify that it exists. Otherwise, verify that a default profile + // can be found. + if len(profileName) == 0 { + NnfDataMovementProfiles := &nnfv1alpha1.NnfDataMovementProfileList{} + if err := s.client.List(ctx, NnfDataMovementProfiles, &client.ListOptions{Namespace: ns}); err != nil { + return nil, err + } + profilesFound := make([]string, 0, len(NnfDataMovementProfiles.Items)) + for _, profile := range NnfDataMovementProfiles.Items { + if profile.Data.Default { + objkey := client.ObjectKeyFromObject(&profile) + profilesFound = append(profilesFound, objkey.Name) + } + } + // Require that there be one and only one default. + if len(profilesFound) == 0 { + return nil, fmt.Errorf("unable to find a default NnfDataMovementProfile to use") + } else if len(profilesFound) > 1 { + return nil, fmt.Errorf("more than one default NnfDataMovementProfile found; unable to pick one: %v", profilesFound) + } + profileName = profilesFound[0] + } + + profile := &nnfv1alpha1.NnfDataMovementProfile{ + ObjectMeta: metav1.ObjectMeta{ + Name: profileName, + Namespace: ns, + }, + } + + err := s.client.Get(ctx, client.ObjectKeyFromObject(profile), profile) + if err != nil { + return nil, fmt.Errorf("unable to retrieve NnfDataMovementProfile: %s", profileName) + } + + return profile, nil +} + func getDirectiveIndexFromClientMount(object *dwsv1alpha2.ClientMount) (string, error) { // Find the DW index for our work. labels := object.GetLabels() @@ -459,7 +515,6 @@ func (s *defaultServer) createNnfDataMovement(ctx context.Context, req *pb.DataM Name: lustrefs.Name, }, }, - Profile: req.Profile, }, } @@ -489,7 +544,6 @@ func (s *defaultServer) createNnfNodeDataMovement(ctx context.Context, req *pb.D Destination: &nnfv1alpha1.NnfDataMovementSpecSourceDestination{ Path: req.Destination, }, - Profile: req.Profile, }, } diff --git a/go.mod b/go.mod index 7b0ec4b3..33599af5 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,7 @@ go 1.21 require ( github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20240709160956-40dcbac0aadf - github.com/NearNodeFlash/nnf-sos v0.0.1-0.20240729130730-8f01fd2116ea + github.com/NearNodeFlash/nnf-sos v0.0.1-0.20240801190336-58f2d99dd6cb github.com/onsi/ginkgo/v2 v2.17.1 github.com/onsi/gomega v1.32.0 github.com/prometheus/client_golang v1.16.0 @@ -69,11 +69,11 @@ require ( k8s.io/utils v0.0.0-20230505201702-9f6742963106 // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect - sigs.k8s.io/yaml v1.3.0 + sigs.k8s.io/yaml v1.3.0 // indirect ) require ( - github.com/DataWorkflowServices/dws v0.0.1-0.20240726140248-bd9f85356c73 + github.com/DataWorkflowServices/dws v0.0.1-0.20240801173757-1fb2188d84b9 go.openly.dev/pointy v1.3.0 ) diff --git a/go.sum b/go.sum index 23a60758..e6ae3187 100644 --- a/go.sum +++ b/go.sum @@ -1,11 +1,11 @@ -github.com/DataWorkflowServices/dws v0.0.1-0.20240726140248-bd9f85356c73 h1:bq1AaHhdaeuaq+ibNk/UIMExXuLicfQCir25xKHnEck= -github.com/DataWorkflowServices/dws v0.0.1-0.20240726140248-bd9f85356c73/go.mod h1:6MrEEHISskyooSKcKU6R3mFqH6Yh6KzWgajhcw2s+nM= +github.com/DataWorkflowServices/dws v0.0.1-0.20240801173757-1fb2188d84b9 h1:Jf5AxKXNxNtDC4YBbnJfpKWr4DicrEJ0aCtIgarIIqU= +github.com/DataWorkflowServices/dws v0.0.1-0.20240801173757-1fb2188d84b9/go.mod h1:6MrEEHISskyooSKcKU6R3mFqH6Yh6KzWgajhcw2s+nM= github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20240709160956-40dcbac0aadf h1:ArBI1LR+BBZ9lF+Aohv49RhTpmRqIXLz4L/h45qQT4k= github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20240709160956-40dcbac0aadf/go.mod h1:N5X1obpl0mBI0VoCJdQhv7cFXOC6g3VlXj712qWj0JE= github.com/NearNodeFlash/nnf-ec v0.0.1-0.20240708183336-34d1295977f5 h1:7eT5mOVSNwtacIcGhzMAsi8EVbeS4zk9QwThG2f0GHE= github.com/NearNodeFlash/nnf-ec v0.0.1-0.20240708183336-34d1295977f5/go.mod h1:oxdwMqfttOF9dabJhqrWlirCnMk8/8eyLMwl+hducjk= -github.com/NearNodeFlash/nnf-sos v0.0.1-0.20240729130730-8f01fd2116ea h1:WZCoDgPMs8+3jle4iJnx4EaPlBLd6pr2ks+o8U4yy10= -github.com/NearNodeFlash/nnf-sos v0.0.1-0.20240729130730-8f01fd2116ea/go.mod h1:WBbrMgTEGvnFbbuQglDBoLl/KvcsC3F/CKY9jq5hWgU= +github.com/NearNodeFlash/nnf-sos v0.0.1-0.20240801190336-58f2d99dd6cb h1:tHAn3aqU61vQ+FCo+6uLZrhamrfwq4qV5EBlBlqDOpk= +github.com/NearNodeFlash/nnf-sos v0.0.1-0.20240801190336-58f2d99dd6cb/go.mod h1:uJvjwkSLrPk4A/13U/E4vnO8R+utDE/lGclsKVp/88s= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/benbjohnson/clock v1.3.0 h1:ip6w0uFQkncKQ979AypyG0ER7mqUSBdKLOgAle/AT8A= github.com/benbjohnson/clock v1.3.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= diff --git a/internal/controller/datamovement_controller.go b/internal/controller/datamovement_controller.go index bf5a7e23..6396a60e 100644 --- a/internal/controller/datamovement_controller.go +++ b/internal/controller/datamovement_controller.go @@ -50,7 +50,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/predicate" - "sigs.k8s.io/yaml" dwsv1alpha2 "github.com/DataWorkflowServices/dws/api/v1alpha2" "github.com/NearNodeFlash/nnf-dm/internal/controller/metrics" @@ -61,13 +60,6 @@ import ( const ( finalizer = "dm.cray.hpe.com" - - // DM ConfigMap Info - configMapName = "nnf-dm-config" - configMapNamespace = nnfv1alpha1.DataMovementNamespace - - // DM ConfigMap Data Keys - configMapKeyData = "nnf-dm-config.yaml" ) // Regex to scrape the progress output of the `dcp` command. Example output: @@ -129,21 +121,6 @@ type dataMovementCancelContext struct { cancel context.CancelFunc } -// Configuration that matches the nnf-dm-config ConfigMap -type dmConfig struct { - Profiles map[string]dmConfigProfile `yaml:"profiles"` - ProgressIntervalSeconds int `yaml:"progressIntervalSeconds,omitempty"` -} - -// Each profile can have different settings -type dmConfigProfile struct { - Slots int `yaml:"slots,omitempty"` - MaxSlots int `yaml:"maxSlots,omitempty"` - Command string `yaml:"command"` - LogStdout bool `yaml:"logStdout"` - StoreStdout bool `yaml:"storeStdout"` -} - // Invalid error is a non-recoverable error type that implies the Data Movement resource is invalid type invalidError struct { err error @@ -161,6 +138,7 @@ func (i *invalidError) Unwrap() error { return i.err } //+kubebuilder:rbac:groups=nnf.cray.hpe.com,resources=nnfdatamovements,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=nnf.cray.hpe.com,resources=nnfdatamovements/status,verbs=get;update;patch //+kubebuilder:rbac:groups=nnf.cray.hpe.com,resources=nnfdatamovements/finalizers,verbs=update +//+kubebuilder:rbac:groups=nnf.cray.hpe.com,resources=nnfdatamovementprofiles,verbs=get;list;watch //+kubebuilder:rbac:groups=nnf.cray.hpe.com,resources=nnfstorages,verbs=get;list;watch //+kubebuilder:rbac:groups=nnf.cray.hpe.com,resources=nnfnodestorages,verbs=get;list;watch //+kubebuilder:rbac:groups=dataworkflowservices.github.io,resources=clientmounts,verbs=get;list @@ -276,29 +254,12 @@ func (r *DataMovementReconciler) Reconcile(ctx context.Context, req ctrl.Request cancel: cancel, }) - // Get DM Config map - configMap := &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{ - Name: configMapName, - Namespace: configMapNamespace, - }, - } - if err := r.Get(ctx, client.ObjectKeyFromObject(configMap), configMap); err != nil { - return ctrl.Result{}, dwsv1alpha2.NewResourceError("could not get data movement config map: %v", client.ObjectKeyFromObject(configMap)).WithError(err).WithMajor() - } - - cfg := dmConfig{} - if err := yaml.Unmarshal([]byte(configMap.Data[configMapKeyData]), &cfg); err != nil { - return ctrl.Result{}, dwsv1alpha2.NewResourceError("invalid data for config map: %v", client.ObjectKeyFromObject(configMap)).WithError(err).WithFatal() - } - log.Info("Using config map", "config", cfg) - - // Ensure requested DM profile exists - profile, found := cfg.Profiles[dm.Spec.Profile] - if !found { - return ctrl.Result{}, dwsv1alpha2.NewResourceError("").WithUserMessage("'%s' profile not found in config map: %v", dm.Spec.Profile, client.ObjectKeyFromObject(configMap)).WithUser().WithFatal() + // Get DM Profile + profile, err := r.getDMProfile(ctx, dm) + if err != nil { + return ctrl.Result{}, dwsv1alpha2.NewResourceError("could not get profile for data movement").WithError(err).WithMajor() } - log.Info("Using profile", "profile name", dm.Spec.Profile, "profile", profile) + log.Info("Using profile", "profile", profile) // Create the hostfile. This is needed for preparing the destination and the data movement // command itself. @@ -351,7 +312,7 @@ func (r *DataMovementReconciler) Reconcile(ctx context.Context, req ctrl.Request // While the command is running, capture and process the output. Read lines until EOF to // ensure we have the latest output. Then use the last regex match to obtain the most recent // progress. - progressCollectInterval := time.Duration(cfg.ProgressIntervalSeconds) * time.Second + progressCollectInterval := time.Duration(profile.Data.ProgressIntervalSeconds) * time.Second if progressCollectionEnabled(progressCollectInterval) { go func() { var elapsed metav1.Duration @@ -460,12 +421,12 @@ func (r *DataMovementReconciler) Reconcile(ctx context.Context, req ctrl.Request log.Info("Data movement operation completed", "cmdStatus", cmdStatus) // Profile or DM request has enabled stdout logging - if profile.LogStdout || (dm.Spec.UserConfig != nil && dm.Spec.UserConfig.LogStdout) { + if profile.Data.LogStdout || (dm.Spec.UserConfig != nil && dm.Spec.UserConfig.LogStdout) { log.Info("Data movement operation output", "output", combinedOutBuf.String()) } // Profile or DM request has enabled storing stdout - if profile.StoreStdout || (dm.Spec.UserConfig != nil && dm.Spec.UserConfig.StoreStdout) { + if profile.Data.StoreStdout || (dm.Spec.UserConfig != nil && dm.Spec.UserConfig.StoreStdout) { dm.Status.Message = combinedOutBuf.String() } } @@ -570,7 +531,28 @@ func parseDcpStats(line string, cmdStatus *nnfv1alpha1.NnfDataMovementCommandSta return nil } -func buildDMCommand(ctx context.Context, profile dmConfigProfile, hostfile string, dm *nnfv1alpha1.NnfDataMovement) ([]string, error) { +func (r *DataMovementReconciler) getDMProfile(ctx context.Context, dm *nnfv1alpha1.NnfDataMovement) (*nnfv1alpha1.NnfDataMovementProfile, error) { + + var profile *nnfv1alpha1.NnfDataMovementProfile + + if dm.Spec.ProfileReference.Kind != reflect.TypeOf(nnfv1alpha1.NnfDataMovementProfile{}).Name() { + return profile, fmt.Errorf("invalid NnfDataMovementProfile kind %s", dm.Spec.ProfileReference.Kind) + } + + profile = &nnfv1alpha1.NnfDataMovementProfile{ + ObjectMeta: metav1.ObjectMeta{ + Name: dm.Spec.ProfileReference.Name, + Namespace: dm.Spec.ProfileReference.Namespace, + }, + } + if err := r.Get(ctx, client.ObjectKeyFromObject(profile), profile); err != nil { + return nil, err + } + + return profile, nil +} + +func buildDMCommand(ctx context.Context, profile *nnfv1alpha1.NnfDataMovementProfile, hostfile string, dm *nnfv1alpha1.NnfDataMovement) ([]string, error) { log := log.FromContext(ctx) userConfig := dm.Spec.UserConfig != nil @@ -580,7 +562,7 @@ func buildDMCommand(ctx context.Context, profile dmConfigProfile, hostfile strin return []string{"true"}, nil } - cmd := profile.Command + cmd := profile.Data.Command cmd = strings.ReplaceAll(cmd, "$HOSTFILE", hostfile) cmd = strings.ReplaceAll(cmd, "$UID", fmt.Sprintf("%d", dm.Spec.UserId)) cmd = strings.ReplaceAll(cmd, "$GID", fmt.Sprintf("%d", dm.Spec.GroupId)) @@ -601,11 +583,11 @@ func buildDMCommand(ctx context.Context, profile dmConfigProfile, hostfile strin cmd = cmd[:idx] + opts + " " + cmd[idx:] } else { log.Info("spec.config.dpcOptions is set but no source path is found in the DM command", - "command", profile.Command, "DCPOptions", opts) + "command", profile.Data.Command, "DCPOptions", opts) } } else { log.Info("spec.config.dpcOptions is set but no dcp command found in the DM command", - "command", profile.Command, "DCPOptions", opts) + "command", profile.Data.Command, "DCPOptions", opts) } } } @@ -904,12 +886,12 @@ func createDestinationDir(dest string, uid, gid uint32, mpiHostfile string, log } // Create an MPI hostfile given settings from a profile and user config from the dm -func createMpiHostfile(profile dmConfigProfile, hosts []string, dm *nnfv1alpha1.NnfDataMovement) (string, error) { +func createMpiHostfile(profile *nnfv1alpha1.NnfDataMovementProfile, hosts []string, dm *nnfv1alpha1.NnfDataMovement) (string, error) { userConfig := dm.Spec.UserConfig != nil // Create MPI hostfile only if included in the provided command - slots := profile.Slots - maxSlots := profile.MaxSlots + slots := profile.Data.Slots + maxSlots := profile.Data.MaxSlots // Allow the user to override the slots and max_slots in the hostfile. if userConfig && dm.Spec.UserConfig.Slots != nil && *dm.Spec.UserConfig.Slots >= 0 { diff --git a/internal/controller/datamovement_controller_test.go b/internal/controller/datamovement_controller_test.go index b7a32bf8..9bf36ff8 100644 --- a/internal/controller/datamovement_controller_test.go +++ b/internal/controller/datamovement_controller_test.go @@ -25,6 +25,7 @@ import ( "fmt" "os" "path/filepath" + "reflect" "strings" "time" @@ -42,7 +43,6 @@ import ( "k8s.io/client-go/util/retry" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" - "sigs.k8s.io/yaml" ) // This is dumped into a temporary file and then ran as a bash script. @@ -56,10 +56,8 @@ var _ = Describe("Data Movement Test", func() { Describe("Reconciler Tests", func() { var dm *nnfv1alpha1.NnfDataMovement - var cm *corev1.ConfigMap - var dmCfg *dmConfig - var dmCfgProfile dmConfigProfile - createCm := true + var dmProfile *nnfv1alpha1.NnfDataMovementProfile + createDmProfile := true var tmpDir string var srcPath string var destPath string @@ -85,7 +83,7 @@ var _ = Describe("Data Movement Test", func() { BeforeEach(func() { var err error - createCm = true + createDmProfile = true testLabel = fmt.Sprintf("%s-%s", testLabelKey, uuid.NewString()) tmpDir, err = os.MkdirTemp("/tmp", "dm-test") @@ -104,16 +102,21 @@ var _ = Describe("Data Movement Test", func() { } k8sClient.Create(context.TODO(), ns) - // Default config map data - dmCfg = &dmConfig{ - Profiles: map[string]dmConfigProfile{ - nnfv1alpha1.DataMovementProfileDefault: { - Command: defaultCommand, + // Default DM profile + dmProfile = &nnfv1alpha1.NnfDataMovementProfile{ + ObjectMeta: metav1.ObjectMeta{ + Name: "default", + Namespace: corev1.NamespaceDefault, + Labels: map[string]string{ + testLabelKey: testLabel, }, }, - ProgressIntervalSeconds: 1, + Data: nnfv1alpha1.NnfDataMovementProfileData{ + Command: defaultCommand, + ProgressIntervalSeconds: 1, + Default: true, + }, } - dmCfgProfile = dmCfg.Profiles[nnfv1alpha1.DataMovementProfileDefault] dm = &nnfv1alpha1.NnfDataMovement{ ObjectMeta: metav1.ObjectMeta{ @@ -133,37 +136,22 @@ var _ = Describe("Data Movement Test", func() { UserId: 0, GroupId: 0, Cancel: false, + ProfileReference: corev1.ObjectReference{ + Kind: reflect.TypeOf(nnfv1alpha1.NnfDataMovementProfile{}).Name(), + Name: dmProfile.Name, + Namespace: dmProfile.Namespace, + }, }, } }) JustBeforeEach(func() { - // Create CM and verify label - if createCm { - // allow test to override the values in the default cfg profile - dmCfg.Profiles[nnfv1alpha1.DataMovementProfileDefault] = dmCfgProfile - - // Convert the config to raw - b, err := yaml.Marshal(dmCfg) - Expect(err).ToNot(HaveOccurred()) - - cm = &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{ - Name: configMapName, - Namespace: v1alpha1.DataMovementNamespace, - Labels: map[string]string{ - testLabelKey: testLabel, - }, - }, - Data: map[string]string{ - configMapKeyData: string(b), - }, - } - - Expect(k8sClient.Create(context.TODO(), cm)).To(Succeed()) + // Create DM Profile and verify label + if createDmProfile { + Expect(k8sClient.Create(context.TODO(), dmProfile)).To(Succeed()) Eventually(func(g Gomega) string { - g.Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(cm), cm)).To(Succeed()) - return cm.Labels[testLabelKey] + g.Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(dmProfile), dmProfile)).To(Succeed()) + return dmProfile.Labels[testLabelKey] }).Should(Equal(testLabel)) } @@ -186,10 +174,10 @@ var _ = Describe("Data Movement Test", func() { } // Remove configmap - if createCm { - Expect(k8sClient.Delete(context.TODO(), cm)).To(Succeed()) + if createDmProfile { + Expect(k8sClient.Delete(context.TODO(), dmProfile)).To(Succeed()) Eventually(func() error { - return k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(cm), cm) + return k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(dmProfile), dmProfile) }).ShouldNot(Succeed()) } @@ -199,7 +187,7 @@ var _ = Describe("Data Movement Test", func() { Context("when a data movement operation succeeds", func() { BeforeEach(func() { - dmCfgProfile.Command = "sleep 1" + dmProfile.Data.Command = "sleep 1" }) It("should have a state and status of 'Finished' and 'Success'", func() { Eventually(func(g Gomega) nnfv1alpha1.NnfDataMovementStatus { @@ -214,7 +202,7 @@ var _ = Describe("Data Movement Test", func() { Context("when a data movement command has no progress output", func() { BeforeEach(func() { - dmCfgProfile.Command = "sleep 1" + dmProfile.Data.Command = "sleep 1" }) It("CommandStatus should not have a ProgressPercentage", func() { Eventually(func(g Gomega) nnfv1alpha1.NnfDataMovementStatus { @@ -232,7 +220,7 @@ var _ = Describe("Data Movement Test", func() { Context("when the dm configmap has specified an overrideCmd", func() { BeforeEach(func() { - dmCfgProfile.Command = "ls -l" + dmProfile.Data.Command = "ls -l" }) It("should use that command instead of the default mpirun", func() { Eventually(func(g Gomega) string { @@ -242,13 +230,13 @@ var _ = Describe("Data Movement Test", func() { cmd = dm.Status.CommandStatus.Command } return cmd - }).Should(Equal(cmdBashPrefix + dmCfgProfile.Command)) + }).Should(Equal(cmdBashPrefix + dmProfile.Data.Command)) }) }) Context("when the dm configmap does not have $HOSTFILE in the command", func() { BeforeEach(func() { - dmCfgProfile.Command = "ls -l" + dmProfile.Data.Command = "ls -l" }) It("should use that command instead of the default mpirun", func() { Eventually(func(g Gomega) string { @@ -258,14 +246,14 @@ var _ = Describe("Data Movement Test", func() { cmd = dm.Status.CommandStatus.Command } return cmd - }).Should(Equal(cmdBashPrefix + dmCfgProfile.Command)) + }).Should(Equal(cmdBashPrefix + dmProfile.Data.Command)) }) }) Context("when the dm config map has specified a dmProgressInterval of less than 1s", func() { BeforeEach(func() { - dmCfgProfile.Command = "sleep .5" - dmCfg.ProgressIntervalSeconds = 0 + dmProfile.Data.Command = "sleep .5" + dmProfile.Data.ProgressIntervalSeconds = 0 }) It("the data movement should skip progress collection", func() { @@ -288,8 +276,8 @@ var _ = Describe("Data Movement Test", func() { Context("when the dm config map has specified to store Stdout", func() { output := "this is not a test" BeforeEach(func() { - dmCfgProfile.Command = "echo " + output - dmCfgProfile.StoreStdout = true + dmProfile.Data.Command = "echo " + output + dmProfile.Data.StoreStdout = true }) It("should store the output in Status.Message", func() { @@ -311,8 +299,8 @@ var _ = Describe("Data Movement Test", func() { Context("when the dm config map has specified to not store Stdout", func() { output := "this is not a test" BeforeEach(func() { - dmCfgProfile.Command = "echo " + output - dmCfgProfile.StoreStdout = false + dmProfile.Data.Command = "echo " + output + dmProfile.Data.StoreStdout = false }) It("should not store anything in Status.Message", func() { @@ -334,7 +322,7 @@ var _ = Describe("Data Movement Test", func() { Context("when the UserConfig has specified to store Stdout", func() { output := "this is not a test" BeforeEach(func() { - dmCfgProfile.Command = "echo " + output + dmProfile.Data.Command = "echo " + output dm.Spec.UserConfig = &nnfv1alpha1.NnfDataMovementConfig{ StoreStdout: true, } @@ -359,7 +347,7 @@ var _ = Describe("Data Movement Test", func() { Context("when the UserConfig has specified not to store Stdout", func() { output := "this is not a test" BeforeEach(func() { - dmCfgProfile.Command = "echo " + output + dmProfile.Data.Command = "echo " + output dm.Spec.UserConfig = &nnfv1alpha1.NnfDataMovementConfig{ StoreStdout: false, } @@ -383,7 +371,7 @@ var _ = Describe("Data Movement Test", func() { Context("when there is no dm config map", func() { BeforeEach(func() { - createCm = false + createDmProfile = false }) It("should requeue and not start running", func() { @@ -395,15 +383,27 @@ var _ = Describe("Data Movement Test", func() { }) Context("when a non-default profile is supplied (and present)", func() { - p := "test-profile" + p := &nnfv1alpha1.NnfDataMovementProfile{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-profile", + Namespace: corev1.NamespaceDefault, + }, + } cmd := "sleep .1" BeforeEach(func() { - dmCfgProfile = dmConfigProfile{ - Command: cmd, + p.Data.Command = cmd + dm.Spec.ProfileReference = corev1.ObjectReference{ + Kind: reflect.TypeOf(nnfv1alpha1.NnfDataMovementProfile{}).Name(), + Name: p.Name, + Namespace: p.Namespace, } - dmCfg.Profiles[p] = dmCfgProfile - dm.Spec.Profile = p + + Expect(k8sClient.Create(context.TODO(), p)).To(Succeed(), "create nnfDataMovementProfile") + + Eventually(func(g Gomega) { + g.Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(p), p)).To(Succeed()) + }, "3s", "1s").Should(Succeed(), "wait for create of NnfDataMovementProfile") }) It("should use that profile to perform data movement", func() { @@ -417,21 +417,33 @@ var _ = Describe("Data Movement Test", func() { })) By("verify that profile is used") - Expect(dm.Spec.Profile).To(Equal(p)) + Expect(dm.Spec.ProfileReference).To(MatchFields(IgnoreExtras, + Fields{ + "Kind": Equal(reflect.TypeOf(nnfv1alpha1.NnfDataMovementProfile{}).Name()), + "Name": Equal(p.Name), + "Namespace": Equal(p.Namespace), + }, + )) Expect(dm.Status.CommandStatus.Command).To(Equal(cmdBashPrefix + cmd)) }) }) Context("when a non-default profile is supplied (and NOT present)", func() { - m := "missing-test-profile" + m := &nnfv1alpha1.NnfDataMovementProfile{ + ObjectMeta: metav1.ObjectMeta{ + Name: "missing-test-profile", + Namespace: corev1.NamespaceDefault, + }, + } cmd := "sleep .1" BeforeEach(func() { - dmCfgProfile = dmConfigProfile{ - Command: cmd, + m.Data.Command = cmd + dm.Spec.ProfileReference = corev1.ObjectReference{ + Kind: reflect.TypeOf(nnfv1alpha1.NnfDataMovementProfile{}).Name(), + Name: m.Name, + Namespace: m.Namespace, } - dmCfg.Profiles["test-profile"] = dmCfgProfile - dm.Spec.Profile = m }) It("should use that profile to perform data movement and fail", func() { @@ -445,13 +457,19 @@ var _ = Describe("Data Movement Test", func() { })) By("verify that profile is used") - Expect(dm.Spec.Profile).To(Equal(m)) + Expect(dm.Spec.ProfileReference).To(MatchFields(IgnoreExtras, + Fields{ + "Kind": Equal(reflect.TypeOf(nnfv1alpha1.NnfDataMovementProfile{}).Name()), + "Name": Equal(m.Name), + "Namespace": Equal(m.Namespace), + }, + )) }) }) Context("when a data movement command fails", func() { BeforeEach(func() { - dmCfgProfile.Command = "false" + dmProfile.Data.Command = "false" }) It("should have a State/Status of 'Finished'/'Failed'", func() { Eventually(func(g Gomega) nnfv1alpha1.NnfDataMovementStatus { @@ -470,7 +488,7 @@ var _ = Describe("Data Movement Test", func() { BeforeEach(func() { // Set cancel on creation so that data movement doesn't start dm.Spec.Cancel = true - dmCfgProfile.Command = "false" + dmProfile.Data.Command = "false" }) It("should have a State/Status of 'Finished'/'Cancelled' and StartTime/EndTime should be set to now", func() { @@ -530,14 +548,14 @@ var _ = Describe("Data Movement Test", func() { Expect(err).ToNot(HaveOccurred()) commandWithArgs = fmt.Sprintf("/bin/bash %s %d %f", scriptFilePath, commandDuration, commandIntervalInSec) - dmCfgProfile.Command = commandWithArgs + dmProfile.Data.Command = commandWithArgs }) It("should update progress by parsing the output of the command", func() { startTime := metav1.NowMicro() - Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(cm), cm)).To(Succeed()) - Expect(dmCfgProfile.Command).To(Equal(commandWithArgs)) + Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(dmProfile), dmProfile)).To(Succeed()) + Expect(dmProfile.Data.Command).To(Equal(commandWithArgs)) By("ensuring that we do not have a progress to start") Eventually(func(g Gomega) *int32 { @@ -614,8 +632,8 @@ var _ = Describe("Data Movement Test", func() { _, err := os.Stat(scriptFilePath) Expect(err).ToNot(HaveOccurred()) - dmCfgProfile.Command = fmt.Sprintf("/bin/bash %s 10 .25", scriptFilePath) - dmCfg.ProgressIntervalSeconds = 1 + dmProfile.Data.Command = fmt.Sprintf("/bin/bash %s 10 .25", scriptFilePath) + dmProfile.Data.ProgressIntervalSeconds = 1 }) It("LastMessage should not include multiple lines of output", func() { @@ -635,7 +653,7 @@ var _ = Describe("Data Movement Test", func() { Context("when a data movement operation is cancelled", func() { BeforeEach(func() { - dmCfgProfile.Command = "sleep 5" + dmProfile.Data.Command = "sleep 5" }) It("should have a state and status of 'Finished' and 'Cancelled'", func() { By("ensuring the data movement started") @@ -737,11 +755,10 @@ var _ = Describe("Data Movement Test", func() { } When("$HOSTFILE is present", func() { It("should create the hostfile", func() { - profile := dmConfigProfile{ - Command: "mpirun --hostfile $HOSTFILE dcp src dest", - } + profile := nnfv1alpha1.NnfDataMovementProfile{} + profile.Data.Command = "mpirun --hostfile $HOSTFILE dcp src dest" - hostfile, err := createMpiHostfile(profile, hosts, &dm) + hostfile, err := createMpiHostfile(&profile, hosts, &dm) Expect(err).ToNot(HaveOccurred()) Expect(len(hostfile)).Should((BeNumerically(">", 0))) info, err := os.Stat(hostfile) @@ -773,9 +790,9 @@ var _ = Describe("Data Movement Test", func() { When("DCPOptions are specified", func() { It("should inject the extra options before the $SRC argument to dcp", func() { - profile := dmConfigProfile{ - Command: defaultCommand, - } + profile := nnfv1alpha1.NnfDataMovementProfile{} + profile.Data.Command = defaultCommand + dm.Spec.UserConfig = &nnfv1alpha1.NnfDataMovementConfig{ DCPOptions: "--extra opts", } @@ -783,7 +800,7 @@ var _ = Describe("Data Movement Test", func() { "mpirun --allow-run-as-root --hostfile /tmp/hostfile dcp --progress 1 --uid %d --gid %d --extra opts %s %s", expectedUid, expectedGid, srcPath, destPath) - cmd, err := buildDMCommand(context.TODO(), profile, "/tmp/hostfile", &dm) + cmd, err := buildDMCommand(context.TODO(), &profile, "/tmp/hostfile", &dm) Expect(err).ToNot(HaveOccurred()) Expect(strings.Join(cmd, " ")).Should(MatchRegexp(expectedCmdRegex)) }) @@ -794,17 +811,19 @@ var _ = Describe("Data Movement Test", func() { func(numSlots *int) { profileSlots, profileMaxSlots := 3, 8 - profile := dmConfigProfile{ - Command: defaultCommand, - Slots: profileSlots, - MaxSlots: profileMaxSlots, + profile := nnfv1alpha1.NnfDataMovementProfile{ + Data: nnfv1alpha1.NnfDataMovementProfileData{ + Command: defaultCommand, + Slots: profileSlots, + MaxSlots: profileMaxSlots, + }, } dm.Spec.UserConfig = &nnfv1alpha1.NnfDataMovementConfig{ Slots: numSlots, MaxSlots: numSlots, } - hostfilePath, err := createMpiHostfile(profile, hosts, &dm) + hostfilePath, err := createMpiHostfile(&profile, hosts, &dm) Expect(err).ToNot(HaveOccurred()) Expect(hostfilePath).ToNot(BeEmpty()) DeferCleanup(func() { diff --git a/vendor/github.com/DataWorkflowServices/dws/api/v1alpha2/resource.go b/vendor/github.com/DataWorkflowServices/dws/api/v1alpha2/resource.go index 62157d6d..95d8671f 100644 --- a/vendor/github.com/DataWorkflowServices/dws/api/v1alpha2/resource.go +++ b/vendor/github.com/DataWorkflowServices/dws/api/v1alpha2/resource.go @@ -34,7 +34,7 @@ const ( ) // ResourceStatus is the enumeration of the status of a DWS resource -// +kubebuilder:validation:Enum:=Starting;Ready;Disabled;NotPresent;Offline;Failed;Degraded;Unknown +// +kubebuilder:validation:Enum:=Starting;Ready;Disabled;NotPresent;Offline;Failed;Degraded;Drained;Unknown type ResourceStatus string const ( @@ -56,7 +56,7 @@ const ( NotPresentStatus ResourceStatus = "NotPresent" // Offline means the resource is offline and cannot be communicated with. This differs - // fro the NotPresent state in that the resource is known to exist. + // from the NotPresent state in that the resource is known to exist. OfflineStatus ResourceStatus = "Offline" // Failed means the resource has failed during startup or execution. @@ -66,6 +66,9 @@ const ( // recovery actions may alleviate a degraded status. DegradedStatus ResourceStatus = "Degraded" + // Drained means the resource has had its pods drained from the node. + DrainedStatus ResourceStatus = "Drained" + // Unknown means the resource status is unknown. UnknownStatus ResourceStatus = "Unknown" ) diff --git a/vendor/github.com/NearNodeFlash/nnf-sos/api/v1alpha1/nnf_datamovement_types.go b/vendor/github.com/NearNodeFlash/nnf-sos/api/v1alpha1/nnf_datamovement_types.go index 2a37a77d..a561ba9c 100644 --- a/vendor/github.com/NearNodeFlash/nnf-sos/api/v1alpha1/nnf_datamovement_types.go +++ b/vendor/github.com/NearNodeFlash/nnf-sos/api/v1alpha1/nnf_datamovement_types.go @@ -32,9 +32,8 @@ const ( // NNF Node Name as the namespace. DataMovementNamespace = "nnf-dm-system" - // The name of the default profile stored in the nnf-dm-config ConfigMap that is used to - // configure Data Movement. - DataMovementProfileDefault = "default" + // The namespace for NnfDataMovementProfiles that are not pinned. + DataMovementProfileNamespace = "nnf-system" ) // NnfDataMovementSpec defines the desired state of NnfDataMovement @@ -62,14 +61,13 @@ type NnfDataMovementSpec struct { // +kubebuilder:default:=false Cancel bool `json:"cancel,omitempty"` - // Profile specifies the name of profile in the nnf-dm-config ConfigMap to be used for - // configuring data movement. Defaults to the default profile. - // +kubebuilder:default:=default - Profile string `json:"profile,omitempty"` + // ProfileReference is an object reference to an NnfDataMovementProfile that is used to + // configure data movement. If empty, the default profile is used. + ProfileReference corev1.ObjectReference `json:"profileReference,omitempty"` // User defined configuration on how data movement should be performed. This overrides the - // configuration defined in the nnf-dm-config ConfigMap. These values are typically set by the - // Copy Offload API. + // configuration defined in the supplied ProfileReference/NnfDataMovementProfile. These values + // are typically set by the Copy Offload API. UserConfig *NnfDataMovementConfig `json:"userConfig,omitempty"` } @@ -109,11 +107,11 @@ type NnfDataMovementConfig struct { StoreStdout bool `json:"storeStdout,omitempty"` // The number of slots specified in the MPI hostfile. A value of 0 disables the use of slots in - // the hostfile. Nil will defer to the value specified in the nnf-dm-config ConfigMap. + // the hostfile. Nil will defer to the value specified in the NnfDataMovementProfile. Slots *int `json:"slots,omitempty"` // The number of max_slots specified in the MPI hostfile. A value of 0 disables the use of slots - // in the hostfile. Nil will defer to the value specified in the nnf-dm-config ConfigMap. + // in the hostfile. Nil will defer to the value specified in the NnfDataMovementProfile. MaxSlots *int `json:"maxSlots,omitempty"` } diff --git a/vendor/github.com/NearNodeFlash/nnf-sos/api/v1alpha1/nnfdatamovementprofile_types.go b/vendor/github.com/NearNodeFlash/nnf-sos/api/v1alpha1/nnfdatamovementprofile_types.go new file mode 100644 index 00000000..fec73ad5 --- /dev/null +++ b/vendor/github.com/NearNodeFlash/nnf-sos/api/v1alpha1/nnfdatamovementprofile_types.go @@ -0,0 +1,100 @@ +/* + * Copyright 2024 Hewlett Packard Enterprise Development LP + * Other additional copyright holders may be indicated within. + * + * The entirety of this work is licensed under the Apache License, + * Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. + * + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// NnfDataMovementProfileData defines the desired state of NnfDataMovementProfile +type NnfDataMovementProfileData struct { + + // Default is true if this instance is the default resource to use + // +kubebuilder:default:=false + Default bool `json:"default,omitempty"` + + // Pinned is true if this instance is an immutable copy + // +kubebuilder:default:=false + Pinned bool `json:"pinned,omitempty"` + + // Slots is the number of slots specified in the MPI hostfile. A value less than 1 disables the + // use of slots in the hostfile. + // +kubebuilder:default:=8 + Slots int `json:"slots"` + + // MaxSlots is the number of max_slots specified in the MPI hostfile. A value less than 1 + // disables the use of max_slots in the hostfile. + // +kubebuilder:default:=0 + MaxSlots int `json:"maxSlots"` + + // Command to execute to perform data movement. $VARS are replaced by the nnf software and must + // be present in the command. + // Available $VARS: + // HOSTFILE: hostfile that is created and used for mpirun. Contains a list of hosts and the + // slots/max_slots for each host. This hostfile is created at `/tmp//hostfile` + // UID: User ID that is inherited from the Workflow + // GID: Group ID that is inherited from the Workflow + // SRC: source for the data movement + // DEST destination for the data movement + // +kubebuilder:default:="ulimit -n 2048 && mpirun --allow-run-as-root --hostfile $HOSTFILE dcp --progress 1 --uid $UID --gid $GID $SRC $DEST" + Command string `json:"command"` + + // If true, enable the command's stdout to be saved in the log when the command completes + // successfully. On failure, the output is always logged. + // +kubebuilder:default:=false + LogStdout bool `json:"logStdout,omitempty"` + + // Similar to logStdout, store the command's stdout in Status.Message when the command completes + // successfully. On failure, the output is always stored. + // +kubebuilder:default:=false + StoreStdout bool `json:"storeStdout,omitempty"` + + // NnfDataMovement resources have the ability to collect and store the progress percentage and the + // last few lines of output in the CommandStatus field. This number is used for the interval to collect + // the progress data. `dcp --progress N` must be included in the data movement command in order for + // progress to be collected. A value less than 1 disables this functionality. + // +kubebuilder:default:=5 + ProgressIntervalSeconds int `json:"progressIntervalSeconds,omitempty"` +} + +// +kubebuilder:object:root=true +// +kubebuilder:printcolumn:name="DEFAULT",type="boolean",JSONPath=".data.default",description="True if this is the default instance" +// +kubebuilder:printcolumn:name="AGE",type="date",JSONPath=".metadata.creationTimestamp" + +// NnfDataMovementProfile is the Schema for the nnfdatamovementprofiles API +type NnfDataMovementProfile struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Data NnfDataMovementProfileData `json:"data,omitempty"` +} + +// +kubebuilder:object:root=true + +// NnfDataMovementProfileList contains a list of NnfDataMovementProfile +type NnfDataMovementProfileList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []NnfDataMovementProfile `json:"items"` +} + +func init() { + SchemeBuilder.Register(&NnfDataMovementProfile{}, &NnfDataMovementProfileList{}) +} diff --git a/vendor/github.com/NearNodeFlash/nnf-sos/api/v1alpha1/zz_generated.deepcopy.go b/vendor/github.com/NearNodeFlash/nnf-sos/api/v1alpha1/zz_generated.deepcopy.go index e0f2a631..29c36ccf 100644 --- a/vendor/github.com/NearNodeFlash/nnf-sos/api/v1alpha1/zz_generated.deepcopy.go +++ b/vendor/github.com/NearNodeFlash/nnf-sos/api/v1alpha1/zz_generated.deepcopy.go @@ -477,6 +477,79 @@ func (in *NnfDataMovementManagerStatus) DeepCopy() *NnfDataMovementManagerStatus return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NnfDataMovementProfile) DeepCopyInto(out *NnfDataMovementProfile) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + out.Data = in.Data +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NnfDataMovementProfile. +func (in *NnfDataMovementProfile) DeepCopy() *NnfDataMovementProfile { + if in == nil { + return nil + } + out := new(NnfDataMovementProfile) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *NnfDataMovementProfile) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NnfDataMovementProfileData) DeepCopyInto(out *NnfDataMovementProfileData) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NnfDataMovementProfileData. +func (in *NnfDataMovementProfileData) DeepCopy() *NnfDataMovementProfileData { + if in == nil { + return nil + } + out := new(NnfDataMovementProfileData) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NnfDataMovementProfileList) DeepCopyInto(out *NnfDataMovementProfileList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]NnfDataMovementProfile, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NnfDataMovementProfileList. +func (in *NnfDataMovementProfileList) DeepCopy() *NnfDataMovementProfileList { + if in == nil { + return nil + } + out := new(NnfDataMovementProfileList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *NnfDataMovementProfileList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NnfDataMovementSpec) DeepCopyInto(out *NnfDataMovementSpec) { *out = *in @@ -490,6 +563,7 @@ func (in *NnfDataMovementSpec) DeepCopyInto(out *NnfDataMovementSpec) { *out = new(NnfDataMovementSpecSourceDestination) **out = **in } + out.ProfileReference = in.ProfileReference if in.UserConfig != nil { in, out := &in.UserConfig, &out.UserConfig *out = new(NnfDataMovementConfig) diff --git a/vendor/github.com/NearNodeFlash/nnf-sos/config/crd/bases/nnf.cray.hpe.com_nnfdatamovementprofiles.yaml b/vendor/github.com/NearNodeFlash/nnf-sos/config/crd/bases/nnf.cray.hpe.com_nnfdatamovementprofiles.yaml new file mode 100644 index 00000000..e947df4e --- /dev/null +++ b/vendor/github.com/NearNodeFlash/nnf-sos/config/crd/bases/nnf.cray.hpe.com_nnfdatamovementprofiles.yaml @@ -0,0 +1,114 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.14.0 + name: nnfdatamovementprofiles.nnf.cray.hpe.com +spec: + group: nnf.cray.hpe.com + names: + kind: NnfDataMovementProfile + listKind: NnfDataMovementProfileList + plural: nnfdatamovementprofiles + singular: nnfdatamovementprofile + scope: Namespaced + versions: + - additionalPrinterColumns: + - description: True if this is the default instance + jsonPath: .data.default + name: DEFAULT + type: boolean + - jsonPath: .metadata.creationTimestamp + name: AGE + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + description: NnfDataMovementProfile is the Schema for the nnfdatamovementprofiles + API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + data: + description: NnfDataMovementProfileData defines the desired state of NnfDataMovementProfile + properties: + command: + default: ulimit -n 2048 && mpirun --allow-run-as-root --hostfile $HOSTFILE + dcp --progress 1 --uid $UID --gid $GID $SRC $DEST + description: |- + Command to execute to perform data movement. $VARS are replaced by the nnf software and must + be present in the command. + Available $VARS: + HOSTFILE: hostfile that is created and used for mpirun. Contains a list of hosts and the + slots/max_slots for each host. This hostfile is created at `/tmp//hostfile` + UID: User ID that is inherited from the Workflow + GID: Group ID that is inherited from the Workflow + SRC: source for the data movement + DEST destination for the data movement + type: string + default: + default: false + description: Default is true if this instance is the default resource + to use + type: boolean + logStdout: + default: false + description: |- + If true, enable the command's stdout to be saved in the log when the command completes + successfully. On failure, the output is always logged. + type: boolean + maxSlots: + default: 0 + description: |- + MaxSlots is the number of max_slots specified in the MPI hostfile. A value less than 1 + disables the use of max_slots in the hostfile. + type: integer + pinned: + default: false + description: Pinned is true if this instance is an immutable copy + type: boolean + progressIntervalSeconds: + default: 5 + description: |- + NnfDataMovement resources have the ability to collect and store the progress percentage and the + last few lines of output in the CommandStatus field. This number is used for the interval to collect + the progress data. `dcp --progress N` must be included in the data movement command in order for + progress to be collected. A value less than 1 disables this functionality. + type: integer + slots: + default: 8 + description: |- + Slots is the number of slots specified in the MPI hostfile. A value less than 1 disables the + use of slots in the hostfile. + type: integer + storeStdout: + default: false + description: |- + Similar to logStdout, store the command's stdout in Status.Message when the command completes + successfully. On failure, the output is always stored. + type: boolean + required: + - command + - maxSlots + - slots + type: object + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + type: object + served: true + storage: true + subresources: {} diff --git a/vendor/github.com/NearNodeFlash/nnf-sos/config/crd/bases/nnf.cray.hpe.com_nnfdatamovements.yaml b/vendor/github.com/NearNodeFlash/nnf-sos/config/crd/bases/nnf.cray.hpe.com_nnfdatamovements.yaml index d4e1660b..e9f001d7 100644 --- a/vendor/github.com/NearNodeFlash/nnf-sos/config/crd/bases/nnf.cray.hpe.com_nnfdatamovements.yaml +++ b/vendor/github.com/NearNodeFlash/nnf-sos/config/crd/bases/nnf.cray.hpe.com_nnfdatamovements.yaml @@ -121,12 +121,52 @@ spec: the data movement operation. format: int32 type: integer - profile: - default: default + profileReference: description: |- - Profile specifies the name of profile in the nnf-dm-config ConfigMap to be used for - configuring data movement. Defaults to the default profile. - type: string + ProfileReference is an object reference to an NnfDataMovementProfile that is used to + configure data movement. If empty, the default profile is used. + properties: + apiVersion: + description: API version of the referent. + type: string + fieldPath: + description: |- + If referring to a piece of an object instead of an entire object, this string + should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. + For example, if the object reference is to a container within a pod, this would take on a value like: + "spec.containers{name}" (where "name" refers to the name of the container that triggered + the event) or if no container name is specified "spec.containers[2]" (container with + index 2 in this pod). This syntax is chosen only to have some well-defined way of + referencing a part of an object. + TODO: this design is not final and this field is subject to change in the future. + type: string + kind: + description: |- + Kind of the referent. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + namespace: + description: |- + Namespace of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ + type: string + resourceVersion: + description: |- + Specific resourceVersion to which this reference is made, if any. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency + type: string + uid: + description: |- + UID of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids + type: string + type: object + x-kubernetes-map-type: atomic source: description: Source describes the source of the data movement operation properties: @@ -184,8 +224,8 @@ spec: userConfig: description: |- User defined configuration on how data movement should be performed. This overrides the - configuration defined in the nnf-dm-config ConfigMap. These values are typically set by the - Copy Offload API. + configuration defined in the supplied ProfileReference/NnfDataMovementProfile. These values + are typically set by the Copy Offload API. properties: dcpOptions: description: Extra options to pass to the dcp command (used to @@ -207,12 +247,12 @@ spec: maxSlots: description: |- The number of max_slots specified in the MPI hostfile. A value of 0 disables the use of slots - in the hostfile. Nil will defer to the value specified in the nnf-dm-config ConfigMap. + in the hostfile. Nil will defer to the value specified in the NnfDataMovementProfile. type: integer slots: description: |- The number of slots specified in the MPI hostfile. A value of 0 disables the use of slots in - the hostfile. Nil will defer to the value specified in the nnf-dm-config ConfigMap. + the hostfile. Nil will defer to the value specified in the NnfDataMovementProfile. type: integer storeStdout: default: false @@ -289,7 +329,7 @@ spec: type: integer progress: description: |- - Progress refects the progress of the underlying data movement command as captured from + ProgressPercentage refects the progress of the underlying data movement command as captured from standard output. A best effort is made to parse the command output as a percentage. If no progress has yet to be measured than this field is omitted. If the latest command output does not contain a valid percentage, then the value is unchanged from the previously parsed value. diff --git a/vendor/modules.txt b/vendor/modules.txt index 94b350c2..5d96ddca 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1,4 +1,4 @@ -# github.com/DataWorkflowServices/dws v0.0.1-0.20240726140248-bd9f85356c73 +# github.com/DataWorkflowServices/dws v0.0.1-0.20240801173757-1fb2188d84b9 ## explicit; go 1.21 github.com/DataWorkflowServices/dws/api/v1alpha2 github.com/DataWorkflowServices/dws/utils/dwdparse @@ -10,7 +10,7 @@ github.com/NearNodeFlash/lustre-fs-operator/config/crd/bases # github.com/NearNodeFlash/nnf-ec v0.0.1-0.20240708183336-34d1295977f5 ## explicit; go 1.19 github.com/NearNodeFlash/nnf-ec/pkg/rfsf/pkg/models -# github.com/NearNodeFlash/nnf-sos v0.0.1-0.20240729130730-8f01fd2116ea +# github.com/NearNodeFlash/nnf-sos v0.0.1-0.20240801190336-58f2d99dd6cb ## explicit; go 1.21 github.com/NearNodeFlash/nnf-sos/api/v1alpha1 github.com/NearNodeFlash/nnf-sos/config/crd/bases