diff --git a/cmd/experimental/kjobctl/docs/commands/kjobctl_create/kjobctl_create_slurm.md b/cmd/experimental/kjobctl/docs/commands/kjobctl_create/kjobctl_create_slurm.md
index cf79a1cc77..0dba3309b4 100644
--- a/cmd/experimental/kjobctl/docs/commands/kjobctl_create/kjobctl_create_slurm.md
+++ b/cmd/experimental/kjobctl/docs/commands/kjobctl_create/kjobctl_create_slurm.md
@@ -71,7 +71,7 @@ kjobctl create slurm --profile APPLICATION_PROFILE_NAME [--localqueue LOCAL_QUEU
- --init-image string Default: "bash:5-alpine3.20" |
+ --init-image string Default: "registry.k8s.io/busybox:1.27.2" |
|
diff --git a/cmd/experimental/kjobctl/pkg/builder/slurm_builder.go b/cmd/experimental/kjobctl/pkg/builder/slurm_builder.go
index 6b674a1674..bf0d4ee61b 100644
--- a/cmd/experimental/kjobctl/pkg/builder/slurm_builder.go
+++ b/cmd/experimental/kjobctl/pkg/builder/slurm_builder.go
@@ -24,7 +24,6 @@ import (
"fmt"
"math"
"os"
- "slices"
"strconv"
"strings"
"text/template"
@@ -282,7 +281,7 @@ func (b *slurmBuilder) build(ctx context.Context) (runtime.Object, []runtime.Obj
job.Spec.Template.Spec.InitContainers = append(job.Spec.Template.Spec.InitContainers, corev1.Container{
Name: "slurm-init-env",
Image: b.initImage,
- Command: []string{"bash", slurmInitEntrypointFilenamePath},
+ Command: []string{"sh", slurmInitEntrypointFilenamePath},
VolumeMounts: []corev1.VolumeMount{
{
Name: "slurm-scripts",
@@ -485,22 +484,30 @@ func (b *slurmBuilder) build(ctx context.Context) (runtime.Object, []runtime.Obj
return job, []runtime.Object{configMap, service}, nil
}
-func (b *slurmBuilder) buildIndexesMap() map[int32][]int32 {
- indexMap := make(map[int32][]int32)
+func (b *slurmBuilder) buildArrayIndexes() string {
nTasks := ptr.Deref(b.nTasks, 1)
+ length := int64(math.Ceil(float64(len(b.arrayIndexes.Indexes)) / float64(nTasks)))
+ containerIndexes := make([][]string, length)
+
var (
completionIndex int32
containerIndex int32
)
for _, index := range b.arrayIndexes.Indexes {
- indexMap[completionIndex] = append(indexMap[completionIndex], index)
+ containerIndexes[completionIndex] = append(containerIndexes[completionIndex], fmt.Sprint(index))
containerIndex++
if containerIndex >= nTasks {
containerIndex = 0
completionIndex++
}
}
- return indexMap
+
+ completionIndexes := make([]string, length)
+ for completionIndex, containerIndexes := range containerIndexes {
+ completionIndexes[completionIndex] = strings.Join(containerIndexes, ",")
+ }
+
+ return strings.Join(completionIndexes, ";")
}
type slurmInitEntrypointScript struct {
@@ -546,18 +553,6 @@ func (b *slurmBuilder) buildInitEntrypointScript() (string, error) {
nTasks := ptr.Deref(b.nTasks, 1)
nodes := ptr.Deref(b.nodes, 1)
- indexesMap := b.buildIndexesMap()
- keyValues := make([]string, 0, len(indexesMap))
- for key, value := range indexesMap {
- strIndexes := make([]string, 0, len(value))
- for _, index := range value {
- strIndexes = append(strIndexes, fmt.Sprintf("%d", index))
- }
- keyValues = append(keyValues, fmt.Sprintf(`["%d"]="%s"`, key, strings.Join(strIndexes, ",")))
- }
-
- slices.Sort(keyValues)
-
var gpusPerTask, memPerCPU, memPerGPU string
if b.gpusPerTask != nil {
gpus := make([]string, 0)
@@ -579,7 +574,7 @@ func (b *slurmBuilder) buildInitEntrypointScript() (string, error) {
}
scriptValues := slurmInitEntrypointScript{
- ArrayIndexes: strings.Join(keyValues, " "),
+ ArrayIndexes: b.buildArrayIndexes(),
EnvsPath: slurmEnvsPath,
SbatchEnvFilename: slurmSbatchEnvFilename,
diff --git a/cmd/experimental/kjobctl/pkg/builder/slurm_builder_test.go b/cmd/experimental/kjobctl/pkg/builder/slurm_builder_test.go
index da2e400bcb..3193443987 100644
--- a/cmd/experimental/kjobctl/pkg/builder/slurm_builder_test.go
+++ b/cmd/experimental/kjobctl/pkg/builder/slurm_builder_test.go
@@ -238,7 +238,7 @@ func TestSlurmBuilderDo(t *testing.T) {
Mode(v1alpha1.SlurmMode).
Subdomain("profile-slurm").
WithInitContainer(*wrappers.MakeContainer("slurm-init-env", "bash:latest").
- Command("bash", "/slurm/scripts/init-entrypoint.sh").
+ Command("sh", "/slurm/scripts/init-entrypoint.sh").
WithVolumeMount(corev1.VolumeMount{Name: "slurm-scripts", MountPath: "/slurm/scripts"}).
WithVolumeMount(corev1.VolumeMount{Name: "slurm-env", MountPath: "/slurm/env"}).
Obj()).
@@ -282,7 +282,7 @@ func TestSlurmBuilderDo(t *testing.T) {
Mode(v1alpha1.SlurmMode).
Data(map[string]string{
"script": "#!/bin/bash\nsleep 300'",
- "init-entrypoint.sh": `#!/usr/local/bin/bash
+ "init-entrypoint.sh": `#!/bin/sh
set -o errexit
set -o nounset
@@ -292,16 +292,14 @@ set -x
# External variables
# JOB_COMPLETION_INDEX - completion index of the job.
-for i in {0..1}
-do
- # ["COMPLETION_INDEX"]="CONTAINER_INDEX_1,CONTAINER_INDEX_2"
- declare -A array_indexes=(["0"]="1" ["1"]="2" ["2"]="3" ["3"]="4" ["4"]="5") # Requires bash v4+
+array_indexes="1;2;3;4;5"
+container_indexes=$(echo "$array_indexes" | awk -F';' -v idx="$JOB_COMPLETION_INDEX" '{print $((idx + 1))}')
- container_indexes=${array_indexes[${JOB_COMPLETION_INDEX}]}
- container_indexes=(${container_indexes//,/ })
+for i in $(seq 0 1)
+do
+ container_index=$(echo "$container_indexes" | awk -F',' -v idx="$i" '{print $((idx + 1))}')
- if [[ ! -v container_indexes[$i] ]];
- then
+ if [ -z "$container_index" ]; then
break
fi
@@ -341,9 +339,9 @@ SLURM_SUBMIT_DIR=/slurm/scripts
SLURM_SUBMIT_HOST=$HOSTNAME
SLURM_JOB_NODELIST=profile-slurm-0.profile-slurm,profile-slurm-1.profile-slurm
SLURM_JOB_FIRST_NODE=profile-slurm-0.profile-slurm
-SLURM_JOB_ID=$(( JOB_COMPLETION_INDEX * 1 + i + 1 ))
-SLURM_JOBID=$(( JOB_COMPLETION_INDEX * 1 + i + 1 ))
-SLURM_ARRAY_TASK_ID=${container_indexes[$i]}
+SLURM_JOB_ID=$(expr $JOB_COMPLETION_INDEX \* 1 + $i + 1)
+SLURM_JOBID=$(expr $JOB_COMPLETION_INDEX \* 1 + $i + 1)
+SLURM_ARRAY_TASK_ID=$container_index
EOF
done
diff --git a/cmd/experimental/kjobctl/pkg/builder/templates/slurm_init_entrypoint_script.sh.tmpl b/cmd/experimental/kjobctl/pkg/builder/templates/slurm_init_entrypoint_script.sh.tmpl
index d5d747288a..5f1e02942b 100644
--- a/cmd/experimental/kjobctl/pkg/builder/templates/slurm_init_entrypoint_script.sh.tmpl
+++ b/cmd/experimental/kjobctl/pkg/builder/templates/slurm_init_entrypoint_script.sh.tmpl
@@ -1,4 +1,4 @@
-#!/usr/local/bin/bash
+#!/bin/sh
set -o errexit
set -o nounset
@@ -8,16 +8,14 @@ set -x
# External variables
# JOB_COMPLETION_INDEX - completion index of the job.
-for i in {0..{{.SlurmNTasksPerNode}}}
-do
- # ["COMPLETION_INDEX"]="CONTAINER_INDEX_1,CONTAINER_INDEX_2"
- declare -A array_indexes=({{.ArrayIndexes}}) # Requires bash v4+
+array_indexes="{{.ArrayIndexes}}"
+container_indexes=$(echo "$array_indexes" | awk -F';' -v idx="$JOB_COMPLETION_INDEX" '{print $((idx + 1))}')
- container_indexes=${array_indexes[${JOB_COMPLETION_INDEX}]}
- container_indexes=(${container_indexes//,/ })
+for i in $(seq 0 {{.SlurmNTasksPerNode}})
+do
+ container_index=$(echo "$container_indexes" | awk -F',' -v idx="$i" '{print $((idx + 1))}')
- if [[ ! -v container_indexes[$i] ]];
- then
+ if [ -z "$container_index" ]; then
break
fi
@@ -57,9 +55,9 @@ SLURM_SUBMIT_DIR={{.SlurmSubmitDir}}
SLURM_SUBMIT_HOST=$HOSTNAME
SLURM_JOB_NODELIST={{.SlurmJobNodeList}}
SLURM_JOB_FIRST_NODE={{.SlurmJobFirstNode}}
-SLURM_JOB_ID=$(( JOB_COMPLETION_INDEX * {{.SlurmNTasksPerNode}} + i + {{.SlurmArrayJobID}} ))
-SLURM_JOBID=$(( JOB_COMPLETION_INDEX * {{.SlurmNTasksPerNode}} + i + {{.SlurmArrayJobID}} ))
-SLURM_ARRAY_TASK_ID=${container_indexes[$i]}
+SLURM_JOB_ID=$(expr $JOB_COMPLETION_INDEX \* {{.SlurmNTasksPerNode}} + $i + 1)
+SLURM_JOBID=$(expr $JOB_COMPLETION_INDEX \* {{.SlurmNTasksPerNode}} + $i + 1)
+SLURM_ARRAY_TASK_ID=$container_index
EOF
done
diff --git a/cmd/experimental/kjobctl/pkg/cmd/create/create.go b/cmd/experimental/kjobctl/pkg/cmd/create/create.go
index ccec998d2f..cd49c8a44a 100644
--- a/cmd/experimental/kjobctl/pkg/cmd/create/create.go
+++ b/cmd/experimental/kjobctl/pkg/cmd/create/create.go
@@ -351,7 +351,7 @@ var createModeSubcommands = map[string]modeSubcommand{
subcmd.Flags().BoolVar(&o.IgnoreUnknown, ignoreUnknownFlagName, false,
"Ignore all the unsupported flags in the bash script.")
- subcmd.Flags().StringVar(&o.InitImage, initImageFlagName, "bash:5-alpine3.20",
+ subcmd.Flags().StringVar(&o.InitImage, initImageFlagName, "registry.k8s.io/busybox:1.27.2",
"The image used for the init container.")
subcmd.Flags().BoolVar(&o.SkipPriorityValidation, skipPriorityValidationFlagName, false,
"Skip workload priority class validation. Add priority class label even if the class does not exist.")
diff --git a/cmd/experimental/kjobctl/pkg/cmd/create/create_test.go b/cmd/experimental/kjobctl/pkg/cmd/create/create_test.go
index 7aa1b47b76..0058e561fa 100644
--- a/cmd/experimental/kjobctl/pkg/cmd/create/create_test.go
+++ b/cmd/experimental/kjobctl/pkg/cmd/create/create_test.go
@@ -724,8 +724,8 @@ func TestCreateCmd(t *testing.T) {
Profile("profile").
Mode(v1alpha1.SlurmMode).
Subdomain("profile-slurm").
- WithInitContainer(*wrappers.MakeContainer("slurm-init-env", "bash:5-alpine3.20").
- Command("bash", "/slurm/scripts/init-entrypoint.sh").
+ WithInitContainer(*wrappers.MakeContainer("slurm-init-env", "registry.k8s.io/busybox:1.27.2").
+ Command("sh", "/slurm/scripts/init-entrypoint.sh").
WithVolumeMount(corev1.VolumeMount{Name: "slurm-scripts", MountPath: "/slurm/scripts"}).
WithVolumeMount(corev1.VolumeMount{Name: "slurm-env", MountPath: "/slurm/env"}).
Obj()).
@@ -778,7 +778,7 @@ func TestCreateCmd(t *testing.T) {
Mode(v1alpha1.SlurmMode).
Data(map[string]string{
"script": "#!/bin/bash\nsleep 300'",
- "init-entrypoint.sh": `#!/usr/local/bin/bash
+ "init-entrypoint.sh": `#!/bin/sh
set -o errexit
set -o nounset
@@ -788,16 +788,14 @@ set -x
# External variables
# JOB_COMPLETION_INDEX - completion index of the job.
-for i in {0..1}
-do
- # ["COMPLETION_INDEX"]="CONTAINER_INDEX_1,CONTAINER_INDEX_2"
- declare -A array_indexes=(["0"]="0") # Requires bash v4+
+array_indexes="0"
+container_indexes=$(echo "$array_indexes" | awk -F';' -v idx="$JOB_COMPLETION_INDEX" '{print $((idx + 1))}')
- container_indexes=${array_indexes[${JOB_COMPLETION_INDEX}]}
- container_indexes=(${container_indexes//,/ })
+for i in $(seq 0 1)
+do
+ container_index=$(echo "$container_indexes" | awk -F',' -v idx="$i" '{print $((idx + 1))}')
- if [[ ! -v container_indexes[$i] ]];
- then
+ if [ -z "$container_index" ]; then
break
fi
@@ -837,9 +835,9 @@ SLURM_SUBMIT_DIR=/slurm/scripts
SLURM_SUBMIT_HOST=$HOSTNAME
SLURM_JOB_NODELIST=profile-slurm-0.profile-slurm
SLURM_JOB_FIRST_NODE=profile-slurm-0.profile-slurm
-SLURM_JOB_ID=$(( JOB_COMPLETION_INDEX * 1 + i + 1 ))
-SLURM_JOBID=$(( JOB_COMPLETION_INDEX * 1 + i + 1 ))
-SLURM_ARRAY_TASK_ID=${container_indexes[$i]}
+SLURM_JOB_ID=$(expr $JOB_COMPLETION_INDEX \* 1 + $i + 1)
+SLURM_JOBID=$(expr $JOB_COMPLETION_INDEX \* 1 + $i + 1)
+SLURM_ARRAY_TASK_ID=$container_index
EOF
done
@@ -982,7 +980,7 @@ error_path=$(unmask_filename "$SBATCH_ERROR")
LocalQueue("lq1").
Subdomain("profile-slurm").
WithInitContainer(*wrappers.MakeContainer("slurm-init-env", "bash:latest").
- Command("bash", "/slurm/scripts/init-entrypoint.sh").
+ Command("sh", "/slurm/scripts/init-entrypoint.sh").
WithVolumeMount(corev1.VolumeMount{Name: "slurm-scripts", MountPath: "/slurm/scripts"}).
WithVolumeMount(corev1.VolumeMount{Name: "slurm-env", MountPath: "/slurm/env"}).
Obj()).
@@ -1055,7 +1053,7 @@ error_path=$(unmask_filename "$SBATCH_ERROR")
LocalQueue("lq1").
Data(map[string]string{
"script": "#!/bin/bash\nsleep 300'",
- "init-entrypoint.sh": `#!/usr/local/bin/bash
+ "init-entrypoint.sh": `#!/bin/sh
set -o errexit
set -o nounset
@@ -1065,16 +1063,14 @@ set -x
# External variables
# JOB_COMPLETION_INDEX - completion index of the job.
-for i in {0..3}
-do
- # ["COMPLETION_INDEX"]="CONTAINER_INDEX_1,CONTAINER_INDEX_2"
- declare -A array_indexes=(["0"]="0,1,2" ["1"]="3,4,5" ["2"]="6,7,8" ["3"]="9,10,11" ["4"]="12,13,14" ["5"]="15,16,17" ["6"]="18,19,20" ["7"]="21,22,23" ["8"]="24,25") # Requires bash v4+
+array_indexes="0,1,2;3,4,5;6,7,8;9,10,11;12,13,14;15,16,17;18,19,20;21,22,23;24,25"
+container_indexes=$(echo "$array_indexes" | awk -F';' -v idx="$JOB_COMPLETION_INDEX" '{print $((idx + 1))}')
- container_indexes=${array_indexes[${JOB_COMPLETION_INDEX}]}
- container_indexes=(${container_indexes//,/ })
+for i in $(seq 0 3)
+do
+ container_index=$(echo "$container_indexes" | awk -F',' -v idx="$i" '{print $((idx + 1))}')
- if [[ ! -v container_indexes[$i] ]];
- then
+ if [ -z "$container_index" ]; then
break
fi
@@ -1114,9 +1110,9 @@ SLURM_SUBMIT_DIR=/slurm/scripts
SLURM_SUBMIT_HOST=$HOSTNAME
SLURM_JOB_NODELIST=profile-slurm-fpxnj-0.profile-slurm-fpxnj,profile-slurm-fpxnj-1.profile-slurm-fpxnj
SLURM_JOB_FIRST_NODE=profile-slurm-fpxnj-0.profile-slurm-fpxnj
-SLURM_JOB_ID=$(( JOB_COMPLETION_INDEX * 3 + i + 1 ))
-SLURM_JOBID=$(( JOB_COMPLETION_INDEX * 3 + i + 1 ))
-SLURM_ARRAY_TASK_ID=${container_indexes[$i]}
+SLURM_JOB_ID=$(expr $JOB_COMPLETION_INDEX \* 3 + $i + 1)
+SLURM_JOBID=$(expr $JOB_COMPLETION_INDEX \* 3 + $i + 1)
+SLURM_ARRAY_TASK_ID=$container_index
EOF
done