Skip to content

Commit

Permalink
Use busybox init image. (#3238)
Browse files Browse the repository at this point in the history
  • Loading branch information
mbobrovskyi authored Oct 16, 2024
1 parent 3298f05 commit 6c18082
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 73 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ kjobctl create slurm --profile APPLICATION_PROFILE_NAME [--localqueue LOCAL_QUEU
</td>
</tr>
<tr>
<td colspan="2">--init-image string&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Default: &#34;bash:5-alpine3.20&#34;</td>
<td colspan="2">--init-image string&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Default: &#34;registry.k8s.io/busybox:1.27.2&#34;</td>
</tr>
<tr>
<td></td>
Expand Down
33 changes: 14 additions & 19 deletions cmd/experimental/kjobctl/pkg/builder/slurm_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ import (
"fmt"
"math"
"os"
"slices"
"strconv"
"strings"
"text/template"
Expand Down Expand Up @@ -282,7 +281,7 @@ func (b *slurmBuilder) build(ctx context.Context) (runtime.Object, []runtime.Obj
job.Spec.Template.Spec.InitContainers = append(job.Spec.Template.Spec.InitContainers, corev1.Container{
Name: "slurm-init-env",
Image: b.initImage,
Command: []string{"bash", slurmInitEntrypointFilenamePath},
Command: []string{"sh", slurmInitEntrypointFilenamePath},
VolumeMounts: []corev1.VolumeMount{
{
Name: "slurm-scripts",
Expand Down Expand Up @@ -485,22 +484,30 @@ func (b *slurmBuilder) build(ctx context.Context) (runtime.Object, []runtime.Obj
return job, []runtime.Object{configMap, service}, nil
}

func (b *slurmBuilder) buildIndexesMap() map[int32][]int32 {
indexMap := make(map[int32][]int32)
func (b *slurmBuilder) buildArrayIndexes() string {
nTasks := ptr.Deref(b.nTasks, 1)
length := int64(math.Ceil(float64(len(b.arrayIndexes.Indexes)) / float64(nTasks)))
containerIndexes := make([][]string, length)

var (
completionIndex int32
containerIndex int32
)
for _, index := range b.arrayIndexes.Indexes {
indexMap[completionIndex] = append(indexMap[completionIndex], index)
containerIndexes[completionIndex] = append(containerIndexes[completionIndex], fmt.Sprint(index))
containerIndex++
if containerIndex >= nTasks {
containerIndex = 0
completionIndex++
}
}
return indexMap

completionIndexes := make([]string, length)
for completionIndex, containerIndexes := range containerIndexes {
completionIndexes[completionIndex] = strings.Join(containerIndexes, ",")
}

return strings.Join(completionIndexes, ";")
}

type slurmInitEntrypointScript struct {
Expand Down Expand Up @@ -546,18 +553,6 @@ func (b *slurmBuilder) buildInitEntrypointScript() (string, error) {
nTasks := ptr.Deref(b.nTasks, 1)
nodes := ptr.Deref(b.nodes, 1)

indexesMap := b.buildIndexesMap()
keyValues := make([]string, 0, len(indexesMap))
for key, value := range indexesMap {
strIndexes := make([]string, 0, len(value))
for _, index := range value {
strIndexes = append(strIndexes, fmt.Sprintf("%d", index))
}
keyValues = append(keyValues, fmt.Sprintf(`["%d"]="%s"`, key, strings.Join(strIndexes, ",")))
}

slices.Sort(keyValues)

var gpusPerTask, memPerCPU, memPerGPU string
if b.gpusPerTask != nil {
gpus := make([]string, 0)
Expand All @@ -579,7 +574,7 @@ func (b *slurmBuilder) buildInitEntrypointScript() (string, error) {
}

scriptValues := slurmInitEntrypointScript{
ArrayIndexes: strings.Join(keyValues, " "),
ArrayIndexes: b.buildArrayIndexes(),

EnvsPath: slurmEnvsPath,
SbatchEnvFilename: slurmSbatchEnvFilename,
Expand Down
24 changes: 11 additions & 13 deletions cmd/experimental/kjobctl/pkg/builder/slurm_builder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ func TestSlurmBuilderDo(t *testing.T) {
Mode(v1alpha1.SlurmMode).
Subdomain("profile-slurm").
WithInitContainer(*wrappers.MakeContainer("slurm-init-env", "bash:latest").
Command("bash", "/slurm/scripts/init-entrypoint.sh").
Command("sh", "/slurm/scripts/init-entrypoint.sh").
WithVolumeMount(corev1.VolumeMount{Name: "slurm-scripts", MountPath: "/slurm/scripts"}).
WithVolumeMount(corev1.VolumeMount{Name: "slurm-env", MountPath: "/slurm/env"}).
Obj()).
Expand Down Expand Up @@ -282,7 +282,7 @@ func TestSlurmBuilderDo(t *testing.T) {
Mode(v1alpha1.SlurmMode).
Data(map[string]string{
"script": "#!/bin/bash\nsleep 300'",
"init-entrypoint.sh": `#!/usr/local/bin/bash
"init-entrypoint.sh": `#!/bin/sh
set -o errexit
set -o nounset
Expand All @@ -292,16 +292,14 @@ set -x
# External variables
# JOB_COMPLETION_INDEX - completion index of the job.
for i in {0..1}
do
# ["COMPLETION_INDEX"]="CONTAINER_INDEX_1,CONTAINER_INDEX_2"
declare -A array_indexes=(["0"]="1" ["1"]="2" ["2"]="3" ["3"]="4" ["4"]="5") # Requires bash v4+
array_indexes="1;2;3;4;5"
container_indexes=$(echo "$array_indexes" | awk -F';' -v idx="$JOB_COMPLETION_INDEX" '{print $((idx + 1))}')
container_indexes=${array_indexes[${JOB_COMPLETION_INDEX}]}
container_indexes=(${container_indexes//,/ })
for i in $(seq 0 1)
do
container_index=$(echo "$container_indexes" | awk -F',' -v idx="$i" '{print $((idx + 1))}')
if [[ ! -v container_indexes[$i] ]];
then
if [ -z "$container_index" ]; then
break
fi
Expand Down Expand Up @@ -341,9 +339,9 @@ SLURM_SUBMIT_DIR=/slurm/scripts
SLURM_SUBMIT_HOST=$HOSTNAME
SLURM_JOB_NODELIST=profile-slurm-0.profile-slurm,profile-slurm-1.profile-slurm
SLURM_JOB_FIRST_NODE=profile-slurm-0.profile-slurm
SLURM_JOB_ID=$(( JOB_COMPLETION_INDEX * 1 + i + 1 ))
SLURM_JOBID=$(( JOB_COMPLETION_INDEX * 1 + i + 1 ))
SLURM_ARRAY_TASK_ID=${container_indexes[$i]}
SLURM_JOB_ID=$(expr $JOB_COMPLETION_INDEX \* 1 + $i + 1)
SLURM_JOBID=$(expr $JOB_COMPLETION_INDEX \* 1 + $i + 1)
SLURM_ARRAY_TASK_ID=$container_index
EOF
done
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/local/bin/bash
#!/bin/sh

set -o errexit
set -o nounset
Expand All @@ -8,16 +8,14 @@ set -x
# External variables
# JOB_COMPLETION_INDEX - completion index of the job.

for i in {0..{{.SlurmNTasksPerNode}}}
do
# ["COMPLETION_INDEX"]="CONTAINER_INDEX_1,CONTAINER_INDEX_2"
declare -A array_indexes=({{.ArrayIndexes}}) # Requires bash v4+
array_indexes="{{.ArrayIndexes}}"
container_indexes=$(echo "$array_indexes" | awk -F';' -v idx="$JOB_COMPLETION_INDEX" '{print $((idx + 1))}')

container_indexes=${array_indexes[${JOB_COMPLETION_INDEX}]}
container_indexes=(${container_indexes//,/ })
for i in $(seq 0 {{.SlurmNTasksPerNode}})
do
container_index=$(echo "$container_indexes" | awk -F',' -v idx="$i" '{print $((idx + 1))}')

if [[ ! -v container_indexes[$i] ]];
then
if [ -z "$container_index" ]; then
break
fi

Expand Down Expand Up @@ -57,9 +55,9 @@ SLURM_SUBMIT_DIR={{.SlurmSubmitDir}}
SLURM_SUBMIT_HOST=$HOSTNAME
SLURM_JOB_NODELIST={{.SlurmJobNodeList}}
SLURM_JOB_FIRST_NODE={{.SlurmJobFirstNode}}
SLURM_JOB_ID=$(( JOB_COMPLETION_INDEX * {{.SlurmNTasksPerNode}} + i + {{.SlurmArrayJobID}} ))
SLURM_JOBID=$(( JOB_COMPLETION_INDEX * {{.SlurmNTasksPerNode}} + i + {{.SlurmArrayJobID}} ))
SLURM_ARRAY_TASK_ID=${container_indexes[$i]}
SLURM_JOB_ID=$(expr $JOB_COMPLETION_INDEX \* {{.SlurmNTasksPerNode}} + $i + 1)
SLURM_JOBID=$(expr $JOB_COMPLETION_INDEX \* {{.SlurmNTasksPerNode}} + $i + 1)
SLURM_ARRAY_TASK_ID=$container_index
EOF

done
2 changes: 1 addition & 1 deletion cmd/experimental/kjobctl/pkg/cmd/create/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ var createModeSubcommands = map[string]modeSubcommand{

subcmd.Flags().BoolVar(&o.IgnoreUnknown, ignoreUnknownFlagName, false,
"Ignore all the unsupported flags in the bash script.")
subcmd.Flags().StringVar(&o.InitImage, initImageFlagName, "bash:5-alpine3.20",
subcmd.Flags().StringVar(&o.InitImage, initImageFlagName, "registry.k8s.io/busybox:1.27.2",
"The image used for the init container.")
subcmd.Flags().BoolVar(&o.SkipPriorityValidation, skipPriorityValidationFlagName, false,
"Skip workload priority class validation. Add priority class label even if the class does not exist.")
Expand Down
50 changes: 23 additions & 27 deletions cmd/experimental/kjobctl/pkg/cmd/create/create_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -724,8 +724,8 @@ func TestCreateCmd(t *testing.T) {
Profile("profile").
Mode(v1alpha1.SlurmMode).
Subdomain("profile-slurm").
WithInitContainer(*wrappers.MakeContainer("slurm-init-env", "bash:5-alpine3.20").
Command("bash", "/slurm/scripts/init-entrypoint.sh").
WithInitContainer(*wrappers.MakeContainer("slurm-init-env", "registry.k8s.io/busybox:1.27.2").
Command("sh", "/slurm/scripts/init-entrypoint.sh").
WithVolumeMount(corev1.VolumeMount{Name: "slurm-scripts", MountPath: "/slurm/scripts"}).
WithVolumeMount(corev1.VolumeMount{Name: "slurm-env", MountPath: "/slurm/env"}).
Obj()).
Expand Down Expand Up @@ -778,7 +778,7 @@ func TestCreateCmd(t *testing.T) {
Mode(v1alpha1.SlurmMode).
Data(map[string]string{
"script": "#!/bin/bash\nsleep 300'",
"init-entrypoint.sh": `#!/usr/local/bin/bash
"init-entrypoint.sh": `#!/bin/sh
set -o errexit
set -o nounset
Expand All @@ -788,16 +788,14 @@ set -x
# External variables
# JOB_COMPLETION_INDEX - completion index of the job.
for i in {0..1}
do
# ["COMPLETION_INDEX"]="CONTAINER_INDEX_1,CONTAINER_INDEX_2"
declare -A array_indexes=(["0"]="0") # Requires bash v4+
array_indexes="0"
container_indexes=$(echo "$array_indexes" | awk -F';' -v idx="$JOB_COMPLETION_INDEX" '{print $((idx + 1))}')
container_indexes=${array_indexes[${JOB_COMPLETION_INDEX}]}
container_indexes=(${container_indexes//,/ })
for i in $(seq 0 1)
do
container_index=$(echo "$container_indexes" | awk -F',' -v idx="$i" '{print $((idx + 1))}')
if [[ ! -v container_indexes[$i] ]];
then
if [ -z "$container_index" ]; then
break
fi
Expand Down Expand Up @@ -837,9 +835,9 @@ SLURM_SUBMIT_DIR=/slurm/scripts
SLURM_SUBMIT_HOST=$HOSTNAME
SLURM_JOB_NODELIST=profile-slurm-0.profile-slurm
SLURM_JOB_FIRST_NODE=profile-slurm-0.profile-slurm
SLURM_JOB_ID=$(( JOB_COMPLETION_INDEX * 1 + i + 1 ))
SLURM_JOBID=$(( JOB_COMPLETION_INDEX * 1 + i + 1 ))
SLURM_ARRAY_TASK_ID=${container_indexes[$i]}
SLURM_JOB_ID=$(expr $JOB_COMPLETION_INDEX \* 1 + $i + 1)
SLURM_JOBID=$(expr $JOB_COMPLETION_INDEX \* 1 + $i + 1)
SLURM_ARRAY_TASK_ID=$container_index
EOF
done
Expand Down Expand Up @@ -982,7 +980,7 @@ error_path=$(unmask_filename "$SBATCH_ERROR")
LocalQueue("lq1").
Subdomain("profile-slurm").
WithInitContainer(*wrappers.MakeContainer("slurm-init-env", "bash:latest").
Command("bash", "/slurm/scripts/init-entrypoint.sh").
Command("sh", "/slurm/scripts/init-entrypoint.sh").
WithVolumeMount(corev1.VolumeMount{Name: "slurm-scripts", MountPath: "/slurm/scripts"}).
WithVolumeMount(corev1.VolumeMount{Name: "slurm-env", MountPath: "/slurm/env"}).
Obj()).
Expand Down Expand Up @@ -1055,7 +1053,7 @@ error_path=$(unmask_filename "$SBATCH_ERROR")
LocalQueue("lq1").
Data(map[string]string{
"script": "#!/bin/bash\nsleep 300'",
"init-entrypoint.sh": `#!/usr/local/bin/bash
"init-entrypoint.sh": `#!/bin/sh
set -o errexit
set -o nounset
Expand All @@ -1065,16 +1063,14 @@ set -x
# External variables
# JOB_COMPLETION_INDEX - completion index of the job.
for i in {0..3}
do
# ["COMPLETION_INDEX"]="CONTAINER_INDEX_1,CONTAINER_INDEX_2"
declare -A array_indexes=(["0"]="0,1,2" ["1"]="3,4,5" ["2"]="6,7,8" ["3"]="9,10,11" ["4"]="12,13,14" ["5"]="15,16,17" ["6"]="18,19,20" ["7"]="21,22,23" ["8"]="24,25") # Requires bash v4+
array_indexes="0,1,2;3,4,5;6,7,8;9,10,11;12,13,14;15,16,17;18,19,20;21,22,23;24,25"
container_indexes=$(echo "$array_indexes" | awk -F';' -v idx="$JOB_COMPLETION_INDEX" '{print $((idx + 1))}')
container_indexes=${array_indexes[${JOB_COMPLETION_INDEX}]}
container_indexes=(${container_indexes//,/ })
for i in $(seq 0 3)
do
container_index=$(echo "$container_indexes" | awk -F',' -v idx="$i" '{print $((idx + 1))}')
if [[ ! -v container_indexes[$i] ]];
then
if [ -z "$container_index" ]; then
break
fi
Expand Down Expand Up @@ -1114,9 +1110,9 @@ SLURM_SUBMIT_DIR=/slurm/scripts
SLURM_SUBMIT_HOST=$HOSTNAME
SLURM_JOB_NODELIST=profile-slurm-fpxnj-0.profile-slurm-fpxnj,profile-slurm-fpxnj-1.profile-slurm-fpxnj
SLURM_JOB_FIRST_NODE=profile-slurm-fpxnj-0.profile-slurm-fpxnj
SLURM_JOB_ID=$(( JOB_COMPLETION_INDEX * 3 + i + 1 ))
SLURM_JOBID=$(( JOB_COMPLETION_INDEX * 3 + i + 1 ))
SLURM_ARRAY_TASK_ID=${container_indexes[$i]}
SLURM_JOB_ID=$(expr $JOB_COMPLETION_INDEX \* 3 + $i + 1)
SLURM_JOBID=$(expr $JOB_COMPLETION_INDEX \* 3 + $i + 1)
SLURM_ARRAY_TASK_ID=$container_index
EOF
done
Expand Down

0 comments on commit 6c18082

Please sign in to comment.