diff --git a/src/kube-runtime/build/kube-runtime.dockerfile b/src/kube-runtime/build/kube-runtime.dockerfile index d99685ae49..4b97c9022e 100644 --- a/src/kube-runtime/build/kube-runtime.dockerfile +++ b/src/kube-runtime/build/kube-runtime.dockerfile @@ -15,12 +15,16 @@ # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + FROM python:2.7-alpine3.8 ARG BARRIER_DIR=/opt/frameworkcontroller/frameworkbarrier -WORKDIR /pai-runtime -COPY --from=frameworkcontroller/frameworkbarrier:v0.3.0 $BARRIER_DIR/frameworkbarrier . +WORKDIR /usr/local/pai + COPY src/ ./ +COPY --from=frameworkcontroller/frameworkbarrier:v0.3.0 $BARRIER_DIR/frameworkbarrier ./init.d +RUN mkdir -p ./logs && \ + chmod -R +x ./ -CMD ["/bin/sh", "-c", "/pai-runtime/entry"] +CMD ["/bin/sh", "-c", "/usr/local/pai/init"] diff --git a/src/kube-runtime/src/entry b/src/kube-runtime/src/entry deleted file mode 100755 index 9eb25f5563..0000000000 --- a/src/kube-runtime/src/entry +++ /dev/null @@ -1,63 +0,0 @@ -#!/bin/sh - -# Copyright (c) Microsoft Corporation -# All rights reserved. -# -# MIT License -# -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the "Software"), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and -# to permit persons to whom the Software is furnished to do so, subject to the following conditions: -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING -# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - -# This script is entrypoint of pai runtime, this will copy run to /usr/local/pai/run -# which will be the entrypoint of task container. So `entry` running in runtime -# container, `/usr/local/pai/run` running in task container, and will be pid 1 in -# task container - -PAI_DIR=/usr/local/pai -INIT_DIR=${PAI_DIR}/init -PAI_LOG_DIR=${PAI_DIR}/logs - -MAIN_SH=${INIT_DIR}/main.sh -LOG_FILE=${PAI_LOG_DIR}/${FC_POD_UID}_entry.log - -mkdir -p $PAI_LOG_DIR -cp /pai-runtime/run $PAI_DIR -cp -r /pai-runtime/init $PAI_DIR - -# do preparation for each script -for i in `find /pai-runtime/prep/ -type f -regex ".*.sh"` ; do - file_name=`basename $i` - echo -e "##### [${i} start] ##### \n" - $i >> ${PAI_LOG_DIR}/${FC_POD_UID}_prep.log 2>&1 - echo -e "##### [${i} end] ##### \n\n" -done - -# do service discovery -cd $PAI_LOG_DIR - -/pai-runtime/frameworkbarrier > $PAI_LOG_DIR/${FC_POD_UID}_barrier.log 2>&1 -echo "barrier returns $?" >> $LOG_FILE -python /pai-runtime/parse.py framework.json > $PAI_DIR/runtime_env.sh 2> $PAI_LOG_DIR/${FC_POD_UID}_parse.log -echo "parser.py returns $?" >> $LOG_FILE - -# prepare main.sh, which is where user command get started -echo '#!/bin/sh' >> $MAIN_SH -echo -e "\n\n$USER_CMD" >> $MAIN_SH - -chmod +x $MAIN_SH - -# debug -echo -e "finished entry\nmain.sh has:" >> $LOG_FILE -cat $PAI_DIR/init/main.sh >> $LOG_FILE -echo -e "\nruntime_env.sh has:" >> $LOG_FILE -cat $PAI_DIR/runtime_env.sh >> $LOG_FILE diff --git a/src/kube-runtime/src/init b/src/kube-runtime/src/init new file mode 100644 index 0000000000..de0c47a64b --- /dev/null +++ b/src/kube-runtime/src/init @@ -0,0 +1,65 @@ +#!/bin/sh + +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +# This init script will be executed inside init container, +# all scripts under init.d will be executed in priority order. +# Init scripts will do preparations before user container starts. +# Runtime script will be executed as the entrypoint of user container +# and will be pid 1 process in user container. + +PAI_WORK_DIR=/usr/local/pai +PAI_INIT_DIR=${PAI_WORK_DIR}/init.d +PAI_RUNTIME_DIR=${PAI_WORK_DIR}/runtime.d + +PAI_LOG_DIR=${PAI_WORK_DIR}/logs +PAI_LOG_FILE=${PAI_LOG_DIR}/${FC_POD_UID}_init.log + + +# To run init scripts under init.d in init container, +# execute them here in priority order.s +# Here're the steps to onboard a new init script, +# 1. put it under init.d +# 2. give it a priority in [0, 100] and insert below in order +# 3. add the following format block + +# comment for the script purpose +# priority=value +# ${PAI_INIT_DIR}/init.sh >> ${PAI_LOG_FILE} 2>&1 + + +# framework barrier +# priority=0 +${PAI_INIT_DIR}/frameworkbarrier > ${PAI_LOG_DIR}/${FC_POD_UID}_barrier.log 2>&1 +echo "barrier returns $?" >> ${PAI_LOG_FILE} + +# generate runtime env variables +# priority=10 +python ${PAI_INIT_DIR}/parse.py framework.json > ${PAI_RUNTIME_DIR}/runtime_env.sh 2> ${PAI_LOG_DIR}/${FC_POD_UID}_parse.log +echo "parser.py returns $?" >> ${PAI_LOG_FILE} + +# write user commands to user.sh +# priority=100 +echo "${USER_CMD}" >> ${PAI_RUNTIME_DIR}/user.sh + +# for debug +echo -e "finished entry\nuser.sh has:" >> ${PAI_LOG_FILE} +cat ${PAI_RUNTIME_DIR}/user.sh >> ${PAI_LOG_FILE} +echo -e "\nruntime_env.sh has:" >> ${PAI_LOG_FILE} +cat ${PAI_RUNTIME_DIR}/runtime_env.sh >> ${PAI_LOG_FILE} diff --git a/src/kube-runtime/src/parse.py b/src/kube-runtime/src/init.d/parse.py similarity index 100% rename from src/kube-runtime/src/parse.py rename to src/kube-runtime/src/init.d/parse.py diff --git a/src/kube-runtime/src/run b/src/kube-runtime/src/runtime old mode 100755 new mode 100644 similarity index 53% rename from src/kube-runtime/src/run rename to src/kube-runtime/src/runtime index 0ac3294860..ef586f317f --- a/src/kube-runtime/src/run +++ b/src/kube-runtime/src/runtime @@ -18,32 +18,42 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# This script tries to behave like initd, will execute shell scripts under -# `/usr/local/pai/init`, main.sh will get a special treat: it will start after -# all other script and if main.sh exit this initd will kill all other processes in -# container. +# This runtime script will be executed inside task container, +# all scripts under runtime.d will be executed in priority order. +# User's commands will start in the end, and whole runtime script +# will exit after user's commands exit. + +PAI_WORK_DIR=/usr/local/pai +PAI_RUNTIME_DIR=${PAI_WORK_DIR}/runtime.d -PAI_DIR=/usr/local/pai -INIT_DIR=${PAI_DIR}/init #PAI_LOG_DIR=/usr/local/pai/logs/attempt-${FC_FRAMEWORK_ATTEMPT_ID}/role-${FC_TASKROLE_NAME}/idx-${FC_TASK_INDEX}/attempt-${FC_TASK_ATTEMPT_ID}/ -PAI_LOG_DIR=${PAI_DIR}/logs - -. $PAI_DIR/runtime_env.sh - -for i in `find $INIT_DIR/ -type f -regex ".*.sh"` ; do - file_name=`basename $i` - if [ $file_name = "main.sh" ] ; then - echo "skip main.sh for now" - continue - else - echo "starting ${file_name}" - $i > ${PAI_LOG_DIR}/${FC_POD_UID}_${file_name}_init.log 2>&1 & - fi -done - -echo "starting main.sh" -$INIT_DIR/main.sh 2>&1 | tee ${PAI_LOG_DIR}/${FC_POD_UID}_main.log & # TODO tee may not exist in user's container -MAIN_PID=$! - -echo "wait for main" -wait $MAIN_PID +PAI_LOG_DIR=${PAI_WORK_DIR}/logs +PAI_LOG_FILE=${PAI_LOG_DIR}/${FC_POD_UID}_runtime.log + + +# To run runtime scripts under runtime.d in task container, +# execute them here in priority order. +# Here're the steps to onboard a new runtime script, +# 1. put it under runtime.d +# 2. give it a priority in [0, 100] and insert below in order +# 3. add the following format block + +# comment for the script purpose +# priority=value +# ${PAI_RUNTIME_DIR}/runtime.sh >> ${PAI_LOG_FILE} 2>&1 + + +# export runtime env variables +# priority=0 +source ${PAI_RUNTIME_DIR}/runtime_env.sh + +# prepare ssh + +# execute user commands +# priority=100 +echo "[INFO] USER COMMAND START" +${PAI_RUNTIME_DIR}/user.sh 2>&1 | tee ${PAI_LOG_DIR}/${FC_POD_UID}_main.log & # TODO tee may not exist in user's container +USER_PID=$! + +echo "[INFO] USER COMMAND END" +wait ${USER_PID} diff --git a/src/kube-runtime/src/init/sshd.sh b/src/kube-runtime/src/runtime.d/sshd.sh old mode 100755 new mode 100644 similarity index 95% rename from src/kube-runtime/src/init/sshd.sh rename to src/kube-runtime/src/runtime.d/sshd.sh index a6b3c76125..939f397a32 --- a/src/kube-runtime/src/init/sshd.sh +++ b/src/kube-runtime/src/runtime.d/sshd.sh @@ -43,7 +43,7 @@ function prepare_ssh() function prepare_job_ssh() { -# Job ssh files are mounted to /usr/local/pai/ssh-secret. +# Job ssh files are mounted to /usr/local/pai/ssh-secret. # Please refer to https://kubernetes.io/docs/concepts/configuration/secret/#use-case-pod-with-ssh-keys localPublicKeyPath=/etc/ssh-secret/ssh-publickey localPrivateKeyPath=/etc/ssh-secret/ssh-privatekey @@ -69,12 +69,12 @@ function prepare_user_ssh() function start_ssh() { - printf "%s %s\n" \ + printf "%s %s\n" \ "[INFO]" "start ssh service" service ssh restart } -# Try to install openssh if sshd is not found +# Try to install openssh if sshd is not found if [ ! -f /usr/sbin/sshd ] ; then apt-get update apt-get install -y openssh-client openssh-server diff --git a/src/kube-runtime/src/prep/ssh.sh b/src/kube-runtime/src/runtime.d/user.sh similarity index 97% rename from src/kube-runtime/src/prep/ssh.sh rename to src/kube-runtime/src/runtime.d/user.sh index 7318dc52f6..bc55ac586e 100644 --- a/src/kube-runtime/src/prep/ssh.sh +++ b/src/kube-runtime/src/runtime.d/user.sh @@ -17,4 +17,5 @@ # DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# TODO prepare sshd binary to user + +# user's commands here