diff --git a/bin/eggroll_boot_standalone.py b/bin/eggroll_boot_standalone.py deleted file mode 100644 index 489755602..000000000 --- a/bin/eggroll_boot_standalone.py +++ /dev/null @@ -1,97 +0,0 @@ -import os -import argparse -import platform -from subprocess import Popen, PIPE - - -def get_property(config_file, property_name): - with open(config_file) as i_file: - for line in i_file: - if line == "\n" or line.find("=") == -1: - continue - - values = line.strip("\n").strip(" ").split("=") - if values[0] == property_name: - return values[1] - - continue - - return None - - -if __name__ == '__main__': - args_parser = argparse.ArgumentParser() - args_parser.add_argument('-s', '--session-id') - args_parser.add_argument('-p', '--port', default='0') - args_parser.add_argument('-c', '--config') - - args = args_parser.parse_args() - - eggroll_home = os.environ.get('EGGROLL_HOME') - if eggroll_home is None: - raise Exception("EGGROLL_HOME not set") - - if args.config: - conf_file = args.config - else: - conf_file = f'{eggroll_home}/conf/eggroll.properties' - print(f'reading default config: {conf_file}') - - session_id = args.session_id - cluster_manager_port = args.port - - eggroll_logs_dir = os.environ.get('EGGROLL_LOGS_DIR') - if eggroll_logs_dir is None: - eggroll_logs_dir = get_property(conf_file, "eggroll.logs.dir") - if eggroll_logs_dir is None: - eggroll_logs_dir = os.path.join(eggroll_home, 'logs') - - os.environ["EGGROLL_LOGS_DIR"] = os.path.join(eggroll_logs_dir, session_id) - - eggroll_log_conf = eggroll_home + '/conf' + '/log4j2.properties' - - if os.path.exists(os.path.join(eggroll_logs_dir, 'eggroll')) is not True: - os.makedirs(os.path.join(eggroll_logs_dir, 'eggroll')) - - javahome = get_property(conf_file, "eggroll.resourcemanager.bootstrap.roll_pair_master.javahome") - classpath = os.path.join(eggroll_home, 'jvm/core/target/lib/*') + ";" + os.path.join(eggroll_home, 'lib/*') + ";" + os.path.join(eggroll_home, 'jvm/roll_pair/target/lib/*') - - if platform.system() == "Windows": - if javahome is None: - java_cmd = 'java.exe' - else: - java_cmd = '\"' + javahome + '\\bin\\java.exe ' + '\"' - else: - if javahome is None: - p = Popen(['which java'], stdout=PIPE, stderr=PIPE, stdin=PIPE) - java_cmd = p.stdout.read() - else: - java_cmd = javahome + '/bin/java' - - print("EGGROLL_HOME:", eggroll_home) - os.chdir(eggroll_home) - - standalone_tag = os.environ.get("EGGROLL_STANDALONE_TAG", None) - if standalone_tag == None: - java_define = ' -Dlog4j.configurationFile=' + eggroll_log_conf - else: - java_define = ' -Dlog4j.configurationFile=' + eggroll_log_conf + ' -Deggroll.standalone.tag=' + standalone_tag - - cmd = java_cmd + java_define + ' -cp ' + classpath +\ - ' com.webank.eggroll.core.Bootstrap ' +\ - ' --ignore-rebind ' +\ - ' --bootstraps com.webank.eggroll.core.resourcemanager.ClusterManagerBootstrap,com.webank.eggroll.core.resourcemanager.NodeManagerBootstrap ' +\ - ' -c ' + conf_file +\ - ' -s ' + session_id +\ - ' -p ' + cluster_manager_port - - eggroll_log_file = 'eggroll/bootstrap-standalone-manager.out' - eggroll_err_file = 'eggroll/bootstrap-standalone-manager.err' - - log_file = os.path.join(eggroll_logs_dir, eggroll_log_file) - err_file = os.path.join(eggroll_logs_dir, eggroll_err_file) - print(cmd) - - log_file_fp = open(log_file, 'ab') - err_file_fp = open(err_file, 'ab') - proc = Popen(cmd, shell=False, stdout=log_file_fp, stderr=err_file_fp) diff --git a/bin/eggroll_boot_standalone.sh b/bin/eggroll_boot_standalone.sh deleted file mode 100644 index 37da0f9b9..000000000 --- a/bin/eggroll_boot_standalone.sh +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env bash - -SHELL_FOLDER=$(dirname "$0") - -if [[ -z ${EGGROLL_HOME} ]]; then - echo "env variable EGGROLL_HOME not set" - exit -1 -fi - -get_property() { - property_value=`grep $2 $1 | cut -d '=' -f 2-` -} - -#set -x -session_id="null_sid" -version=2.0 -while getopts ":s:p:e:c:" opt; do - case $opt in - s) - session_id=$OPTARG - ;; - p) - manager_port=$OPTARG - ;; - e) - eggs=$OPTARG - ;; - c) - config=$OPTARG - ;; - ?) - echo "Invalid option: -$OPTARG index:$OPTIND" - ;; - esac -done - -if [[ -z ${EGGROLL_LOGS_DIR} ]]; then - EGGROLL_LOGS_DIR=${EGGROLL_HOME}/logs/ -fi - -if [[ -z ${config} ]]; then - config=${EGGROLL_HOME}/conf/eggroll.properties -fi - -if [[ ! -d "${EGGROLL_LOGS_DIR}/eggroll" ]]; then - mkdir -p ${EGGROLL_LOGS_DIR}/eggroll -fi - -if [[ -z ${manager_port} ]]; then - get_property ${config} "eggroll.resourcemanager.clustermanager.port" - manager_port=${property_value} -fi - - -cd ${EGGROLL_HOME} -echo "EGGROLL_HOME: ${EGGROLL_HOME}" - -if [[ -z ${EGGROLL_STANDALONE_TAG} ]]; then - java_define="-Dlog4j.configurationFile=${EGGROLL_HOME}/conf/log4j2.properties" -else - java_define="-Dlog4j.configurationFile=${EGGROLL_HOME}/conf/log4j2.properties -Deggroll.standalone.tag=${EGGROLL_STANDALONE_TAG}" -fi - -cmd="java $java_define -cp ${EGGROLL_HOME}/jvm/core/target/lib/*:${EGGROLL_HOME}/lib/*:${EGGROLL_HOME}/jvm/roll_pair/target/lib/* com.webank.eggroll.core.Bootstrap --ignore-rebind --bootstraps com.webank.eggroll.core.resourcemanager.ClusterManagerBootstrap,com.webank.eggroll.core.resourcemanager.NodeManagerBootstrap -c ${config} -s $session_id -p $manager_port &" -echo "cmd: ${cmd}" -eval ${cmd} >> ${EGGROLL_HOME}/logs/eggroll/bootstrap-standalone-manager.out 2>>${EGGROLL_HOME}/logs/eggroll/bootstrap-standalone-manager.err - -#while [ 1 ]; do -# sleep 1 -#done diff --git a/bin/eggroll_dashboard.sh b/bin/eggroll_dashboard.sh deleted file mode 100644 index 9602205ca..000000000 --- a/bin/eggroll_dashboard.sh +++ /dev/null @@ -1,192 +0,0 @@ -#!/bin/bash - -cwd=$(cd `dirname $0`; pwd) -cd $cwd/.. -export EGGROLL_HOME=`pwd` - -export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION='python' -cd ${EGGROLL_HOME} -echo "EGGROLL_HOME=${EGGROLL_HOME}" - -eval action=\$$# -start_mode=1 -modules=(dashboard) - -if [ $action = starting ];then - action=start - start_mode=0 -elif [ $action = restarting ];then - action=restart - start_mode=0 -fi - -get_property() { - property_value=`grep $1 ${EGGROLL_HOME}/conf/eggroll.properties | cut -d= -f 2-` -} - -get_property "eggroll.resourcemanager.process.tag" -processor_tag=${property_value} -if [ -z "${processor_tag}" ];then - processor_tag=EGGROLL_DAEMON -fi -echo "processor_tag=$processor_tag" - -main() { - case "$module" in - dashboard) - main_class=org.fedai.eggroll.webapp.JettyServer - get_property "eggroll.jetty.server.port" - port=${property_value} - ;; - *) - usage - exit -1 - esac -} -action() { - case "$action" in - debug) - stop - debug - status - ;; - start) - start - status - ;; - stop) - stop - status - ;; - kill) - shut - status - ;; - status) - status - ;; - restart) - stop - start - status - ;; - *) - usage - exit -1 - esac -} - -all() { - for module in "${modules[@]}"; do - main - echo - echo "[INFO] $module=${main_class}" - echo "[INFO] processing: ${module} ${action}" - echo "==================" - action - echo "--------------" - done -} - -usage() { - echo "usage: `basename ${0}` {dashboard | all} {start | stop | kill | restart | status}" -} - -multiple() { - total=$# - for (( i=1; i> ${EGGROLL_HOME}/logs/eggroll/bootstrap.${module}.out 2>>${EGGROLL_HOME}/logs/eggroll/bootstrap.${module}.err - else - nohup $cmd >> ${EGGROLL_HOME}/logs/eggroll/bootstrap.${module}.out 2>>${EGGROLL_HOME}/logs/eggroll/bootstrap.${module}.err & - fi - - getpid - if [[ $? -eq 0 ]]; then - echo "service start sucessfully. pid=${pid}" - else - echo "service start failed" - fi - else - echo "service already started. pid=${pid}" - fi -} -stop() { - getpid - if [[ -n ${pid} ]]; then - echo "killing: - `ps aux | grep ${pid} | grep ${processor_tag} | grep ${main_class} | grep -v grep`" - kill ${pid} - sleep 1 - flag=0 - while [ $flag -eq 0 ] - do - getpid - flag=$? - done - echo "killed" - else - echo "service not running" - fi -} -case "$1" in - all) - all $@ - ;; - usage) - usage - ;; - *) - multiple $@ - ;; -esac - -cd $cwd - diff --git a/bin/eggroll_old.sh b/bin/eggroll_old.sh deleted file mode 100644 index f1ed81dc3..000000000 --- a/bin/eggroll_old.sh +++ /dev/null @@ -1,271 +0,0 @@ -#!/bin/bash - -# -# Copyright 2019 The eggroll Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -cwd=$(cd `dirname $0`; pwd) -cd $cwd/.. -export EGGROLL_HOME=`pwd` - -export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION='python' -cd ${EGGROLL_HOME} -echo "EGGROLL_HOME=${EGGROLL_HOME}" - -eval action=\$$# -start_mode=1 -modules=(clustermanager nodemanager dashboard) - -if [ $action = starting ];then - action=start - start_mode=0 -elif [ $action = restarting ];then - action=restart - start_mode=0 -fi - -get_property() { - property_value=`grep $1 ${EGGROLL_HOME}/conf/eggroll.properties | cut -d= -f 2-` -} - -get_property "eggroll.resourcemanager.process.tag" -processor_tag=${property_value} -if [ -z "${processor_tag}" ];then - processor_tag=EGGROLL_DAEMON -fi -echo "processor_tag=$processor_tag" - -main() { - case "$module" in - clustermanager) - main_class=org.fedai.eggroll.clustermanager.Bootstrap - get_property "eggroll.resourcemanager.clustermanager.port" - port=${property_value} - get_property "eggroll.resourcemanager.clustermanager.jvm.options" - jvm_options=${property_value} - ;; - nodemanager) - main_class=org.fedai.eggroll.nodemanager.Bootstrap - get_property "eggroll.resourcemanager.nodemanager.port" - port=${property_value} - get_property "eggroll.resourcemanager.nodemanager.jvm.options" - jvm_options=${property_value} - ;; - dashboard) - main_class=org.fedai.eggroll.webapp.JettyServer - get_property "eggroll.dashboard.server.port" - port=${property_value} - ;; - *) - usage - exit -1 - esac -} - -action() { - case "$action" in - debug) - stop - debug - status - ;; - start) - start - status - ;; - stop) - stop - status - ;; - kill) - shut - status - ;; - status) - status - ;; - restart) - stop - start - status - ;; - *) - usage - exit -1 - esac -} - -all() { - for module in "${modules[@]}"; do - main - echo - echo "[INFO] $module=${main_class}" - echo "[INFO] processing: ${module} ${action}" - echo "==================" - action - echo "--------------" - done -} - -usage() { - echo "usage: `basename ${0}` {clustermanager | nodemanager | dashboard | all} {start | stop | kill | restart | status}" -} - -multiple() { - total=$# - for (( i=1; i> ${EGGROLL_HOME}/logs/eggroll/bootstrap.${module}.out 2>>${EGGROLL_HOME}/logs/eggroll/bootstrap.${module}.err - else - exec $cmd >> ${EGGROLL_HOME}/logs/eggroll/bootstrap.${module}.out 2>>${EGGROLL_HOME}/logs/eggroll/bootstrap.${module}.err & - fi - - inspect_pid 5 $! - getpid - if [[ $? -eq 0 ]]; then - echo "service start sucessfully. pid=${pid}" - else - echo "service start failed" - fi - else - echo "service already started. pid=${pid}" - fi -} - -debug() { - getpid - if [[ $? -eq 1 ]]; then - mklogsdir - export EGGROLL_LOG_FILE=${module} - export module=${module} - cmd="java -server -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=7007 ${jvm_options} -Dlog4j.configurationFile=${EGGROLL_HOME}/conf/log4j2.xml -Dmodule=${module} -cp ${EGGROLL_HOME}/lib/*: ${main_class} -p $port -s ${processor_tag}" - - echo $cmd - if [ $start_mode = 0 ];then - exec $cmd >> ${EGGROLL_HOME}/logs/eggroll/bootstrap.${module}.out 2>>${EGGROLL_HOME}/logs/eggroll/bootstrap.${module}.err - else - exec $cmd >> ${EGGROLL_HOME}/logs/eggroll/bootstrap.${module}.out 2>>${EGGROLL_HOME}/logs/eggroll/bootstrap.${module}.err & - fi - - getpid - if [[ $? -eq 0 ]]; then - echo "service debug sucessfully. pid=${pid}" - else - echo "service debug failed" - fi - else - echo "service already started. pid=${pid}" - fi -} - -stop() { - getpid - if [[ -n ${pid} ]]; then - echo "killing: - `ps aux | grep ${pid} | grep ${processor_tag} | grep ${main_class} | grep -v grep`" - kill ${pid} - sleep 1 - flag=0 - while [ $flag -eq 0 ] - do - getpid - flag=$? - done - echo "killed" - else - echo "service not running" - fi -} - -shut() { - getpid - if [[ -n ${pid} ]]; then - echo "killing: - `ps aux | grep ${pid} | grep ${processor_tag} | grep ${main_class} | grep -v grep`" - kill -9 ${pid} - sleep 1 - flag=0 - while [ $flag -eq 0 ] - do - getpid - flag=$? - done - echo "killed" - else - echo "service not running" - fi -} - -case "$1" in - all) - all $@ - ;; - usage) - usage - ;; - *) - multiple $@ - ;; -esac - -cd $cwd - diff --git a/conf/eggroll.properties b/conf/eggroll.properties index cb9a724a5..47671d146 100644 --- a/conf/eggroll.properties +++ b/conf/eggroll.properties @@ -40,8 +40,8 @@ eggroll.security.encrypt.public_key= eggroll.security.encrypt.private_key= eggroll.security.encrypt.enable=false -eggroll.data.dir=data/ -eggroll.logs.dir=logs/ +eggroll.data.dir=/data/projects/fate/eggroll/data/ +eggroll.logs.dir=/data/projects/fate/eggroll/logs/ eggroll.bootstrap.root.script=bin/eggroll_boot.sh diff --git a/jvm/node_manager/src/main/resources/eggroll.properties b/jvm/node_manager/src/main/resources/eggroll.properties index 844f944fc..aded593ba 100644 --- a/jvm/node_manager/src/main/resources/eggroll.properties +++ b/jvm/node_manager/src/main/resources/eggroll.properties @@ -26,8 +26,8 @@ eggroll.resourcemanager.clustermanager.jdbc.username=root eggroll.resourcemanager.clustermanager.jdbc.password=admin -eggroll.data.dir=data/ -eggroll.logs.dir=logs/ +eggroll.data.dir=/data/projects/fate/eggroll/data/ +eggroll.logs.dir=/data/projects/fate/eggroll/logs/ eggroll.resourcemanager.clustermanager.host=127.0.0.1 eggroll.resourcemanager.clustermanager.port=4670 eggroll.resourcemanager.nodemanager.host=127.0.0.1 diff --git a/python/eggroll/config/__main__.py b/python/eggroll/config/__main__.py new file mode 100644 index 000000000..912e2318d --- /dev/null +++ b/python/eggroll/config/__main__.py @@ -0,0 +1,29 @@ +def main(): + import argparse + import configparser + import omegaconf + + from .config import Config + + arguments = argparse.ArgumentParser() + arguments.add_argument("-c", "--config", type=str, required=True) + args = arguments.parse_args() + + config = Config() + config.load_default() + c = configparser.ConfigParser() + c.read(args.config) + for k, v in c.items("eggroll"): + try: + if v == "": + omegaconf.OmegaConf.select(config.config, k) + else: + config.config = omegaconf.OmegaConf.merge( + config.config, omegaconf.OmegaConf.from_dotlist([f"{k}={v}"]) + ) + except omegaconf.errors.ConfigKeyError: + print(f"Error: {k} is not set, please add it to eggroll/config/defaults") + + +if __name__ == "__main__": + main() diff --git a/python/eggroll/config/defaults.py b/python/eggroll/config/defaults.py index 49e50b877..5f9587a59 100644 --- a/python/eggroll/config/defaults.py +++ b/python/eggroll/config/defaults.py @@ -7,6 +7,14 @@ class DefaultConfig: @dataclass class EggrollConfig: + @dataclass + class DashboardConfig: + @dataclass + class ServerConfig: + port: int = 8083 + + server: ServerConfig = ServerConfig() + @dataclass class TransferConfig: @dataclass @@ -36,6 +44,18 @@ class JdbcConfig: @dataclass class NodemanagerConfig: + @dataclass + class GpuConfig: + @dataclass + class NumConfig: + shell: str = MISSING + + num: NumConfig = NumConfig() + + @dataclass + class NetConfig: + device: str = MISSING + @dataclass class ContainersConfig: @dataclass @@ -47,6 +67,8 @@ class DataConfig: host: str = MISSING port: int = MISSING containers: ContainersConfig = ContainersConfig() + net: NetConfig = NetConfig() + gpu: GpuConfig = GpuConfig() @dataclass class ProcessConfig: @@ -350,12 +372,28 @@ class ServerConfig: @dataclass class SecurityConfig: + @dataclass + class EncryptConfig: + public_key: str = MISSING + private_key: str = MISSING + enable: bool = False + + @dataclass + class SessionConfig: + @dataclass + class ExpiredConfig: + time: int = 30 + + expired: ExpiredConfig = ExpiredConfig() + @dataclass class LoginConfig: username: str = MISSING password: str = MISSING login: LoginConfig = LoginConfig() + session: SessionConfig = SessionConfig() + encrypt: EncryptConfig = EncryptConfig() @dataclass class ContainerConfig: @@ -604,5 +642,6 @@ class RootConfig: home: str = MISSING gc: GCConfig = GCConfig() transfer: TransferConfig = TransferConfig() + dashboard: DashboardConfig = DashboardConfig() eggroll: EggrollConfig = EggrollConfig()