-
Notifications
You must be signed in to change notification settings - Fork 10
/
experiments.sh
55 lines (49 loc) · 1.22 KB
/
experiments.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#! /bin/bash
CLUSTER_DEF='{"ps": ["ps.local:7000"], "workers": ["a.workers.local:7000", "b.workers.local:7000"]}' # TODO: You need to adapt this line to your own cluster, and run this script on 'ps.local'.
CLUSTER_PID=0
RUNNING_PID=0
function start_cluster {
python3 deploy.py --cluster "${CLUSTER_DEF}" --deploy --id "ps:0" --omit&
CLUSTER_PID=$!
trap run_abort TERM INT
}
function stop_cluster {
kill -s 2 ${CLUSTER_PID}
wait ${CLUSTER_PID}
wait ${CLUSTER_PID}
}
function run {
local NAME=E=${1}-R=${2}-N=${3}-F=${4}-B=${5}
python3 runner.py \
--server "${CLUSTER_DEF}" \
--experiment ${1} \
--aggregator ${2} \
--nb-workers ${3} \
--nb-decl-byz-workers ${4} \
--experiment-args "batch-size:${5}" \
--max-step ${6} \
--stdout-to ${NAME}.stdout \
--stderr-to ${NAME}.stderr \
--evaluation-period -1 \
--checkpoint-period 600 \
--summary-period -1 \
--evaluation-delta 1000 \
--checkpoint-delta -1 \
--summary-delta 1000 \
--ev-job-name ps \
--no-wait&
RUNNING_PID=$!
wait ${RUNNING_PID}
}
function run_abort {
kill -s 2 ${RUNNING_PID}
wait ${RUNNING_PID}
wait ${RUNNING_PID}
stop_cluster
exit 0
}
start_cluster
# Begin experiments
run mnist average 2 0 50 100000
# End experiments
stop_cluster