forked from FNNDSC/CHRIS_docs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcovidnet_anonymized.sh
executable file
·624 lines (530 loc) · 22.9 KB
/
covidnet_anonymized.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
#!/usr/local/bin/bash
#
source ./ffe.sh
# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
# start feedflow specification section
# |||||||||||||||||||||||||||||||||||||
#
# The following array declares the specific containers in the workflow
# as well as the arguments to be passed to each. This is a WIP attempt
# to templatize/describe feedflow structure.
#
declare -a a_WORKFLOWSPEC=(
"0:0|
fnndsc/pl-lungct: ARGS;
--title=COVIDNET_lung_CT_subjects"
"0:1*_n:l1|
parrmi/pl-dcm_anon: ARGS;
--title=anon;
--inputFile=@image[_n];
--previous_id=@prev_id"
"1:2*_n:l1|
fnndsc/pl-med2img: ARGS;
--inputFile=@image[_n];
--convertOnlySingleDICOM;
--title=@image[_n];
--previous_id=@prev_id"
"2*_n:3*_n:l1|
fnndsc/pl-covidnet: ARGS;
--imagefile=sample.png;
--title=COVIDNET;
--previous_id=@prev_id"
"3*_n:4*_n:l1|
fnndsc/pl-pdfgeneration: ARGS;
--imagefile=sample.png;
--patientId=@patientID;
--title=report;
--previous_id=@prev_id"
)
WORKFLOW=\
'{
"WARNING": "THIS JSON STRUCTURE IS NOT USED!!!"
"meta": {
"loops": [
{
"l1": {
"var": "n",
"iterate": [1, 5]
}
}
]
},
"feed": {
"tree": [
{
"node_previous": { "id": 0},
"node_self": { "id": 0},
"container": "fnndsc/pl-lungct",
"args": ["--NOARGS"]
},
{
"node_previous": { "id": 0},
"node_self": { "id": 1},
"container": "parrmi/pl-dcm_anon",
"args": [
"--previous_id=@prev_id"
]
},
{
"node_previous": { "id": 1},
"node_self": { "id": 2, "loop": "l1" },
"container": "fnndsc/pl-med2img",
"args": [
"--inputFile=@image[_n]",
"--convertOnlySingleDICOM",
"--previous_id=@prev_id"
]
},
{
"node_previous": { "id": 2, "loop": "l1" },
"node_self": { "id": 3, "loop": "l1" },
"container": "fnndsc/pl-covidnet",
"args": [
"--imagefile=sample.png",
"--previous_id=@prev_id"
]
},
{
"node_previous": { "id": 3, "loop": "l1" },
"node_self": { "id": 4, "loop": "l1" },
"container": "fnndsc/pl-pdfgeneration",
"args": [
"--imagefile=sample.png",
"--patientId=@patientID",
"--previous_id=@prev_id"
]
},
]
}
}'
declare -a a_PLUGINS=()
declare -a a_ARGS=()
pluginArray_filterFromWorkflow "a_WORKFLOWSPEC[@]" "a_PLUGINS"
argArray_filterFromWorkflow "a_WORKFLOWSPEC[@]" "a_ARGS"
# ||||||||||||||||||||||||||||||||||
# end feedflow specification section
# //////////////////////////////////
SYNOPSIS="
NAME
covidnet.sh
SYNPOSIS
covidnet.sh [-C <CUBEjsonDetails>] \\
[-r <protocol>] \\
[-p <port>] \\
[-a <cubeIP>] \\
[-u <user>] \\
[-w <passwd>] \\
[-G <graphvizDotFile>] \\
[-i <listOfLungImagesToProcess>] \\
[-s <sleepAfterPluginRun>] \\
[-W] \\
[-S] \\
[-R] \\
[-J] \\
[-q]
DESC
'covidnet.sh' posts a workflow based off COVID-NET to CUBE:
███:0 pl-lungct
|
███:1 pl-dcm_anon
__/│\__
_ / / | \ \_
/ / │ \.. \
↓ ↓ ↓ ↓ ↓
███ ███ ███ ███ ███ :2 pl-med2img
│ │ │ │ │
│ │ │ │ │
↓ ↓ ↓ ↓ ↓
███ ███ ███ ███ ███ :3 pl-covidnet
│ │ │ │ │
│ │ │ │ │
↓ ↓ ↓ ↓ ↓
███ ███ ███ ███ ███ :4 pl-pdfgeneration
The FS plugin, ``pl-lungct``, generates an output directory containing
several candidate images. This workflow will process each of those
images, resulting in a fanned tree execution toplogy.
By specifying a specific image in the [-i <lungImageToProcess>], only
one branch will be created.
Note, this does require some implicit knowledge since the user of
this script would need to know which images exist. By running this
script with a ``-q``, a hard coded list of available images to process
is printed.
ARGS
[-s <sleepAfterPluginRun>]
Default is '0'. Adds an explicit system ``sleep`` after executing
a plugin. This can be useful in not overloading the ancillary
services when large amount of plugins are being dispatched
concurrently.
[-S]
If specified, save each plugin POST command on the filesystem. Useful
for debugging.
[-W]
If specified, will wait at the end of a single branch for success
of termination node before building a subsequent branch. This
demonstrates how to script wait functionality. This logic can be
used for simulating a delay while waiting for a scarce computing
resource (like a GPU) to be released for subsequent branches to
use.
[-R]
If specified, print a final one line report of the prediction for the
image being processed on a given branch. Note that this implies a [-W].
[-J]
If specified, print the full JSON prediction generated by the
pl-covidnet. Note that this implies a [-W].
[-G <graphvizDotFile>]
If specified, write two graphviz .dot files called
<graphvizDotFile>-nodes.dot
<graphvizDotFile>-nodes-args.dot
that describes the workflow in graphviz format. The first dot file
contains only the nodes in the tree, while the second contains the nodes
with graph edges labeled with the CLI args denoting the tranition from
one node to another.
These dot files are suitable for rendering by graphviz parsers, e.g.
http://dreampuf.github.io/GraphvizOnline
http://viz-js.com
[-i <listOflLungImageToProcess>]
Runs the inference pipeline of each of the comma separated images
in the <listOfLungImagesToProcess string. Note these images *MUST*
be valid image(s) that exists in the output of ``pl-lungct``.
To see a list of valid images run this script with a ``-q``.
[-q]
Print a list of valid images and exit.
[-r <protocol>] (http)
[-p <port>] (8000)
[-a <cubeIP>] (%HOSTIP)
[-u <cubeUser>] (chris)
[-w <cubeUserPasswd>] (chris1234)
A set of values to specify the details of the CUBE instance to use
for running the workflow. Each of the above has (defaults) as shown.
This information can also be specified by passing a JSON string with
the [-C <CUBEjsonDetails>].
Using one of these specific args, however, is generally simpler. Most
often, the [-a <cubeIP>] will be used.
[-C <CUBEjsonDetails>]
If specified, interpret passed JSON string as the CUBE instance
on which to schedule the run. The default is of the form:
'{
\"protocol\": \"http\",
\"port\": \"8000\",
\"address\": \"%HOSTIP\",
\"user\": \"chris\",
\"password\": \"chris1234\"
}'
Note the single quotes about the structure. The '%HOSTIP' is a special
construct that will be dynamically replaced by the fully qualified IP
of the current host. This is useful in some proxied networks where the
string 'localhost' can be problematic.
EXAMPLES
Typical execution:
$ ./covidnet.sh -C '{
\"protocol\": \"http\",
\"port\": \"8000\",
\"address\": \"megalodon.local\",
\"user\": \"chris\",
\"password\": \"chris1234\"
}'
or equivalently:
$ ./covidnet.sh -a megalodon.local
TIMING CONSIDERATIONS
While this client script should ideally not concern itself with execution
concerns beyond the logical structure of a feedflow, some notes are
important:
* Too many apps POSTed in quick succession *might* overwhelm the
scheduler;
To not overwhelm the scheduler, it is a good idea to pause for a few
seconds after POSTing each app to the backend with a '-s 3' (for 3s
pause) flag.
Thus,
$ ./covidnet.sh -a megalodon.local -s 3 -G feed
where the '-G feed' also produces two graphviz dot files suitable for
rendering with a graphviz viewer.
REPORTING CONSIDERATIONS
This script can also fetch final prediction data and write a summary
report to the console. This is set using a '-R' (report) flag. Note that
to generate the report forces execution to run essentially in series and
not in parallel. This is so as to print the correct report with the correct
compute branch.
"
PROTOCOL="http"
PORT="8000"
ADDRESS="%%HOSTIP"
USER="chris"
PASSWD="chris1234"
CUBE_FMT='{
"protocol": "%s",
"port": "%s",
"address": "%s",
"user": "%s",
"password": "%s"
}'
GRAPHVIZHEADER='digraph G {
rankdir="LR";
subgraph cluster_0 {
style=filled;
color=lightgrey;
label = "ChRIS COVID-NET Graph";
node [style=filled,fillcolor=white,fontname="mono",fontsize=8];
edge [fontname="mono", fontsize=8];
'
GRAPHVIZBODY=""
GRAPHVIZBODYARGS=""
declare -i b_respSuccess=0
declare -i b_respFail=0
declare -i STEP=0
declare -i b_imageList=0
declare -i b_onlyShowImageNames=0
declare -i b_CUBEjson=0
declare -i b_graphviz=0
declare -i b_waitOnBranchFinish=0
declare -i b_printReport=0
declare -i b_printJSONprediction=0
declare -i sleepAfterPluginRun=0
declare -i b_saveCalls=0
IMAGESTOPROCESS=""
GRAPHVIZFILE=""
while getopts "C:G:i:qxr:p:a:u:w:WRJs:S" opt; do
case $opt in
S) b_saveCalls=1 ;;
s) sleepAfterPluginRun=$OPTARG ;;
W) b_waitOnBranchFinish=1 ;;
R) b_waitOnBranchFinish=1
b_printReport=1 ;;
J) b_waitOnBranchFinish=1
b_printJSONprediction=1 ;;
C) b_CUBEjson=1
CUBEJSON=$OPTARG ;;
G) b_graphviz=1
GRAPHVIZFILE=$OPTARG ;;
i) b_imageList=1 ;
IMAGESTOPROCESS=$OPTARG ;;
q) b_onlyShowImageNames=1 ;;
r) PROTOCOL=$OPTARG ;;
p) PORT=$OPTARG ;;
a) ADDRESS=$OPTARG ;;
u) USER=$OPTARG ;;
w) PASSWD=$OPTARG ;;
x) echo "$SYNOPSIS"; exit 0 ;;
*) exit 1 ;;
esac
done
CUBE=$(printf "$CUBE_FMT" "$PROTOCOL" "$PORT" "$ADDRESS" "$USER" "$PASSWD")
if (( b_CUBEjson )) ; then
CUBE="$CUBEJSON"
fi
ADDRESS=$(echo $CUBE | jq -r .address)
# Global variable that contains the "current" ID returned
# from a call to CUBE
ID="-1"
title -d 1 "Checking on required dependencies..."
boxcenter "Verify that various command line tools needed to construct this "
boxcenter "workflow exist on the UNIX path. If any of the below files are "
boxcenter "not found, please install them according to the requirements of "
boxcenter "your OS. "
boxcenter ""
dep_check "jq,chrispl-search,chrispl-run,http"
windowBottom
if (( b_respFail > 0 )) ; then exit 4 ; fi
title -d 1 "Checking for plugin IDs on CUBE...." \
"(ids below denote plugin ids)"
#
# This section queries CUBE for IDs of all plugins in the plugin
# array structure.
#
# If any failures were flagged, the script will exit.
#
b_respSuccess=0
b_respFail=0
boxcenter "Verify that all the plugins that constitute this workflow are "
boxcenter "registered to the CUBE instance with which we are communicating."
boxcenter ""
for plugin in "${a_PLUGINS[@]}" ; do
cparse $plugin "REPO" "CONTAINER" "MMN" "ENV"
opBlink_feedback "$ADDRESS:$PORT" "::CUBE->$plugin" \
"op-->" "search"
windowBottom
RESP=$(
chrispl-search --for id \
--using name="$CONTAINER" \
--onCUBE "$CUBE"
)
opRet_feedback "$?" \
"$ADDRESS:$PORT" "::CUBE->$plugin" \
"result-->" "pid = $(echo $RESP | awk '{print $3}')"
done
postQuery_report
windowBottom
if (( b_respFail > 0 )) ; then exit 2 ; fi
title -d 1 "Start constructing the Feed by POSTing the root FS node..."
ROOTID=-1
retState=""
filesInNode=""
dcmFiles=""
# Post the root node, wait for it to finish, and
# collect a list of output files
boxcenter "Run the root node and dynamically capture a list of output "
boxcenter "files created by the base FS plugin. This file list will be"
boxcenter "processed to create the actual list of DICOMS to process -- "
boxcenter "each DICOM will spawn a new parallel branch. "
boxcenter ""
windowBottom
#\\\\\\\\\\\\\\\\\\
# Core logic here ||
plugin_run "0:0" "a_WORKFLOWSPEC[@]" "$CUBE" ROOTID \
$sleepAfterPluginRun && id_check $ROOTID
waitForNodeState "$CUBE" "finishedSuccessfully" $ROOTID retState
dataInNode_get fname "$CUBE" $ROOTID filesInNode
# Core logic here ||
#///////////////////
# Now, parse the list of files for DICOMs, read into an
# array, and print the pruned file list
dcmFiles=$( echo "$filesInNode" |\
awk '{print $3}' |\
awk -F \/ '{print $5}' | grep dcm)
echo -en "\033[2A\033[2K"
read -a a_lungCT <<< $(echo $dcmFiles)
a_lungCTorig=("${a_lungCT[@]}")
windowBottom
if (( b_imageList )) ; then
title -d 1 "Checking that images to process exist in root pl-lungct..."
boxcenter "Verify that any DICOMs explicitly listed by the user "
boxcenter "when calling this script actually exist in the root "
boxcenter "node. "
boxcenter ""
b_respSuccess=0
b_respFail=0
if (( b_imageList )) ; then
read -a a_lungCT <<< $(echo "$IMAGESTOPROCESS" | tr ',' ' ')
fi
for image in "${a_lungCT[@]}" ; do
opBlink_feedback "Image to process" "::$image" \
"valid-->" "checking"
windowBottom
if [[ " ${a_lungCTorig[@]} " =~ " ${image} " ]] ; then
status=0
else
status=1
fi
opRet_feedback "$status" \
"Image to process" "::$image" \
"can process-->" "valid"
done
postImageCheck_report
windowBottom
if (( b_respFail > 0 )) ; then exit 1 ; fi
if (( b_onlyShowImageNames )) ; then exit 0 ; fi
fi
# title -d 1 "Anonymizing the files POSTed by the root FS node..."
# # Anonymize files from the root node, wait for it to finish, and
# # collect a list of output files
# boxcenter "Anonymize the root node and dynamically capture a list of output "
# boxcenter "files created by the base FS plugin. This file list will be"
# boxcenter "processed to create the actual list of DICOMS to process -- "
# boxcenter "each DICOM will spawn a new parallel branch. "
# boxcenter ""
# windowBottom
# plugin_run ":1" "a_WORKFLOWSPEC[@]" "$CUBE" ID1 $sleepAfterPluginRun \
# "@prev_id=$ROOTID" && id_check $ID1
# digraph_add "GRAPHVIZBODY" "GRAPHVIZBODYARGS" ":0;$ROOTID" ":1;$ID1" \
# "a_WORKFLOWSPEC[@]"
# waitForNodeState "$CUBE" "finishedSuccessfully" $ID1 retState
# dataInNode_get fname "$CUBE" $ID1 filesInNode
# # Now, parse the list of files for DICOMs, read into an
# # array, and print the pruned file list
# dcmFiles=$( echo "$filesInNode" |\
# awk '{print $3}' |\
# awk -F \/ '{print $5}' | grep dcm)
# echo -en "\033[2A\033[2K"
# read -a a_lungCT_anon <<< $(echo $dcmFiles)
# a_lungCToriganon=("${a_lungCT_anon[@]}")
# windowBottom
title -d 1 "Building and Scheduling workflow..."
boxcenter "Construct and run each branch, one per input DICOM file. "
boxcenter "If a wait condition has been specified, pause at the end of "
boxcenter "each branch until the final compute is successful before "
boxcenter "buidling the next parallel branch. "
boxcenter ""
boxcenter "If a report has been specified, print a final report on the "
boxcenter "prediction of the input image for that branch. "
boxcenter ""
# Now the branch(es)
b_respSuccess=1
b_respFail=0
boxcenter ""
boxcenter ""
for image in "${a_lungCT[@]}" ; do
echo -en "\033[2A\033[2K"
boxcenter ""
boxcenter "Building prediction branch for image $image..." ${LightGray}
boxcenter ""
boxcenter ""
plugin_run ":1" "a_WORKFLOWSPEC[@]" "$CUBE" ID1 $sleepAfterPluginRun \
"@prev_id=$ROOTID;@image[_n]=$image" && id_check $ID1
digraph_add "GRAPHVIZBODY" "GRAPHVIZBODYARGS" ":0;$ROOTID" ":1;$ID1" \
"a_WORKFLOWSPEC[@]"
plugin_run ":2" "a_WORKFLOWSPEC[@]" "$CUBE" ID2 $sleepAfterPluginRun \
"@prev_id=$ID1;@image[_n]=$image" && id_check $ID2
digraph_add "GRAPHVIZBODY" "GRAPHVIZBODYARGS" ":1;$ID1" ":2;$ID2" \
"a_WORKFLOWSPEC[@]"
plugin_run ":3" "a_WORKFLOWSPEC[@]" "$CUBE" ID3 $sleepAfterPluginRun \
"@prev_id=$ID2" && id_check $ID3
digraph_add "GRAPHVIZBODY" "GRAPHVIZBODYARGS" ":2;$ID2" ":3;$ID3" \
"a_WORKFLOWSPEC[@]"
plugin_run ":4" "a_WORKFLOWSPEC[@]" "$CUBE" ID4 $sleepAfterPluginRun \
"@prev_id=$ID3;@patientID=$ID1-12345" && id_check $ID4
digraph_add "GRAPHVIZBODY" "GRAPHVIZBODYARGS" ":3;$ID3" ":4;$ID4" \
"a_WORKFLOWSPEC[@]"
if (( b_waitOnBranchFinish )) ; then
waitForNodeState "$CUBE" "finishedSuccessfully" $ID3 retState
fi
if (( b_printReport || b_printJSONprediction )) ; then
# get list of file resources for the prediction plugin (ID2)
dataInNode_get file_resource "$CUBE" $ID3 linksInNode
echo -en "\033[2A\033[2K"
prediction=$(echo "$linksInNode" |\
grep "prediction-default.json" |\
awk '{print $3}')
rm -f prediction-default.json 2>/dev/null
http -a chris:chris1234 --quiet --download "$prediction"
final=$(cat prediction-default.json | jq .prediction --raw-output)
RESULT=$(cat prediction-default.json |\
sed -E 's/(.{70})/\1\n/g')
if (( b_printJSONprediction )) ; then
echo "$RESULT" | ./boxes.sh ${LightGray}
fi
if (( b_printReport )) ; then
case "$final" in
"normal")
perc=$( cat prediction-default.json |\
jq .Normal --raw-output |\
xargs -i% printf 'scale=2 ; (%*10000)/100\n' | bc)
boxcenter "ANALYSIS: image $image is predicted to be normal at $perc percent." ${Green}
;;
"pneumonia")
perc=$( cat prediction-default.json |\
jq .Pneumonia --raw-output |\
xargs -i% printf 'scale=2 ; (%*10000)/100\n' | bc)
boxcenter "ANALYSIS: image $image shows pneumonia at $perc percent." ${LightPurple}
;;
"COVID-19")
perc=$( cat prediction-default.json |\
jq '.["COVID-19"]' --raw-output |\
xargs -i% printf 'scale=2 ; (%*10000)/100\n' | bc)
boxcenter "ANALYSIS: image $image shows COVID-19 infection at $perc percent." ${Red}
;;
esac
fi
boxcenter ""
boxcenter ""
windowBottom
fi
done
echo -en "\033[2A\033[2K"
postRun_report
windowBottom
if (( b_respFail > 0 )) ; then exit 3 ; fi
if (( b_graphviz )) ; then
graphVis_printFile "$GRAPHVIZHEADER" \
"$GRAPHVIZBODY" \
"$GRAPHVIZBODYARGS" \
"$GRAPHVIZFILE"
fi