-
Notifications
You must be signed in to change notification settings - Fork 3
/
runbatch_heatmaps.sh
106 lines (88 loc) · 2.92 KB
/
runbatch_heatmaps.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/bin/bash
## run Rscript on a server with R, eg hpc04,5,6, hpc-rc08, etc
# Prepare data for R heatmaps
# exclude GC, ref length, any host chr etc (all distort heatmaps)
# Sophia Poertner, Colin Davenport, 2020-2021
## FUNCTIONS ##
prepare_files () {
echo "INFO: Preparing files for R heatmap creation"
for infile in $(ls *haybaler.csv *haybaler_short.csv)
do
echo "Running on " $infile
#exclude mouse, human, mito
grep -v "^chr" $infile | grep -v "^1_1_1" > $infile.filt1.csv
# using tab delimiters, cut a max of 2000 columns out excluding cols 2-3. Also restrict to number_of_taxa lines
cut -f1,4-2000 $infile.filt1.csv | head -n $1 > $infile.filt2.csv
# remove _complete_genome from labels
sed "s/_complete_genome//g" $infile.filt2.csv > $infile.filt.heatmap.csv
# cleanup: remove temp filt1.csv and filt2.csv files
rm $infile.filt1.csv
rm $infile.filt2.csv
done
}
create_heatmaps () {
echo "INFO: Starting batch heatmap creation"
# check for rscript, exit if unavailable
# get rscript_bin from config_yaml. Run setup.sh and restart session
# rscript_bin="/usr/bin/Rscript"
if [[ ! -f $rscript_bin ]]
then
echo "INFO: Rscript binary not found, aborting. Could not find Rscript here, is R installed? " $rscript_bin
exit
fi
echo "INFO: Using rscript binary: " $rscript_bin
# create heatmaps for each heatmap.csv file
for heatmapcsv in *.heatmap.csv
do
echo "INFO: Creating heatmap for file: $heatmapcsv"
# run local
$rscript_bin create_heatmap.R $heatmapcsv
done
}
## HEATMAP CREATION ##
# Setup conda and directories
source $WOCHENENDE_DIR/scripts/parse_yaml.sh
eval $(parse_yaml $WOCHENENDE_DIR/config.yaml)
count_haybaler_csv=`ls -1 *haybaler.csv *haybaler_short.csv 2>/dev/null | wc -l`
if [[ $count_haybaler_csv != 0 ]]
then
# Create heatmaps with 50 taxa
if [[ ! -d "top_50_taxa" ]]
then
mkdir top_50_taxa
fi
# Run bash function for 50 taxa
prepare_files 50
create_heatmaps
count_html=`ls -1 *heatmap*.html 2>/dev/null | wc -l`
count_pdf=`ls -1 *heatmap*.pdf 2>/dev/null | wc -l`
if [[ $count_pdf != 0 ]]
then
mv *heatmap*.pdf top_50_taxa
fi
if [[ $count_html != 0 ]]
then
mv *heatmap*.html top_50_taxa
fi
# Create heatmaps with 200 taxa
if [[ ! -d "top_200_taxa" ]]
then
mkdir top_200_taxa
fi
# Run bash function for 200 taxa
prepare_files 200
create_heatmaps
count_html=`ls -1 *heatmap*.html 2>/dev/null | wc -l`
count_pdf=`ls -1 *heatmap*.pdf 2>/dev/null | wc -l`
if [[ $count_pdf != 0 ]]
then
mv *heatmap*.pdf top_200_taxa
fi
if [[ $count_html != 0 ]]
then
mv *heatmap*.html top_200_taxa
fi
echo "INFO: Script completed"
else
echo "no input files found for heatmaps creation. Needs *haybaler.csv as input"
fi