forked from MariaNattestad/Nchart
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsummarize_reads.sh
executable file
·51 lines (43 loc) · 1.24 KB
/
summarize_reads.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/bin/sh
/bin/ls *.fastq *.fq > fqlist.txt
echo "Computing Read lens"
rm -f fqlist.lens.txt
for i in `cat fqlist.txt`
do
if [ ! -r $i.lens ]
then
echo $i
echo $i >> fqlist.lens.txt
fi
done
if [ -s fqlist.lens.txt ]
then
cat fqlist.lens.txt | parallel -t 'fastq_to_fasta_fast {} | getlengths > {}.lens'
rm -f allreads.csv allreads.stats
fi
echo "Checking Read len stats"
rm -f fqlist.stats.txt
for i in `cat fqlist.txt`
do
if [ ! -r $i.lens.stats ]
then
echo $i
echo $i >> fqlist.lens.txt
fi
done
if [ -s fqlist.lens.txt ]
then
cat fqlist.lens.txt | parallel -t 'stats -f 2 {}.lens -big 10000,20000,30000,40000,50000 > {}.lens.stats'
fi
if [ ! -r allreads.stats ]
then
echo "Summarizing all reads"
awk '{print $2}' *.lens | stats -big 10000,20000,30000,40000,50000 > allreads.stats
fi
if [ ! -r allreads.csv ]
then
echo -e "run\t#reads\tmax\tmean\tn50\tMb_total\tMb>10kb\tMb>20kbp\tMb>30kbp\tMb>40kb\tMb>50kb\n" > allreads.csv
head *.stats | awk '{printf("%s", $0); if(NR%3==0){print}}' | tr '=' ' ' | tr -d '[' | tr -d ']' | \
awk '{print $2,$6,$8,$9,$15,int($13/1000000),int($23/1000000),int($27/1000000),int($31/1000000),int($35/1000000),int($39/1000000)}' | tr ' ' '\t' >> allreads.csv
fi
column -t allreads.csv