-
Notifications
You must be signed in to change notification settings - Fork 1
/
counts_merge.sh
executable file
·81 lines (62 loc) · 3.1 KB
/
counts_merge.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env bash
# counts_merge.sh
#
# Created by Bruno Costa on 01/02/2016
# Copyright 2016 ITQB / UNL. All rights reserved.
# Script to produce and merge all count tables together
# Call: counts_merge.sh
#if [[ "$#" != 1 ]]; then
# echo "Error - This script runs with x arguments $# arguments were given"
# exit 127
#fi
#Gets the scipt directory
DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
. $DIR/"config/workdirs.cfg"
SCRIPTS_DIR=$DIR"/scripts"
mkdir -p ${workdir}/count
novel=${workdir}/count/all_seq_counts_novel.tsv
novelNonCons=${workdir}/count/all_seq_counts_nonCons.tsv
tasi=${workdir}/count/all_seq_counts_tasi.tsv
novelTasi=${workdir}/count/all_seq_counts_novelTasi.tsv
cons=${workdir}/count/all_seq_counts_cons.tsv
consSeq=${workdir}/count/all_seq_cons.seq
star=${workdir}/count/all_seq_star.seq
reunion=${workdir}/count/all_seq.tsv
tasiSeq=`mktemp /tmp/tasiSeq.XXXXXX`
tasiNovel=`mktemp /tmp/tasiNovel.XXXXXX`
#Get count matrix save to counts
$SCRIPTS_DIR/count_abundance.sh "${workdir}/data/*_cons.fa" "cons" $THREADS > $cons
$SCRIPTS_DIR/merge_conserved.py -i $cons
$SCRIPTS_DIR/count_abundance.sh "${workdir}/data/mircat/*noncons_miRNA_filtered.fa" "novel" $THREADS > $novel
$SCRIPTS_DIR/count_abundance.sh "${workdir}/data/tasi/Lib*-tasi.fa" "tasi" $THREADS > $tasi
#$SCRIPTS_DIR/count_abundance.sh "${workdir}/data/*_cons.fa ${workdir}/data/mircat/*noncons_miRNA_filtered.fa" "none" $THREADS > ${workdir}/count/all_seq_counts.tsv
#This has a script why the snippet instead directly here?
##Get the list of seqs with star
cat ${workdir}/data/mircat/*output_filtered.csv | awk -F ',' '{if($14!="NO"){if($7!="Sequence"){print $7}}}' | sort | uniq > $star
###Merging classifications
awk '{if(NR>1){print $1}}' $tasi > $tasiSeq
#Merge tasi with novel
grep -wf $tasiSeq $novel | awk '{print $1}' | xargs -n 1 -I pattern sed -ir "s:pattern\tnovel\t:pattern\tnovel-tasi\t:g" $novel
##!!!!Losing tasi if cons
awk '{if(NR>1){print $1}}' $cons > $consSeq
grep -wvf $consSeq $novel > $novelNonCons
grep "tasi" $novelNonCons | awk '{print $1}' > $tasiNovel
#Create new file for all conserved
cp $cons $novelTasi
#Add tasi that aren't novel
grep -v 'Lib' $tasi | grep -wvf $tasiNovel >> $novelTasi
#Add novel and novel tasi
grep -v "Lib" $novelNonCons >> $novelTasi
#Add header to new file with all classifications
head -1 $novelTasi > $reunion
#Find seq that have star and add to new file
grep -wf $star $novelTasi | awk '{printf $1"\t"$2" star";for(i=3;i<=NF;i++){printf "\t"$i};printf "\n"}' >> $reunion
#Add the sequences that aren't star
grep -v "Lib" $novelTasi | grep -vwf $star >> $reunion
#clean up
rm $tasiNovel $tasiSeq
GENOME_SUFF=$( echo "$GENOME_BASENAME" | awk -F "part" '{print $1}')
#Merge all outputs from miRCat correct chromossome column to remove "," and output relevant columns with no-redundant lines.
#Loses part, abundance, star,
cat $DIR/data/mircat/Lib*_${FILTER_SUF}_${GENOME_SUFF}_mirbase_noncons_output_filtered.csv | sed -r "s:(^[0-9]+,)(.*)(,[0-9]+,[0-9]+,[+-])(.*$):\1@£\2@£\3\4:g" | awk -F "@£" '{gsub(/,/,"",$2); print}' | cut -d "," -f 2-5,7-13 | sort | uniq > $DIR/count/global-miRCat.csv
exit 0