-
Notifications
You must be signed in to change notification settings - Fork 0
/
Pipeline.py
61 lines (46 loc) · 1.96 KB
/
Pipeline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# =============================================================================
# bmle
# G4Pipeline: Pipeline.py
# A wrapper module to run through all modules in the G4 annotation pipeline
# =============================================================================
def main():
import Utils
import BedToGFF
import NearestAnnot
import NonAlignments
import GeneOverlap
# =========================================================================
# File input and formatting modules
# =========================================================================
prefix = input('Path to directory containing all files: ')
if not prefix.endswith('/'): prefix += '/'
fasta = prefix + input('Filename of genomic FASTA file: ')
annot = prefix + input('Filename of genomic annotation GFF file: ')
if annot.endswith('.gff'):
with open(prefix + annot, 'r') as f:
if not f.readline().startswith('##'):
Utils.reformatGFF(annot, fasta)
annot += '3'
qb = prefix + input('Filename of QuadBase2 Tetraplex Finder BED file: ')
BedToGFF.reformatBED(qb, fasta)
gplex = qb[:-3] + 'gff3'
sam = prefix + input('Filename of blastn SAM file: ')
Utils.reformatSAM(sam, fasta)
# =========================================================================
# Main pipeline modules
# =========================================================================
NonAlignments.main(sam)
nal = sam[:-3] + 'gff3'
GeneOverlap.main(annot, gplex, nal)
# =========================================================================
# Summary data modules
# =========================================================================
output1 = prefix + 'analyses/gplex.txt'
NearestAnnot.generate(gplex, annot, output1)
NearestAnnot.summarize(output1, fasta)
output2 = prefix + 'analyses/nal.txt'
NearestAnnot.generate(gplex, nal, output2)
NearestAnnot.summarize(output2, fasta)
# =============================================================================
if __name__=='__main__':
main()