-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathgffToBed.py
executable file
·71 lines (59 loc) · 2.51 KB
/
gffToBed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/env python
"""
gffToBed.py: given a GFF file, output its contents in BED format.
Translate GFF to BED coordinates. Optionally output only those entries
of a type or feature (GFF field 3) that matches the type parameter.
Use the type as the BED name by default, or optionally specify an element from
the attributes field (which are semicolon-delimited key-value pairs) to use as
the name.
Arguments:
inputGff: name of the GFF file
Options:
-t <type>: specifies a type of GFF record to translate. The default is to
translate everything.
-n <name>: specifies an attribute from the attributes field to use as the
name. The type is used as the name by default. If a name
attribute is specified but is not found in some record, there
will be an assertion failure.
Usage:
gffToBed.py -t miRNA -n ID hsa.gff > miRNA.bed
Assumptions:
This code assumes that each line in the GFF file represents a single
ungapped alignment, equivalent to a single block in BED format. This
does not support generating a multi-block BED from GFF.
Note that the type and name parameters are both case-sensitive.
"""
import argparse
from BCBio import GFF
import re
parser = argparse.ArgumentParser()
parser.add_argument('inputGff', type=str, help="Input GFF file")
parser.add_argument('-t', dest="type", default=".*", help="Type to output")
parser.add_argument('-n', dest="name", default="",
help="Name of the field to use in the BED name field")
args = parser.parse_args()
typeSearchString = "^%s$" % (args.type)
nameSearchStrings = args.name.split(",")
gffIter = GFF.parse(args.inputGff)
for chrom in gffIter:
for hit in chrom.features:
if re.search(typeSearchString, hit.type):
chromStart = hit.location.nofuzzy_start
chromEnd = hit.location.nofuzzy_end
if hit.strand:
strand = "+"
else:
strand = "-"
if args.name == "":
hitName = hit.type
else:
hitName = ""
delimiter = ""
for thisName in nameSearchStrings:
hitName = "%s%s%s" % (hitName, delimiter,
hit.qualifiers[thisName][0])
delimiter = ","
print "%s\t%d\t%d\t%s\t1\t%s\t%d\t%d\t0\t1\t%d\t0" \
% (chrom.id, int(chromStart), int(chromEnd), hitName,
strand, int(chromStart), chromEnd,
int(chromEnd) - int(chromStart))