-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_kegg_pathway_genes.py
32 lines (24 loc) · 1.08 KB
/
get_kegg_pathway_genes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#!/usr/bin/python
"""
Jeroen Merks
Haalt de genen van de opgegeven pathway van KEGG op.
"""
from Bio.KEGG import REST
def main(pathway):
print "Fetching gene names related to pathway %s from the current KEGG database..." % pathway
promotor_gene_accessions = []
pathway_file = REST.kegg_get(pathway).read() # query and read each pathway
# iterate through each KEGG pathway file, keeping track of which section
# of the file we're in, only read the gene in each pathway
current_section = None
for line in pathway_file.rstrip().split("\n"):
section = line[:12].strip() # section names are within 12 columns
if not section == "":
current_section = section
if current_section == "GENE":
gene_identifiers, gene_description = line[12:].split("; ")
gene_id, gene_symbol = gene_identifiers.split()
if gene_symbol not in promotor_gene_accessions:
promotor_gene_accessions.append(
"promotor_region_" + gene_symbol + "_" + gene_id)
return promotor_gene_accessions