-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
adding scripts to normalize and score kegg data from IMG
- Loading branch information
Showing
2 changed files
with
210 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
#!/usr/bin/python | ||
|
||
import sys | ||
from optparse import OptionParser | ||
|
||
script_info = {} | ||
script_info['brief_description'] = """This script normalizes the KO count according to the number of read in the library.""" | ||
|
||
script_info['script_usage'] = [] | ||
|
||
usage = ''' | ||
Usage: | ||
python normalize_kegg.py -i <kegg table input> -d <list_of_files.txt> | ||
-o <output text file> | ||
''' | ||
|
||
parser = OptionParser(usage) | ||
parser.add_option("-i", "--input_kegg_count_file", dest = "input_file", | ||
help = 'The tabular input [REQUIRED]') | ||
parser.add_option("-d", "--library_read_ciybt", dest = "dictionary_file", | ||
help = 'The dictionary file [REQUIRED]') | ||
parser.add_option("-o", "--output_table", dest = "output_file", default = 'None', | ||
help = 'The output file [OPTIONAL]') | ||
parser.add_option("-v", action="store_true", dest="verbose") | ||
|
||
|
||
# checks if the supplied arguments are adequate | ||
def valid_arguments(opts, args): | ||
if (opts.input_file == None or opts.dictionary_file == None ): | ||
return True | ||
else: | ||
return False | ||
|
||
def main(argv): | ||
(opts, args) = parser.parse_args() | ||
verbose = opts.verbose | ||
parser.set_defaults(verbose=False) | ||
if verbose: | ||
print '' | ||
print "Verbosity is on" | ||
if verbose: | ||
print 'Initializing...' | ||
|
||
if valid_arguments(opts, args): | ||
print usage | ||
sys.exit(0) | ||
|
||
# initialize the input file, dictionary, and output file | ||
input_file = opts.input_file | ||
dictionary_file = opts.dictionary_file | ||
output_file = opts.output_file | ||
|
||
if output_file == 'None' : | ||
print 'No output file was specified' | ||
output_file = input_file + '_normalized' | ||
print 'Output will be directed to file: %s' %output_file | ||
|
||
# Open input and outputs | ||
filedict = open(dictionary_file, 'r') | ||
filein = open(input_file, 'r') | ||
fileout = open(output_file, 'w') | ||
if verbose: | ||
print 'Loading dictionary: %s' %dictionary_file | ||
|
||
file_read_count_dict = {} | ||
|
||
for line in filedict: | ||
#print line | ||
line = line.strip('\n\r').split('\t') | ||
#print line | ||
my_file = line[0] | ||
file_read_count = line[1] | ||
file_read_count_dict[my_file] = file_read_count | ||
if verbose: | ||
print 'Loading complete' | ||
|
||
# Normalizing the output | ||
# This is how the summary file is formatted | ||
# TXA-OM3C0-O3-3300001461.u.ko_summary.txt | ||
# This is how the summary file "file" column looks like | ||
# TXA/TXA-OM3C0/TXA-OM3C0-O3-3300001461.u.ko.txt | ||
# or | ||
# TXA-OM3C0-O3-3300001461.u.ko.txt | ||
|
||
if verbose: | ||
print 'Starting normalization' | ||
the_read_count = file_read_count_dict.get(input_file, 0) | ||
|
||
if the_read_count == 0: | ||
print "No read count was found for %s" %input_file | ||
sys.exit(0) | ||
|
||
for line2 in filein: | ||
line2 = line2.strip('\n\r').split('\t') # split the file into three | ||
the_file0 = line2[0] | ||
# This takes care of the wd path if it appears | ||
the_file = the_file0.split('/')[-1] | ||
the_KEGG_number = line2[1] | ||
the_KEGG_count = int(line2[2]) | ||
the_new_KEGG_count = (the_KEGG_count/float(the_read_count)) *1000 | ||
fileout.write('%s\t%s\t%f\n' %(the_file, the_KEGG_number, the_new_KEGG_count)) | ||
if verbose: | ||
print 'Normalization complete' | ||
filein.close() | ||
fileout.close() | ||
filedict.close() | ||
|
||
|
||
# the main function | ||
main(sys.argv[1:]) | ||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
#!/usr/bin/python | ||
|
||
import sys | ||
import subprocess | ||
import re | ||
from optparse import OptionParser | ||
|
||
script_info = {} | ||
script_info['brief_description'] = """This script converts a tabular KO summary | ||
into a table with count for each KEGG Module """ | ||
|
||
script_info['script_usage'] = [] | ||
|
||
usage = ''' | ||
Usage: | ||
python score_keggs_to_module.py -i <IMG kegg tabular input> -d <kegg dictionary> | ||
-o <output text file> | ||
''' | ||
|
||
parser = OptionParser(usage) | ||
parser.add_option("-i", "--input_kegg_input", dest = "input_file", | ||
help = 'The tabular input [REQUIRED]') | ||
parser.add_option("-d", "--kegg_dictionary", dest = "dictionary_file", | ||
help = 'The dictionary file [REQUIRED]') | ||
parser.add_option("-o", "--output_table", dest = "output_file", default = 'None', | ||
help = 'The output file [OPTIONAL]') | ||
parser.add_option("-v", action="store_true", dest="verbose") | ||
|
||
|
||
|
||
|
||
# checks if the supplied arguments are adequate | ||
def valid_arguments(opts, args): | ||
if (opts.input_file == None or opts.dictionary_file == None ): | ||
return True | ||
else: | ||
return False | ||
|
||
|
||
def main(argv): | ||
(opts, args) = parser.parse_args() | ||
verbose = opts.verbose | ||
parser.set_defaults(verbose=False) | ||
|
||
if verbose: | ||
print '' | ||
print "Verbosity is on" | ||
if verbose: | ||
print 'Initializing...' | ||
|
||
if valid_arguments(opts, args): | ||
print usage | ||
sys.exit(0) | ||
|
||
# initialize the input file, dictionary, and output file | ||
input_file = opts.input_file | ||
dictionary_file = opts.dictionary_file | ||
output_file = opts.output_file | ||
if output_file == 'None': | ||
output_file = input_file + '_' + dictionary_file | ||
if verbose: | ||
print 'No output file was specified' | ||
print 'Output will be directed to file: %s' %output_file | ||
|
||
# Open input and outputs | ||
filedict = open(dictionary_file, 'r') | ||
filein = open(input_file, 'r') | ||
fileout = open(output_file, 'w') | ||
|
||
ko_count = {} | ||
# Ko6506 K06478 K06458 K06459 K01488 K06713 K06450 K06451 K06454 | ||
for line in filedict: | ||
line = line.strip('\n\r').split("\t") | ||
my_key = line[0] | ||
my_results = line[1:len(line)] | ||
for result in my_results: | ||
p = subprocess.Popen('grep %s %s' %(result, input_file), stdout=subprocess.PIPE, shell=True) | ||
node0 = p.communicate()[0] | ||
if len(node0) == 0: | ||
continue | ||
else: | ||
node1 = node0.strip('\r\n').split('\t') | ||
count = float(node1[2]) | ||
current_count = ko_count.get(my_key, 0) | ||
new_count = current_count + count | ||
ko_count[my_key] = new_count | ||
|
||
# print ko_count | ||
# write family_dictionary | ||
for key, value in ko_count.iteritems(): | ||
fileout.write("%s\t%s\t%f\n" %(input_file, key, value)) | ||
# the main function | ||
main(sys.argv[1:]) |