Skip to content

Commit

Permalink
Updating
Browse files Browse the repository at this point in the history
  • Loading branch information
carden24 committed Mar 7, 2018
1 parent 2e9f752 commit bad23a0
Show file tree
Hide file tree
Showing 3 changed files with 156 additions and 26 deletions.
48 changes: 48 additions & 0 deletions filter_mothur_biome.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@


# Script to remove unwanted samples from mothur shared file

# Read list of unwanted samples
# Read line


# Write if not in list


import sys
import re

# Inputs
filein = open(sys.argv[1], 'r')
filelist = open(sys.argv[2], 'r')

shared_shortname = re.sub('[.](shared)', '', sys.argv[1], re.I)
fileout_handle = shared_shortname + "_filtered.shared"
fileout = open(fileout_handle, 'w')


#create a list with the names of the samples requested
requested_samples = []
for sample in filelist:
sample = sample.strip('\n').strip('\r')
requested_samples.append(sample)
filelist.close()

# Print number of requested samples
print "%s records requested" % len(requested_samples)

found_counter = 0
for line in filein:
line2 = line.strip('\n').strip('\r')
line2 = line2.split('\t')
# Second column correspond to the sample in the otu table
if line2[1] in requested_samples:
found_counter = found_counter + 1
continue
else:
fileout.write("%s" % line)

print "%s samples removed" % found_counter

fileout.close()
filein.close()
52 changes: 26 additions & 26 deletions score_blast_results_from_cazy.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,27 +11,27 @@

#config = load_config()
script_info = {}
script_info['brief_description'] = """This script converts a tabular BLAST output
into a table with count for each CAZy family.
script_info['brief_description'] = """This script converts a tabular BLAST
output into a table with count for each CAZy family.
It requires a pregenerated dictionary (pkl) from the CAZy file used
as BLAST database """

script_info['script_usage'] = []

usage = '''
Usage:
python score_blast_results_from_cazy.py -i <blast tabular input> -d <cazy dictionary>
Usage:
python score_blast_results_from_cazy.py -i <blast tabular input>
-d <cazy dictionary>
-o <output text file>
'''

parser = OptionParser(usage)
parser.add_option("-i", "--input_blast_result", dest = "input_file",
help = 'The blast tabular input [REQUIRED]')
parser.add_option("-d", "--dictionary_file", dest = "dictionary_file",
help = 'The dictionary file [REQUIRED]')
parser.add_option("-o", "--output_table", dest = "output_file", default = 'None',
help = 'The output file [OPTIONAL]')

parser.add_option("-i", "--input_blast_result", dest="input_file",
help='The blast tabular input [REQUIRED]')
parser.add_option("-d", "--dictionary_file", dest="dictionary_file",
help='The dictionary file [REQUIRED]')
parser.add_option("-o", "--output_table", dest="output_file", default='None',
help='The output file [OPTIONAL]')


# checks if the supplied arguments are adequate
Expand All @@ -41,7 +41,7 @@ def valid_arguments(opts, args):
else:
return False


def main(argv):
(opts, args) = parser.parse_args()
print ''
Expand All @@ -52,21 +52,21 @@ def main(argv):
sys.exit(0)

# initialize the input file, dictionary, and output file
input_file = opts.input_file
input_file = opts.input_file
dictionary_file = opts.dictionary_file
output_file = opts.output_file
if output_file == 'None' :
if output_file == 'None':
print 'No output file was specified'
output_file = input_file + '_cazy_family'
print 'Output will be directed to file: %s' %output_file
print 'Output will be directed to file: %s' % output_file
# else:
# continue

# Open input and outputs
filedict = open(dictionary_file, 'rb')
filein = open(input_file, 'r')
fileout = open(output_file, 'w')
print 'Loading dictionary %s' %dictionary_file
print 'Loading dictionary %s' % dictionary_file
cazy_dictionary = pickle.load(filedict)
print 'Loading complete'
family_output_dictionary = {}
Expand All @@ -76,33 +76,33 @@ def main(argv):
for line in filein:
entry_counter = entry_counter + 1
# Split line from blast output
line = line.split('\t')
line = line.split('\t')
# Get subject from blast output
subject = line[1]
# print subject
# Get info for that subject in dictionary
new_dictionary_entry = cazy_dictionary.get(subject)
if new_dictionary_entry == None:
print 'Could not find data for %s' %subject
print 'Could not find data for %s' % subject
print 'Please check you are using the correct dictionary file'
sys.exit(0)
# Obtain CAZy family
family = new_dictionary_entry[1]
# Entry family name in results dictionary
# If there is no result use zero as the initial count
initial_count = family_output_dictionary.get(family,0)
initial_count = family_output_dictionary.get(family, 0)
# Update count
updated_count = initial_count + 1
# Update entry in dictionary
family_output_dictionary[family] = updated_count
updated_count = initial_count + 1
# Update entry in dictionary
family_output_dictionary[family] = updated_count

print 'Found a total of %s entries' %entry_counter
print 'And a total of %d families' %len(family_output_dictionary.keys())
print 'Writing to %s' %output_file
print 'Found a total of %s entries' % entry_counter
print 'And a total of %d families' % len(family_output_dictionary.keys())
print 'Writing to %s' % output_file

# write family_dictionary
for key, value in family_output_dictionary.iteritems():
fileout.write("%s\t%d\n" %(key, value))
fileout.write("%s\t%d\n" % (key, value))
fileout.close()
filein.close()
filedict.close()
Expand Down
82 changes: 82 additions & 0 deletions score_butyrate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#! /usr/bin/python

# author__ = "Erick Cardenas Poire"
# Usage python ./score_butyrate.py -i <input file> -d <dictionary file>

import sys
import re
from optparse import OptionParser


#config = load_config()
script_info = {}
script_info['brief_description'] = """Converts a tabular BLAST output
into a table with count for butyrate synthesis genes.
It requires a table that links the img ids with the gene names"""

script_info['script_usage'] = []

usage = '''
Usage:
python score_blast_butyrate.py -i <blast tabular input> -d <dictionary file>
-o <output text file>
'''

parser = OptionParser(usage)
parser.add_option("-i", "--input_blast_result", dest="input_file",
help='The blast tabular input [REQUIRED]')
parser.add_option("-d", "--dictionary_file", dest="dictionary_file",
help='The dictionary file [REQUIRED]')


def main(argv):
(opts, args) = parser.parse_args()
print ''
print 'Initializing...'

# initialize the input file, dictionary, and output file
input_file = opts.input_file
dictionary_file = opts.dictionary_file
shortname = re.sub('[.](txt)', '', input_file, re.I)
output_file = shortname + "_summary.txt"

filedict = open(dictionary_file, 'r')
filein = open(input_file, 'r')
fileout = open(output_file, 'w')

gene_id_dictionary = {}
for line in filedict:
line = line.rstrip(' ')
line = line.rstrip('\n')
line = line.split('\t')
gene_id = line[0]
gene = line[1]
gene_id_dictionary[gene_id] = gene

gene_count_dictionary = {}
for line2 in filein:
line2 = line2.lstrip(' ').rstrip(' ')
line2 = line2.strip('\n')
# Split line from results
line2 = line2.split(' ')
subject = line2[1]
line_count = int(line2[0])
the_gene = gene_id_dictionary.get(subject, 'no_match')
print the_gene
# Get current count and update, if not found use zero

current_gene_count = gene_count_dictionary.get(the_gene, 0)
new_score = current_gene_count + line_count
gene_count_dictionary[the_gene] = new_score

for key, value in gene_count_dictionary.iteritems():
fileout.write("%s\t%s\t%s\n" % (input_file, key, value))

# Closing file
filein.close()
filedict.close()
fileout.close()


# the main function
main(sys.argv[1:])

0 comments on commit bad23a0

Please sign in to comment.