diff --git a/GET_LINEAGES_NCBI.EC.py b/GET_LINEAGES_NCBI.EC.py
index 1dfe710..0ed9869 100644
--- a/GET_LINEAGES_NCBI.EC.py
+++ b/GET_LINEAGES_NCBI.EC.py
@@ -1,3 +1,5 @@
+# Author Roli Wilhelm
+
 #!/usr/bin/python
 import sys, os, re, getopt, glob, subprocess, os.path, numpy as np, time
 import timeit
diff --git a/GET_LINEAGES_NCBI.py b/GET_LINEAGES_NCBI.py
index e38d400..5ebd273 100755
--- a/GET_LINEAGES_NCBI.py
+++ b/GET_LINEAGES_NCBI.py
@@ -1,3 +1,5 @@
+#Author Roli Wilhelm
+
 #!/usr/bin/python
 import sys, os, re, getopt, glob, subprocess, os.path, numpy as np, time
 import timeit
diff --git a/HMM_search_and_parse_and_extract.py b/HMM_search_and_parse_and_extract.py
index e6da306..14f279e 100644
--- a/HMM_search_and_parse_and_extract.py
+++ b/HMM_search_and_parse_and_extract.py
@@ -21,7 +21,7 @@
 
 
 #config = load_config()
-script_info={}
+script_info = {}
 script_info['brief_description'] = """Filters sequence according to a minimum 
             size parameter"""
 script_info['script_description'] = """HMMER parser. Runs hmmscan, filters
@@ -64,12 +64,12 @@
                         '\n(d) \'all\' -- Extract hits, contigs, and all\
                          proteins from hits\n')
 
-#Compiling frequently used regular expression patterns
+# Compiling frequently used regular expression patterns
 hmm_pattern = re.compile('[.](hmm)')
 query_pattern = re.compile('[.](fasta$|fas$|faa$|fsa$|fa$)')
 
 
-# checks if the supplied arguments are adequate
+# Checks if the supplied arguments are adequate
 def valid_arguments(opts, args):
     if (opts.input_model == None or opts.input_fp == None ):
         return True
@@ -98,13 +98,12 @@ def update_progress(progress):
     sys.stderr.flush()
 
 
-#Get HMM length function
+# Get HMM length function
 def get_hmm_len(input_model):
-#    hmmshortname = re.sub('[.](hmm)','',input_model, re.I)
-    hmmshortname = re.sub(hmm_pattern,'',input_model, re.I)  
-    hmm_leng_file = hmmshortname+".length.txt"
-    hmm_fileout = open(hmm_leng_file,'w')
-    hmm_filein = open(input_model,'r')
+    hmmshortname = re.sub(hmm_pattern, '', input_model, re.I)  
+    hmm_leng_file = hmmshortname + ".length.txt"
+    hmm_fileout = open(hmm_leng_file, 'w')
+    hmm_filein = open(input_model, 'r')
     for line in hmm_filein:
         if line.startswith('NAME'):
             line = line.strip('\n')
@@ -121,32 +120,28 @@ def get_hmm_len(input_model):
                 continue
     hmm_fileout.close()
     hmm_filein.close()
-    os.system(' '.join(['cp',hmm_leng_file,'all.hmm.ps.len']))
+    os.system(' '.join(['cp', hmm_leng_file, 'all.hmm.ps.len']))
 
 
-#Function to run hmmscan and parse
+# Function to run hmmscan and parse
 def run_hmm_scan (model,query,output):
-    #removes extension, case insensitive search
-#   hmmshortname = re.sub('[.](hmm)','',model, re.I)  
-    hmmshortname = re.sub(hmm_pattern,'',model, re.I)  
-    #finds file format removes extension, case insensitive search
-#   shortname = re.sub('[.](fasta$|fas$|faa$|fsa$|fa$)','',query, re.I)
-    shortname = re.sub(query_pattern,'',query, re.I)
+    # Removes extension, case insensitive search
+    hmmshortname = re.sub(hmm_pattern, '', model, re.I)  
+    # Finds file format removes extension, case insensitive search
+    shortname = re.sub(query_pattern, ' ', query, re.I)
     output_file = output + "/" + shortname + "_" + hmmshortname + '.hmm.out'
     output_file2 = output +"/" + shortname + "_" + hmmshortname + '.txt'
     print 'Running hmmscan...'
-    os.system(' '.join(['hmmscan',model,query,">",output_file]))
+    os.system(' '.join(['hmmscan', model, query, ">", output_file]))
     print 'Parsing results...'
-    os.system(' '.join(['sh','hmmscan-parser.sh',output_file,'>',output_file2]))
+    os.system(' '.join(['sh', 'hmmscan-parser.sh', output_file, '>', output_file2]))
 
-#Filtering by evalue and coverage
-def filtering_by_evalue_and_coverage(model,query,output,evalue,coverage):
-    #removes extension, case insensitive search
-#   hmmshortname = re.sub('[.](hmm)','',model, re.I)
+# Filtering by evalue and coverage
+def filtering_by_evalue_and_coverage(model, query, output, evalue, coverage):
+    # Removes extension, case insensitive search
     hmmshortname = re.sub(hmm_pattern,'',model, re.I)  
-    #finds file format removes extension, case insensitive search
-#   shortname = re.sub('[.](fasta$|fas$|faa$|fsa$|fa$)','',query, re.I)
-    shortname = re.sub(query_pattern,'',query, re.I)
+    # Finds file format removes extension, case insensitive search
+    shortname = re.sub(query_pattern, '', query, re.I)
     output_file2 = output+"/" + shortname + "_" + hmmshortname + '.txt'
     hmm_table = open(output_file2, 'r')
     output_file3 = output + "/" + shortname + "_" + hmmshortname+'.filtered.txt'
@@ -157,7 +152,7 @@ def filtering_by_evalue_and_coverage(model,query,output,evalue,coverage):
         line2 = line.strip('\n').split('\t')
         result_evalue = float(line2[2])
         result_model_coverage = float(line2[7])
-        if (result_evalue <=evalue) and (result_model_coverage*100 >= coverage):
+        if (result_evalue <= evalue) and (result_model_coverage * 100 >= coverage):
             hmm_filtered_table.write('%s' %line)
         else:
             continue
@@ -165,59 +160,56 @@ def filtering_by_evalue_and_coverage(model,query,output,evalue,coverage):
     hmm_filtered_table.close()
 
 
-#Function to extract hits from filtered results
+# Function to extract hits from filtered results
 def extract_protein_hits(query,model,output):
-    #removes extension, case insensitive search
-    hmmshortname = re.sub(hmm_pattern,'',model, re.I)  
-#   hmmshortname = re.sub('[.](hmm)', '', model, re.I)
-    #finds file format removes extension, case insensitive search
-#   shortname = re.sub('[.](fasta$|fas$|faa$|fsa$|fa$)','',query, re.I)
-    shortname = re.sub(query_pattern,'',query, re.I)
-    input_file4 = output+"/"+shortname+"_"+hmmshortname+'.filtered.txt'
+    # Removes extension, case insensitive search
+    hmmshortname = re.sub(hmm_pattern, '', model, re.I)  
+    # Finds file format removes extension, case insensitive search
+    shortname = re.sub(query_pattern, '', query, re.I)
+    input_file4 = output + "/" + shortname + "_" + hmmshortname + '.filtered.txt'
     hmm_filtered_table2 = open(input_file4, 'r')
   
-    print '   Extracting proteins for %s and HMM database=%s' %(query,model)
-    #Create dictionary with protein:[list of model it hits]
+    print '   Extracting proteins for %s and HMM database=%s' %(query, model)
+    # Create dictionary with protein:[list of model it hits]
     protein_hit_dictionary = {}
     all_models_hits = []
     for line3 in hmm_filtered_table2:
         line4 = line3.strip('\n').split('\t')
         protein_hit = line4[0]
         model_of_protein_hit = line4[1].rstrip(' ')
-  
-        #update list of proteins
+        # Update list of proteins
         all_models_hits.append(model_of_protein_hit)
 
-        #Get list of proteins hits, if non existent create empty list
+        # Get list of proteins hits, if non existent create empty list
         models = protein_hit_dictionary.get(protein_hit, [])
-        #Append current model hit to list
+        # Append current model hit to list
         models.append(model_of_protein_hit)   
-        #Update dictionary entry   
+        # Update dictionary entry   
         protein_hit_dictionary[protein_hit] = models
 
-    #Print message
-    count_of_models=list(set(all_models_hits))
-    count_of_proteins=len(protein_hit_dictionary.keys())
+    # Print message
+    count_of_models = list(set(all_models_hits))
+    count_of_proteins = len(protein_hit_dictionary.keys())
 
     print '   Extracting %s unique proteins corresponding to %s HMM models' \
           %(count_of_proteins,len(count_of_models))      
 
-    #open one output file per model
-    #Generate list of output files 
-    #for item in all_models_hits:
+    # Open one output file per model
+    # Generate list of output files 
+    # For item in all_models_hits:
     files = [open(output + '/' + shortname + '_' + hmmshortname + '_' + item + '.fasta', 'w') \
            for item in set(all_models_hits)]
 
-    #Open original file, find if name is in hit list,
-    #Then get models hits and write to model result files
+    # Open original file, find if name is in hit list,
+    # Then get models hits and write to model result files
     filein = open(query, 'r')
-    for record in SeqIO.parse(filein,"fasta"):
+    for record in SeqIO.parse(filein, "fasta"):
         name = record.name
         if name in protein_hit_dictionary.keys():
             what_models_list = protein_hit_dictionary.get(name)
-            #Iterate this list
+            # Iterate this list
             for what_model in what_models_list:
-                #Find index
+                # Find index
                 index = count_of_models.index(what_model)
                 files[index].write('>%s\n%s\n' % (name, record.seq))
     #Close files
@@ -226,46 +218,44 @@ def extract_protein_hits(query,model,output):
 
 
 #Function to extract contigs
-def extract_contigs(query,model,output,assembly_file):
-    # removes extension, case insensitive search
-#   hmmshortname = re.sub('[.](hmm)', '', model, re.I)
-    hmmshortname = re.sub(hmm_pattern,'',model, re.I)  
-    #finds file format removes extension, case insensitive search
-#   shortname = re.sub('[.](fasta$|fas$|faa$|fsa$|fa$)','', query, re.I)
-    shortname = re.sub(query_pattern,'',query, re.I)
+def extract_contigs(query, model, output, assembly_file):
+    # Removes extension, case insensitive search
+    hmmshortname = re.sub(hmm_pattern, '', model, re.I)  
+    # Finds file format removes extension, case insensitive search
+    shortname = re.sub(query_pattern, '', query, re.I)
     input_file4 = output + "/" + shortname + "_" + hmmshortname +'.filtered.txt'
     hmm_filtered_table2 = open(input_file4, 'r')
   
-    print '   Extracting contigs for file=%s and HMM database=%s' %(query,model)
+    print '   Extracting contigs for file=%s and HMM database=%s' %(query, model)
     #Create dictionary with protein:[list of model it hits]
     protein_model_dictionary = {}
     for line3 in hmm_filtered_table2:
         line4 = line3.strip('\n').split('\t')
         protein_hit = line4[0]
         model_of_protein_hit = line4[1].rstrip(' ')
-        #Get list of proteins hits, if non existent create empty list
+        # Get list of proteins hits, if non existent create empty list
         models = protein_model_dictionary.get(protein_hit, [])
-        #Append current model hit to list
+        # Append current model hit to list
         models.append(model_of_protein_hit)   
-        #Update dictionary entry   
+        # Update dictionary entry   
         protein_model_dictionary[protein_hit] = models
 
-    #Create protein-contig dictionary
+    # Create protein-contig dictionary
     contigs_list = []
-    #parse through list and add to contigs_list
+    # Parse through list and add to contigs_list
     for protein in protein_model_dictionary.keys():
         contig = protein.rsplit('_', 1)
         contigs_list.append(contig[0])
     contigs_list = list(set(contigs_list))
 
-    #Open original file, find if name is in hit list,
-    #Then get models hits and write to model result files
-    assembly_in = open(assembly_file,'r')
-    contigs_file = output+"/" + shortname + "_" + hmmshortname +'_contigs.fasta'
+    # Open original file, find if name is in hit list,
+    # Then get models hits and write to model result files
+    assembly_in = open(assembly_file, 'r')
+    contigs_file = output + "/" + shortname + "_" + hmmshortname + '_contigs.fasta'
     contigs_out = open(contigs_file, 'w')
     print '   Looking for %s contigs' %len(contigs_list)
     progress_counter = 0
-    for record in SeqIO.parse(assembly_in,"fasta"):
+    for record in SeqIO.parse(assembly_in, "fasta"):
         name = record.name
         if name in contigs_list:
             progress_counter = progress_counter + 1
@@ -278,13 +268,11 @@ def extract_contigs(query,model,output,assembly_file):
         print 'Some contigs were not found'
   
 
-#Function to extract all proteins from contig
+# Function to extract all proteins from contig
 def extract_all_proteins_from_contigs(query, model, output):
     # Removes extension, case insensitive search
-    hmmshortname = re.sub(hmm_pattern,'',model, re.I)   
-#   hmmshortname = re.sub('[.](hmm)','',model, re.I)
+    hmmshortname = re.sub(hmm_pattern, '', model, re.I)   
     # Finds file format removes extension, case insensitive search
-#   shortname = re.sub('[.](fasta$|fas$|faa$|fsa$|fa$)','',query, re.I)
     shortname = re.sub(query_pattern, '', query, re.I)
     input_file4 = output + "/" + shortname + "_" + hmmshortname + '.filtered.txt'
     hmm_filtered_table2 = open(input_file4, 'r')
@@ -311,7 +299,7 @@ def extract_all_proteins_from_contigs(query, model, output):
     # Open one output file per model
     # Generate list of output files 
     files = [open(output + '/' + shortname + '_' + hmmshortname + '_' \
-             + contigs + '.fasta','w') for contigs in (contigs_list)]
+             + contigs + '.fasta', 'w') for contigs in (contigs_list)]
 
     # Open original file, find if name is in hit list,
     # Then get models hits and write to model result files
@@ -346,7 +334,7 @@ def main(argv):
         raise IOError,\
         "Cannot open hmmscan-parser.sh. Please copy it to the local directory"
 
-    # initialize the input file and model, loading parameters
+    # Initialize the input file and model, loading parameters
     input_model = opts.input_model 
     input_fp = opts.input_fp 
     output_dir = opts.output_dir 
@@ -357,8 +345,7 @@ def main(argv):
 
     # Creates a model length dictionary
     print 'Checking model length...'
-#   hmmshortname = re.sub('[.](hmm)', '', input_model, re.I)
-    hmmshortname = re.sub(hmm_pattern,'',input_model, re.I)   
+    hmmshortname = re.sub(hmm_pattern, '', input_model, re.I)   
     hmm_leng_file = hmmshortname + ".length.txt"
     print '   Created %s file' % hmm_leng_file
     get_hmm_len(input_model)
@@ -380,7 +367,6 @@ def main(argv):
         extract_protein_hits(input_fp, input_model, output_dir)
     elif extract_mode == 'contigs':
         extract_contigs(input_fp, input_model, output_dir, assembly_file)
-      
     elif extract_mode == 'all':
         extract_protein_hits(input_fp, input_model, output_dir)
         extract_contigs(input_fp, input_model, output_dir, assembly_file)
diff --git a/add_coverage_to_fasta_contigs_2.py b/add_coverage_to_fasta_contigs_2.py
new file mode 100644
index 0000000..7aa658c
--- /dev/null
+++ b/add_coverage_to_fasta_contigs_2.py
@@ -0,0 +1,84 @@
+#!/usr/bin/python
+# File created on 31 Jan 2014.
+
+__author__ = "Erick Cardenas Poire"
+__copyright__ = "Copyright 2014"
+__credits__ = [""]
+__version__ = "1.0"
+__maintainer__ = "Erick Cardenas Poire"
+__status__ = "Release"
+
+from Bio import SeqIO
+import sys
+from os import makedirs, sys, listdir, environ, path
+import re 
+import inspect
+from commands import getstatusoutput
+from optparse import OptionParser
+import shutil 
+
+#config = load_config()
+script_info = {}
+script_info['brief_description'] = """Adds coverage information from one file and modifies fasta header"""
+script_info['script_description'] = """Adds coverage information from one file and modifies fasta header
+             REQUIRED: You must have a fasta and coverage file with same base name"""
+script_info['script_usage'] = []
+
+usage= """
+Need to run it like this:
+./add.coverage.to.fasta.py  -i input_file
+For more options:  ./add.coverage.to.fasta.py -h"""
+
+parser = OptionParser(usage)
+parser.add_option("-i", "--input_file", dest = "input_fp",
+                  help = 'the input fasta file/input dir [REQUIRED]')
+
+
+#creates an input output pair if input is just an input file
+def create_an_inputs_and_output(input_file):
+   input_output = []
+   shortname = re.sub('[.](fasta$|fas$|fna$|faa$|fsa$|fa$)','',input_file, re.I)  #finds file format removes extension, case insensitive search
+   coverage_input_file = shortname+".cov"
+   output_file = shortname + ".new.fasta"
+   input_output.append(input_file)
+   input_output.append(coverage_input_file)
+   input_output.append(output_file)
+   return input_output
+
+# checks if the supplied arguments are adequate
+def valid_arguments(opts, args):
+   if opts.input_fp == None:
+      return True
+   else:
+      return False
+
+def main(argv):
+   (opts, args) = parser.parse_args()
+   if valid_arguments(opts, args):
+      print usage
+      sys.exit(0)
+
+   # initialize the input directory or file
+   input_fp = opts.input_fp 
+   list_of_files = create_an_inputs_and_output(input_fp)
+ 
+   # Creates coverage dictionary
+   coverage_dictionary = {}
+   coverage_file_in = open(list_of_files[1],'r')
+   for line in coverage_file_in:
+      line = line.split('\t')
+      seq_ID = line[0]
+      seq_coverage = line[1]
+      coverage_dictionary[seq_ID] = seq_coverage
+   coverage_file_in.close()   
+
+   fileout = open(list_of_files[2], 'w')
+   for seq_record in SeqIO.parse(list_of_files[0], format = "fasta"):
+      seq_name = seq_record.id
+      coverage = coverage_dictionary.get(seq_name,0)
+      description = "coverage=" + coverage
+      fileout.write('>%s %s\n%s\n' %(seq_record.id, description, seq_record.seq))
+   fileout.close()
+
+# the main function 
+main(sys.argv[1:])    
\ No newline at end of file
diff --git a/add_lineage_to_dictionary_2.py b/add_lineage_to_dictionary_2.py
new file mode 100644
index 0000000..a37af93
--- /dev/null
+++ b/add_lineage_to_dictionary_2.py
@@ -0,0 +1,138 @@
+#!/usr/bin/python
+import sys, os, re, glob, subprocess, numpy as np, pickle
+from cogent.parse.ncbi_taxonomy import NcbiTaxonomyFromFiles
+from optparse import OptionParser
+
+#config = load_config()
+script_info={}
+script_info['brief_description'] = """Adds lineage to dictionary"""
+script_info['script_description'] = """Adds lineage to dictionary"""
+script_info['script_usage'] = []
+
+usage= """
+Need to run it like this:
+./add_lineage_to_dictionary  -i input_file"""
+
+parser = OptionParser(usage)
+parser.add_option("-i", "--input_dictionary", dest="input_fp",
+                  help='the input dictionary file [REQUIRED]')
+parser.add_option("-o", "--destination_dictionary", dest="output_fp",
+                  help='the output dictionary file [REQUIRED]')
+parser.add_option("-t", "--tax_level", dest="tax_level",
+                  help='the desired taxonomic levels [REQUIRED]')
+parser.add_option("-d", "--ncbi_database", dest="ncbi_db",
+                  help='ncbi database [REQUIRED]')
+
+## Define function for pulling lineage info from NCBI nodes and names files
+def get_lineage(node, my_ranks):
+	ranks_lookup = dict([(r,idx) for idx, r in enumerate(my_ranks)])
+	lineage = [None] * len(my_ranks)
+	curr = node
+	while curr.Parent is not None:
+		if curr.Rank in ranks_lookup:
+			lineage[ranks_lookup[curr.Rank]] = curr.Name
+		curr = curr.Parent
+	return lineage
+
+
+#def get_lineage_from_taxid(gi):
+#        try:
+#            # Superkingdom search
+#            node = tree.ById[taxid]
+
+#            tax_superkingdom = get_lineage(node, 'superkingdom')
+#            tax_superkingdom = str(tax_superkingdom[0]).lower()
+
+#            tax_phylum = get_lineage(node, 'phylum')
+#            tax_phylum = str(tax_phylum[0]).lower()
+
+#            tax_class = get_lineage(node, 'class')
+#            tax_class = str(tax_class[0]).lower()
+
+#            tax_order = get_lineage(node, 'order')
+#            tax_order = str(tax_order[0]).lower()
+
+ #           tax_family = get_lineage(node, 'family')
+  #          tax_family = str(tax_family[0]).lower()
+
+   #         tax_genus = get_lineage(node, 'genus')
+    #        tax_genus = str(tax_genus[0]).lower()
+
+     #       tax_species = get_lineage(node, 'species')
+      #      tax_species = str(tax_species[0]).lower()
+
+       #     tax = [tax_superkingdom, tax_phylum, tax_class, tax_order, tax_family, tax_genus, tax_species] 
+        #except KeyError:
+         #   tax = ['NA','NA','NA','NA','NA','NA','NA']
+        #print tax
+
+
+
+def main(argv):
+    (opts, args) = parser.parse_args()
+
+    #Initialize files
+    input_fp = opts.input_fp 
+    input_dictionary_file = open(input_fp, "rb")
+    input_dictionary = pickle.load(input_dictionary_file)
+
+    output_fp = opts.output_fp 
+    output_file = open(output_fp, "w")
+#    output = {}
+    ncbi_db = opts.ncbi_db 
+
+    # Print loading dictionary    
+#    test_dictionary = {'gi_63300aaa':['a','b','c'], 'gi|163862923|gb_ABY43982.1_': ['d','e','f']}
+
+    # Print loading tree
+#    tree = NcbiTaxonomyFromFiles(open('nodes.dmp'), open('names.dmp'))
+#    root = tree.Root
+#    all_taxids = []
+
+    for key in input_dictionary.keys():
+#    for key in test_dictionary.keys():
+        #print key
+        if key.startswith('gi|'):
+            gi_location = key.split('|')
+            gi = gi_location[1]
+        else:
+            gi_location = key.split('_')
+            gi = gi_location[1]
+#        subprocess.call('grep --max-count=1 \"'+gi+'\" \"'+ncbi_db+'\" | tee -a blast_taxid.txt', shell = True)
+        grep = subprocess.Popen('grep --max-count=1 \"'+gi+'\" \"'+ncbi_db+'\"', shell = True, stdout = subprocess.PIPE)
+        node0 = grep.communicate()[0]
+        node1 = node0.strip('\n').split('\t')
+        try:
+            taxid = int(node1[1])
+        except IndexError:
+            taxid = 'nope'
+        #print taxid
+        output_file.write ('%s\t%s\t%s\n' %(key, gi, taxid))
+        #print 'end'
+
+#        if taxid  == None :
+#            print 'No taxid found for gi %s' %gi
+#            taxid = 'nope'
+#        else:
+#            continue
+         #       all_taxids.append(taxid)
+
+#        value = test_dictionary.get(key)
+#        print value
+#        print 'gi is %s' %gi
+#        value2 = value.append(str(gi))
+#        print value2
+#        value3 = value2.append(str(taxid))
+#        print value
+
+#        output[key] = value2
+#    print len(set(all_taxids))
+#    print all_taxids.count('none')
+#    input_dictionary_file.close()
+#    pickle.dump(output,output_file)
+    output_file.close()
+
+# Run main function 
+main(sys.argv[1:])
+
+
diff --git a/all_hmm_ps_2.len b/all_hmm_ps_2.len
new file mode 100644
index 0000000..d78b143
--- /dev/null
+++ b/all_hmm_ps_2.len
@@ -0,0 +1,333 @@
+CBM10.hmm	28
+CBM11.hmm	163
+CBM12.hmm	34
+CBM13.hmm	188
+CBM14.hmm	54
+CBM15.hmm	146
+CBM16.hmm	116
+CBM17.hmm	203
+CBM18.hmm	38
+CBM19.hmm	45
+CBM1.hmm	29
+CBM20.hmm	90
+CBM21.hmm	107
+CBM22.hmm	131
+CBM23.hmm	162
+CBM24.hmm	76
+CBM25.hmm	78
+CBM26.hmm	75
+CBM27.hmm	168
+CBM28.hmm	208
+CBM29.hmm	144
+CBM2.hmm	101
+CBM30.hmm	91
+CBM31.hmm	92
+CBM32.hmm	124
+CBM34.hmm	120
+CBM35.hmm	123
+CBM36.hmm	115
+CBM37.hmm	62
+CBM38.hmm	129
+CBM39.hmm	94
+CBM3.hmm	88
+CBM40.hmm	179
+CBM41.hmm	102
+CBM42.hmm	136
+CBM43.hmm	83
+CBM44.hmm	64
+CBM45.hmm	81
+CBM46.hmm	87
+CBM47.hmm	128
+CBM48.hmm	76
+CBM49.hmm	78
+CBM4.hmm	126
+CBM50.hmm	40
+CBM51.hmm	134
+CBM52.hmm	52
+CBM53.hmm	87
+CBM54.hmm	114
+CBM55.hmm	46
+CBM56.hmm	159
+CBM57.hmm	147
+CBM58.hmm	117
+CBM59.hmm	145
+CBM5.hmm	40
+CBM60.hmm	108
+CBM61.hmm	141
+CBM62.hmm	131
+CBM6.hmm	138
+CBM8.hmm	143
+CBM9.hmm	182
+CE10.hmm	341
+CE11.hmm	271
+CE12.hmm	210
+CE13.hmm	355
+CE14.hmm	124
+CE15.hmm	269
+CE16.hmm	267
+CE1.hmm	227
+CE2.hmm	209
+CE3.hmm	194
+CE4.hmm	130
+CE5.hmm	189
+CE6.hmm	99
+CE7.hmm	313
+CE8.hmm	288
+CE9.hmm	373
+cohesin.hmm	134
+dockerin.hmm	21
+GH100.hmm	458
+GH102.hmm	157
+GH103.hmm	295
+GH104.hmm	145
+GH105.hmm	332
+GH106.hmm	824
+GH107.hmm	329
+GH108.hmm	86
+GH109.hmm	126
+GH10.hmm	303
+GH110.hmm	548
+GH111.hmm	1032
+GH112.hmm	715
+GH113.hmm	306
+GH114.hmm	190
+GH115.hmm	697
+GH116.hmm	363
+GH117.hmm	211
+GH118.hmm	477
+GH119.hmm	1070
+GH11.hmm	177
+GH120.hmm	91
+GH121.hmm	1392
+GH122.hmm	337
+GH123.hmm	538
+GH124.hmm	332
+GH125.hmm	402
+GH12.hmm	156
+GH13.hmm	299
+GH14.hmm	412
+GH15.hmm	361
+GH16.hmm	189
+GH17.hmm	311
+GH18.hmm	296
+GH19.hmm	231
+GH1.hmm	429
+GH20.hmm	337
+GH22.hmm	122
+GH23.hmm	135
+GH24.hmm	137
+GH25.hmm	177
+GH26.hmm	303
+GH27.hmm	375
+GH28.hmm	325
+GH29.hmm	346
+GH2.hmm	752
+GH30.hmm	417
+GH31.hmm	427
+GH32.hmm	293
+GH33.hmm	342
+GH34.hmm	461
+GH35.hmm	307
+GH36.hmm	688
+GH37.hmm	491
+GH38.hmm	269
+GH39.hmm	431
+GH3.hmm	216
+GH42.hmm	371
+GH43.hmm	248
+GH44.hmm	514
+GH45.hmm	198
+GH46.hmm	222
+GH47.hmm	446
+GH48.hmm	617
+GH49.hmm	549
+GH4.hmm	179
+GH50.hmm	653
+GH51.hmm	630
+GH52.hmm	415
+GH53.hmm	342
+GH54.hmm	316
+GH55.hmm	740
+GH56.hmm	333
+GH57.hmm	383
+GH58.hmm	449
+GH59.hmm	631
+GH5.hmm	275
+GH62.hmm	278
+GH63.hmm	570
+GH64.hmm	367
+GH65.hmm	372
+GH66.hmm	556
+GH67.hmm	669
+GH68.hmm	417
+GH6.hmm	294
+GH70.hmm	803
+GH71.hmm	375
+GH72.hmm	312
+GH73.hmm	128
+GH74.hmm	233
+GH75.hmm	220
+GH76.hmm	358
+GH77.hmm	494
+GH78.hmm	504
+GH79.hmm	455
+GH7.hmm	415
+GH80.hmm	63
+GH81.hmm	622
+GH82.hmm	185
+GH83.hmm	542
+GH84.hmm	295
+GH85.hmm	315
+GH86.hmm	591
+GH87.hmm	597
+GH88.hmm	329
+GH89.hmm	663
+GH8.hmm	320
+GH90.hmm	551
+GH91.hmm	395
+GH92.hmm	491
+GH93.hmm	307
+GH94.hmm	1036
+GH95.hmm	722
+GH96.hmm	614
+GH97.hmm	631
+GH98.hmm	327
+GH99.hmm	334
+GH9.hmm	418
+GT10.hmm	347
+GT11.hmm	276
+GT12.hmm	134
+GT13.hmm	395
+GT14.hmm	250
+GT15.hmm	273
+GT16.hmm	350
+GT17.hmm	284
+GT18.hmm	686
+GT19.hmm	354
+GT1.hmm	382
+GT20.hmm	475
+GT21.hmm	233
+GT22.hmm	389
+GT23.hmm	318
+GT24.hmm	248
+GT25.hmm	181
+GT26.hmm	171
+GT27.hmm	295
+GT28.hmm	157
+GT29.hmm	247
+GT2.hmm	168
+GT30.hmm	177
+GT31.hmm	192
+GT32.hmm	90
+GT33.hmm	425
+GT34.hmm	246
+GT35.hmm	674
+GT37.hmm	459
+GT38.hmm	467
+GT39.hmm	223
+GT3.hmm	637
+GT40.hmm	211
+GT41.hmm	705
+GT42.hmm	288
+GT43.hmm	212
+GT44.hmm	100
+GT45.hmm	115
+GT46.hmm	356
+GT47.hmm	296
+GT48.hmm	739
+GT49.hmm	337
+GT4.hmm	160
+GT50.hmm	262
+GT51.hmm	177
+GT52.hmm	263
+GT53.hmm	1049
+GT54.hmm	290
+GT55.hmm	383
+GT56.hmm	357
+GT57.hmm	481
+GT58.hmm	364
+GT59.hmm	404
+GT5.hmm	472
+GT60.hmm	330
+GT61.hmm	242
+GT62.hmm	268
+GT63.hmm	340
+GT64.hmm	248
+GT65.hmm	340
+GT66.hmm	693
+GT67.hmm	315
+GT68.hmm	350
+GT69.hmm	239
+GT6.hmm	280
+GT70.hmm	368
+GT71.hmm	264
+GT72.hmm	355
+GT73.hmm	245
+GT74.hmm	280
+GT75.hmm	343
+GT76.hmm	407
+GT77.hmm	216
+GT78.hmm	134
+GT79.hmm	879
+GT7.hmm	250
+GT80.hmm	379
+GT81.hmm	293
+GT82.hmm	311
+GT83.hmm	540
+GT84.hmm	215
+GT85.hmm	427
+GT87.hmm	231
+GT88.hmm	523
+GT89.hmm	556
+GT8.hmm	257
+GT90.hmm	250
+GT91.hmm	451
+GT92.hmm	279
+GT9.hmm	225
+PL10.hmm	287
+PL11.hmm	606
+PL12.hmm	138
+PL13.hmm	363
+PL14.hmm	200
+PL15.hmm	134
+PL16.hmm	278
+PL17.hmm	139
+PL18.hmm	188
+PL1.hmm	202
+PL20.hmm	229
+PL21.hmm	72
+PL22.hmm	265
+PL2.hmm	530
+PL3.hmm	197
+PL4.hmm	567
+PL5.hmm	317
+PL6.hmm	372
+PL7.hmm	231
+PL8.hmm	259
+PL9.hmm	374
+SLH.hmm	42
+CBM63.hmm	78
+CBM64.hmm	85
+GH101.hmm	707
+GH126.hmm	321
+GH127.hmm	524
+GH128.hmm	224
+GH129.hmm	618
+GH130.hmm	296
+GT93.hmm	309
+GT94.hmm	283
+AA10.hmm	178
+AA1.hmm	943
+AA2.hmm	255
+AA3.hmm	618
+AA4.hmm	522
+AA5.hmm	1281
+AA6.hmm	195
+AA7.hmm	458
+AA8.hmm	815
+AA9.hmm	220
+CBM65.hmm	114
+CBM66.hmm	155
+CBM67.hmm	176
+GH131.hmm	255
+GH132.hmm	303
diff --git a/correct_cazy_dictionary.py b/correct_cazy_dictionary.py
index 33ad558..6a99b28 100755
--- a/correct_cazy_dictionary.py
+++ b/correct_cazy_dictionary.py
@@ -88,7 +88,7 @@
 print len(good_dictionary.keys())
 
 #print bad_dictionary
-pickle.dump(good_dictionary,file_good_dictionary)
+pickle.dump(good_dictionary, file_good_dictionary)
 
 
 file_bad_dictionary.close()
diff --git a/count_hmmer_hits.py b/count_hmmer_hits.py
new file mode 100644
index 0000000..fae6149
--- /dev/null
+++ b/count_hmmer_hits.py
@@ -0,0 +1,19 @@
+#!/usr/bin/python
+
+#usage
+#python count_hmmer_hits.py <filein>
+
+import sys
+
+filein = open(sys.argv[1], 'r')
+counter = 0
+
+for line in filein:
+   if line.ends.with('[number of targets reported over threshold]'):
+   line2 = line.split(' ')
+   print line2
+else:
+   continue
+
+print '%s\t%s' %(sys.argv[1], str(counter))
+
diff --git a/create_newcazy_dictionary.py b/create_newcazy_dictionary.py
index 699ffe5..bc22dcb 100755
--- a/create_newcazy_dictionary.py
+++ b/create_newcazy_dictionary.py
@@ -1,12 +1,7 @@
 #!/usr/bin/python
 # File created on 13 Feb 2014.
 
-__author__ = "Erick Cardenas Poire"
-__copyright__ = "Copyright 2014"
-__credits__ = [""]
-__version__ = "1.0"
-__maintainer__ = "Erick Cardenas Poire"
-__status__ = "Release"
+# Author = "Erick Cardenas Poire"
 
 
 import pickle
diff --git a/create_newcazy_dictionary_with_subfamilies.py b/create_newcazy_dictionary_with_subfamilies.py
index f8d092e..a248ba9 100755
--- a/create_newcazy_dictionary_with_subfamilies.py
+++ b/create_newcazy_dictionary_with_subfamilies.py
@@ -1,13 +1,7 @@
 #!/usr/bin/python
 # File created on 13 Feb 2014.
 
-__author__ = "Erick Cardenas Poire"
-__copyright__ = "Copyright 2014"
-__credits__ = [""]
-__version__ = "1.0"
-__maintainer__ = "Erick Cardenas Poire"
-__status__ = "Release"
-
+# Author = "Erick Cardenas Poire"
 
 import pickle
 import sys
diff --git a/dereplicate_fasta.py b/dereplicate_fasta.py
index 24977f1..725eddf 100644
--- a/dereplicate_fasta.py
+++ b/dereplicate_fasta.py
@@ -1,12 +1,7 @@
 #!/usr/bin/python
 from __future__ import division
 
-__author__ = "Erick Cardenas Poire"
-__copyright__ = "Copyright 2014"
-__credits__ = [""]
-__version__ = "1.0"
-__maintainer__ = "Erick Cardenas Poire"
-__status__ = "Release"
+# Author = "Erick Cardenas Poire"
 
 try:
     from Bio import SeqIO
@@ -22,7 +17,7 @@
 
 
 # config = load_config()
-script_info={}
+script_info = {}
 script_info['brief_description'] = """Dereplicates sequences based on name"""""
 script_info['script_description'] = """
 	    REQUIRED: Fasta file
diff --git a/fasta_removal.py b/fasta_removal.py
index 56f474f..a423e00 100644
--- a/fasta_removal.py
+++ b/fasta_removal.py
@@ -1,6 +1,7 @@
 # Removes sequences that are in a list provided
 # Requires fasta file and list of sequences to be removed (one name per line)
 # Requires screed module
+
 #usage
 #python fasta.removal.py <originalfile.nucleotide.fasta> <namelist>
 # 	0			1		2		
@@ -8,8 +9,8 @@
 import sys, screed
 
 # Inputs
-filein = open(sys.argv[1],'r')
-filelist = open(sys.argv[2],'r')
+filein = open(sys.argv[1], 'r')
+filelist = open(sys.argv[2], 'r')
 
 # Outputs
 outy = sys.argv[1]
@@ -52,4 +53,4 @@
 
 fileout.close()
 fileout2.close()
-filein.close()
\ No newline at end of file
+filein.close()
diff --git a/fasta_to_stockholm.py b/fasta_to_stockholm.py
index 6d29834..6abc356 100755
--- a/fasta_to_stockholm.py
+++ b/fasta_to_stockholm.py
@@ -6,11 +6,11 @@
 from Bio import AlignIO
 
 #input
-filein=open(sys.argv[1],"r")
+filein = open(sys.argv[1], "r")
 
 #outputs
-fileout=open(sys.argv[2],'w')
+fileout = open(sys.argv[2], 'w')
 
 
-AlignIO.convert(filein,"fasta",fileout,"stockholm")
+AlignIO.convert(filein, "fasta", fileout, "stockholm")
 
diff --git a/fastaselection_nucleotide.py b/fastaselection_nucleotide.py
index 4f86345..77b5dde 100755
--- a/fastaselection_nucleotide.py
+++ b/fastaselection_nucleotide.py
@@ -5,46 +5,46 @@
 import sys, screed
 
 #inputs
-filein=open(sys.argv[1],'r')
-filelist=open(sys.argv[2],'r')
+filein = open(sys.argv[1], 'r')
+filelist = open(sys.argv[2], 'r')
 
 #outputs
-outy=sys.argv[1]
-out1=outy+'.cleaned.sequences'
-fileout1=open(out1,'w')
+outy = sys.argv[1]
+out1 = outy + '.cleaned.sequences'
+fileout1 = open(out1, 'w')
 
-outy=sys.argv[1]
-out2=outy+'.removed.sequences'
-fileout2=open(out2,'w')
+outy = sys.argv[1]
+out2 = outy + '.removed.sequences'
+fileout2 = open(out2, 'w')
 
 
 
 #create a list with the names of the sequences requested
-requestedsequences=[]
+requestedsequences = []
 for line in filelist:
-   line=line.strip('\n').strip('\r')
+   line = line.strip('\n').strip('\r')
    requestedsequences.append(line)
 
 #print requestedsequences
-number_records=len(requestedsequences)
+number_records = len(requestedsequences)
 print "%s records requested" % number_records
 
 
 #read file, read each record, if name is in list write it, otherwise continue
-counter=1
+counter = 1
 for record in screed.open(sys.argv[1]):
-   sequence_name=record.name			#get sequence name
+   sequence_name = record.name			#get sequence name
    if sequence_name in requestedsequences:
       print "%s of %s records found" %(counter, number_records)
-      sequence=record.sequence
-      sequence=sequence.strip('*')
-      description=record.description
+      sequence = record.sequence
+      sequence = sequence.strip('*')
+      description = record.description
       fileout2.write(">%s %s\n%s\n" %(sequence_name, description, sequence))
-      counter=counter+1
+      counter = counter + 1
    else:
-      sequence=record.sequence
-      sequence=sequence.strip('*')
-      description=record.description
+      sequence = record.sequence
+      sequence = sequence.strip('*')
+      description = record.description
       fileout1.write(">%s %s\n%s\n" %(sequence_name, description, sequence))
 fileout.close()
 fileout2.close()
diff --git a/fastaselection_protein_v2.py b/fastaselection_protein_v2.py
index fce0662..1cde036 100644
--- a/fastaselection_protein_v2.py
+++ b/fastaselection_protein_v2.py
@@ -5,22 +5,22 @@
 import sys, screed
 
 #inputs
-filein=open(sys.argv[1],'r')
-name=sys.argv[2]
+filein = open(sys.argv[1], 'r')
+name = sys.argv[2]
 
 #outputs
-outy=sys.argv[1]
-out1=outy+'.'+name+'.fasta'
+outy = sys.argv[1]
+out1 = outy + '.' + name + '.fasta'
 
-fileout=open(out1,'w')
+fileout = open(out1, 'w')
 
 
 #read file, read each record, if name is in list write it, otherwise continue
 for record in screed.open(sys.argv[1]):
-   sequence_name=record.name			#get sequence name
+   sequence_name = record.name			#get sequence name
    if sequence_name == name:
-      description=record.description
-      sequence=record.sequence      
+      description = record.description
+      sequence = record.sequence      
       print "Records found"
       fileout.write(">%s %s\n%s\n" %(sequence_name, description, sequence))
    else:
diff --git a/fastq-to-fasta.py b/fastq-to-fasta.py
index 4757691..e972b0a 100755
--- a/fastq-to-fasta.py
+++ b/fastq-to-fasta.py
@@ -27,14 +27,14 @@ def update_progress(progress):
     sys.stderr.write(text)
     sys.stderr.flush()
 
-counter=0
+counter = 0
 for n, record in enumerate(fastq_iter(open(sys.argv[1]))):
-   counter=counter+1
+   counter = counter + 1
    print ('%s reads found' %counter)
 
 for n, record in enumerate(fastq_iter(open(sys.argv[1]))):
    if n % 1 == 0:
-      progress=n/float(counter)
+      progress=n / float(counter)
       update_progress(progress)
 #print>>sys.stderr, '...', n     
    sequence = record['sequence']
diff --git a/fastq_to_fasta.py b/fastq_to_fasta.py
index 320f734..9b8adc9 100644
--- a/fastq_to_fasta.py
+++ b/fastq_to_fasta.py
@@ -19,7 +19,7 @@
 from optparse import OptionParser
 
 #config = load_config()
-script_info={}
+script_info = {}
 script_info['brief_description'] = """Converts fastq to fasta"""
 script_info['script_description'] = """Read fastq with Biopython, writes fasta
              REQUIRED: You must have a fasta file"""
@@ -30,8 +30,8 @@
 ./fastq.to.fasta.py  -i input_file"""
 
 parser = OptionParser(usage)
-parser.add_option("-i", "--input_file", dest="input_fp",
-                  help='the input fastq file [REQUIRED]')
+parser.add_option("-i", "--input_file", dest = "input_fp",
+                  help = 'the input fastq file [REQUIRED]')
 
 
 # Creates an input output pair if input is just an input file
@@ -39,7 +39,7 @@ def create_an_inputs_and_output(input_file):
    input_output = []
    # finds file format removes extension, case insensitive search
    shortname = re.sub('[.](fastq$|fq$)','',input_file, re.I)
-   output_file = shortname+".fasta"
+   output_file = shortname + ".fasta"
    input_output.append(input_file)
    input_output.append(output_file)
    return input_output
diff --git a/filter_blast_by_query_coverage.py b/filter_blast_by_query_coverage.py
index 3648578..3b96f6a 100755
--- a/filter_blast_by_query_coverage.py
+++ b/filter_blast_by_query_coverage.py
@@ -5,10 +5,10 @@
 
 import sys
 
-filein=open(sys.argv[1],'r')
-out1=sys.argv[2]
+filein = open(sys.argv[1], 'r')
+out1 = sys.argv[2]
 
-fileout=open(out1,'w')
+fileout = open(out1, 'w')
 
 #blast output
 #HS6_179:1:1101:10145:166587/1	gi|49642693|emb|CAH00655.1|	58.33	   24	4e-04	35.0	79
@@ -17,15 +17,15 @@
 
 
 for line in filein:
-   output=line   
-   line=line.split('\t')			#split line from blast output
-   query_len=float(line[3])		 	#get query lenght
-   alignment_len=float(line[5])			#get alignment length
-   query_coverage=alignment_len/query_len	#get query coverage
+   output = line   
+   line = line.split('\t')			#split line from blast output
+   query_len = float(line[3])		 	#get query lenght
+   alignment_len = float(line[5])			#get alignment length
+   query_coverage = alignment_len / query_len	#get query coverage
    print query_len
    print alignment_len
    print query_coverage
-   if query_coverage>=0.7:
+   if query_coverage >= 0.7:
       fileout.write ('%s' %output)
    else:
       continue
diff --git a/filter_by_size.py b/filter_by_size.py
index e264f5b..f7650a1 100755
--- a/filter_by_size.py
+++ b/filter_by_size.py
@@ -30,24 +30,24 @@
 
 
 #config = load_config()
-script_info={}
+script_info = {}
 script_info['brief_description'] = """Filters sequence according to a minimum size parameter"""
 script_info['script_description'] = """Reads sequences, calculates size and writes to output 
              if length is more or equal to size parameter
              REQUIRED: You must have a fasta and size parameter"""
 script_info['script_usage'] = []
 
-usage= """
+usage = """
 Sorry to bother you, but you
 need to run it like this:
 python filter.by.size.py -i <filein> > -s <size.threshold>
 """
 
 parser = OptionParser(usage)
-parser.add_option("-i", "--input_file", dest="input_fp",
-                  help='the input fasta file [REQUIRED]')
-parser.add_option("-s", "--input_size", dest="threshold_size",
-                  help='the input threshold  [REQUIRED]')
+parser.add_option("-i", "--input_file", dest = "input_fp",
+                  help = 'the input fasta file [REQUIRED]')
+parser.add_option("-s", "--input_size", dest = "threshold_size",
+                  help = 'the input threshold  [REQUIRED]')
 
 
 
@@ -56,7 +56,7 @@
 def create_inputs_and_output(input_file):
    input_output = []
    shortname = re.sub('[.](fasta$|fas$|fna$|faa$|fsa$|fa$)','',input_file, re.I)  #finds file format removes extension, case insensitive search
-   output_file=shortname+".filtered.fasta"
+   output_file = shortname + ".filtered.fasta"
    input_output.append(input_file)
    input_output.append(output_file)
    return input_output
@@ -79,12 +79,12 @@ def main(argv):
    input_fp = opts.input_fp
    list_of_files=create_inputs_and_output(input_fp)
    size = opts.threshold_size
-   fileout=open(list_of_files[1], 'w')
+   fileout = open(list_of_files[1], 'w')
    print ("Filtering out sequences smaller than %s bases" %size)
 
    #Read sequences and filter
-   for seq_record in SeqIO.parse(list_of_files[0], format="fasta"):
-      seq_size=(len(seq_record.seq))
+   for seq_record in SeqIO.parse(list_of_files[0], format = "fasta"):
+      seq_size = (len(seq_record.seq))
       if float(seq_size) >= float(size):
          fileout.write('>%s %s\n%s\n' %(seq_record.id, seq_record.description, seq_record.seq))
       else:
diff --git a/filter_fasta.py b/filter_fasta.py
index 2c8da8e..b2963af 100644
--- a/filter_fasta.py
+++ b/filter_fasta.py
@@ -7,19 +7,19 @@
 import Bio
 from Bio import SeqIO
 
-filein=open(sys.argv[1],'rb')
-fileout=open(sys.argv[2], 'w')
+filein = open(sys.argv[1], 'rb')
+fileout = open(sys.argv[2], 'w')
 
-for seq_record in SeqIO.parse(filein, format="fasta"):
-   line=seq_record.description
+for seq_record in SeqIO.parse(filein, format = "fasta"):
+   line = seq_record.description
 #   print line
-   line=line.split('#')
+   line = line.split('#')
 #   print line
-   partial_info=line[4]
+   partial_info = line[4]
 #   print partial_info
-   partial=partial_info.split(';')
+   partial = partial_info.split(';')
 #   print partial
-   if partial[1]=='partial=00':
+   if partial[1] == 'partial=00':
       fileout.write('>%s %s\n%s\n' %(seq_record.id, seq_record.description, seq_record.seq))
 #      print 'complete'
    else:
diff --git a/find_unique_contigs.hit.py b/find_unique_contigs.hit.py
index db7c6fe..002b227 100755
--- a/find_unique_contigs.hit.py
+++ b/find_unique_contigs.hit.py
@@ -3,18 +3,18 @@
 
 import sys 
 
-out1=sys.argv[2]
-fileout=open(out1,'w')
+out1 = sys.argv[2]
+fileout = open(out1,'w')
 
-contiglist=[]
+contiglist = []
 for line in open (sys.argv[1]):
-   line=line.split('\t')
-   contig0=line[0]
-   contig1=contig0.split('_')
-   contig2=contig1[0]
+   line = line.split('\t')
+   contig0 = line[0]
+   contig1 = contig0.split('_')
+   contig2 = contig1[0]
    contiglist.append(contig2)
 
-lista=set(contiglist)
+lista = set(contiglist)
 #print lista
 
 contigdict={}
@@ -22,8 +22,8 @@
    contigdict[member]=contiglist.count(member)
 
 for key in contigdict:
-   firstcol=key
-   secondcol=contigdict.get(key)
+   firstcol = key
+   secondcol = contigdict.get(key)
    fileout.write ('%s\t%s\n' %(firstcol , secondcol))
 #fileout.write('%s' %contiglist)
    
diff --git a/fq_gz-first_100000.fa.py b/fq_gz-first_100000.fa.py
index 8562363..1c96726 100644
--- a/fq_gz-first_100000.fa.py
+++ b/fq_gz-first_100000.fa.py
@@ -12,7 +12,7 @@
 from screed.fastq import fastq_iter
 
 for n, record in enumerate(fastq_iter(gzip.open(filein,'rb'))):
-   if n <=100000:
+   if n <= 100000:
       sequence = record['sequence']
       name = record['name']
       fw.write('>%s\n%s\n' % (name, sequence))
diff --git a/genbank_to_fasta.py b/genbank_to_fasta.py
index 16ac13d..99ef92f 100644
--- a/genbank_to_fasta.py
+++ b/genbank_to_fasta.py
@@ -41,7 +41,7 @@
 """
 
 parser = OptionParser(usage)
-parser.add_option("-i", "--input_file", dest="input_fp",
+parser.add_option("-i", "--input_file", dest = "input_fp",
                   help='the input fasta file [REQUIRED]')
 
 
@@ -50,7 +50,7 @@
 def create_inputs_and_output(input_file):
    input_output = []
    shortname = re.sub('[.](gbk$|.gen$|gb$)','',input_file, re.I)  #finds file format removes extension, case insensitive search
-   output_file=shortname+".fasta"
+   output_file = shortname+".fasta"
    input_output.append(input_file)
    input_output.append(output_file)
    return input_output
@@ -71,12 +71,12 @@ def main(argv):
 
    # initialize the inputs and outputs
    input_fp = opts.input_fp
-   list_of_files=create_inputs_and_output(input_fp)
-   fileout=open(list_of_files[1], 'w')
+   list_of_files = create_inputs_and_output(input_fp)
+   fileout = open(list_of_files[1], 'w')
    print ("Converting Genbank to Fasta")
 
    #Read sequences and filter
-   for seq_record in SeqIO.parse(list_of_files[0], format="genbank"):
+   for seq_record in SeqIO.parse(list_of_files[0], format = "genbank"):
       fileout.write('>%s %s\n%s\n' %(seq_record.id, seq_record.description, seq_record.seq))
    fileout.close()
 
diff --git a/get_full_name.py b/get_full_name.py
index 3d7ad53..0da8539 100755
--- a/get_full_name.py
+++ b/get_full_name.py
@@ -2,5 +2,5 @@
 import screed
 
 for record in screed.open(sys.argv[1]):
-   print '%s\t%s' %(record.name,record.description)
+   print '%s\t%s' %(record.name, record.description)
 
diff --git a/get_hmm_len.py b/get_hmm_len.py
index 1d6e71b..a60397b 100644
--- a/get_hmm_len.py
+++ b/get_hmm_len.py
@@ -15,21 +15,21 @@
 #if line starts with LENG get name write name.hmm
 #read next line
 
-filein=open(sys.argv[1],'r')
-fileout=open(sys.argv[2],'w')
+filein = open(sys.argv[1], 'r')
+fileout = open(sys.argv[2], 'w')
 
 for line in filein:
    if line.startswith('NAME'):
-      line=line.strip('\n')
-      line=line.split(' ')
-      name=line[2]
+      line = line.strip('\n')
+      line = line.split(' ')
+      name = line[2]
       fileout.write('%s\t' %name)
       #print line[2]
    else:
       if line.startswith('LENG'):
-         line=line.strip('\n')
-         line=line.split(' ')
-         len=line[2]
+         line = line.strip('\n')
+         line = line.split(' ')
+         len = line[2]
          fileout.write('%s\n' %len)
 #         print line[2]
       else:
diff --git a/get_name_and_description.py b/get_name_and_description.py
index efa8318..718a45f 100755
--- a/get_name_and_description.py
+++ b/get_name_and_description.py
@@ -1,13 +1,13 @@
 import sys
 
-filein=open(sys.argv[1])
-fp=open(sys.argv[2], 'w')
+filein = open(sys.argv[1])
+fp = open(sys.argv[2], 'w')
 for line in filein:
     if line.startswith('>'):
-        line=line.split(" ",1)
-        name=line[0]
-        desc=line[1]
-        fp.write('%s\t%s' %(name,desc))
+        line = line.split(" ", 1)
+        name = line[0]
+        desc = line[1]
+        fp.write('%s\t%s' %(name, desc))
     else:
         continue
 fp.close()
diff --git a/get_protein_from_nucleotide_accession.py b/get_protein_from_nucleotide_accession.py
index 02c3733..b8dbd0d 100644
--- a/get_protein_from_nucleotide_accession.py
+++ b/get_protein_from_nucleotide_accession.py
@@ -1,14 +1,7 @@
-
-
 #!/usr/bin/python
 # File created on 28 Feb 2014.
 
-__author__ = "Erick Cardenas Poire"
-__copyright__ = "Copyright 2014"
-__credits__ = [""]
-__version__ = "1.0"
-__maintainer__ = "Erick Cardenas Poire"
-__status__ = "Release"
+# Author__ = "Erick Cardenas Poire"
 
 import sys
 from Bio import SeqIO
@@ -28,13 +21,13 @@
 """
 script_info['script_usage'] = []
 
-usage= """
+usage = """
 Need to run it like this:
 ./get_protein_from_nucleotide_accession.py -i input_file"""
 
 parser = OptionParser(usage)
-parser.add_option("-i", "--input_file", dest="input_fp",
-                  help='the input fastq file [REQUIRED]')
+parser.add_option("-i", "--input_file", dest = "input_fp",
+                  help = 'the input fastq file [REQUIRED]')
 
 
 # Creates an input output pair if input is just an input file
diff --git a/get_protein_from_nucleotide_accession_v2.py b/get_protein_from_nucleotide_accession_v2.py
index 36be1b7..89821d2 100644
--- a/get_protein_from_nucleotide_accession_v2.py
+++ b/get_protein_from_nucleotide_accession_v2.py
@@ -34,8 +34,8 @@
 ./get_protein_from_nucleotide_accession.py -i input_file"""
 
 parser = OptionParser(usage)
-parser.add_option("-i", "--input_file", dest="input_fp",
-                  help='the input fastq file [REQUIRED]')
+parser.add_option("-i", "--input_file", dest = "input_fp",
+                  help = 'the input fastq file [REQUIRED]')
 
 
 # Creates an input output pair if input is just an input file
@@ -79,7 +79,7 @@ def main(argv):
             dict_entry = dict_entry0[0]
         #print record_name
         #print dict_entry
-        handle = Entrez.efetch(db="nucleotide", id=record_name, rettype="gb", retmode="text")
+        handle = Entrez.efetch(db = "nucleotide", id = record_name, rettype = "gb", retmode = "text")
         records = SeqIO.parse(handle, "genbank")
         time.sleep(0.5)
         for record in records:
diff --git a/get_protein_with_gi_number.py b/get_protein_with_gi_number.py
index 8c15551..1099719 100644
--- a/get_protein_with_gi_number.py
+++ b/get_protein_with_gi_number.py
@@ -24,7 +24,7 @@
 
 
 #handle = Entrez.efetch(db="protein", id=requestedsequences, rettype="gb", retmode="text")
-handle = Entrez.efetch(db="protein", id=requestedsequences, rettype="fasta", retmode="text")
+handle = Entrez.efetch(db = "protein", id = requestedsequences, rettype = "fasta", retmode = "text")
 records = SeqIO.parse(handle, "fasta")
 
 for record in records:
diff --git a/len_stats_from.fasta.py b/len_stats_from.fasta.py
index 7cedc59..ecacfc2 100755
--- a/len_stats_from.fasta.py
+++ b/len_stats_from.fasta.py
@@ -1,24 +1,24 @@
 import sys
-filein=open(sys.argv[1],'r')
+filein = open(sys.argv[1],'r')
 print 'Reading ', sys.argv[1]
-out=sys.argv[1]
+out = sys.argv[1]
 
-fileout=out+'.stats.txt'
+fileout = out + '.stats.txt'
 print fileout
 
-fp=open(fileout, 'w')
+fp = open(fileout, 'w')
 
-big_table=[]
+big_table = []
 for line in filein:
    if line.startswith('>'):
       continue
    else:
-      length=int(len(line))
+      length = int(len(line))
       big_table.append(length)
-count=len(big_table)
-average=sum(big_table)/float(count)
-mini=min(big_table)
-maxi=max(big_table)
+count = len(big_table)
+average = sum(big_table)/ float(count)
+mini = min(big_table)
+maxi = max(big_table)
 
 print count , ' reads detected'
 print 'With an average read lenght of ' , average , 'bases'
diff --git a/modify_dereplicated.py b/modify_dereplicated.py
index 5627fe8..e912ff5 100644
--- a/modify_dereplicated.py
+++ b/modify_dereplicated.py
@@ -11,37 +11,36 @@
 
 
 #Create dereplication dictionary
-derep_dictionary={}
+derep_dictionary = {}
 
-
-derep_table=open(sys.argv[2],'r')
+derep_table = open(sys.argv[2],'r')
 
 for line in derep_table:
    if line.startswith('Representative'):
       continue
    else:
-      line=line.split("\t")
-      seq=line[0] #extract sequence name
-      seq_count=line[1] #extract sequence count
-      seq_count=seq_count.rstrip('\n')
-      derep_dictionary[seq]=seq_count
+      line = line.split("\t")
+      seq = line[0] #extract sequence name
+      seq_count = line[1] #extract sequence count
+      seq_count = seq_count.rstrip('\n')
+      derep_dictionary[seq] = seq_count
 derep_table.close()
 print derep_dictionary
 
-filein=open(sys.argv[1],'r')
+filein = open(sys.argv[1],'r')
 
 
-out0=str(sys.argv[1])
-out=out0.rsplit( ".", 1 )[ 0 ]
-out1=out+'.modified.fasta'
-fileout1=open(out1,'w')
+out0 = str(sys.argv[1])
+out = out0.rsplit( ".", 1 )[ 0 ]
+out1 = out + '.modified.fasta'
+fileout1 = open(out1,'w')
 
 
-for seq_record in SeqIO.parse(filein, format="fasta"):
-   name=seq_record.id
-   name_count=derep_dictionary.get(name)	#get info for read in dictionary   
-   new_name=name+'size='+name_count+';'
-   sequence=seq_record.seq
-   fileout1.write('>%s\n%s\n' %(new_name,sequence))
+for seq_record in SeqIO.parse(filein, format = "fasta"):
+   name = seq_record.id
+   name_count = derep_dictionary.get(name)	#get info for read in dictionary   
+   new_name = name + 'size=' + name_count + ';'
+   sequence = seq_record.seq
+   fileout1.write('>%s\n%s\n' %(new_name, sequence))
 fileout1.close()
 
diff --git a/multiline_fasta_to_fasta.py b/multiline_fasta_to_fasta.py
new file mode 100644
index 0000000..6fbd474
--- /dev/null
+++ b/multiline_fasta_to_fasta.py
@@ -0,0 +1,17 @@
+#usage
+#python multiline_fasta_to_fasta.py <filein>
+
+import sys
+import Bio
+from Bio import SeqIO
+
+filein = open(sys.argv[1], 'r')
+fileout_name = sys.argv[1] + '_new.fa'
+fileout = open(fileout_name, 'w')
+
+for seq_record in SeqIO.parse(filein, format = "fasta"):
+   fileout.write('>%s %s\n%s\n' %(seq_record.id, seq_record.description, seq_record.seq))
+
+filein.close()
+fileout.close()
+
diff --git a/parse_taxonomy.py b/parse_taxonomy.py
index a43608a..8799fad 100644
--- a/parse_taxonomy.py
+++ b/parse_taxonomy.py
@@ -18,8 +18,6 @@
 #    input_dictionary[0]['LineageEx'] if d['Rank'] in ['family', 'order']}
 
 
-
-
 for organism in input_list:
 #    print organism
     lineage = {d['Rank']:d['ScientificName'] for d in organism['LineageEx'] if d['Rank'] in ['phylum', 'class', 'order', 'family', 'genus' ]}
diff --git a/parse_xml_result.py b/parse_xml_result.py
index 5c0f12e..b2c27de 100755
--- a/parse_xml_result.py
+++ b/parse_xml_result.py
@@ -10,19 +10,19 @@
 from Bio.Blast import NCBIXML
 
 #input
-result_handle=open(sys.argv[1],"r")
+result_handle = open(sys.argv[1], "r")
 #result_handle = open("test.xml")
 
-print ("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" %("Query","Alignment","Identity(%)","Similarity(%)","Alignment length","Expected value","Score","Length Query","Length Subject"))
+print ("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" %("Query", "Alignment", "Identity(%)", "Similarity(%)", "Alignment length", "Expected value", "Score", "Length Query", "Length Subject"))
 
 blast_records = NCBIXML.parse(result_handle)
 for blast_record in blast_records:
    for alignment in blast_record.alignments:
       for hsp in alignment.hsps:
-         pident=(hsp.identities)/float(hsp.align_length)*100
-         psimil=(hsp.positives)/float(hsp.align_length)*100
-         mygaps=str(hsp.gaps)
-         print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" %(blast_record.query, alignment.title, pident, psimil, hsp.align_length, hsp.expect, hsp.score,len(hsp.query),len(hsp.sbjct),mygaps)
+         pident = (hsp.identities) / float(hsp.align_length) * 100
+         psimil = (hsp.positives) / float(hsp.align_length) * 100
+         mygaps = str(hsp.gaps)
+         print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" %(blast_record.query, alignment.title, pident, psimil, hsp.align_length, hsp.expect, hsp.score, len(hsp.query), len(hsp.sbjct), mygaps)
 
 result_handle.close()
 
diff --git a/quality-trim.py b/quality-trim.py
index 2c2ac71..86cc176 100755
--- a/quality-trim.py
+++ b/quality-trim.py
@@ -1,3 +1,6 @@
+# this scripts looks for B as a indicator of bad quality in the qual section of a fastq. It does not checks the quality encoding system. Use it carefully
+
+
 #!/usr/bin/env python
 
 import sys
diff --git a/quality-trim_to_gz.py b/quality-trim_to_gz.py
index 673b62c..14a226c 100755
--- a/quality-trim_to_gz.py
+++ b/quality-trim_to_gz.py
@@ -1,3 +1,6 @@
+# Does not check for quality encoding but for a B as indicator of bad quality.
+# use it with caution 
+
 #!/usr/bin/env python
 
 import sys
diff --git a/remoteblastp.py b/remoteblastp.py
index 7d8336a..62ef9ff 100755
--- a/remoteblastp.py
+++ b/remoteblastp.py
@@ -1,12 +1,16 @@
-#usage
-#python remoteblastp.py <originalfile.fasta> <output.xml>
-# 	0			1		2		
+# This scripts will do a blast search against the nr database
+# The blast program as well as search parameters 
+# can be specified inthe lines below
 
+# Usage
+# python remoteblastp.py <originalfile.fasta> <output.xml>
+# 	 0			1		2		
 
-my_perc_ident='none'
-my_blast_program='blastp'
-my_evalue_treshold=0.00001
-my_hitlist_size=10
+
+my_perc_ident = 'none'
+my_blast_program = 'blastp'
+my_evalue_treshold = 0.00001
+my_hitlist_size = 10
 
 import sys
 import Bio
@@ -15,17 +19,17 @@
 from Bio.Blast import NCBIXML
 
 #input
-filein=open(sys.argv[1],"r")
+filein = open(sys.argv[1], "r")
 
 #outputs
-myout=sys.argv[2]
-fileout=open(myout,'w')
+myout = sys.argv[2]
+fileout = open(myout, 'w')
 
 
 for seq_record in SeqIO.parse(filein, format="fasta"):
 #   print seq_record   
 #   print seq_record.format("fasta")
-   result_handle = NCBIWWW.qblast(my_blast_program, "nr", seq_record.format("fasta"), hitlist_size=my_hitlist_size,expect=my_evalue_treshold,perc_ident=my_perc_ident)
+   result_handle = NCBIWWW.qblast(my_blast_program, "nr", seq_record.format("fasta"), hitlist_size = my_hitlist_size, expect = my_evalue_treshold, perc_ident = my_perc_ident)
    fileout.write(result_handle.read())
 filein.close()
 fileout.close()
diff --git a/remoteblastp_vs_refsq.py b/remoteblastp_vs_refsq.py
index c490c9e..8abaa02 100644
--- a/remoteblastp_vs_refsq.py
+++ b/remoteblastp_vs_refsq.py
@@ -1,12 +1,17 @@
+# This scripts will do a blast search against the refseq database
+# The blast program as well as search parameters 
+# can be specified inthe lines below
+
+
 #usage
 #python remoteblastp.py <originalfile.fasta> <output.xml>
 # 	0			1		2		
 
 
-my_perc_ident='none'
-my_blast_program='blastp'
-my_evalue_treshold=0.00001
-my_hitlist_size=100
+my_perc_ident = 'none'
+my_blast_program = 'blastp'
+my_evalue_treshold = 0.00001
+my_hitlist_size = 100
 
 import sys
 import Bio
@@ -15,14 +20,14 @@
 from Bio.Blast import NCBIXML
 
 #input
-filein=open(sys.argv[1],"r")
+filein = open(sys.argv[1], "r")
 
 #outputs
-myout=sys.argv[2]
-fileout=open(myout,'w')
+myout = sys.argv[2]
+fileout = open(myout, 'w')
 
 
-for seq_record in SeqIO.parse(filein, format="fasta"):
+for seq_record in SeqIO.parse(filein, format = "fasta"):
 #   print seq_record   
 #   print seq_record.format("fasta")
    result_handle = NCBIWWW.qblast(my_blast_program, "refseq", seq_record.format("fasta"), hitlist_size=my_hitlist_size,expect=my_evalue_treshold,perc_ident=my_perc_ident)
diff --git a/rename_fasta-to-fasta.py b/rename_fasta-to-fasta.py
index e633969..4834d08 100644
--- a/rename_fasta-to-fasta.py
+++ b/rename_fasta-to-fasta.py
@@ -7,14 +7,14 @@
 import Bio
 from Bio import SeqIO
 
-filein=open(sys.argv[1],'rb')
+filein = open(sys.argv[1], 'rb')
 
-for seq_record in SeqIO.parse(filein, format="fasta"):
-   line=seq_record.id
+for seq_record in SeqIO.parse(filein, format = "fasta"):
+   line = seq_record.id
 #   print line
-   line=line.split('|')
+   line = line.split('|')
 #   print line
-   name=line[2]
+   name = line[2]
 #   print name
    print '>%s\n%s' % (name, seq_record.seq)
 
diff --git a/retrieve_genbank_annotation.py b/retrieve_genbank_annotation.py
index 068e4bf..95e9f8e 100755
--- a/retrieve_genbank_annotation.py
+++ b/retrieve_genbank_annotation.py
@@ -10,40 +10,40 @@
 Entrez.email = "carden24@mail.ubc.ca"
 
 #inputs
-filelist=open(sys.argv[1],'r')
+filelist = open(sys.argv[1], 'r')
 
 #output
-fileout=open(sys.argv[2],'w')
+fileout = open(sys.argv[2], 'w')
 
 
 #create a list with the names of the sequences requested
-requestedsequences=[]
+requestedsequences = []
 for line in filelist:
-   line=line.strip('\n')
+   line = line.strip('\n')
    requestedsequences.append(line)
 print "%d Sequences requested" % len(requestedsequences)
 print requestedsequences
 
 
-handle = Entrez.efetch(db="protein", id=requestedsequences, rettype="gb", retmode="text")
-records=SeqIO.parse(handle,"genbank")
+handle = Entrez.efetch(db = "protein", id = requestedsequences, rettype = "gb", retmode = "text")
+records = SeqIO.parse(handle, "genbank")
 
 for record in records:
-   feat=record.features
+   feat = record.features
    for f in feat:
-      if f.type=="CDS":
-         quali=f.qualifiers
-         gene=str(quali.get('gene','no_gene_name'))
-         gene=gene.strip('\'[]')
-         locus=str(quali.get('locus_tag','no_locus_tag'))
-         locus=locus.strip('\'[]')
-         old_locus=str(quali.get('old_locus_tag','no_old_locus_tag'))
-         old_locus=old_locus.strip('\'[]')
-         product=str(quali.get('product','no_product_name'))
-         product=product.strip('\'[]')
-         protein_id=str(quali.get('protein_id','no_protein_id'))
-         protein_id=protein_id.strip('\'[]')
-         fileout.write("%s\t%s\t%s\t%s\t%s\n" %(gene,locus,old_locus,product,protein_id))
+      if f.type == "CDS":
+         quali = f.qualifiers
+         gene = str(quali.get('gene', 'no_gene_name'))
+         gene = gene.strip('\'[]')
+         locus = str(quali.get('locus_tag', 'no_locus_tag'))
+         locus = locus.strip('\'[]')
+         old_locus = str(quali.get('old_locus_tag', 'no_old_locus_tag'))
+         old_locus = old_locus.strip('\'[]')
+         product = str(quali.get('product', 'no_product_name'))
+         product = product.strip('\'[]')
+         protein_id = str(quali.get('protein_id', 'no_protein_id'))
+         protein_id = protein_id.strip('\'[]')
+         fileout.write("%s\t%s\t%s\t%s\t%s\n" %(gene, locus, old_locus, product, protein_id))
       else:
          continue
 fileout.close()
diff --git a/retrieve_genbank_record.to.fasta.py b/retrieve_genbank_record.to.fasta.py
index 9a574b1..7233bd6 100755
--- a/retrieve_genbank_record.to.fasta.py
+++ b/retrieve_genbank_record.to.fasta.py
@@ -8,27 +8,27 @@
 Entrez.email = "carden24@mail.ubc.ca"
 
 #inputs
-filelist=open(sys.argv[1],'r')
+filelist = open(sys.argv[1], 'r')
 
 #output
-fileout=open(sys.argv[2],'w')
+fileout = open(sys.argv[2], 'w')
 
 
 #create a list with the names of the sequences requested
-requestedsequences=[]
+requestedsequences = []
 for line in filelist:
-   line=line.strip('\n')
+   line = line.strip('\n')
    requestedsequences.append(line)
 print "%d Sequences requested" % len(requestedsequences)
 print requestedsequences
 
 
-handle = Entrez.efetch(db="protein", id=requestedsequences, rettype="fasta", retmode="text")
-records=SeqIO.parse(handle,"fasta")
+handle = Entrez.efetch(db = "protein", id = requestedsequences, rettype = "fasta", retmode = "text")
+records = SeqIO.parse(handle, "fasta")
 
 for record in records:
-   seq_name=record.id
-   seq_description=record.description
-   seq_sequence =record.seq
-   fileout.write (">%s %s\n%s\n" %(seq_name,seq_description,seq_sequence))
+   seq_name = record.id
+   seq_description = record.description
+   seq_sequence = record.seq
+   fileout.write (">%s %s\n%s\n" %(seq_name, seq_description, seq_sequence))
 
diff --git a/retrieve_genbank_record.to.fasta.v2.py b/retrieve_genbank_record.to.fasta.v2.py
index 2a0f536..21ac2fb 100755
--- a/retrieve_genbank_record.to.fasta.v2.py
+++ b/retrieve_genbank_record.to.fasta.v2.py
@@ -8,46 +8,46 @@
 Entrez.email = "carden24@mail.ubc.ca"
 
 #inputs
-filelist=open(sys.argv[1],'r')
+filelist = open(sys.argv[1], 'r')
 
 #output
-fileout=open(sys.argv[2],'w')
+fileout = open(sys.argv[2], 'w')
 
 
 #create a list with the names of the sequences requested
-requestedsequences=[]
+requestedsequences = []
 for line in filelist:
-   line=line.strip('\n')
+   line = line.strip('\n')
    requestedsequences.append(line)
 print "%d Sequences requested" % len(requestedsequences)
 print requestedsequences
 
 
-handle = Entrez.efetch(db="protein", id=requestedsequences, rettype="gb", retmode="text")
-records=SeqIO.parse(handle,"genbank")
+handle = Entrez.efetch(db = "protein", id = requestedsequences, rettype = "gb", retmode = "text")
+records = SeqIO.parse(handle, "genbank")
 
 for record in records:
    feat=record.features
    for f in feat:
-      if f.type=="CDS":
-         quali=f.qualifiers
-         gene=str(quali.get('gene','no_gene_name'))
-         gene=gene.strip('\'[]')
+      if f.type == "CDS":
+         quali = f.qualifiers
+         gene = str(quali.get('gene', 'no_gene_name'))
+         gene = gene.strip('\'[]')
          print gene
-         product=str(quali.get('product','no_product_name'))
-         product=product.strip('\'[]')
+         product = str(quali.get('product', 'no_product_name'))
+         product = product.strip('\'[]')
 #         print product
 #         description=gene+'-'+product
-         protein_id=str(quali.get('protein_id','no_protein_id'))
+         protein_id = str(quali.get('protein_id', 'no_protein_id'))
 #         protein_id=str(f.qualifiers['protein_id'])
-         protein_id=protein_id.strip('\'[]')
-         translated_protein=str(quali.get('translation','no_translation'))
+         protein_id = protein_id.strip('\'[]')
+         translated_protein = str(quali.get('translation', 'no_translation'))
 #         translated_protein=str(f.qualifiers['translation'])
-         translated_protein=translated_protein.strip('\'[]')
-         if protein_id=='no_protein_id':
+         translated_protein = translated_protein.strip('\'[]')
+         if protein_id == 'no_protein_id':
             continue
          else:
-            fileout.write(">%s %s\n%s\n" %(protein_id,gene,translated_protein))
+            fileout.write(">%s %s\n%s\n" %(protein_id, gene, translated_protein))
       else:
          continue
 fileout.close()
diff --git a/retrieve_taxonomy_from_accession_numbers_v2.py b/retrieve_taxonomy_from_accession_numbers_v2.py
index 3183b2b..d405009 100644
--- a/retrieve_taxonomy_from_accession_numbers_v2.py
+++ b/retrieve_taxonomy_from_accession_numbers_v2.py
@@ -22,8 +22,8 @@
 print "%d Sequence(s) requested" % len(requestedsequences)
 print ''
 
-handle = Entrez.efetch(db="nuccore", id=requestedsequences, rettype="gb", retmode="text")
-records = SeqIO.parse(handle,"genbank")
+handle = Entrez.efetch(db = "nuccore", id = requestedsequences, rettype = "gb", retmode = "text")
+records = SeqIO.parse(handle, "genbank")
 
 for record in records:
 #   print record.id
diff --git a/retrieve_taxonomy_from_gis.py b/retrieve_taxonomy_from_gis.py
index 90b9cc6..8ae8799 100644
--- a/retrieve_taxonomy_from_gis.py
+++ b/retrieve_taxonomy_from_gis.py
@@ -25,7 +25,7 @@
 
 
 handle = Entrez.efetch(db="protein", id=requestedsequences, rettype="gb", retmode="text")
-records = SeqIO.parse(handle,"genbank")
+records = SeqIO.parse(handle, "genbank")
 ##print records
 
 for record in records:
diff --git a/score_blast2.py b/score_blast2.py
index b5fa6d7..6725a3b 100755
--- a/score_blast2.py
+++ b/score_blast2.py
@@ -4,20 +4,20 @@
 import pickle
 import sys
 
-threshold=float(0.4)
-filedict=open(sys.argv[1],'rb')
-filein=open(sys.argv[2],'r')
+threshold = float(0.4)
+filedict = open(sys.argv[1], 'rb')
+filein = open(sys.argv[2], 'r')
 
-outy=sys.argv[3]
-out1=outy+'family.out'
-out2=outy+'subfamily.out'
+outy = sys.argv[3]
+out1 = outy + 'family.out'
+out2 = outy + 'subfamily.out'
 
-fileout1=open(out1,'w')
-fileout2=open(out2,'w')
+fileout1 = open(out1, 'w')
+fileout2 = open(out2, 'w')
 
 cazy_or_foly_dict = pickle.load(filedict)
-familydict={}
-subfamilydict={}
+familydict = {}
+subfamilydict = {}
 
 #blast output
 #HS6_179:1:1101:10145:166587/1	gi|49642693|emb|CAH00655.1|	58.33	   24	4e-04	35.0	79
@@ -27,29 +27,29 @@
 
 
 for line in filein:
-   line=line.split('\t')			#split line from blast output
-   subject=line[1]				#get subject
-   bits=float(line[6])				#get raw score
-   dict_entry=cazy_or_foly_dict.get(subject)	#get info for subject in dictionary
-   maxbits=float(dict_entry[3])			#get maximum raw score ratio for the subject vs itself
-   scoreratio=bits/float(maxbits)		#calculate bits score ratio
-   if scoreratio>=threshold:			#if bits score ratio is higher than treshold
-      family=dict_entry[2]			#obtain family
-      fff=familydict.get(family,0)		#Entry family name in new dictionary, if absent use 0 if not get count
-      ggg=fff+1					#update count
-      familydict[family]=ggg			#update entry in dictionary
-      subfamily=dict_entry[1]			#do the same for subfamily
-      sss=subfamilydict.get(subfamily,0)
-      ttt=sss+1
-      subfamilydict[subfamily]=ttt
+   line = line.split('\t')			#split line from blast output
+   subject = line[1]				#get subject
+   bits = float(line[6])				#get raw score
+   dict_entry = cazy_or_foly_dict.get(subject)	#get info for subject in dictionary
+   maxbits = float(dict_entry[3])			#get maximum raw score ratio for the subject vs itself
+   scoreratio = bits/float(maxbits)		#calculate bits score ratio
+   if scoreratio >= threshold:			#if bits score ratio is higher than treshold
+      family = dict_entry[2]			#obtain family
+      fff = familydict.get(family, 0)		#Entry family name in new dictionary, if absent use 0 if not get count
+      ggg = fff + 1					#update count
+      familydict[family] = ggg			#update entry in dictionary
+      subfamily = dict_entry[1]			#do the same for subfamily
+      sss = subfamilydict.get(subfamily, 0)
+      ttt = sss + 1
+      subfamilydict[subfamily] = ttt
    else:
       continue
 
 #write familydict dictionary
 for key1, value1 in familydict.iteritems():
-   fileout1.write("%s\t%s\n" %(key1,value1))
+   fileout1.write("%s\t%s\n" %(key1, value1))
 
 #write subfamily dictionary
 for key2, value2 in subfamilydict.iteritems():
-   fileout2.write("%s\t%s\n" %(key2,value2))
+   fileout2.write("%s\t%s\n" %(key2, value2))
 
diff --git a/score_blast_and_normalize.py b/score_blast_and_normalize.py
index c0efe96..3af3ea3 100755
--- a/score_blast_and_normalize.py
+++ b/score_blast_and_normalize.py
@@ -5,43 +5,43 @@
 import pickle
 import sys
 
-filedict=open(sys.argv[1],'rb')
-filein=open(sys.argv[3],'r')
-fileassembly_dict=open(sys.argv[2],'r')
+filedict = open(sys.argv[1], 'rb')
+filein = open(sys.argv[3], 'r')
+fileassembly_dict = open(sys.argv[2], 'r')
 
-outy=sys.argv[4]
-out1=outy+'family.out'
-out2=outy+'subfamily.out'
-fileout1=open(out1,'w')
-fileout2=open(out2,'w')
+outy = sys.argv[4]
+out1 = outy + 'family.out'
+out2 = outy + 'subfamily.out'
+fileout1 = open(out1, 'w')
+fileout2 = open(out2, 'w')
 
 cazy_or_foly_dict = pickle.load(filedict)
 assembly_dict = pickle.load(fileassembly_dict)
 
-familydict={}
-subfamilydict={}
+familydict = {}
+subfamilydict = {}
 
 for line in filein:
-   line=line.split('\t')			#split line from fasta
-   query=line[0]				#get query
-   coverage=assembly_dict.get(query)	#get fold coverage from assembly dictionary   
-   fold=float(coverage[9]) 
-   subject=line[1]				#get subject
-   dict_entry=cazy_or_foly_dict.get(subject)	#get info for subject in dictionary
+   line = line.split('\t')			#split line from fasta
+   query = line[0]				#get query
+   coverage = assembly_dict.get(query)	#get fold coverage from assembly dictionary   
+   fold = float(coverage[9]) 
+   subject = line[1]				#get subject
+   dict_entry = cazy_or_foly_dict.get(subject)	#get info for subject in dictionary
 #   print subject
 #   print fold
-   family=dict_entry[2]				#obtain family
-   fff=familydict.get(family,0)			#Entry family name in new dictionary, if absent use 0 if not get count
-   ggg=fff+fold					#update count
-   familydict[family]=ggg			#update entry in dictionary
-   subfamily=dict_entry[1]			#do the same for subfamily
-   sss=subfamilydict.get(subfamily,0)
-   ttt=sss+fold
-   subfamilydict[subfamily]=ttt
+   family = dict_entry[2]				#obtain family
+   fff = familydict.get(family, 0)			#Entry family name in new dictionary, if absent use 0 if not get count
+   ggg = fff + fold					#update count
+   familydict[family] = ggg			#update entry in dictionary
+   subfamily = dict_entry[1]			#do the same for subfamily
+   sss = subfamilydict.get(subfamily ,0)
+   ttt = sss + fold
+   subfamilydict[subfamily] = ttt
 #write familydict dictionary
 for key1, value1 in familydict.iteritems():
-   fileout1.write("%s\t%s\n" %(key1,value1))
+   fileout1.write("%s\t%s\n" %(key1, value1))
 
 #write subfamily dictionary
 for key2, value2 in subfamilydict.iteritems():
-   fileout2.write("%s\t%s\n" %(key2,value2))
+   fileout2.write("%s\t%s\n" %(key2, value2))
diff --git a/score_blast_for_redundancy.py b/score_blast_for_redundancy.py
index c1ab3fa..d3be19c 100644
--- a/score_blast_for_redundancy.py
+++ b/score_blast_for_redundancy.py
@@ -8,28 +8,28 @@
 import sys
 
 #threshold=float(0.4)
-filedict=open(sys.argv[1],'rb')
-filein=open(sys.argv[2],'r')
-outy=sys.argv[3]
-out1=outy+'.family.out'
-fileout1=open(out1,'w')
+filedict = open(sys.argv[1], 'rb')
+filein = open(sys.argv[2], 'r')
+outy = sys.argv[3]
+out1 = outy + '.family.out'
+fileout1 = open(out1, 'w')
 
 cazy_or_foly_dict = pickle.load(filedict)
-subfamilydict={}
+subfamilydict = {}
 
 
 for line in filein:
-   line=line.lstrip(" ")
-   line=line.rstrip("\n ")
-   line=line.split(' ')			#split line from fasta
-   subject_count=line[0]			#get subject count
-   subject=line[1]				#get subject
-   dict_entry=cazy_or_foly_dict.get(subject)	#get info for subject in dictionary
-   family=dict_entry[1]				#obtain family
+   line = line.lstrip(" ")
+   line = line.rstrip("\n ")
+   line = line.split(' ')			#split line from fasta
+   subject_count = line[0]			#get subject count
+   subject = line[1]				#get subject
+   dict_entry = cazy_or_foly_dict.get(subject)	#get info for subject in dictionary
+   family = dict_entry[1]				#obtain family
 #   print subject
 #   print subject_count
 #   print family
-   fileout1.write("%s\t%s\t%s\n" %(subject,subject_count,family))
+   fileout1.write("%s\t%s\t%s\n" %(subject, subject_count, family))
 fileout1.close()
 filein.close()
 filedict.close()
diff --git a/score_blast_for_redundancy.v2.py b/score_blast_for_redundancy.v2.py
index a580898..539bfe4 100644
--- a/score_blast_for_redundancy.v2.py
+++ b/score_blast_for_redundancy.v2.py
@@ -8,28 +8,28 @@
 import sys
 
 #threshold=float(0.4)
-filedict=open(sys.argv[1],'rb')
-filein=open(sys.argv[2],'r')
-outy=sys.argv[3]
-out1=outy+'.family.out'
-fileout1=open(out1,'w')
+filedict = open(sys.argv[1], 'rb')
+filein = open(sys.argv[2], 'r')
+outy = sys.argv[3]
+out1 = outy + '.family.out'
+fileout1 = open(out1, 'w')
 
 cazy_or_foly_dict = pickle.load(filedict)
-subfamilydict={}
+subfamilydict = {}
 
 
 for line in filein:
-   line=line.lstrip(" ")
-   line=line.rstrip("\n ")
-   line=line.split(' ')			#split line from fasta
-   subject_count=line[0]			#get subject count
-   subject=line[1]				#get subject
-   dict_entry=cazy_or_foly_dict.get(subject)	#get info for subject in dictionary
-   family=dict_entry[2]				#obtain family
+   line = line.lstrip(" ")
+   line = line.rstrip("\n ")
+   line = line.split(' ')			#split line from fasta
+   subject_count = line[0]			#get subject count
+   subject = line[1]				#get subject
+   dict_entry = cazy_or_foly_dict.get(subject)	#get info for subject in dictionary
+   family = dict_entry[2]				#obtain family
 #   print subject
 #   print subject_count
 #   print family
-   fileout1.write("%s\t%s\t%s\n" %(subject,subject_count,family))
+   fileout1.write("%s\t%s\t%s\n" %(subject, subject_count, family))
 fileout1.close()
 filein.close()
 filedict.close()