diff --git a/mob_suite/mob_cluster.py b/mob_suite/mob_cluster.py index 7983ae0..3cb1d1a 100644 --- a/mob_suite/mob_cluster.py +++ b/mob_suite/mob_cluster.py @@ -533,7 +533,7 @@ def main(): new_seq_info[seq_id]['organism'] = organism ETE3DBTAXAFILE = os.path.abspath(database_dir + "/taxa.sqlite") - taxids = NamesToTaxIDs(organisms, ETE3DBTAXAFILE) + taxids = NamesToTaxIDs(organisms, ETE3DBTAXAFILE, database_dir) del(organisms) for seq_id in new_seq_info: diff --git a/mob_suite/mob_typer.py b/mob_suite/mob_typer.py index 7aaf164..163c88c 100644 --- a/mob_suite/mob_typer.py +++ b/mob_suite/mob_typer.py @@ -508,7 +508,7 @@ def main(): record['relaxase_type_accession(s)'] = ",".join(record['relaxase_type_accession(s)']) host_range = hostrange(record['rep_type(s)'].split(','), record['relaxase_type_accession(s)'].split(','), - mob_cluster_id, ncbi, lit,ETE3DBTAXAFILE) + mob_cluster_id, ncbi, lit, ETE3DBTAXAFILE, database_dir) for field in host_range: record[field] = host_range[field] diff --git a/mob_suite/utils.py b/mob_suite/utils.py index f62b65a..584ace0 100644 --- a/mob_suite/utils.py +++ b/mob_suite/utils.py @@ -79,10 +79,10 @@ def filter_invalid_taxids(taxids): return filtered -def getHeirarchy(taxid,ETE3DBTAXAFILE): +def getHeirarchy(taxid,ETE3DBTAXAFILE,database_directory): if not isETE3DBTAXAFILEexists(ETE3DBTAXAFILE): logging.info("Did not find taxa.sqlite in {}. Initializaing ete3 taxonomy database".format(ETE3DBTAXAFILE)) - initETE3Database() + initETE3Database(database_directory, ETE3DBTAXAFILE) ncbi = NCBITaxa(dbfile=ETE3DBTAXAFILE) if not isETE3DBTAXAFILEexists(ETE3DBTAXAFILE): @@ -103,10 +103,10 @@ def getHeirarchy(taxid,ETE3DBTAXAFILE): return {'names': names, 'ranks': names} -def getTaxid(taxon,ETE3DBTAXAFILE): +def getTaxid(taxon,ETE3DBTAXAFILE,database_directory): if not isETE3DBTAXAFILEexists(ETE3DBTAXAFILE): logging.info("Did not find taxa.sqlite in {}. Initializaing ete3 taxonomy database".format(ETE3DBTAXAFILE)) - initETE3Database() + initETE3Database(database_directory, ETE3DBTAXAFILE) ncbi = NCBITaxa(dbfile=ETE3DBTAXAFILE) if not isETE3DBTAXAFILEexists(ETE3DBTAXAFILE): @@ -121,10 +121,10 @@ def getTaxid(taxon,ETE3DBTAXAFILE): -def NamesToTaxIDs(names,ETE3DBTAXAFILE): +def NamesToTaxIDs(names,ETE3DBTAXAFILE,database_directory): if not isETE3DBTAXAFILEexists(ETE3DBTAXAFILE): logging.info("Did not find taxa.sqlite in {}. Initializaing ete3 taxonomy database".format(ETE3DBTAXAFILE)) - initETE3Database(ETE3DBTAXAFILE) + initETE3Database(database_directory, ETE3DBTAXAFILE) ncbi = NCBITaxa(dbfile=ETE3DBTAXAFILE) @@ -138,10 +138,10 @@ def NamesToTaxIDs(names,ETE3DBTAXAFILE): -def getTaxonConvergence(taxids,ETE3DBTAXAFILE): +def getTaxonConvergence(taxids,ETE3DBTAXAFILE,database_directory): if not isETE3DBTAXAFILEexists(ETE3DBTAXAFILE): logging.info("Did not find taxa.sqlite in {}. Initializaing ete3 taxonomy database".format(ETE3DBTAXAFILE)) - initETE3Database(ETE3DBTAXAFILE) + initETE3Database(database_directory, ETE3DBTAXAFILE) ncbi = NCBITaxa(dbfile=ETE3DBTAXAFILE) @@ -206,7 +206,7 @@ def getTaxonConvergence(taxids,ETE3DBTAXAFILE): return (['-', '-']) -def hostrange(replion_types, relaxase_types, mob_cluster_id, ncbi, lit,ETE3DBTAXAFILE): +def hostrange(replion_types, relaxase_types, mob_cluster_id, ncbi, lit, ETE3DBTAXAFILE, database_directory): host_range_predictions = { 'observed_host_range_ncbi_name': '', 'observed_host_range_ncbi_rank': '', @@ -251,25 +251,25 @@ def hostrange(replion_types, relaxase_types, mob_cluster_id, ncbi, lit,ETE3DBTAX ncbi_unique_taxids = filter_invalid_taxids( list(set(ncbi_replicon_taxids + ncbi_cluster_taxids + ncbi_relaxase_taxids))) host_range_predictions['observed_host_range_ncbi_rank'], host_range_predictions[ - 'observed_host_range_ncbi_name'] = getTaxonConvergence(ncbi_unique_taxids,ETE3DBTAXAFILE) + 'observed_host_range_ncbi_name'] = getTaxonConvergence(ncbi_unique_taxids,ETE3DBTAXAFILE,database_directory) # Determine taxids associated with literature lit_unique_taxids = filter_invalid_taxids(list(set(lit_replicon_taxids))) host_range_predictions['reported_host_range_lit_rank'], host_range_predictions[ - 'reported_host_range_lit_name'] = getTaxonConvergence(lit_unique_taxids,ETE3DBTAXAFILE) + 'reported_host_range_lit_name'] = getTaxonConvergence(lit_unique_taxids,ETE3DBTAXAFILE,database_directory) # determine overall host range overall_taxids = filter_invalid_taxids(list(set(ncbi_unique_taxids + lit_unique_taxids))) host_range_predictions['predicted_host_range_overall_rank'], host_range_predictions[ - 'predicted_host_range_overall_name'] = getTaxonConvergence(overall_taxids,ETE3DBTAXAFILE) + 'predicted_host_range_overall_name'] = getTaxonConvergence(overall_taxids,ETE3DBTAXAFILE,database_directory) # move host-range prediction up to family when it is at genus or species level if host_range_predictions['predicted_host_range_overall_rank'] == 'genus' or host_range_predictions[ 'predicted_host_range_overall_rank'] == 'species': - taxid = getTaxid(host_range_predictions['predicted_host_range_overall_name'],ETE3DBTAXAFILE) - heir = getHeirarchy(taxid,ETE3DBTAXAFILE) + taxid = getTaxid(host_range_predictions['predicted_host_range_overall_name'],ETE3DBTAXAFILE,database_directory) + heir = getHeirarchy(taxid,ETE3DBTAXAFILE,database_directory) names = heir['names'] ranks = heir['ranks'] @@ -369,7 +369,7 @@ def isETE3DBTAXAFILEexists(ETE3DBTAXAFILE): return True -def initETE3Database(database_directory, ETE3DBTAXAFILE, logging): +def initETE3Database(database_directory, ETE3DBTAXAFILE): lockfilepath = os.path.join(database_directory, ".lock") if os.path.exists(lockfilepath) == False: