From 557b26ce491888a81079d359e006ea4338509867 Mon Sep 17 00:00:00 2001 From: Dan Fornika Date: Tue, 20 Jun 2023 15:21:33 -0700 Subject: [PATCH 1/3] Add missing args to initETE3Database --- mob_suite/mob_cluster.py | 2 +- mob_suite/utils.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/mob_suite/mob_cluster.py b/mob_suite/mob_cluster.py index 7983ae0..3cb1d1a 100644 --- a/mob_suite/mob_cluster.py +++ b/mob_suite/mob_cluster.py @@ -533,7 +533,7 @@ def main(): new_seq_info[seq_id]['organism'] = organism ETE3DBTAXAFILE = os.path.abspath(database_dir + "/taxa.sqlite") - taxids = NamesToTaxIDs(organisms, ETE3DBTAXAFILE) + taxids = NamesToTaxIDs(organisms, ETE3DBTAXAFILE, database_dir) del(organisms) for seq_id in new_seq_info: diff --git a/mob_suite/utils.py b/mob_suite/utils.py index f62b65a..b6dc651 100644 --- a/mob_suite/utils.py +++ b/mob_suite/utils.py @@ -82,7 +82,7 @@ def filter_invalid_taxids(taxids): def getHeirarchy(taxid,ETE3DBTAXAFILE): if not isETE3DBTAXAFILEexists(ETE3DBTAXAFILE): logging.info("Did not find taxa.sqlite in {}. Initializaing ete3 taxonomy database".format(ETE3DBTAXAFILE)) - initETE3Database() + initETE3Database(database_directory, ETE3DBTAXAFILE) ncbi = NCBITaxa(dbfile=ETE3DBTAXAFILE) if not isETE3DBTAXAFILEexists(ETE3DBTAXAFILE): @@ -106,7 +106,7 @@ def getHeirarchy(taxid,ETE3DBTAXAFILE): def getTaxid(taxon,ETE3DBTAXAFILE): if not isETE3DBTAXAFILEexists(ETE3DBTAXAFILE): logging.info("Did not find taxa.sqlite in {}. Initializaing ete3 taxonomy database".format(ETE3DBTAXAFILE)) - initETE3Database() + initETE3Database(database_directory, ETE3DBTAXAFILE) ncbi = NCBITaxa(dbfile=ETE3DBTAXAFILE) if not isETE3DBTAXAFILEexists(ETE3DBTAXAFILE): @@ -121,10 +121,10 @@ def getTaxid(taxon,ETE3DBTAXAFILE): -def NamesToTaxIDs(names,ETE3DBTAXAFILE): +def NamesToTaxIDs(names,ETE3DBTAXAFILE,database_directory): if not isETE3DBTAXAFILEexists(ETE3DBTAXAFILE): logging.info("Did not find taxa.sqlite in {}. Initializaing ete3 taxonomy database".format(ETE3DBTAXAFILE)) - initETE3Database(ETE3DBTAXAFILE) + initETE3Database(database_directory, ETE3DBTAXAFILE) ncbi = NCBITaxa(dbfile=ETE3DBTAXAFILE) @@ -141,7 +141,7 @@ def NamesToTaxIDs(names,ETE3DBTAXAFILE): def getTaxonConvergence(taxids,ETE3DBTAXAFILE): if not isETE3DBTAXAFILEexists(ETE3DBTAXAFILE): logging.info("Did not find taxa.sqlite in {}. Initializaing ete3 taxonomy database".format(ETE3DBTAXAFILE)) - initETE3Database(ETE3DBTAXAFILE) + initETE3Database(database_directory, ETE3DBTAXAFILE) ncbi = NCBITaxa(dbfile=ETE3DBTAXAFILE) @@ -369,7 +369,7 @@ def isETE3DBTAXAFILEexists(ETE3DBTAXAFILE): return True -def initETE3Database(database_directory, ETE3DBTAXAFILE, logging): +def initETE3Database(database_directory, ETE3DBTAXAFILE): lockfilepath = os.path.join(database_directory, ".lock") if os.path.exists(lockfilepath) == False: From 74a4f35ff28ff6f808fec6c5d6034f427606613b Mon Sep 17 00:00:00 2001 From: Dan Fornika Date: Tue, 20 Jun 2023 16:09:38 -0700 Subject: [PATCH 2/3] Update signatures and call sites of other functions that need database_directory --- mob_suite/mob_typer.py | 2 +- mob_suite/utils.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/mob_suite/mob_typer.py b/mob_suite/mob_typer.py index 7aaf164..163c88c 100644 --- a/mob_suite/mob_typer.py +++ b/mob_suite/mob_typer.py @@ -508,7 +508,7 @@ def main(): record['relaxase_type_accession(s)'] = ",".join(record['relaxase_type_accession(s)']) host_range = hostrange(record['rep_type(s)'].split(','), record['relaxase_type_accession(s)'].split(','), - mob_cluster_id, ncbi, lit,ETE3DBTAXAFILE) + mob_cluster_id, ncbi, lit, ETE3DBTAXAFILE, database_dir) for field in host_range: record[field] = host_range[field] diff --git a/mob_suite/utils.py b/mob_suite/utils.py index b6dc651..6ecf303 100644 --- a/mob_suite/utils.py +++ b/mob_suite/utils.py @@ -79,7 +79,7 @@ def filter_invalid_taxids(taxids): return filtered -def getHeirarchy(taxid,ETE3DBTAXAFILE): +def getHeirarchy(taxid,ETE3DBTAXAFILE,database_directory): if not isETE3DBTAXAFILEexists(ETE3DBTAXAFILE): logging.info("Did not find taxa.sqlite in {}. Initializaing ete3 taxonomy database".format(ETE3DBTAXAFILE)) initETE3Database(database_directory, ETE3DBTAXAFILE) @@ -103,7 +103,7 @@ def getHeirarchy(taxid,ETE3DBTAXAFILE): return {'names': names, 'ranks': names} -def getTaxid(taxon,ETE3DBTAXAFILE): +def getTaxid(taxon,ETE3DBTAXAFILE,database_directory): if not isETE3DBTAXAFILEexists(ETE3DBTAXAFILE): logging.info("Did not find taxa.sqlite in {}. Initializaing ete3 taxonomy database".format(ETE3DBTAXAFILE)) initETE3Database(database_directory, ETE3DBTAXAFILE) @@ -138,7 +138,7 @@ def NamesToTaxIDs(names,ETE3DBTAXAFILE,database_directory): -def getTaxonConvergence(taxids,ETE3DBTAXAFILE): +def getTaxonConvergence(taxids,ETE3DBTAXAFILE,database_directory): if not isETE3DBTAXAFILEexists(ETE3DBTAXAFILE): logging.info("Did not find taxa.sqlite in {}. Initializaing ete3 taxonomy database".format(ETE3DBTAXAFILE)) initETE3Database(database_directory, ETE3DBTAXAFILE) @@ -206,7 +206,7 @@ def getTaxonConvergence(taxids,ETE3DBTAXAFILE): return (['-', '-']) -def hostrange(replion_types, relaxase_types, mob_cluster_id, ncbi, lit,ETE3DBTAXAFILE): +def hostrange(replion_types, relaxase_types, mob_cluster_id, ncbi, lit, ETE3DBTAXAFILE, database_directory): host_range_predictions = { 'observed_host_range_ncbi_name': '', 'observed_host_range_ncbi_rank': '', @@ -251,25 +251,25 @@ def hostrange(replion_types, relaxase_types, mob_cluster_id, ncbi, lit,ETE3DBTAX ncbi_unique_taxids = filter_invalid_taxids( list(set(ncbi_replicon_taxids + ncbi_cluster_taxids + ncbi_relaxase_taxids))) host_range_predictions['observed_host_range_ncbi_rank'], host_range_predictions[ - 'observed_host_range_ncbi_name'] = getTaxonConvergence(ncbi_unique_taxids,ETE3DBTAXAFILE) + 'observed_host_range_ncbi_name'] = getTaxonConvergence(ncbi_unique_taxids,ETE3DBTAXAFILE,database_directory) # Determine taxids associated with literature lit_unique_taxids = filter_invalid_taxids(list(set(lit_replicon_taxids))) host_range_predictions['reported_host_range_lit_rank'], host_range_predictions[ - 'reported_host_range_lit_name'] = getTaxonConvergence(lit_unique_taxids,ETE3DBTAXAFILE) + 'reported_host_range_lit_name'] = getTaxonConvergence(lit_unique_taxids,ETE3DBTAXAFILE,database_directory) # determine overall host range overall_taxids = filter_invalid_taxids(list(set(ncbi_unique_taxids + lit_unique_taxids))) host_range_predictions['predicted_host_range_overall_rank'], host_range_predictions[ - 'predicted_host_range_overall_name'] = getTaxonConvergence(overall_taxids,ETE3DBTAXAFILE) + 'predicted_host_range_overall_name'] = getTaxonConvergence(overall_taxids,ETE3DBTAXAFILE,database_directory) # move host-range prediction up to family when it is at genus or species level if host_range_predictions['predicted_host_range_overall_rank'] == 'genus' or host_range_predictions[ 'predicted_host_range_overall_rank'] == 'species': taxid = getTaxid(host_range_predictions['predicted_host_range_overall_name'],ETE3DBTAXAFILE) - heir = getHeirarchy(taxid,ETE3DBTAXAFILE) + heir = getHeirarchy(taxid,ETE3DBTAXAFILE,database_directory) names = heir['names'] ranks = heir['ranks'] From 5a5e8af2c188f8cbf3b129445cba0bf2c9bab688 Mon Sep 17 00:00:00 2001 From: Dan Fornika Date: Tue, 20 Jun 2023 16:14:15 -0700 Subject: [PATCH 3/3] Add database_directory to getTaxid() call site --- mob_suite/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mob_suite/utils.py b/mob_suite/utils.py index 6ecf303..584ace0 100644 --- a/mob_suite/utils.py +++ b/mob_suite/utils.py @@ -268,7 +268,7 @@ def hostrange(replion_types, relaxase_types, mob_cluster_id, ncbi, lit, ETE3DBTA # move host-range prediction up to family when it is at genus or species level if host_range_predictions['predicted_host_range_overall_rank'] == 'genus' or host_range_predictions[ 'predicted_host_range_overall_rank'] == 'species': - taxid = getTaxid(host_range_predictions['predicted_host_range_overall_name'],ETE3DBTAXAFILE) + taxid = getTaxid(host_range_predictions['predicted_host_range_overall_name'],ETE3DBTAXAFILE,database_directory) heir = getHeirarchy(taxid,ETE3DBTAXAFILE,database_directory) names = heir['names'] ranks = heir['ranks']