Skip to content

Commit

Permalink
update utils.run_ete_script() and remove ete_path/--ete-path
Browse files Browse the repository at this point in the history
don't need it since we're on python 3 now
  • Loading branch information
psathyrella committed Mar 9, 2024
1 parent e490d8b commit 68efc20
Show file tree
Hide file tree
Showing 13 changed files with 36 additions and 53 deletions.
7 changes: 3 additions & 4 deletions bin/bcr-phylo-run.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
from python.event import RecombinationEvent
import python.paircluster as paircluster

ete_path = os.getenv('HOME') + '/anaconda_ete/bin'
bcr_phylo_path = os.getenv('PWD') + '/packages/bcr-phylo-benchmark'
ig_or_tr = 'ig'

Expand Down Expand Up @@ -212,9 +211,9 @@ def run_bcr_phylo(naive_seq, outdir, ievent, uid_str_len=None, igcr=None):

cfo = None
if args.n_procs == 1:
utils.run_ete_script(cmd, ete_path, dryrun=args.dry_run)
utils.run_ete_script(cmd, dryrun=args.dry_run)
else:
cmd, _ = utils.run_ete_script(cmd, ete_path, return_for_cmdfos=True, tmpdir=outdir, dryrun=args.dry_run)
cmd = utils.run_ete_script(cmd, return_for_cmdfos=True, dryrun=args.dry_run)
cfo = {'cmd_str' : cmd, 'workdir' : outdir, 'outfname' : bcr_phylo_fasta_fname(outdir)}
sys.stdout.flush()
return cfo
Expand Down Expand Up @@ -348,7 +347,7 @@ def translate_duplicate_pids(mpair, dup_translations):
kdfname, nwkfname = '%s/kd-vals.csv' % outdir, '%s/simu.nwk' % outdir
if not utils.output_exists(args, kdfname, outlabel='kd/nwk conversion', offset=4): # eh, don't really need to check for both kd and nwk file, chances of only one being missing are really small, and it'll just crash when it looks for it a couple lines later
cmd = './bin/read-bcr-phylo-trees.py --pickle-tree-file %s/%s_lineage_tree.p --kdfile %s --newick-tree-file %s' % (outdir, args.extrastr, kdfname, nwkfname)
utils.run_ete_script(cmd, ete_path, debug=args.n_procs==1)
utils.run_ete_script(cmd, debug=args.n_procs==1)
nodefo = read_kdvals(kdfname)
dtree = treeutils.get_dendro_tree(treefname=nwkfname)
seqfos = utils.read_fastx(bcr_phylo_fasta_fname(outdir)) # output mutated sequences from bcr-phylo
Expand Down
3 changes: 2 additions & 1 deletion bin/gctree-run.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,8 @@ def parse_output():
def convert_pickle_tree():
assert False # doesn't work yet
cmd = '%s/bin/read-bcr-phylo-trees.py --pickle-tree-file %s --newick-tree-file %s/tree.nwk' % (utils.get_partis_dir(), args.infname, args.outdir)
utils.run_ete_script(cmd, None, conda_path=args.condapath, conda_env='ete3', pyversion='3')
# assert False # this also needs updating
# utils.run_ete_script(cmd, None, conda_path=args.condapath, conda_env='ete3', pyversion='3')

# ----------------------------------------------------------------------------------------
parser = argparse.ArgumentParser()
Expand Down
1 change: 0 additions & 1 deletion bin/partis
Original file line number Diff line number Diff line change
Expand Up @@ -1380,7 +1380,6 @@ parent_args.append({'name' : '--no-partition-plots', 'kwargs' : {'action' : 'sto
parent_args.append({'name' : '--only-csv-plots', 'kwargs' : {'action' : 'store_true', 'help' : 'skip writing actual image files, which can quite be slow, and only write the csv/yaml summaries (where implemented)'}})
parent_args.append({'name' : '--make-per-gene-plots', 'kwargs' : {'action' : 'store_true', 'help' : 'in addition to plots aggregating over genes, write plots displaying info for each gene of, e.g., per position shm rate, deletion frequencies'}})
parent_args.append({'name' : '--make-per-gene-per-base-plots', 'kwargs' : {'action' : 'store_true', 'help' : 'in addition to the plots made by --make-per-gene-plots, also make the per-gene, per-base plots (i.e. showing A->T vs A->G (this is quite slow, like a few seconds per gene plot).'}})
parent_args.append({'name' : '--ete-path', 'kwargs' : {'default' : ('%s/anaconda_ete/bin' % os.getenv('HOME')) if os.getenv('HOME') is not None else None, 'help' : 'Set to the string \'None\' to turn off.'}})
parent_args.append({'name' : '--linearham-dir', 'kwargs' : {'default' : ('%s/work/linearham' % os.getenv('HOME')) if os.getenv('HOME') is not None else None, 'help' : 'path to linearham main dir (necessary if you want to use linearham without docker)'}})
parent_args.append({'name' : '--meta-info-to-emphasize', 'kwargs' : {'help' : 'Input meta info (or regular annotation) key to emphasize (highlight in red) in various plots, similar to --queries-to-include. Specify as comma-separated key-value pair, for instance \'timepoints,+8d\' would highlight all sequences with timepoint \'+8d\'. Can be any annotation key or input meta key. For now only supports one key-val pair, but in future should support colon-separated list.'}})
parent_args.append({'name' : '--meta-info-key-to-color', 'kwargs' : {'help' : 'Like --meta-info-to-emphasize, except for this key we choose a different color for each value (only in the slug/joy plots at the moment).'}})
Expand Down
7 changes: 4 additions & 3 deletions bin/read-gctree-output.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,12 +113,13 @@ def run_cmd(action):
cmd += ' --min-selection-metric-cluster-size 3 --treefname %s/%s --plotdir %s --selection-metrics-to-calculate lbi:aa-lbi:cons-dist-aa:lbr:aa-lbr:lbf:aa-lbf' % (args.gctreedir, args.tree_basename, 'paired-outdir' if args.paired_loci else '%s/selection-metrics/plots'%args.outdir)
cmd += ' --extra-daffy-metrics lbi:aa-lbi'
cmd += ' --label-root-node'
cmd += ' --add-selection-metrics-to-outfname --use-droplet-id-for-combo-id --selection-metric-plot-cfg %s' % ':'.join(treeutils.default_plot_cfg + ['distr', 'tree-mut-stats'])
plt_cfg = treeutils.default_plot_cfg + ['distr', 'tree-mut-stats']
if args.no_tree_plots:
plt_cfg = [t for t in plt_cfg if t != 'tree']
cmd += ' --add-selection-metrics-to-outfname --use-droplet-id-for-combo-id --selection-metric-plot-cfg %s' % ':'.join(plt_cfg)
if args.slice_bin_fname is not None:
cmd += ' --slice-bin-fname %s' % args.slice_bin_fname
cmd += ' --choose-all-abs --chosen-ab-fname %s/chosen-abs.csv' % args.outdir # --debug 1
if args.no_tree_plots:
cmd += ' --ete-path None'
if args.n_procs is not None:
cmd += ' --n-procs %d' % args.n_procs
utils.simplerun(cmd, logfname='%s/%s.log'%(args.outdir, action), dryrun=args.dry)
Expand Down
7 changes: 3 additions & 4 deletions bin/smetric-run.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,8 @@
# NOTE extra required args are set in treeutils plot_tree_metrics()
args.cluster_indices = utils.get_arg_list(args.cluster_indices, intify_with_ranges=True)
args.selection_metric_plot_cfg = utils.get_arg_list(args.selection_metric_plot_cfg, choices=treeutils.all_plot_cfg)
ete_path, workdir = None, None
workdir = None, None
if args.make_tree_plots or 'tree' in args.selection_metric_plot_cfg:
ete_path = '%s/anaconda_ete/bin' % os.getenv('HOME')
workdir = utils.choose_random_subdir('/tmp/%s/tree-metrics' % os.getenv('USER'))

if args.n_max_queries != -1:
Expand All @@ -56,8 +55,8 @@
if args.metric_method == 'dtr':
raise Exception('I think the [new] first arg here (metrics_to_calc) isn\'t right, but don\'t want to test cause i don\'t care about dtr')
treeutils.add_smetrics(args, ['lbi', 'lbr', 'dtr'], None, args.lb_tau, base_plotdir=args.base_plotdir,
train_dtr=args.action=='train', dtr_cfg=args.dtr_cfg, true_lines_to_use=true_lines, ete_path=ete_path, workdir=workdir) # NOTE if you need this in the future you may want to add tree_inference_method 'gctree' and tree_inference_outdir
train_dtr=args.action=='train', dtr_cfg=args.dtr_cfg, true_lines_to_use=true_lines, workdir=workdir) # NOTE if you need this in the future you may want to add tree_inference_method 'gctree' and tree_inference_outdir
else:
treeutils.calculate_individual_tree_metrics(args.metric_method, true_lines, base_plotdir=args.base_plotdir, lb_tau=args.lb_tau, only_csv=args.only_csv_plots,
min_cluster_size=args.min_selection_metric_cluster_size, include_relative_affy_plots=args.include_relative_affy_plots,
dont_normalize_lbi=args.dont_normalize_lbi, ete_path=ete_path, workdir=workdir, cluster_indices=args.cluster_indices, only_look_upwards=args.only_look_upwards, args=args) #, debug=True)
dont_normalize_lbi=args.dont_normalize_lbi, workdir=workdir, cluster_indices=args.cluster_indices, only_look_upwards=args.only_look_upwards, args=args) #, debug=True)
8 changes: 4 additions & 4 deletions python/lbplotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -1544,7 +1544,7 @@ def ctypetitle(ct):
add_fn(fnames, fn=fn)

# ----------------------------------------------------------------------------------------
def get_lb_tree_cmd(treestr, outfname, lb_metric, affy_key, ete_path, subworkdir, metafo=None, tree_style=None, queries_to_include=None, label_all_nodes=False, label_leaf_nodes=False, label_root_node=False, seq_len=None,
def get_lb_tree_cmd(treestr, outfname, lb_metric, affy_key, subworkdir, metafo=None, tree_style=None, queries_to_include=None, label_all_nodes=False, label_leaf_nodes=False, label_root_node=False, seq_len=None,
meta_info_key_to_color=None, meta_info_to_emphasize=None, node_size_key=None, branch_color_key=None, uid_translations=None, node_label_regex=None):
treefname = '%s/tree.nwk' % subworkdir
metafname = '%s/meta.yaml' % subworkdir
Expand Down Expand Up @@ -1586,12 +1586,12 @@ def get_lb_tree_cmd(treestr, outfname, lb_metric, affy_key, ete_path, subworkdir
cmdstr += ' --branch-color-key %s' % branch_color_key
if node_label_regex is not None:
cmdstr += ' --node-label-regex %s' % node_label_regex
cmdstr, _ = utils.run_ete_script(cmdstr, ete_path, return_for_cmdfos=True, tmpdir=subworkdir, extra_str=' ')
cmdstr = utils.run_ete_script(cmdstr, return_for_cmdfos=True, extra_str=' ')

return {'cmd_str' : cmdstr, 'workdir' : subworkdir, 'outfname' : outfname, 'workfnames' : [treefname, metafname]}

# ----------------------------------------------------------------------------------------
def plot_lb_trees(args, metric_methods, baseplotdir, lines, ete_path, base_workdir, is_true_line=False, tree_style=None, fnames=None):
def plot_lb_trees(args, metric_methods, baseplotdir, lines, base_workdir, is_true_line=False, tree_style=None, fnames=None):
add_fn(fnames, new_row=True)
workdir = '%s/ete3-plots' % base_workdir
plotdir = baseplotdir + '/trees'
Expand All @@ -1614,7 +1614,7 @@ def plot_lb_trees(args, metric_methods, baseplotdir, lines, ete_path, base_workd
if affy_key in line: # either 'affinities' or 'relative_affinities'
metafo[utils.reversed_input_metafile_keys[affy_key]] = {uid : affy for uid, affy in zip(line['unique_ids'], line[affy_key])}
outfname = '%s/%s-tree-iclust-%d%s.svg' % (plotdir, lb_metric, iclust, '-relative' if 'relative' in affy_key else '')
cmdfos += [get_lb_tree_cmd(treestr, outfname, lb_metric, affy_key, ete_path, '%s/sub-%d' % (workdir, len(cmdfos)), metafo=metafo, tree_style=tree_style, queries_to_include=qtis,
cmdfos += [get_lb_tree_cmd(treestr, outfname, lb_metric, affy_key, '%s/sub-%d' % (workdir, len(cmdfos)), metafo=metafo, tree_style=tree_style, queries_to_include=qtis,
label_all_nodes=args.label_tree_nodes, label_leaf_nodes=args.label_leaf_nodes, label_root_node=args.label_root_node, uid_translations=altids, node_label_regex=args.node_label_regex,
seq_len=float(numpy.mean([len(s) for s in line['seqs']])))]
add_fn(fnames, fn=outfname, n_per_row=4)
Expand Down
2 changes: 1 addition & 1 deletion python/partitiondriver.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ def calc_tree_metrics(self, annotation_dict, annotation_list=None, cpath=None):
print(' --seed-unique-id: restricting selection metric calculation to seed cluster in best partition (mostly to avoid fasttree crash on duplicate uids)')
annotation_dict = OrderedDict([(uidstr, line) for uidstr, line in annotation_dict.items() if self.args.seed_unique_id in line['unique_ids'] and line['unique_ids'] in cpath.partitions[cpath.i_best]])
treeutils.add_smetrics(self.args, self.args.selection_metrics_to_calculate, annotation_dict, self.args.lb_tau, reco_info=self.reco_info, # NOTE keys in <annotation_dict> may be out of sync with 'unique_ids' if we add inferred ancestral seqs here
use_true_clusters=self.reco_info is not None, base_plotdir=self.args.plotdir, ete_path=self.args.ete_path, workdir=self.args.workdir,
use_true_clusters=self.reco_info is not None, base_plotdir=self.args.plotdir, workdir=self.args.workdir,
outfname=self.args.selection_metric_fname, glfo=self.glfo, tree_inference_outdir=self.args.tree_inference_outdir, debug=self.args.debug)

# ----------------------------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion python/partitionplotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -798,7 +798,7 @@ def vmuts(vclass_muts, mutfo, uid):
qtis = None if self.args.queries_to_include is None else [q for q in self.args.queries_to_include if q in annotation['unique_ids']] # NOTE make sure to *not* modify args.queries_to_include
altids = [(u, au) for u, au in zip(annotation['unique_ids'], annotation['alternate-uids']) if au is not None] if 'alternate-uids' in annotation else None
mfo, cdr3fo = get_metafo(annotation, iclust)
cfo = lbplotting.get_lb_tree_cmd(self.get_treestr(iclust), '%s/%s.svg'%(plotdir, plotname), None, None, self.args.ete_path, '%s/sub-%d'%(workdir, len(cmdfos)), metafo=mfo,
cfo = lbplotting.get_lb_tree_cmd(self.get_treestr(iclust), '%s/%s.svg'%(plotdir, plotname), None, None, '%s/sub-%d'%(workdir, len(cmdfos)), metafo=mfo,
queries_to_include=qtis, meta_info_key_to_color=self.args.meta_info_key_to_color, meta_info_to_emphasize=self.args.meta_info_to_emphasize, uid_translations=altids,
label_all_nodes=self.args.label_tree_nodes, label_leaf_nodes=self.args.label_leaf_nodes, label_root_node=self.args.label_root_node, node_size_key=self.args.node_size_key, branch_color_key=self.args.branch_color_key, node_label_regex=self.args.node_label_regex)
cmdfos.append(cfo)
Expand Down
3 changes: 0 additions & 3 deletions python/processargs.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,3 @@ def process_corr_values(cvals, estr=''):
raise Exception('have to specify --sw-cachefname or --parameter-dir, since we need sw info to calculate linearham inputs')
if args.extra_annotation_columns is None or 'linearham-info' not in args.extra_annotation_columns:
args.extra_annotation_columns = utils.add_lists(args.extra_annotation_columns, ['linearham-info'])

if args.ete_path is not None and args.ete_path == 'None': # it's nice to be able to unset this from the command line (so we don't make the slow tree plots)
args.ete_path = None
Loading

0 comments on commit 68efc20

Please sign in to comment.