diff --git a/phylogenetic/defaults/clade-i/auspice_config.json b/phylogenetic/defaults/clade-i/auspice_config.json index a924efc..52d78f7 100644 --- a/phylogenetic/defaults/clade-i/auspice_config.json +++ b/phylogenetic/defaults/clade-i/auspice_config.json @@ -55,8 +55,12 @@ "geo_resolutions": [ "country" ], + "metadata_columns": [ + "strain" + ], "display_defaults": { "color_by": "country", + "tip_label": "strain", "map_triplicate": true, "distance_measure": "num_date", "transmission_lines": false diff --git a/phylogenetic/defaults/hmpxv1/auspice_config.json b/phylogenetic/defaults/hmpxv1/auspice_config.json index 89e32dc..a6adee5 100644 --- a/phylogenetic/defaults/hmpxv1/auspice_config.json +++ b/phylogenetic/defaults/hmpxv1/auspice_config.json @@ -60,8 +60,12 @@ "geo_resolutions": [ "country" ], + "metadata_columns": [ + "strain" + ], "display_defaults": { "color_by": "lineage", + "tip_label": "strain", "map_triplicate": true, "distance_measure": "num_date", "transmission_lines": false diff --git a/phylogenetic/defaults/hmpxv1_big/auspice_config.json b/phylogenetic/defaults/hmpxv1_big/auspice_config.json index 034350c..797892f 100644 --- a/phylogenetic/defaults/hmpxv1_big/auspice_config.json +++ b/phylogenetic/defaults/hmpxv1_big/auspice_config.json @@ -65,8 +65,12 @@ "geo_resolutions": [ "country" ], + "metadata_columns": [ + "strain" + ], "display_defaults": { "color_by": "lineage", + "tip_label": "strain", "map_triplicate": true, "distance_measure": "num_date", "transmission_lines": false diff --git a/phylogenetic/defaults/mpxv/auspice_config.json b/phylogenetic/defaults/mpxv/auspice_config.json index 046ac83..790a535 100644 --- a/phylogenetic/defaults/mpxv/auspice_config.json +++ b/phylogenetic/defaults/mpxv/auspice_config.json @@ -70,8 +70,12 @@ "geo_resolutions": [ "country" ], + "metadata_columns": [ + "strain" + ], "display_defaults": { "color_by": "clade_membership", + "tip_label": "strain", "map_triplicate": true, "distance_measure": "div", "transmission_lines": false diff --git a/phylogenetic/rules/export.smk b/phylogenetic/rules/export.smk index 1501eb2..0ee20d4 100644 --- a/phylogenetic/rules/export.smk +++ b/phylogenetic/rules/export.smk @@ -89,8 +89,8 @@ rule export: description=config["description"], auspice_config=config["auspice_config"], output: - auspice_json=build_dir + "/{build_name}/raw_tree.json", - root_sequence=build_dir + "/{build_name}/raw_tree_root-sequence.json", + auspice_json=build_dir + "/{build_name}/tree.json", + root_sequence=build_dir + "/{build_name}/tree_root-sequence.json", params: strain_id=config["strain_id_field"], shell: @@ -107,25 +107,3 @@ rule export: --include-root-sequence \ --output {output.auspice_json} """ - - -rule final_strain_name: - input: - auspice_json=build_dir + "/{build_name}/raw_tree.json", - metadata=build_dir + "/{build_name}/metadata.tsv", - root_sequence=build_dir + "/{build_name}/raw_tree_root-sequence.json", - output: - auspice_json=build_dir + "/{build_name}/tree.json", - root_sequence=build_dir + "/{build_name}/tree_root-sequence.json", - params: - strain_id=config["strain_id_field"], - display_strain_field=config.get("display_strain_field", "strain"), - shell: - """ - python3 scripts/set_final_strain_name.py --metadata {input.metadata} \ - --metadata-id-columns {params.strain_id} \ - --input-auspice-json {input.auspice_json} \ - --display-strain-name {params.display_strain_field} \ - --output {output.auspice_json} - cp {input.root_sequence} {output.root_sequence} - """ diff --git a/phylogenetic/scripts/set_final_strain_name.py b/phylogenetic/scripts/set_final_strain_name.py deleted file mode 100644 index 08ca935..0000000 --- a/phylogenetic/scripts/set_final_strain_name.py +++ /dev/null @@ -1,38 +0,0 @@ -import pandas as pd -import json, argparse -from augur.io import read_metadata - -def replace_name_recursive(node, lookup): - if node["name"] in lookup: - node["name"] = lookup[node["name"]] - - if "children" in node: - for child in node["children"]: - replace_name_recursive(child, lookup) - -if __name__=="__main__": - parser = argparse.ArgumentParser( - description="Swaps out the strain names in the Auspice JSON with the final strain name", - formatter_class=argparse.ArgumentDefaultsHelpFormatter - ) - - parser.add_argument('--input-auspice-json', type=str, required=True, help="input auspice_json") - parser.add_argument('--metadata', type=str, required=True, help="input data") - parser.add_argument('--metadata-id-columns', nargs="+", help="names of possible metadata columns containing identifier information, ordered by priority. Only one ID column will be inferred.") - parser.add_argument('--display-strain-name', type=str, required=True, help="field to use as strain name in auspice") - parser.add_argument('--output', type=str, metavar="JSON", required=True, help="output Auspice JSON") - args = parser.parse_args() - - metadata = read_metadata(args.metadata, id_columns=args.metadata_id_columns) - name_lookup = {} - for ri, row in metadata.iterrows(): - strain_id = row.name - name_lookup[strain_id] = args.display_strain_name if pd.isna(row[args.display_strain_name]) else row[args.display_strain_name] - - with open(args.input_auspice_json, 'r') as fh: - data = json.load(fh) - - replace_name_recursive(data['tree'], name_lookup) - - with open(args.output, 'w') as fh: - json.dump(data, fh)