From d99c3e5defc5c9d3d73f705073f7a4ed3b4a24b7 Mon Sep 17 00:00:00 2001
From: Victor Lin <13424970+victorlin@users.noreply.github.com>
Date: Tue, 10 Sep 2024 14:59:38 -0700
Subject: [PATCH 1/5] Remove default values from parse_sequence function header

The only usage of this function supplies all paramters, so any defaults
here are unused and are either duplicate or inaccurate.
---
 augur/parse.py      | 2 +-
 tests/test_parse.py | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/augur/parse.py b/augur/parse.py
index 5d3cd34a9..795966cfb 100644
--- a/augur/parse.py
+++ b/augur/parse.py
@@ -88,7 +88,7 @@ def prettify(x, trim=0, camelCase=False, etal=None, removeComma=False):
     return res
 
 
-def parse_sequence(sequence, fields, strain_key="strain", separator="|", prettify_fields=None, fix_dates_format=None):
+def parse_sequence(sequence, fields, strain_key, separator, prettify_fields, fix_dates_format):
     """Parse a single sequence record into a sequence record and associated metadata.
 
     Parameters
diff --git a/tests/test_parse.py b/tests/test_parse.py
index 2c159d1a7..04b6f62a3 100644
--- a/tests/test_parse.py
+++ b/tests/test_parse.py
@@ -71,7 +71,9 @@ def test_parse_sequence(self):
             sequence_record,
             fields=fields,
             strain_key="strain",
-            prettify_fields=["region"]
+            separator="|",
+            prettify_fields=["region"],
+            fix_dates_format=None,
         )
 
         assert sequence.id == metadata["strain"]

From f9de7106d33b76779481570d5dcbf04786966750 Mon Sep 17 00:00:00 2001
From: Victor Lin <13424970+victorlin@users.noreply.github.com>
Date: Tue, 10 Sep 2024 15:06:02 -0700
Subject: [PATCH 2/5] Add types to parse_sequence

---
 augur/parse.py | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/augur/parse.py b/augur/parse.py
index 795966cfb..4e9f504f4 100644
--- a/augur/parse.py
+++ b/augur/parse.py
@@ -1,8 +1,10 @@
 """
 Parse delimited fields from FASTA sequence names into a TSV and FASTA file.
 """
+import Bio.SeqRecord
 import pandas as pd
 import sys
+from typing import Dict, Sequence, Tuple
 
 from .io.file import open_file
 from .io.sequences import read_sequences, write_sequences
@@ -88,27 +90,34 @@ def prettify(x, trim=0, camelCase=False, etal=None, removeComma=False):
     return res
 
 
-def parse_sequence(sequence, fields, strain_key, separator, prettify_fields, fix_dates_format):
+def parse_sequence(
+        sequence: Bio.SeqRecord.SeqRecord,
+        fields: Sequence[str],
+        strain_key: str,
+        separator: str,
+        prettify_fields: Sequence[str],
+        fix_dates_format: str,
+    ) -> Tuple[Bio.SeqRecord.SeqRecord, Dict[str, str]]:
     """Parse a single sequence record into a sequence record and associated metadata.
 
     Parameters
     ----------
-    sequence : Bio.SeqRecord.SeqRecord
+    sequence
         a BioPython sequence record to parse with metadata stored in its description field.
 
-    fields : list or tuple
+    fields
         a list of names for fields expected in the given record's description.
 
-    strain_key : str
+    strain_key
         name of the field to use as the given sequence's unique id
 
-    separator : str
+    separator
         delimiter to split record description by.
 
-    prettify_fields : list or tuple
+    prettify_fields
         a list of field names for which the values in those fields should be prettified.
 
-    fix_dates_format : str
+    fix_dates_format
         parse "date" field into the requested canonical format ("dayfirst" or "monthfirst").
 
     Returns

From 6064fa2e521c55fa91bedec15a59c80767beb6ae Mon Sep 17 00:00:00 2001
From: Victor Lin <13424970+victorlin@users.noreply.github.com>
Date: Tue, 10 Sep 2024 14:51:38 -0700
Subject: [PATCH 3/5] Prefer `strain` over `name` as sequence ID field

Following through with deprecation.
---
 augur/parse.py           | 4 +---
 tests/functional/parse.t | 3 +--
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/augur/parse.py b/augur/parse.py
index 4e9f504f4..80b909faa 100644
--- a/augur/parse.py
+++ b/augur/parse.py
@@ -11,7 +11,7 @@
 from .dates import get_numerical_date_from_value
 from .errors import AugurError
 
-PARSE_DEFAULT_ID_COLUMNS = ("name", "strain")
+PARSE_DEFAULT_ID_COLUMNS = ("strain", "name")
 
 forbidden_characters = str.maketrans(
     {' ': None,
@@ -187,8 +187,6 @@ def run(args):
         for possible_id in PARSE_DEFAULT_ID_COLUMNS:
             if possible_id in args.fields:
                 strain_key = possible_id
-                if possible_id == "name" and "strain" in args.fields:
-                    print("DEPRECATED: The default search order for the ID field will be changing from ('name', 'strain') to ('strain', 'name').\nUsers who prefer to keep using 'name' instead of 'strain' should use the parameter: --output-id-field 'name'", file=sys.stderr)
                 break
         if not strain_key:
             strain_key = args.fields[0]
diff --git a/tests/functional/parse.t b/tests/functional/parse.t
index 6b62bbb5a..963afeeb7 100644
--- a/tests/functional/parse.t
+++ b/tests/functional/parse.t
@@ -69,10 +69,9 @@ Parse Zika sequences into sequences and metadata, preferred default ids is 'name
   >   --output-sequences "$TMP/sequences.fasta" \
   >   --output-metadata "$TMP/metadata.tsv" \
   >   --fields strain virus name date region country division city db segment authors url title journal paper_url \
+  >   --output-id-field 'name' \
   >   --prettify-fields region country division city \
   >   --fix-dates monthfirst
-  DEPRECATED: The default search order for the ID field will be changing from ('name', 'strain') to ('strain', 'name').
-  Users who prefer to keep using 'name' instead of 'strain' should use the parameter: --output-id-field 'name'
 
   $ diff -u "parse/sequences_other.fasta" "$TMP/sequences.fasta"
   $ rm -f "$TMP/sequences.fasta" "$TMP/metadata.tsv"

From bec3b0fbc3b0c6f648a3f125c8488d8c96865e78 Mon Sep 17 00:00:00 2001
From: Victor Lin <13424970+victorlin@users.noreply.github.com>
Date: Tue, 10 Sep 2024 14:54:10 -0700
Subject: [PATCH 4/5] Update deprecation entry

---
 DEPRECATED.md | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/DEPRECATED.md b/DEPRECATED.md
index 149e60f12..3283d61c7 100644
--- a/DEPRECATED.md
+++ b/DEPRECATED.md
@@ -10,9 +10,7 @@ available for backwards compatibility, but should not be used in new code.
 
 ## `augur parse` preference of `name` over `strain` as the sequence ID field
 
-*Deprecated in version 24.2.0 (February 2024). Planned to be reordered June 2024 or after.*
-
-Currently, `augur parse` checks for a 'name' field and then a 'strain' field to use as a sequence ID. This order will be changed in favor of searching for a 'strain' and then a 'name' field to be more consistent with the rest of Augur.
+*Deprecated in version 24.2.0 (February 2024). Reordered in version __NEXT__ (September 2024).*
 
 Users who have both 'name' and 'strain' fields in their data, and want to favor using the 'name' field should add the following `augur parse` parameter `--output-id-field 'name'`.
 

From 5e0bc544c12725b91e27bdfc2e322a2b62a76f37 Mon Sep 17 00:00:00 2001
From: Victor Lin <13424970+victorlin@users.noreply.github.com>
Date: Tue, 10 Sep 2024 14:57:56 -0700
Subject: [PATCH 5/5] Update changelog

---
 CHANGES.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/CHANGES.md b/CHANGES.md
index 12c49c938..715a5c0b7 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -2,6 +2,10 @@
 
 ## __NEXT__
 
+### Major Changes
+
+* parse: When both `strain` and `name` fields are present, the `strain` field will now be used as the sequence ID field. [#1629][] (@victorlin)
+
 ### Features
 
 * merge: Generated source columns (e.g. `__source_metadata_{NAME}`) may now have their name template changed with `--source-columns=TEMPLATE` or may be omitted entirely with `--no-source-columns`. [#1625][] (@tsibley)
@@ -13,6 +17,7 @@
 [#1588]: https://github.com/nextstrain/augur/issues/1588
 [#1598]: https://github.com/nextstrain/augur/issues/1598
 [#1625]: https://github.com/nextstrain/augur/issues/1625
+[#1629]: https://github.com/nextstrain/augur/pull/1629
 
 ## 25.4.0 (3 September 2024)