From 5ef92236f5ae5fd53baa7d0aea1313e8bdb37234 Mon Sep 17 00:00:00 2001 From: Louis Bergelson Date: Tue, 29 Jan 2019 11:23:53 -0500 Subject: [PATCH] Add support for Sam Header Readgroup Barcode field (#1210) * adding support in SAMReadGroupRecord for the BC attribute --- .../htsjdk/samtools/SAMReadGroupRecord.java | 41 ++++++++++++++++--- .../htsjdk/samtools/util/SamConstants.java | 13 ++++-- .../samtools/SAMReadGroupRecordTest.java | 23 +++++++++++ 3 files changed, 68 insertions(+), 9 deletions(-) diff --git a/src/main/java/htsjdk/samtools/SAMReadGroupRecord.java b/src/main/java/htsjdk/samtools/SAMReadGroupRecord.java index 14f1c50e3d..60165628ad 100644 --- a/src/main/java/htsjdk/samtools/SAMReadGroupRecord.java +++ b/src/main/java/htsjdk/samtools/SAMReadGroupRecord.java @@ -25,12 +25,9 @@ import htsjdk.samtools.util.Iso8601Date; +import htsjdk.samtools.util.SamConstants; -import java.util.Arrays; -import java.util.Date; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; +import java.util.*; /** * Header information about a read group. @@ -51,6 +48,8 @@ public class SAMReadGroupRecord extends AbstractSAMHeaderRecord public static final String PLATFORM_MODEL_TAG = "PM"; public static final String PLATFORM_UNIT_TAG = "PU"; public static final String READ_GROUP_SAMPLE_TAG = "SM"; + public static final String BARCODE_TAG = "BC"; + /* Platform values for the @RG-PL tag */ public enum PlatformValue { @@ -63,7 +62,7 @@ public enum PlatformValue { new HashSet(Arrays.asList(READ_GROUP_ID_TAG, SEQUENCING_CENTER_TAG, DESCRIPTION_TAG, DATE_RUN_PRODUCED_TAG, FLOW_ORDER_TAG, KEY_SEQUENCE_TAG, LIBRARY_TAG, PROGRAM_GROUP_TAG, PREDICTED_MEDIAN_INSERT_SIZE_TAG, PLATFORM_TAG, PLATFORM_MODEL_TAG, - PLATFORM_UNIT_TAG, READ_GROUP_SAMPLE_TAG)); + PLATFORM_UNIT_TAG, READ_GROUP_SAMPLE_TAG, BARCODE_TAG)); public SAMReadGroupRecord(final String id) { mReadGroupId = id; } @@ -90,6 +89,36 @@ public SAMReadGroupRecord(final String id, final SAMReadGroupRecord srcProgramRe public String getPlatform() { return getAttribute(PLATFORM_TAG); } public void setPlatform(final String platform) { setAttribute(PLATFORM_TAG, platform); } + /** + * @return the List of barcodes associated with this read group or null + */ + public List getBarcodes() { + final String barcodeString = getAttribute(BARCODE_TAG); + if (barcodeString == null) { + return null; + } else if (barcodeString.isEmpty()) { + return Collections.emptyList(); + } else { + return Arrays.asList(barcodeString.split(SamConstants.BARCODE_SEQUENCE_DELIMITER)); + } + } + + /** + * Set the barcodes associated with this ReadGroup. + * Note that an input of null results in unsetting the attribute while an empty list is set as a tag with an empty value. + * @param barcodes a list of barcodes to associate with this read group + */ + public void setBarcodes(final List barcodes) { + if (barcodes == null) { + setAttribute(BARCODE_TAG, null); + } else { + if (barcodes.stream().anyMatch(String::isEmpty)) { + throw new IllegalArgumentException("A barcode must not be an empty String"); + } + setAttribute(BARCODE_TAG, String.join(SamConstants.BARCODE_SEQUENCE_DELIMITER, barcodes)); + } + } + public Date getRunDate() { final String dt = getAttribute(DATE_RUN_PRODUCED_TAG); if (dt == null) return null; diff --git a/src/main/java/htsjdk/samtools/util/SamConstants.java b/src/main/java/htsjdk/samtools/util/SamConstants.java index 9a6f0456c9..ec5f6816fe 100644 --- a/src/main/java/htsjdk/samtools/util/SamConstants.java +++ b/src/main/java/htsjdk/samtools/util/SamConstants.java @@ -30,9 +30,16 @@ */ public final class SamConstants { - //No need to instantiate this class since all the onstants should be static + //No need to instantiate this class since all the constants should be static private SamConstants(){}; - final static String BARCODE_SEQUENCE_DELIMITER = "-"; - final static String BARCODE_QUALITY_DELIMITER = " "; + /** + * The recommended separator to use when specifying multiple barcodes together in the same tag. + */ + public static final String BARCODE_SEQUENCE_DELIMITER = "-"; + + /** + * The recommend separator to use when specifying multiple barcode quality scores together in the same tag. + */ + public static final String BARCODE_QUALITY_DELIMITER = " "; } diff --git a/src/test/java/htsjdk/samtools/SAMReadGroupRecordTest.java b/src/test/java/htsjdk/samtools/SAMReadGroupRecordTest.java index 5dde03cd94..4833a51693 100644 --- a/src/test/java/htsjdk/samtools/SAMReadGroupRecordTest.java +++ b/src/test/java/htsjdk/samtools/SAMReadGroupRecordTest.java @@ -29,7 +29,10 @@ import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.util.Arrays; +import java.util.Collections; import java.util.Date; +import java.util.List; import java.util.function.BiConsumer; import java.util.function.Function; @@ -145,4 +148,24 @@ public void testEqualsAndHashcode(final SAMReadGroupRecord rg, final Object othe } } + @DataProvider + public Object[][] getBarcodes() { + return new Object[][] { + {null, null}, + {Collections.emptyList(), ""}, + {Collections.singletonList("aa"), "aa"}, + {Arrays.asList("aa", "ac"), "aa-ac"}, + {Arrays.asList("aa", "ca", "gg"), "aa-ca-gg"} + }; + } + + @Test(dataProvider = "getBarcodes") + public void testGetAndSetBarcodes(List barcodes, String encoded){ + final SAMReadGroupRecord readGroup = new SAMReadGroupRecord("ReadGroup"); + Assert.assertNull(readGroup.getBarcodes()); + Assert.assertNull(readGroup.getAttribute(SAMReadGroupRecord.BARCODE_TAG)); + readGroup.setBarcodes(barcodes); + Assert.assertEquals(readGroup.getBarcodes(), barcodes); + Assert.assertEquals(readGroup.getAttribute(SAMReadGroupRecord.BARCODE_TAG), encoded); + } }