From c48424196483ae4f0251e8dc0cc2735611c9abf9 Mon Sep 17 00:00:00 2001 From: Alexey Philippov Date: Mon, 29 Oct 2018 21:22:10 +0300 Subject: [PATCH] MergeSamFiles accept SO:UNKNOWN (#1069) - case checks for header's tags/values; - added implementation that treating non-conforming SO tag values as "unknown"; - corresponding tests reworked, few more tests added. --- .../java/htsjdk/samtools/SAMFileHeader.java | 10 +- .../htsjdk/samtools/SAMTextHeaderCodec.java | 30 +++++- .../htsjdk/samtools/SAMFileHeaderTest.java | 91 ++++++++++++++++++- .../htsjdk/samtools/ValidateSamFileTest.java | 7 -- 4 files changed, 124 insertions(+), 14 deletions(-) diff --git a/src/main/java/htsjdk/samtools/SAMFileHeader.java b/src/main/java/htsjdk/samtools/SAMFileHeader.java index 1d123684f1..935746043d 100644 --- a/src/main/java/htsjdk/samtools/SAMFileHeader.java +++ b/src/main/java/htsjdk/samtools/SAMFileHeader.java @@ -246,7 +246,7 @@ public SortOrder getSortOrder() { try { return SortOrder.valueOf(so); } catch (IllegalArgumentException e) { - log.warn("Found non conforming header SO tag: " + so + ". Treating as 'unknown'."); + log.warn("Found non-conforming header SO tag: " + so + ". Treating as 'unknown'."); sortOrder = SortOrder.unknown; } } @@ -308,12 +308,18 @@ public void setAttribute(final String key, final Object value) { */ @Override public void setAttribute(final String key, final String value) { + String tempVal = value; if (key.equals(SORT_ORDER_TAG)) { this.sortOrder = null; + try { + tempVal = SortOrder.valueOf(value).toString(); + } catch (IllegalArgumentException e) { + tempVal = SortOrder.unknown.toString(); + } } else if (key.equals(GROUP_ORDER_TAG)) { this.groupOrder = null; } - super.setAttribute(key, value); + super.setAttribute(key, tempVal); } /** diff --git a/src/main/java/htsjdk/samtools/SAMTextHeaderCodec.java b/src/main/java/htsjdk/samtools/SAMTextHeaderCodec.java index 908e8360b8..5e4f64cd34 100644 --- a/src/main/java/htsjdk/samtools/SAMTextHeaderCodec.java +++ b/src/main/java/htsjdk/samtools/SAMTextHeaderCodec.java @@ -23,10 +23,12 @@ */ package htsjdk.samtools; +import htsjdk.samtools.SAMFileHeader.SortOrder; import htsjdk.samtools.util.DateParser; import htsjdk.samtools.util.LineReader; import htsjdk.samtools.util.RuntimeIOException; import htsjdk.samtools.util.StringUtil; +import htsjdk.samtools.util.Log; import java.io.BufferedWriter; import java.io.IOException; @@ -69,6 +71,7 @@ public class SAMTextHeaderCodec { private static final Pattern FIELD_SEPARATOR_RE = Pattern.compile(FIELD_SEPARATOR); public static final String COMMENT_PREFIX = HEADER_LINE_START + HeaderRecordType.CO.name() + FIELD_SEPARATOR; + private static final Log log = Log.getInstance(SAMTextHeaderCodec.class); void setWriter(final BufferedWriter writer) { this.writer = writer; @@ -231,10 +234,10 @@ private void parseHDLine(final ParsedHeaderLine parsedHeaderLine) { final String soString = parsedHeaderLine.getValue(SAMFileHeader.SORT_ORDER_TAG); try { - if (soString != null) SAMFileHeader.SortOrder.valueOf(soString); + if (soString != null) SortOrder.valueOf(soString); } catch (IllegalArgumentException e) { reportErrorParsingLine(HEADER_LINE_START + parsedHeaderLine.getHeaderRecordType() + - " line has non-conforming SO tag value: "+ soString + ".", + " line has non-conforming SO tag value: " + soString + ".", SAMValidationError.Type.HEADER_TAG_NON_CONFORMING_VALUE, null); } @@ -323,11 +326,30 @@ private class ParsedHeaderLine { SAMValidationError.Type.HEADER_TAG_MULTIPLY_DEFINED, null); continue; } + validateSortOrderValue(keyAndValue); mKeyValuePairs.put(keyAndValue[0], keyAndValue[1]); } lineValid = true; } + private void validateSortOrderValue(String[] value) { + if (SAMFileHeader.SORT_ORDER_TAG.equals(value[0])) { + try { + SortOrder.valueOf(value[1]); + } catch (IllegalArgumentException e) { + if (validationStringency == ValidationStringency.STRICT) { + throw new SAMFormatException("Found non-conforming header SO tag: " + + value[1] + + ", exiting because VALIDATION_STRINGENCY=STRICT"); + } else if (validationStringency == ValidationStringency.LENIENT) { + log.warn("Found non-conforming header SO tag: " + + value[1] + ". Treating as 'unknown'."); + } + value[1] = SortOrder.unknown.toString(); + } + } + } + /** * True if the line is recognized as one of the valid HeaderRecordTypes. */ @@ -462,7 +484,7 @@ protected String getPGLine(final SAMProgramRecord programRecord) { private void writeRGLine(final SAMReadGroupRecord readGroup) { println(getRGLine(readGroup)); } - + protected String getRGLine(final SAMReadGroupRecord readGroup) { final String[] fields = new String[2 + readGroup.getAttributes().size()]; fields[0] = HEADER_LINE_START + HeaderRecordType.RG; @@ -525,4 +547,4 @@ public void setValidationStringency(final ValidationStringency validationStringe } this.validationStringency = validationStringency; } -} +} \ No newline at end of file diff --git a/src/test/java/htsjdk/samtools/SAMFileHeaderTest.java b/src/test/java/htsjdk/samtools/SAMFileHeaderTest.java index 86946d0f5d..911a7bd24c 100644 --- a/src/test/java/htsjdk/samtools/SAMFileHeaderTest.java +++ b/src/test/java/htsjdk/samtools/SAMFileHeaderTest.java @@ -23,7 +23,9 @@ package htsjdk.samtools; import htsjdk.HtsjdkTest; +import htsjdk.samtools.util.BufferedLineReader; import org.testng.Assert; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.util.Arrays; @@ -31,7 +33,7 @@ public class SAMFileHeaderTest extends HtsjdkTest { @Test - public void testSortOrder() { + public void testSortOrderManualSetting() { final SAMFileHeader header = new SAMFileHeader(); header.setSortOrder(SAMFileHeader.SortOrder.coordinate); @@ -45,6 +47,21 @@ public void testSortOrder() { header.setAttribute(SAMFileHeader.SORT_ORDER_TAG, SAMFileHeader.SortOrder.coordinate); Assert.assertEquals(header.getSortOrder(), SAMFileHeader.SortOrder.coordinate); Assert.assertEquals(header.getAttribute(SAMFileHeader.SORT_ORDER_TAG), SAMFileHeader.SortOrder.coordinate.name()); + + header.setAttribute(SAMFileHeader.SORT_ORDER_TAG, "UNKNOWN"); + Assert.assertEquals(header.getSortOrder(), SAMFileHeader.SortOrder.unknown); + Assert.assertEquals(header.getAttribute(SAMFileHeader.SORT_ORDER_TAG), + SAMFileHeader.SortOrder.unknown.name()); + + header.setAttribute(SAMFileHeader.SORT_ORDER_TAG, "uNknOWn"); + Assert.assertEquals(header.getSortOrder(), SAMFileHeader.SortOrder.unknown); + Assert.assertEquals(header.getAttribute(SAMFileHeader.SORT_ORDER_TAG), + SAMFileHeader.SortOrder.unknown.name()); + + header.setAttribute(SAMFileHeader.SORT_ORDER_TAG, "cOoRdinate"); + Assert.assertEquals(header.getSortOrder(), SAMFileHeader.SortOrder.unknown); + Assert.assertEquals(header.getAttribute(SAMFileHeader.SORT_ORDER_TAG), + SAMFileHeader.SortOrder.unknown.name()); } @Test @@ -81,4 +98,76 @@ public void testGetSequenceIfNameIsNotFound() { Assert.assertNull(header.getSequence("chr2")); } + + @Test + public void testWrongTag() { + String[] testData = new String[]{ + "@hd\tVN:1.0\tSO:unsorted\n", + "@sq\tSN:chrM\tLN:16571\n", + "@rg\tID:1\tSM:sample1\n", + "@pg\tID:1\tPN:A\n", + "@co\tVN:1.0\tSO:unsorted\n" + }; + for (String stringHeader : testData) { + SAMTextHeaderCodec codec = new SAMTextHeaderCodec(); + SAMFileHeader header = codec.decode(BufferedLineReader.fromString(stringHeader), null); + String validationErrors = header.getValidationErrors().toString(); + Assert.assertTrue(validationErrors.contains("Unrecognized header record type")); + } + + } + + @DataProvider(name = "DataForWrongTagTests") + public Object[][] dataForWrongTagTests() { + return new Object[][] { + {"@HD\tVN:1.0\tSO:UNSORTED\n"}, + {"@HD\tVN:1.0\tSO:FALSE\n"}, + {"@HD\tVN:1.0\tSO:COORDINATE\n"}, + {"@HD\tVN:1.0\tSO:uNknOWn\n"}, + {"@HD\tVN:1.0\tSO:cOoRdinate\n"}, + }; + } + + @Test(dataProvider = "DataForWrongTagTests") + public void testSortOrderCodecSetting(String hdr) { + String validString = "@HD\tVN:1.0\tSO:unknown\n"; + + SAMTextHeaderCodec codec = new SAMTextHeaderCodec(); + SAMFileHeader header = codec.decode(BufferedLineReader.fromString(validString), null); + + header.setSortOrder(SAMFileHeader.SortOrder.coordinate); + Assert.assertEquals(header.getSortOrder(), SAMFileHeader.SortOrder.coordinate); + Assert.assertEquals(header.getAttribute(SAMFileHeader.SORT_ORDER_TAG), SAMFileHeader.SortOrder.coordinate.name()); + + header.setSortOrder(SAMFileHeader.SortOrder.unsorted); + Assert.assertEquals(header.getSortOrder(), SAMFileHeader.SortOrder.unsorted); + Assert.assertEquals(header.getAttribute(SAMFileHeader.SORT_ORDER_TAG), SAMFileHeader.SortOrder.unsorted.name()); + + header.setAttribute(SAMFileHeader.SORT_ORDER_TAG, "badname"); + Assert.assertEquals(header.getSortOrder(), SAMFileHeader.SortOrder.unknown); + Assert.assertEquals(header.getAttribute(SAMFileHeader.SORT_ORDER_TAG), SAMFileHeader.SortOrder.unknown.name()); + + header = codec.decode(BufferedLineReader.fromString(hdr), null); + Assert.assertTrue(header.getSortOrder().toString().equals("unknown")); + } + + @Test(dataProvider = "DataForWrongTagTests", expectedExceptions = SAMFormatException.class) + public void testValidationStringencyStrict(String stringHeader) { + SAMTextHeaderCodec codec = new SAMTextHeaderCodec(); + codec.setValidationStringency(ValidationStringency.STRICT); + codec.decode(BufferedLineReader.fromString(stringHeader), null); + } + + @Test(dataProvider = "DataForWrongTagTests") + public void testValidationStringencyLenientAndSilent(String stringHeader) { + SAMTextHeaderCodec codec = new SAMTextHeaderCodec(); + + codec.setValidationStringency(ValidationStringency.LENIENT); + SAMFileHeader headerLenient = codec.decode(BufferedLineReader.fromString(stringHeader), null); + Assert.assertTrue(headerLenient.getSortOrder().equals(SAMFileHeader.SortOrder.unknown)); + + codec.setValidationStringency(ValidationStringency.SILENT); + SAMFileHeader headerSilent = codec.decode(BufferedLineReader.fromString(stringHeader), null); + Assert.assertTrue(headerSilent.getSortOrder().equals(SAMFileHeader.SortOrder.unknown)); + } } diff --git a/src/test/java/htsjdk/samtools/ValidateSamFileTest.java b/src/test/java/htsjdk/samtools/ValidateSamFileTest.java index a7a218c802..190a6d5073 100644 --- a/src/test/java/htsjdk/samtools/ValidateSamFileTest.java +++ b/src/test/java/htsjdk/samtools/ValidateSamFileTest.java @@ -562,12 +562,6 @@ public Object[][] tagCorrectlyProcessData() throws IOException { "@RG\tID:0\tSM:Hi,Mom!\n" + "E\t147\tchr1\t15\t255\t10M\t=\t2\t-30\tCAACAGAAGC\t)'.*.+2,))\tU2:Z:CAA"; - final String SOTagCorrectlyProcessTestData = - "@HD\tVN:1.0\tSO:NOTKNOWN\n" + - "@SQ\tSN:chr1\tLN:101\n" + - "@RG\tID:0\tSM:Hi,Mom!\n" + - "E\t147\tchr1\t15\t255\t10M\t=\t2\t-30\tCAACAGAAGC\t)'.*.+2,))\tU2:Z:CAA"; - final String GOTagCorrectlyProcessTestData = "@HD\tVN:1.0\tGO:NOTKNOWN\n" + "@SQ\tSN:chr1\tLN:101\n" + @@ -578,7 +572,6 @@ public Object[][] tagCorrectlyProcessData() throws IOException { {E2TagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.E2_BASE_EQUALS_PRIMARY_BASE}, {E2TagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.MISMATCH_READ_LENGTH_AND_E2_LENGTH}, {U2TagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.MISMATCH_READ_LENGTH_AND_U2_LENGTH}, - {SOTagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.HEADER_TAG_NON_CONFORMING_VALUE}, {GOTagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.HEADER_TAG_NON_CONFORMING_VALUE} }; }