Skip to content

Commit

Permalink
MergeSamFiles accept SO:UNKNOWN (#1069)
Browse files Browse the repository at this point in the history
- case checks for header's tags/values;
- added implementation that treating non-conforming SO tag values as "unknown";
- corresponding tests reworked, few more tests added.
  • Loading branch information
a-filipp0v authored and Yossi Farjoun committed Oct 29, 2018
1 parent f00a754 commit c484241
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 14 deletions.
10 changes: 8 additions & 2 deletions src/main/java/htsjdk/samtools/SAMFileHeader.java
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ public SortOrder getSortOrder() {
try {
return SortOrder.valueOf(so);
} catch (IllegalArgumentException e) {
log.warn("Found non conforming header SO tag: " + so + ". Treating as 'unknown'.");
log.warn("Found non-conforming header SO tag: " + so + ". Treating as 'unknown'.");
sortOrder = SortOrder.unknown;
}
}
Expand Down Expand Up @@ -308,12 +308,18 @@ public void setAttribute(final String key, final Object value) {
*/
@Override
public void setAttribute(final String key, final String value) {
String tempVal = value;
if (key.equals(SORT_ORDER_TAG)) {
this.sortOrder = null;
try {
tempVal = SortOrder.valueOf(value).toString();
} catch (IllegalArgumentException e) {
tempVal = SortOrder.unknown.toString();
}
} else if (key.equals(GROUP_ORDER_TAG)) {
this.groupOrder = null;
}
super.setAttribute(key, value);
super.setAttribute(key, tempVal);
}

/**
Expand Down
30 changes: 26 additions & 4 deletions src/main/java/htsjdk/samtools/SAMTextHeaderCodec.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@
*/
package htsjdk.samtools;

import htsjdk.samtools.SAMFileHeader.SortOrder;
import htsjdk.samtools.util.DateParser;
import htsjdk.samtools.util.LineReader;
import htsjdk.samtools.util.RuntimeIOException;
import htsjdk.samtools.util.StringUtil;
import htsjdk.samtools.util.Log;

import java.io.BufferedWriter;
import java.io.IOException;
Expand Down Expand Up @@ -69,6 +71,7 @@ public class SAMTextHeaderCodec {
private static final Pattern FIELD_SEPARATOR_RE = Pattern.compile(FIELD_SEPARATOR);

public static final String COMMENT_PREFIX = HEADER_LINE_START + HeaderRecordType.CO.name() + FIELD_SEPARATOR;
private static final Log log = Log.getInstance(SAMTextHeaderCodec.class);

void setWriter(final BufferedWriter writer) {
this.writer = writer;
Expand Down Expand Up @@ -231,10 +234,10 @@ private void parseHDLine(final ParsedHeaderLine parsedHeaderLine) {

final String soString = parsedHeaderLine.getValue(SAMFileHeader.SORT_ORDER_TAG);
try {
if (soString != null) SAMFileHeader.SortOrder.valueOf(soString);
if (soString != null) SortOrder.valueOf(soString);
} catch (IllegalArgumentException e) {
reportErrorParsingLine(HEADER_LINE_START + parsedHeaderLine.getHeaderRecordType() +
" line has non-conforming SO tag value: "+ soString + ".",
" line has non-conforming SO tag value: " + soString + ".",
SAMValidationError.Type.HEADER_TAG_NON_CONFORMING_VALUE, null);
}

Expand Down Expand Up @@ -323,11 +326,30 @@ private class ParsedHeaderLine {
SAMValidationError.Type.HEADER_TAG_MULTIPLY_DEFINED, null);
continue;
}
validateSortOrderValue(keyAndValue);
mKeyValuePairs.put(keyAndValue[0], keyAndValue[1]);
}
lineValid = true;
}

private void validateSortOrderValue(String[] value) {
if (SAMFileHeader.SORT_ORDER_TAG.equals(value[0])) {
try {
SortOrder.valueOf(value[1]);
} catch (IllegalArgumentException e) {
if (validationStringency == ValidationStringency.STRICT) {
throw new SAMFormatException("Found non-conforming header SO tag: "
+ value[1]
+ ", exiting because VALIDATION_STRINGENCY=STRICT");
} else if (validationStringency == ValidationStringency.LENIENT) {
log.warn("Found non-conforming header SO tag: "
+ value[1] + ". Treating as 'unknown'.");
}
value[1] = SortOrder.unknown.toString();
}
}
}

/**
* True if the line is recognized as one of the valid HeaderRecordTypes.
*/
Expand Down Expand Up @@ -462,7 +484,7 @@ protected String getPGLine(final SAMProgramRecord programRecord) {
private void writeRGLine(final SAMReadGroupRecord readGroup) {
println(getRGLine(readGroup));
}

protected String getRGLine(final SAMReadGroupRecord readGroup) {
final String[] fields = new String[2 + readGroup.getAttributes().size()];
fields[0] = HEADER_LINE_START + HeaderRecordType.RG;
Expand Down Expand Up @@ -525,4 +547,4 @@ public void setValidationStringency(final ValidationStringency validationStringe
}
this.validationStringency = validationStringency;
}
}
}
91 changes: 90 additions & 1 deletion src/test/java/htsjdk/samtools/SAMFileHeaderTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,17 @@
package htsjdk.samtools;

import htsjdk.HtsjdkTest;
import htsjdk.samtools.util.BufferedLineReader;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.util.Arrays;

public class SAMFileHeaderTest extends HtsjdkTest {

@Test
public void testSortOrder() {
public void testSortOrderManualSetting() {
final SAMFileHeader header = new SAMFileHeader();

header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
Expand All @@ -45,6 +47,21 @@ public void testSortOrder() {
header.setAttribute(SAMFileHeader.SORT_ORDER_TAG, SAMFileHeader.SortOrder.coordinate);
Assert.assertEquals(header.getSortOrder(), SAMFileHeader.SortOrder.coordinate);
Assert.assertEquals(header.getAttribute(SAMFileHeader.SORT_ORDER_TAG), SAMFileHeader.SortOrder.coordinate.name());

header.setAttribute(SAMFileHeader.SORT_ORDER_TAG, "UNKNOWN");
Assert.assertEquals(header.getSortOrder(), SAMFileHeader.SortOrder.unknown);
Assert.assertEquals(header.getAttribute(SAMFileHeader.SORT_ORDER_TAG),
SAMFileHeader.SortOrder.unknown.name());

header.setAttribute(SAMFileHeader.SORT_ORDER_TAG, "uNknOWn");
Assert.assertEquals(header.getSortOrder(), SAMFileHeader.SortOrder.unknown);
Assert.assertEquals(header.getAttribute(SAMFileHeader.SORT_ORDER_TAG),
SAMFileHeader.SortOrder.unknown.name());

header.setAttribute(SAMFileHeader.SORT_ORDER_TAG, "cOoRdinate");
Assert.assertEquals(header.getSortOrder(), SAMFileHeader.SortOrder.unknown);
Assert.assertEquals(header.getAttribute(SAMFileHeader.SORT_ORDER_TAG),
SAMFileHeader.SortOrder.unknown.name());
}

@Test
Expand Down Expand Up @@ -81,4 +98,76 @@ public void testGetSequenceIfNameIsNotFound() {

Assert.assertNull(header.getSequence("chr2"));
}

@Test
public void testWrongTag() {
String[] testData = new String[]{
"@hd\tVN:1.0\tSO:unsorted\n",
"@sq\tSN:chrM\tLN:16571\n",
"@rg\tID:1\tSM:sample1\n",
"@pg\tID:1\tPN:A\n",
"@co\tVN:1.0\tSO:unsorted\n"
};
for (String stringHeader : testData) {
SAMTextHeaderCodec codec = new SAMTextHeaderCodec();
SAMFileHeader header = codec.decode(BufferedLineReader.fromString(stringHeader), null);
String validationErrors = header.getValidationErrors().toString();
Assert.assertTrue(validationErrors.contains("Unrecognized header record type"));
}

}

@DataProvider(name = "DataForWrongTagTests")
public Object[][] dataForWrongTagTests() {
return new Object[][] {
{"@HD\tVN:1.0\tSO:UNSORTED\n"},
{"@HD\tVN:1.0\tSO:FALSE\n"},
{"@HD\tVN:1.0\tSO:COORDINATE\n"},
{"@HD\tVN:1.0\tSO:uNknOWn\n"},
{"@HD\tVN:1.0\tSO:cOoRdinate\n"},
};
}

@Test(dataProvider = "DataForWrongTagTests")
public void testSortOrderCodecSetting(String hdr) {
String validString = "@HD\tVN:1.0\tSO:unknown\n";

SAMTextHeaderCodec codec = new SAMTextHeaderCodec();
SAMFileHeader header = codec.decode(BufferedLineReader.fromString(validString), null);

header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
Assert.assertEquals(header.getSortOrder(), SAMFileHeader.SortOrder.coordinate);
Assert.assertEquals(header.getAttribute(SAMFileHeader.SORT_ORDER_TAG), SAMFileHeader.SortOrder.coordinate.name());

header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
Assert.assertEquals(header.getSortOrder(), SAMFileHeader.SortOrder.unsorted);
Assert.assertEquals(header.getAttribute(SAMFileHeader.SORT_ORDER_TAG), SAMFileHeader.SortOrder.unsorted.name());

header.setAttribute(SAMFileHeader.SORT_ORDER_TAG, "badname");
Assert.assertEquals(header.getSortOrder(), SAMFileHeader.SortOrder.unknown);
Assert.assertEquals(header.getAttribute(SAMFileHeader.SORT_ORDER_TAG), SAMFileHeader.SortOrder.unknown.name());

header = codec.decode(BufferedLineReader.fromString(hdr), null);
Assert.assertTrue(header.getSortOrder().toString().equals("unknown"));
}

@Test(dataProvider = "DataForWrongTagTests", expectedExceptions = SAMFormatException.class)
public void testValidationStringencyStrict(String stringHeader) {
SAMTextHeaderCodec codec = new SAMTextHeaderCodec();
codec.setValidationStringency(ValidationStringency.STRICT);
codec.decode(BufferedLineReader.fromString(stringHeader), null);
}

@Test(dataProvider = "DataForWrongTagTests")
public void testValidationStringencyLenientAndSilent(String stringHeader) {
SAMTextHeaderCodec codec = new SAMTextHeaderCodec();

codec.setValidationStringency(ValidationStringency.LENIENT);
SAMFileHeader headerLenient = codec.decode(BufferedLineReader.fromString(stringHeader), null);
Assert.assertTrue(headerLenient.getSortOrder().equals(SAMFileHeader.SortOrder.unknown));

codec.setValidationStringency(ValidationStringency.SILENT);
SAMFileHeader headerSilent = codec.decode(BufferedLineReader.fromString(stringHeader), null);
Assert.assertTrue(headerSilent.getSortOrder().equals(SAMFileHeader.SortOrder.unknown));
}
}
7 changes: 0 additions & 7 deletions src/test/java/htsjdk/samtools/ValidateSamFileTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -562,12 +562,6 @@ public Object[][] tagCorrectlyProcessData() throws IOException {
"@RG\tID:0\tSM:Hi,Mom!\n" +
"E\t147\tchr1\t15\t255\t10M\t=\t2\t-30\tCAACAGAAGC\t)'.*.+2,))\tU2:Z:CAA";

final String SOTagCorrectlyProcessTestData =
"@HD\tVN:1.0\tSO:NOTKNOWN\n" +
"@SQ\tSN:chr1\tLN:101\n" +
"@RG\tID:0\tSM:Hi,Mom!\n" +
"E\t147\tchr1\t15\t255\t10M\t=\t2\t-30\tCAACAGAAGC\t)'.*.+2,))\tU2:Z:CAA";

final String GOTagCorrectlyProcessTestData =
"@HD\tVN:1.0\tGO:NOTKNOWN\n" +
"@SQ\tSN:chr1\tLN:101\n" +
Expand All @@ -578,7 +572,6 @@ public Object[][] tagCorrectlyProcessData() throws IOException {
{E2TagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.E2_BASE_EQUALS_PRIMARY_BASE},
{E2TagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.MISMATCH_READ_LENGTH_AND_E2_LENGTH},
{U2TagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.MISMATCH_READ_LENGTH_AND_U2_LENGTH},
{SOTagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.HEADER_TAG_NON_CONFORMING_VALUE},
{GOTagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.HEADER_TAG_NON_CONFORMING_VALUE}
};
}
Expand Down

0 comments on commit c484241

Please sign in to comment.