Skip to content

Commit

Permalink
Changed IntervalList fromFiles() so that it doesn't call .unique() on… (
Browse files Browse the repository at this point in the history
#1273)

* Changed IntervalList fromFiles() so that it doesn't call .unique() on the interval list before returning it. While this is a change in API, it is more consistent with fromFile() that similarly doesn't call .unique() before returning the result.

This should fix an issue in picard where CollectHsMetrics (incorrectly) merges abutting intervals prior to merging them.

**NOTA BENE** BREAKING CHANGE!

Users who would like to keep the old behaviour should call uniqued() on the resulting IntervalList prior to using.
  • Loading branch information
Yossi Farjoun authored Feb 22, 2019
1 parent 3b3d107 commit 0cc762f
Show file tree
Hide file tree
Showing 6 changed files with 119 additions and 15 deletions.
3 changes: 2 additions & 1 deletion src/main/java/htsjdk/samtools/metrics/MetricsFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,8 @@ private void printBeanMetrics(final BufferedWriter out, final FormatUtil formatt
}

// Write out a header row with the type of the metric class
out.append(METRIC_HEADER + getBeanType().getName());
out.append(METRIC_HEADER);
out.append(getBeanType().getName());
out.newLine();

// Write out the column headers
Expand Down
12 changes: 7 additions & 5 deletions src/main/java/htsjdk/samtools/util/IntervalList.java
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ public IntervalList uniqued(final boolean concatenateNames) {
return value;
}


/**
* Sorts and uniques the list of intervals held within this interval list.
*
Expand Down Expand Up @@ -248,18 +249,19 @@ public List<Interval> getUniqueIntervals() {
* @param concatenateNames If false, the merged interval has the name of the earlier interval. This keeps name shorter.
*/
public static List<Interval> getUniqueIntervals(final IntervalList list, final boolean concatenateNames) {
return getUniqueIntervals(list, concatenateNames, false);
return getUniqueIntervals(list, true, concatenateNames, false);
}

//NO SIDE EFFECTS HERE!

/**
* Merges list of intervals and reduces them like htsjdk.samtools.util.IntervalList#getUniqueIntervals()
*
* @param combineAbuttingIntervals If true, intervals that are abutting will be combined into one interval.
* @param concatenateNames If false, the merged interval has the name of the earlier interval. This keeps name shorter.
* @param enforceSameStrands enforce that merged intervals have the same strand, otherwise ignore.
*/
public static List<Interval> getUniqueIntervals(final IntervalList list, final boolean concatenateNames, final boolean enforceSameStrands) {
public static List<Interval> getUniqueIntervals(final IntervalList list, final boolean combineAbuttingIntervals, final boolean concatenateNames, final boolean enforceSameStrands) {

final List<Interval> intervals;
if (list.getHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
Expand All @@ -276,7 +278,7 @@ public static List<Interval> getUniqueIntervals(final IntervalList list, final b
if (current == null) {
toBeMerged.add(next);
current = next;
} else if (current.intersects(next) || current.abuts(next)) {
} else if (current.intersects(next) || (combineAbuttingIntervals && current.abuts(next))) {
if (enforceSameStrands && current.isNegativeStrand() != next.isNegativeStrand()) {
throw new SAMException("Strands were not equal for: " + current.toString() + " and " + next.toString());
}
Expand Down Expand Up @@ -476,14 +478,14 @@ public static IntervalList fromName(final SAMFileHeader header, final String seq
}

/**
* Calls {@link #fromFile(java.io.File)} on the provided files, and returns their {@link #union(java.util.Collection)}.
* Calls {@link #fromFile(java.io.File)} on the provided files, and returns their {@link #concatenate(Collection)}
*/
public static IntervalList fromFiles(final Collection<File> intervalListFiles) {
final Collection<IntervalList> intervalLists = new ArrayList<>();
for (final File file : intervalListFiles) {
intervalLists.add(IntervalList.fromFile(file));
}
return IntervalList.union(intervalLists);
return IntervalList.concatenate(intervalLists);
}

/**
Expand Down
40 changes: 40 additions & 0 deletions src/main/java/htsjdk/samtools/util/IntervalUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -73,4 +73,44 @@ public static void assertOrderedNonOverlapping(final Iterator<Interval> interval
prevSequenceIndex = thisSequenceIndex;
}
}

public static class IntervalCombiner {
private boolean combineAbutting = true;
private boolean concatenateNames = true;

public boolean isCombineAbutting() {
return combineAbutting;
}

public IntervalCombiner setCombineAbutting(final boolean combineAbutting) {
this.combineAbutting = combineAbutting;
return this;
}

public boolean isConcatenateNames() {
return concatenateNames;
}

public IntervalCombiner setConcatenateNames(final boolean concatenateNames) {
this.concatenateNames = concatenateNames;
return this;
}

public boolean isEnforceSameStrand() {
return enforceSameStrand;
}

public IntervalCombiner setEnforceSameStrand(final boolean enforceSameStrand) {
this.enforceSameStrand = enforceSameStrand;
return this;
}

private boolean enforceSameStrand = false;

public IntervalList combine(final IntervalList intervalList){
IntervalList retVal = new IntervalList(intervalList.getHeader());
retVal.addall(IntervalList.getUniqueIntervals(intervalList, combineAbutting, concatenateNames, enforceSameStrand));
return retVal;
}
}
}
15 changes: 6 additions & 9 deletions src/test/java/htsjdk/samtools/metrics/MetricsFileTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ public class MetricsFileTest extends HtsjdkTest {
public enum TestEnum {One, Two, Three}

public static class TestMetric extends MetricBase implements Cloneable, Serializable {
private static final long serialVersionUID = 1l;
private static final long serialVersionUID = 1L;

public String STRING_PROP;
public Date DATE_PROP;
Expand Down Expand Up @@ -97,9 +97,6 @@ public void testFloatingPointEquality() throws IOException {

MetricsFile<FloatingPointMetric,Integer> file2 = writeThenReadBack(file);
Assert.assertEquals(file, file2);



}

@Test
Expand Down Expand Up @@ -186,10 +183,13 @@ public void areMetricsFilesEqualTest(){
final File TEST_DIR = new File("src/test/resources/htsjdk/samtools/metrics/");
final File file1 = new File(TEST_DIR,"metricsOne.metrics");
final File file2 = new File(TEST_DIR,"metricsOneCopy.metrics");
final File file3 = new File(TEST_DIR,"metricsOneCopyReordered.metrics");

final File fileModifiedHist = new File(TEST_DIR,"metricsOneModifiedHistogram.metrics");
final File fileModifiedMet = new File(TEST_DIR,"metricsOneModifiedMetrics.metrics");

Assert.assertTrue(MetricsFile.areMetricsEqual(file1, file2));
Assert.assertTrue(MetricsFile.areMetricsEqual(file1, file3));
Assert.assertTrue(MetricsFile.areMetricsEqual(file1, fileModifiedHist));

Assert.assertFalse(MetricsFile.areMetricsAndHistogramsEqual(file1, fileModifiedHist));
Expand All @@ -198,17 +198,14 @@ public void areMetricsFilesEqualTest(){
}

/** Helper method to persist metrics to file and read them back again. */
private <METRIC extends MetricBase> MetricsFile<METRIC, Integer> writeThenReadBack(MetricsFile<METRIC,Integer> in) throws IOException {
private <METRIC extends MetricBase> MetricsFile<METRIC, Integer> writeThenReadBack(MetricsFile<METRIC, Integer> in) throws IOException {
File f = File.createTempFile("test", ".metrics");
f.deleteOnExit();
FileWriter out = new FileWriter(f);
in.write(out);

MetricsFile<METRIC,Integer> retval = new MetricsFile<METRIC,Integer>();
MetricsFile<METRIC, Integer> retval = new MetricsFile<METRIC,Integer>();
retval.read(new FileReader(f));
return retval;
}



}
51 changes: 51 additions & 0 deletions src/test/java/htsjdk/samtools/util/IntervalUtilTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package htsjdk.samtools.util;

import htsjdk.HtsjdkTest;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.variant.utils.SAMSequenceDictionaryExtractor;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

public class IntervalUtilTest extends HtsjdkTest {

final SAMFileHeader header = new SAMFileHeader(SAMSequenceDictionaryExtractor.extractDictionary(
IOUtil.getPath("src/test/resources/htsjdk/samtools/intervallist/IntervalListchr123_empty.interval_list")));
final IntervalList intervalList = new IntervalList(header);

public IntervalUtilTest() throws IOException {}

@BeforeClass
public void initialize() throws IOException {

intervalList.add(new Interval("1", 1, 2, true, "tiny_at_1"));
intervalList.add(new Interval("1", 2, 3, true, "tiny_at_2"));
intervalList.add(new Interval("1", 4, 5, true, "tiny_at_4"));
}

@DataProvider
public Iterator<Object[]> testMergeDifferentlyData() {
final List<Object[]> tests = new ArrayList<>();

IntervalList result1 = new IntervalList(header);
result1.add(new Interval("1", 1, 5, true, "tiny_at_1|tiny_at_2|tiny_at_4"));
tests.add(new Object[]{true, true, true, result1});

return tests.iterator();
}

@Test(dataProvider = "testMergeDifferentlyData")
public void testMergeDifferently(final boolean mergeAbutting, final boolean concatNames, final boolean requireSameStrand, final IntervalList expectedResult) {
IntervalUtil.IntervalCombiner combiner = new IntervalUtil.IntervalCombiner();

combiner.setCombineAbutting(mergeAbutting).setConcatenateNames(concatNames).setEnforceSameStrand(requireSameStrand);
Assert.assertEquals(expectedResult, combiner.combine(intervalList));

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
## htsjdk.samtools.metrics.StringHeader
# picard.illumina.MarkIlluminaAdapters INPUT=testdata/picard/illumina/MarkIlluminaAdaptersTest/unevenReads.sam OUTPUT=/var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/uneven5946421709712534555.sam METRICS=/var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/uneven4591996041776878558.metrics MIN_MATCH_BASES_SE=12 MIN_MATCH_BASES_PE=6 MAX_ERROR_RATE_SE=0.1 MAX_ERROR_RATE_PE=0.1 ADAPTERS=[INDEXED, DUAL_INDEXED, PAIRED_END] ADAPTER_TRUNCATION_LENGTH=30 PRUNE_ADAPTER_LIST_AFTER_THIS_MANY_ADAPTERS_SEEN=100 NUM_ADAPTERS_TO_KEEP=1 VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false GA4GH_CLIENT_SECRETS=client_secrets.json
## htsjdk.samtools.metrics.StringHeader
# Started on: Mon Aug 24 13:31:51 EDT 2015

## METRICS CLASS htsjdk.samtools.metrics.MetricsFileTest$TestMetric
DATE_PROP SHORT_PROP INTEGER_PROP LONG_PROP FLOAT_PROP DOUBLE_PROP ENUM_PROP BOOLEAN_PROP CHARACTER_PROP SHORT_PRIMITIVE INT_PRIMITIVE LONG_PRIMITIVE FLOAT_PRIMITIVE DOUBLE_PRIMITIVE BOOLEAN_PRIMITIVE CHAR_PRIMITIVE STRING_PROP
2008-12-31 123 9223372036854775807 456.789001 0.713487 Two N A 123 919834781 9223372034707292160 0.55694 0.229233 Y B Hello World

## HISTOGRAM java.lang.Integer
clipped_bases read_count
6 1
7 1

0 comments on commit 0cc762f

Please sign in to comment.