diff --git a/src/main/java/htsjdk/samtools/metrics/MetricsFile.java b/src/main/java/htsjdk/samtools/metrics/MetricsFile.java index ac0628abe1..980a88e966 100644 --- a/src/main/java/htsjdk/samtools/metrics/MetricsFile.java +++ b/src/main/java/htsjdk/samtools/metrics/MetricsFile.java @@ -194,7 +194,8 @@ private void printBeanMetrics(final BufferedWriter out, final FormatUtil formatt } // Write out a header row with the type of the metric class - out.append(METRIC_HEADER + getBeanType().getName()); + out.append(METRIC_HEADER); + out.append(getBeanType().getName()); out.newLine(); // Write out the column headers diff --git a/src/main/java/htsjdk/samtools/util/IntervalList.java b/src/main/java/htsjdk/samtools/util/IntervalList.java index d92b2b6edf..31c0d552d1 100644 --- a/src/main/java/htsjdk/samtools/util/IntervalList.java +++ b/src/main/java/htsjdk/samtools/util/IntervalList.java @@ -188,6 +188,7 @@ public IntervalList uniqued(final boolean concatenateNames) { return value; } + /** * Sorts and uniques the list of intervals held within this interval list. * @@ -248,7 +249,7 @@ public List getUniqueIntervals() { * @param concatenateNames If false, the merged interval has the name of the earlier interval. This keeps name shorter. */ public static List getUniqueIntervals(final IntervalList list, final boolean concatenateNames) { - return getUniqueIntervals(list, concatenateNames, false); + return getUniqueIntervals(list, true, concatenateNames, false); } //NO SIDE EFFECTS HERE! @@ -256,10 +257,11 @@ public static List getUniqueIntervals(final IntervalList list, final b /** * Merges list of intervals and reduces them like htsjdk.samtools.util.IntervalList#getUniqueIntervals() * + * @param combineAbuttingIntervals If true, intervals that are abutting will be combined into one interval. * @param concatenateNames If false, the merged interval has the name of the earlier interval. This keeps name shorter. * @param enforceSameStrands enforce that merged intervals have the same strand, otherwise ignore. */ - public static List getUniqueIntervals(final IntervalList list, final boolean concatenateNames, final boolean enforceSameStrands) { + public static List getUniqueIntervals(final IntervalList list, final boolean combineAbuttingIntervals, final boolean concatenateNames, final boolean enforceSameStrands) { final List intervals; if (list.getHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) { @@ -276,7 +278,7 @@ public static List getUniqueIntervals(final IntervalList list, final b if (current == null) { toBeMerged.add(next); current = next; - } else if (current.intersects(next) || current.abuts(next)) { + } else if (current.intersects(next) || (combineAbuttingIntervals && current.abuts(next))) { if (enforceSameStrands && current.isNegativeStrand() != next.isNegativeStrand()) { throw new SAMException("Strands were not equal for: " + current.toString() + " and " + next.toString()); } @@ -476,14 +478,14 @@ public static IntervalList fromName(final SAMFileHeader header, final String seq } /** - * Calls {@link #fromFile(java.io.File)} on the provided files, and returns their {@link #union(java.util.Collection)}. + * Calls {@link #fromFile(java.io.File)} on the provided files, and returns their {@link #concatenate(Collection)} */ public static IntervalList fromFiles(final Collection intervalListFiles) { final Collection intervalLists = new ArrayList<>(); for (final File file : intervalListFiles) { intervalLists.add(IntervalList.fromFile(file)); } - return IntervalList.union(intervalLists); + return IntervalList.concatenate(intervalLists); } /** diff --git a/src/main/java/htsjdk/samtools/util/IntervalUtil.java b/src/main/java/htsjdk/samtools/util/IntervalUtil.java index d761707fb0..e06b0d9c9d 100644 --- a/src/main/java/htsjdk/samtools/util/IntervalUtil.java +++ b/src/main/java/htsjdk/samtools/util/IntervalUtil.java @@ -73,4 +73,44 @@ public static void assertOrderedNonOverlapping(final Iterator interval prevSequenceIndex = thisSequenceIndex; } } + + public static class IntervalCombiner { + private boolean combineAbutting = true; + private boolean concatenateNames = true; + + public boolean isCombineAbutting() { + return combineAbutting; + } + + public IntervalCombiner setCombineAbutting(final boolean combineAbutting) { + this.combineAbutting = combineAbutting; + return this; + } + + public boolean isConcatenateNames() { + return concatenateNames; + } + + public IntervalCombiner setConcatenateNames(final boolean concatenateNames) { + this.concatenateNames = concatenateNames; + return this; + } + + public boolean isEnforceSameStrand() { + return enforceSameStrand; + } + + public IntervalCombiner setEnforceSameStrand(final boolean enforceSameStrand) { + this.enforceSameStrand = enforceSameStrand; + return this; + } + + private boolean enforceSameStrand = false; + + public IntervalList combine(final IntervalList intervalList){ + IntervalList retVal = new IntervalList(intervalList.getHeader()); + retVal.addall(IntervalList.getUniqueIntervals(intervalList, combineAbutting, concatenateNames, enforceSameStrand)); + return retVal; + } + } } diff --git a/src/test/java/htsjdk/samtools/metrics/MetricsFileTest.java b/src/test/java/htsjdk/samtools/metrics/MetricsFileTest.java index 2d8bf6f10c..65b6fba603 100644 --- a/src/test/java/htsjdk/samtools/metrics/MetricsFileTest.java +++ b/src/test/java/htsjdk/samtools/metrics/MetricsFileTest.java @@ -50,7 +50,7 @@ public class MetricsFileTest extends HtsjdkTest { public enum TestEnum {One, Two, Three} public static class TestMetric extends MetricBase implements Cloneable, Serializable { - private static final long serialVersionUID = 1l; + private static final long serialVersionUID = 1L; public String STRING_PROP; public Date DATE_PROP; @@ -97,9 +97,6 @@ public void testFloatingPointEquality() throws IOException { MetricsFile file2 = writeThenReadBack(file); Assert.assertEquals(file, file2); - - - } @Test @@ -186,10 +183,13 @@ public void areMetricsFilesEqualTest(){ final File TEST_DIR = new File("src/test/resources/htsjdk/samtools/metrics/"); final File file1 = new File(TEST_DIR,"metricsOne.metrics"); final File file2 = new File(TEST_DIR,"metricsOneCopy.metrics"); + final File file3 = new File(TEST_DIR,"metricsOneCopyReordered.metrics"); + final File fileModifiedHist = new File(TEST_DIR,"metricsOneModifiedHistogram.metrics"); final File fileModifiedMet = new File(TEST_DIR,"metricsOneModifiedMetrics.metrics"); Assert.assertTrue(MetricsFile.areMetricsEqual(file1, file2)); + Assert.assertTrue(MetricsFile.areMetricsEqual(file1, file3)); Assert.assertTrue(MetricsFile.areMetricsEqual(file1, fileModifiedHist)); Assert.assertFalse(MetricsFile.areMetricsAndHistogramsEqual(file1, fileModifiedHist)); @@ -198,17 +198,14 @@ public void areMetricsFilesEqualTest(){ } /** Helper method to persist metrics to file and read them back again. */ - private MetricsFile writeThenReadBack(MetricsFile in) throws IOException { + private MetricsFile writeThenReadBack(MetricsFile in) throws IOException { File f = File.createTempFile("test", ".metrics"); f.deleteOnExit(); FileWriter out = new FileWriter(f); in.write(out); - MetricsFile retval = new MetricsFile(); + MetricsFile retval = new MetricsFile(); retval.read(new FileReader(f)); return retval; } - - - } diff --git a/src/test/java/htsjdk/samtools/util/IntervalUtilTest.java b/src/test/java/htsjdk/samtools/util/IntervalUtilTest.java new file mode 100644 index 0000000000..d3a17b8d15 --- /dev/null +++ b/src/test/java/htsjdk/samtools/util/IntervalUtilTest.java @@ -0,0 +1,51 @@ +package htsjdk.samtools.util; + +import htsjdk.HtsjdkTest; +import htsjdk.samtools.SAMFileHeader; +import htsjdk.variant.utils.SAMSequenceDictionaryExtractor; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +public class IntervalUtilTest extends HtsjdkTest { + + final SAMFileHeader header = new SAMFileHeader(SAMSequenceDictionaryExtractor.extractDictionary( + IOUtil.getPath("src/test/resources/htsjdk/samtools/intervallist/IntervalListchr123_empty.interval_list"))); + final IntervalList intervalList = new IntervalList(header); + + public IntervalUtilTest() throws IOException {} + + @BeforeClass + public void initialize() throws IOException { + + intervalList.add(new Interval("1", 1, 2, true, "tiny_at_1")); + intervalList.add(new Interval("1", 2, 3, true, "tiny_at_2")); + intervalList.add(new Interval("1", 4, 5, true, "tiny_at_4")); + } + + @DataProvider + public Iterator testMergeDifferentlyData() { + final List tests = new ArrayList<>(); + + IntervalList result1 = new IntervalList(header); + result1.add(new Interval("1", 1, 5, true, "tiny_at_1|tiny_at_2|tiny_at_4")); + tests.add(new Object[]{true, true, true, result1}); + + return tests.iterator(); + } + + @Test(dataProvider = "testMergeDifferentlyData") + public void testMergeDifferently(final boolean mergeAbutting, final boolean concatNames, final boolean requireSameStrand, final IntervalList expectedResult) { + IntervalUtil.IntervalCombiner combiner = new IntervalUtil.IntervalCombiner(); + + combiner.setCombineAbutting(mergeAbutting).setConcatenateNames(concatNames).setEnforceSameStrand(requireSameStrand); + Assert.assertEquals(expectedResult, combiner.combine(intervalList)); + + } +} \ No newline at end of file diff --git a/src/test/resources/htsjdk/samtools/metrics/metricsOneCopyReordered.metrics b/src/test/resources/htsjdk/samtools/metrics/metricsOneCopyReordered.metrics new file mode 100644 index 0000000000..56ad7ed02f --- /dev/null +++ b/src/test/resources/htsjdk/samtools/metrics/metricsOneCopyReordered.metrics @@ -0,0 +1,13 @@ +## htsjdk.samtools.metrics.StringHeader +# picard.illumina.MarkIlluminaAdapters INPUT=testdata/picard/illumina/MarkIlluminaAdaptersTest/unevenReads.sam OUTPUT=/var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/uneven5946421709712534555.sam METRICS=/var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/uneven4591996041776878558.metrics MIN_MATCH_BASES_SE=12 MIN_MATCH_BASES_PE=6 MAX_ERROR_RATE_SE=0.1 MAX_ERROR_RATE_PE=0.1 ADAPTERS=[INDEXED, DUAL_INDEXED, PAIRED_END] ADAPTER_TRUNCATION_LENGTH=30 PRUNE_ADAPTER_LIST_AFTER_THIS_MANY_ADAPTERS_SEEN=100 NUM_ADAPTERS_TO_KEEP=1 VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false GA4GH_CLIENT_SECRETS=client_secrets.json +## htsjdk.samtools.metrics.StringHeader +# Started on: Mon Aug 24 13:31:51 EDT 2015 + +## METRICS CLASS htsjdk.samtools.metrics.MetricsFileTest$TestMetric +DATE_PROP SHORT_PROP INTEGER_PROP LONG_PROP FLOAT_PROP DOUBLE_PROP ENUM_PROP BOOLEAN_PROP CHARACTER_PROP SHORT_PRIMITIVE INT_PRIMITIVE LONG_PRIMITIVE FLOAT_PRIMITIVE DOUBLE_PRIMITIVE BOOLEAN_PRIMITIVE CHAR_PRIMITIVE STRING_PROP +2008-12-31 123 9223372036854775807 456.789001 0.713487 Two N A 123 919834781 9223372034707292160 0.55694 0.229233 Y B Hello World + +## HISTOGRAM java.lang.Integer +clipped_bases read_count +6 1 +7 1