-
Notifications
You must be signed in to change notification settings - Fork 371
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add a new interval list scatter mode (#1786)
* Add a new interval list scatter mode to avoid issue of giant final list in large joint genotyping scatters
- Loading branch information
1 parent
b1e01c2
commit 9eafe4e
Showing
153 changed files
with
1,349 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
23 changes: 23 additions & 0 deletions
23
...icard/util/IntervalList/IntervalListScattererByIntervalCountWithDistributedRemainder.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
package picard.util.IntervalList; | ||
|
||
import htsjdk.samtools.util.Interval; | ||
import htsjdk.samtools.util.IntervalList; | ||
|
||
import java.util.Arrays; | ||
import java.util.List; | ||
|
||
/** | ||
* Scatters {@link IntervalList} by into `interval count` shards so that resulting {@link IntervalList}'s have | ||
* approximately same number of intervals in them. The "remainder" intervals are distributed over the last lists. | ||
*/ | ||
public class IntervalListScattererByIntervalCountWithDistributedRemainder extends IntervalListScattererByIntervalCount { | ||
|
||
@Override | ||
public List<Interval> takeSome(final Interval interval, final long idealSplitWeight, final long currentSize, final double projectSizeOfRemaining) { | ||
if (projectSizeOfRemaining > currentSize) { | ||
return Arrays.asList(interval, null); | ||
} else { | ||
return Arrays.asList(null, interval); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
7 changes: 7 additions & 0 deletions
7
testdata/picard/util/largeScatters/temp_0001_of_60/scattered.interval_list
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
@HD VN:1.6 SO:coordinate | ||
@SQ SN:chr1 LN:248956422 M5:6aef897c3d6ff0c78aff06ac189178dd AS:38 UR:/seq/references/Homo_sapiens_assembly38/v0/Homo_sapiens_assembly38.fasta SP:Homo sapiens | ||
@PG ID:1 CL:IntervalListTools --INPUT testdata/picard/util/test.hg38.200.interval_list --OUTPUT testdata/picard/util/largeScatters --SCATTER_COUNT 60 --SUBDIVISION_MODE INTERVAL_COUNT_WITH_DISTRIBUTED_REMAINDER --PADDING 0 --UNIQUE false --DONT_MERGE_ABUTTING false --SORT true --ACTION CONCAT --INCLUDE_FILTERED false --BREAK_BANDS_AT_MULTIPLES_OF 0 --INVERT false --OUTPUT_VALUE NONE --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --GA4GH_CLIENT_SECRETS client_secrets.json --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false PN:IntervalListTools | ||
chr1 1 195878 + target_1 | ||
chr1 195879 391754 + target_2 | ||
chr1 391755 606302 + target_3 | ||
chr1 606303 820848 + target_4 |
7 changes: 7 additions & 0 deletions
7
testdata/picard/util/largeScatters/temp_0002_of_60/scattered.interval_list
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
@HD VN:1.6 SO:coordinate | ||
@SQ SN:chr1 LN:248956422 M5:6aef897c3d6ff0c78aff06ac189178dd AS:38 UR:/seq/references/Homo_sapiens_assembly38/v0/Homo_sapiens_assembly38.fasta SP:Homo sapiens | ||
@PG ID:1 CL:IntervalListTools --INPUT testdata/picard/util/test.hg38.200.interval_list --OUTPUT testdata/picard/util/largeScatters --SCATTER_COUNT 60 --SUBDIVISION_MODE INTERVAL_COUNT_WITH_DISTRIBUTED_REMAINDER --PADDING 0 --UNIQUE false --DONT_MERGE_ABUTTING false --SORT true --ACTION CONCAT --INCLUDE_FILTERED false --BREAK_BANDS_AT_MULTIPLES_OF 0 --INVERT false --OUTPUT_VALUE NONE --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --GA4GH_CLIENT_SECRETS client_secrets.json --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false PN:IntervalListTools | ||
chr1 820849 910849 + target_5 | ||
chr1 910850 1000848 + target_6 | ||
chr1 1000849 1112599 + target_7 | ||
chr1 1112600 1224349 + target_8 |
7 changes: 7 additions & 0 deletions
7
testdata/picard/util/largeScatters/temp_0003_of_60/scattered.interval_list
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
@HD VN:1.6 SO:coordinate | ||
@SQ SN:chr1 LN:248956422 M5:6aef897c3d6ff0c78aff06ac189178dd AS:38 UR:/seq/references/Homo_sapiens_assembly38/v0/Homo_sapiens_assembly38.fasta SP:Homo sapiens | ||
@PG ID:1 CL:IntervalListTools --INPUT testdata/picard/util/test.hg38.200.interval_list --OUTPUT testdata/picard/util/largeScatters --SCATTER_COUNT 60 --SUBDIVISION_MODE INTERVAL_COUNT_WITH_DISTRIBUTED_REMAINDER --PADDING 0 --UNIQUE false --DONT_MERGE_ABUTTING false --SORT true --ACTION CONCAT --INCLUDE_FILTERED false --BREAK_BANDS_AT_MULTIPLES_OF 0 --INVERT false --OUTPUT_VALUE NONE --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --GA4GH_CLIENT_SECRETS client_secrets.json --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false PN:IntervalListTools | ||
chr1 1224350 1348642 + target_9 | ||
chr1 1348643 1472934 + target_10 | ||
chr1 1472935 1584197 + target_11 | ||
chr1 1584198 1695459 + target_12 |
7 changes: 7 additions & 0 deletions
7
testdata/picard/util/largeScatters/temp_0004_of_60/scattered.interval_list
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
@HD VN:1.6 SO:coordinate | ||
@SQ SN:chr1 LN:248956422 M5:6aef897c3d6ff0c78aff06ac189178dd AS:38 UR:/seq/references/Homo_sapiens_assembly38/v0/Homo_sapiens_assembly38.fasta SP:Homo sapiens | ||
@PG ID:1 CL:IntervalListTools --INPUT testdata/picard/util/test.hg38.200.interval_list --OUTPUT testdata/picard/util/largeScatters --SCATTER_COUNT 60 --SUBDIVISION_MODE INTERVAL_COUNT_WITH_DISTRIBUTED_REMAINDER --PADDING 0 --UNIQUE false --DONT_MERGE_ABUTTING false --SORT true --ACTION CONCAT --INCLUDE_FILTERED false --BREAK_BANDS_AT_MULTIPLES_OF 0 --INVERT false --OUTPUT_VALUE NONE --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --GA4GH_CLIENT_SECRETS client_secrets.json --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false PN:IntervalListTools | ||
chr1 1695460 1815724 + target_13 | ||
chr1 1815725 1935987 + target_14 | ||
chr1 1935988 2030188 + target_15 | ||
chr1 2030189 2124387 + target_16 |
7 changes: 7 additions & 0 deletions
7
testdata/picard/util/largeScatters/temp_0005_of_60/scattered.interval_list
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
@HD VN:1.6 SO:coordinate | ||
@SQ SN:chr1 LN:248956422 M5:6aef897c3d6ff0c78aff06ac189178dd AS:38 UR:/seq/references/Homo_sapiens_assembly38/v0/Homo_sapiens_assembly38.fasta SP:Homo sapiens | ||
@PG ID:1 CL:IntervalListTools --INPUT testdata/picard/util/test.hg38.200.interval_list --OUTPUT testdata/picard/util/largeScatters --SCATTER_COUNT 60 --SUBDIVISION_MODE INTERVAL_COUNT_WITH_DISTRIBUTED_REMAINDER --PADDING 0 --UNIQUE false --DONT_MERGE_ABUTTING false --SORT true --ACTION CONCAT --INCLUDE_FILTERED false --BREAK_BANDS_AT_MULTIPLES_OF 0 --INVERT false --OUTPUT_VALUE NONE --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --GA4GH_CLIENT_SECRETS client_secrets.json --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false PN:IntervalListTools | ||
chr1 2124388 2261379 + target_17 | ||
chr1 2261380 2398369 + target_18 | ||
chr1 2398370 2662724 + target_19 | ||
chr1 2662725 2927077 + target_20 |
7 changes: 7 additions & 0 deletions
7
testdata/picard/util/largeScatters/temp_0006_of_60/scattered.interval_list
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
@HD VN:1.6 SO:coordinate | ||
@SQ SN:chr1 LN:248956422 M5:6aef897c3d6ff0c78aff06ac189178dd AS:38 UR:/seq/references/Homo_sapiens_assembly38/v0/Homo_sapiens_assembly38.fasta SP:Homo sapiens | ||
@PG ID:1 CL:IntervalListTools --INPUT testdata/picard/util/test.hg38.200.interval_list --OUTPUT testdata/picard/util/largeScatters --SCATTER_COUNT 60 --SUBDIVISION_MODE INTERVAL_COUNT_WITH_DISTRIBUTED_REMAINDER --PADDING 0 --UNIQUE false --DONT_MERGE_ABUTTING false --SORT true --ACTION CONCAT --INCLUDE_FILTERED false --BREAK_BANDS_AT_MULTIPLES_OF 0 --INVERT false --OUTPUT_VALUE NONE --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --GA4GH_CLIENT_SECRETS client_secrets.json --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false PN:IntervalListTools | ||
chr1 2927078 3044965 + target_21 | ||
chr1 3044966 3162852 + target_22 | ||
chr1 3162853 3261512 + target_23 | ||
chr1 3261513 3360170 + target_24 |
7 changes: 7 additions & 0 deletions
7
testdata/picard/util/largeScatters/temp_0007_of_60/scattered.interval_list
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
@HD VN:1.6 SO:coordinate | ||
@SQ SN:chr1 LN:248956422 M5:6aef897c3d6ff0c78aff06ac189178dd AS:38 UR:/seq/references/Homo_sapiens_assembly38/v0/Homo_sapiens_assembly38.fasta SP:Homo sapiens | ||
@PG ID:1 CL:IntervalListTools --INPUT testdata/picard/util/test.hg38.200.interval_list --OUTPUT testdata/picard/util/largeScatters --SCATTER_COUNT 60 --SUBDIVISION_MODE INTERVAL_COUNT_WITH_DISTRIBUTED_REMAINDER --PADDING 0 --UNIQUE false --DONT_MERGE_ABUTTING false --SORT true --ACTION CONCAT --INCLUDE_FILTERED false --BREAK_BANDS_AT_MULTIPLES_OF 0 --INVERT false --OUTPUT_VALUE NONE --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --GA4GH_CLIENT_SECRETS client_secrets.json --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false PN:IntervalListTools | ||
chr1 3360171 3481821 + target_25 | ||
chr1 3481822 3603470 + target_26 | ||
chr1 3603471 3720976 + target_27 | ||
chr1 3720977 3838480 + target_28 |
7 changes: 7 additions & 0 deletions
7
testdata/picard/util/largeScatters/temp_0008_of_60/scattered.interval_list
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
@HD VN:1.6 SO:coordinate | ||
@SQ SN:chr1 LN:248956422 M5:6aef897c3d6ff0c78aff06ac189178dd AS:38 UR:/seq/references/Homo_sapiens_assembly38/v0/Homo_sapiens_assembly38.fasta SP:Homo sapiens | ||
@PG ID:1 CL:IntervalListTools --INPUT testdata/picard/util/test.hg38.200.interval_list --OUTPUT testdata/picard/util/largeScatters --SCATTER_COUNT 60 --SUBDIVISION_MODE INTERVAL_COUNT_WITH_DISTRIBUTED_REMAINDER --PADDING 0 --UNIQUE false --DONT_MERGE_ABUTTING false --SORT true --ACTION CONCAT --INCLUDE_FILTERED false --BREAK_BANDS_AT_MULTIPLES_OF 0 --INVERT false --OUTPUT_VALUE NONE --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --GA4GH_CLIENT_SECRETS client_secrets.json --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false PN:IntervalListTools | ||
chr1 3838481 3931034 + target_29 | ||
chr1 3931035 4023586 + target_30 | ||
chr1 4023587 4132844 + target_31 | ||
chr1 4132845 4242101 + target_32 |
Oops, something went wrong.