From 2e44cea2c4e6048e545b0663efadf95e20bac4ab Mon Sep 17 00:00:00 2001 From: Ethan Nelson-Moore Date: Tue, 17 Dec 2024 15:05:54 -0800 Subject: [PATCH] Fix failures to allocate arrays with a very large Java heap size The maximum length of a Java array is not exactly Integer.MAX_VALUE, but slightly less due to the space taken up by the object header. The exact maximum differs depending on the platform and Java version. This was already accounted for in one instance, but not others. This commit fixes the other instances and changes the maximum size in the existing instance to Integer.MAX_VALUE - 32 instead of Integer.MAX_VALUE - 5 to decrease the likelihood of allocation failures on different Java versions and platforms. --- .../sam/markduplicates/EstimateLibraryComplexity.java | 4 ++-- src/main/java/picard/sam/markduplicates/MarkDuplicates.java | 6 +++--- src/main/java/picard/util/SequenceDictionaryUtils.java | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/java/picard/sam/markduplicates/EstimateLibraryComplexity.java b/src/main/java/picard/sam/markduplicates/EstimateLibraryComplexity.java index 6f32f33d8c..69fee46db3 100644 --- a/src/main/java/picard/sam/markduplicates/EstimateLibraryComplexity.java +++ b/src/main/java/picard/sam/markduplicates/EstimateLibraryComplexity.java @@ -425,7 +425,7 @@ public EstimateLibraryComplexity() { } else { sizeInBytes = PairedReadSequence.getSizeInBytes(); } - MAX_RECORDS_IN_RAM = (int) (Runtime.getRuntime().maxMemory() / sizeInBytes) / 2; + MAX_RECORDS_IN_RAM = Math.min((int) (Runtime.getRuntime().maxMemory() / sizeInBytes) / 2, Integer.MAX_VALUE - 32); } /** @@ -673,4 +673,4 @@ boolean passesQualityCheck(final byte[] bases, final byte[] quals, final int see for (int i = 0; i < readLength; i++) total += quals[i]; return total / readLength >= minQuality; } -} \ No newline at end of file +} diff --git a/src/main/java/picard/sam/markduplicates/MarkDuplicates.java b/src/main/java/picard/sam/markduplicates/MarkDuplicates.java index 669fb8cbca..7d1b84c051 100644 --- a/src/main/java/picard/sam/markduplicates/MarkDuplicates.java +++ b/src/main/java/picard/sam/markduplicates/MarkDuplicates.java @@ -479,8 +479,8 @@ private void buildSortedReadEndLists(final boolean useBarcodes) { } else { sizeInBytes = ReadEndsForMarkDuplicates.getSizeOf(); } - MAX_RECORDS_IN_RAM = (int) (Runtime.getRuntime().maxMemory() / sizeInBytes) / 2; - final int maxInMemory = (int) ((Runtime.getRuntime().maxMemory() * SORTING_COLLECTION_SIZE_RATIO) / sizeInBytes); + MAX_RECORDS_IN_RAM = Math.min((int) (Runtime.getRuntime().maxMemory() / sizeInBytes) / 2, Integer.MAX_VALUE - 32); + final int maxInMemory = Math.min((int) ((Runtime.getRuntime().maxMemory() * SORTING_COLLECTION_SIZE_RATIO) / sizeInBytes), Integer.MAX_VALUE - 32); log.info("Will retain up to " + maxInMemory + " data points before spilling to disk."); final ReadEndsForMarkDuplicatesCodec fragCodec, pairCodec, diskCodec; @@ -719,7 +719,7 @@ protected void sortIndicesForDuplicates(final boolean indexOpticalDuplicates){ entryOverhead = SortingLongCollection.SIZEOF; } // Keep this number from getting too large even if there is a huge heap. - int maxInMemory = (int) Math.min((Runtime.getRuntime().maxMemory() * 0.25) / entryOverhead, (double) (Integer.MAX_VALUE - 5)); + int maxInMemory = (int) Math.min((Runtime.getRuntime().maxMemory() * 0.25) / entryOverhead, (double) (Integer.MAX_VALUE - 32)); // If we're also tracking optical duplicates, reduce maxInMemory, since we'll need two sorting collections if (indexOpticalDuplicates) { maxInMemory /= ((entryOverhead + SortingLongCollection.SIZEOF) / entryOverhead); diff --git a/src/main/java/picard/util/SequenceDictionaryUtils.java b/src/main/java/picard/util/SequenceDictionaryUtils.java index 178935fb52..972f90a674 100644 --- a/src/main/java/picard/util/SequenceDictionaryUtils.java +++ b/src/main/java/picard/util/SequenceDictionaryUtils.java @@ -190,7 +190,7 @@ public static SortingCollection makeSortingCollection() { String.class, new StringCodec(), String::compareTo, - (int) Math.min(maxNamesInRam, Integer.MAX_VALUE), + (int) Math.min(maxNamesInRam, Integer.MAX_VALUE - 32), tmpDir.toPath() ); }