From 560963bc92256d57ec4e500534c829cafbe31a19 Mon Sep 17 00:00:00 2001 From: Tom White Date: Mon, 2 Sep 2019 17:23:01 +0100 Subject: [PATCH] Use adam-core-spark2_2.12:0.28.0 --- build.gradle | 2 +- .../engine/spark/datasources/ReadsSparkSink.java | 4 ++-- .../datasources/ReferenceTwoBitSparkSource.java | 11 +++++++++-- .../read/BDGAlignmentRecordToGATKReadAdapter.java | 7 +++---- .../read/GATKReadToBDGAlignmentRecordConverter.java | 12 ++++++------ .../utils/read/GATKReadAdaptersUnitTest.java | 2 -- 6 files changed, 21 insertions(+), 17 deletions(-) diff --git a/build.gradle b/build.gradle index a76be61590c..f33e676fcfb 100644 --- a/build.gradle +++ b/build.gradle @@ -281,7 +281,7 @@ dependencies { compile 'com.thoughtworks.paranamer:paranamer:2.8' compile 'org.bdgenomics.bdg-formats:bdg-formats:0.5.0' - compile('org.bdgenomics.adam:adam-core-spark2_2.11:0.20.0') { + compile('org.bdgenomics.adam:adam-core-spark2_2.12:0.28.0') { exclude group: 'org.slf4j' exclude group: 'org.apache.hadoop' exclude group: 'org.scala-lang' diff --git a/src/main/java/org/broadinstitute/hellbender/engine/spark/datasources/ReadsSparkSink.java b/src/main/java/org/broadinstitute/hellbender/engine/spark/datasources/ReadsSparkSink.java index d971b3a039f..ddeb9eb913a 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/spark/datasources/ReadsSparkSink.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/spark/datasources/ReadsSparkSink.java @@ -13,7 +13,7 @@ import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.broadcast.Broadcast; -import org.bdgenomics.adam.models.RecordGroupDictionary; +import org.bdgenomics.adam.models.ReadGroupDictionary; import org.bdgenomics.adam.models.SequenceDictionary; import org.bdgenomics.formats.avro.AlignmentRecord; import org.broadinstitute.hellbender.exceptions.GATKException; @@ -157,7 +157,7 @@ private static void writeReadsADAM( final JavaSparkContext ctx, final String outputFile, final JavaRDD reads, final SAMFileHeader header) throws IOException { final SequenceDictionary seqDict = SequenceDictionary.fromSAMSequenceDictionary(header.getSequenceDictionary()); - final RecordGroupDictionary readGroups = RecordGroupDictionary.fromSAMHeader(header); + final ReadGroupDictionary readGroups = ReadGroupDictionary.fromSAMHeader(header); final JavaPairRDD rddAlignmentRecords = reads.map(read -> { read.setHeaderStrict(header); diff --git a/src/main/java/org/broadinstitute/hellbender/engine/spark/datasources/ReferenceTwoBitSparkSource.java b/src/main/java/org/broadinstitute/hellbender/engine/spark/datasources/ReferenceTwoBitSparkSource.java index d5d4949723e..30f51785747 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/spark/datasources/ReferenceTwoBitSparkSource.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/spark/datasources/ReferenceTwoBitSparkSource.java @@ -6,15 +6,19 @@ import org.bdgenomics.adam.models.ReferenceRegion; import org.bdgenomics.adam.util.TwoBitFile; import org.bdgenomics.adam.util.TwoBitRecord; +import org.bdgenomics.formats.avro.Strand; import org.bdgenomics.utils.io.ByteAccess; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.gcs.BucketUtils; import org.broadinstitute.hellbender.utils.reference.ReferenceBases; +import scala.Tuple2; import scala.collection.JavaConversions; +import scala.collection.immutable.IndexedSeq; import java.io.IOException; import java.io.Serializable; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -39,7 +43,10 @@ public ReferenceTwoBitSparkSource( String referenceURL) throws IOException { byte[] bytes = ByteStreams.toByteArray(BucketUtils.openFile(this.referenceURL)); ByteAccess byteAccess = new DirectFullByteArrayByteAccess(bytes); this.twoBitFile = new TwoBitFile(byteAccess); - this.twoBitSeqEntries = JavaConversions.mapAsJavaMap(twoBitFile.seqRecords()); + this.twoBitSeqEntries = new LinkedHashMap<>(); + for (Tuple2 pair: JavaConversions.seqAsJavaList(twoBitFile.seqRecords())) { + twoBitSeqEntries.put(pair._1, pair._2); + } } /** @@ -74,7 +81,7 @@ private static ReferenceRegion simpleIntervalToReferenceRegion(SimpleInterval in String contig = interval.getContig(); long start = interval.getGA4GHStart(); long end = interval.getGA4GHEnd(); - return new ReferenceRegion(contig, start, end, null); + return new ReferenceRegion(contig, start, end, Strand.UNKNOWN); } private SimpleInterval cropIntervalAtContigEnd( final SimpleInterval interval ) { diff --git a/src/main/java/org/broadinstitute/hellbender/utils/read/BDGAlignmentRecordToGATKReadAdapter.java b/src/main/java/org/broadinstitute/hellbender/utils/read/BDGAlignmentRecordToGATKReadAdapter.java index ee40ccceb7e..1c71944eed1 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/read/BDGAlignmentRecordToGATKReadAdapter.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/read/BDGAlignmentRecordToGATKReadAdapter.java @@ -3,8 +3,7 @@ import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMRecord; import org.bdgenomics.adam.converters.AlignmentRecordConverter; -import org.bdgenomics.adam.models.RecordGroupDictionary; -import org.bdgenomics.adam.models.SAMFileHeaderWritable; +import org.bdgenomics.adam.models.ReadGroupDictionary; import org.bdgenomics.formats.avro.AlignmentRecord; /** @@ -30,8 +29,8 @@ public final class BDGAlignmentRecordToGATKReadAdapter extends SAMRecordToGATKRe private final AlignmentRecord alignmentRecord; public BDGAlignmentRecordToGATKReadAdapter(final AlignmentRecord alignmentRecord, final SAMFileHeader header) { - super(new AlignmentRecordConverter().convert(alignmentRecord, SAMFileHeaderWritable.apply(header), - RecordGroupDictionary.fromSAMHeader(header))); + super(new AlignmentRecordConverter().convert(alignmentRecord, header, + ReadGroupDictionary.fromSAMHeader(header))); this.alignmentRecord = alignmentRecord; } diff --git a/src/main/java/org/broadinstitute/hellbender/utils/read/GATKReadToBDGAlignmentRecordConverter.java b/src/main/java/org/broadinstitute/hellbender/utils/read/GATKReadToBDGAlignmentRecordConverter.java index eced1c5d26a..d56e72b7fa5 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/read/GATKReadToBDGAlignmentRecordConverter.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/read/GATKReadToBDGAlignmentRecordConverter.java @@ -5,7 +5,7 @@ import org.bdgenomics.formats.avro.AlignmentRecord; import org.bdgenomics.adam.converters.SAMRecordConverter; import org.bdgenomics.adam.models.SequenceDictionary; -import org.bdgenomics.adam.models.RecordGroupDictionary; +import org.bdgenomics.adam.models.ReadGroupDictionary; /** * Converts a GATKRead to a BDG AlignmentRecord @@ -15,27 +15,27 @@ public class GATKReadToBDGAlignmentRecordConverter { private SAMFileHeader header; private SequenceDictionary dict; - private RecordGroupDictionary readGroups; + private ReadGroupDictionary readGroups; public GATKReadToBDGAlignmentRecordConverter(SAMFileHeader header) { this.header = header; this.dict = SequenceDictionary.fromSAMSequenceDictionary(header.getSequenceDictionary()); - this.readGroups = RecordGroupDictionary.fromSAMHeader(header); + this.readGroups = ReadGroupDictionary.fromSAMHeader(header); } public static AlignmentRecord convert( final GATKRead gatkRead, final SAMFileHeader header ) { SequenceDictionary dict = SequenceDictionary.fromSAMSequenceDictionary(header.getSequenceDictionary()); - RecordGroupDictionary readGroups = RecordGroupDictionary.fromSAMHeader(header); + ReadGroupDictionary readGroups = ReadGroupDictionary.fromSAMHeader(header); return GATKReadToBDGAlignmentRecordConverter.convert(gatkRead, header, dict, readGroups); } public static AlignmentRecord convert( - final GATKRead gatkRead, final SAMFileHeader header, final SequenceDictionary dict, final RecordGroupDictionary readGroups ) { + final GATKRead gatkRead, final SAMFileHeader header, final SequenceDictionary dict, final ReadGroupDictionary readGroups ) { return converter.convert(gatkRead.convertToSAMRecord(header)); } public static AlignmentRecord convert( - final SAMRecord sam, final SequenceDictionary dict, final RecordGroupDictionary readGroups ) { + final SAMRecord sam, final SequenceDictionary dict, final ReadGroupDictionary readGroups ) { return converter.convert(sam); } } diff --git a/src/test/java/org/broadinstitute/hellbender/utils/read/GATKReadAdaptersUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/read/GATKReadAdaptersUnitTest.java index ce1e29537e1..7463bdba781 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/read/GATKReadAdaptersUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/read/GATKReadAdaptersUnitTest.java @@ -55,8 +55,6 @@ public Object[][] readPairsForToString() { private static GATKRead basicReadBackedByADAMRecord(final SAMRecord sam) { final AlignmentRecord record = new AlignmentRecord(); - record.setContigName(sam.getContig()); - record.setRecordGroupSample(sam.getReadGroup().getSample()); record.setReadName(sam.getReadName()); record.setSequence(new String(sam.getReadBases())); record.setStart((long)sam.getAlignmentStart()-1); //ADAM records are 0-based