From 7abb3d7f0fe34356abd6846d144953f9a15a253a Mon Sep 17 00:00:00 2001 From: Charles Shale Date: Wed, 3 Aug 2022 09:09:04 +1000 Subject: [PATCH] SvPrep: added discordant junctions --- .../svprep/reads/DiscordantGroups.java | 194 ++++++++++++++++++ .../hmftools/svprep/reads/JunctionData.java | 5 + .../svprep/reads/JunctionTracker.java | 46 +++++ .../hmftools/svprep/reads/ReadRecord.java | 6 +- 4 files changed, 249 insertions(+), 2 deletions(-) create mode 100644 sv-prep/src/main/java/com/hartwig/hmftools/svprep/reads/DiscordantGroups.java diff --git a/sv-prep/src/main/java/com/hartwig/hmftools/svprep/reads/DiscordantGroups.java b/sv-prep/src/main/java/com/hartwig/hmftools/svprep/reads/DiscordantGroups.java new file mode 100644 index 0000000000..9412ad4f16 --- /dev/null +++ b/sv-prep/src/main/java/com/hartwig/hmftools/svprep/reads/DiscordantGroups.java @@ -0,0 +1,194 @@ +package com.hartwig.hmftools.svprep.reads; + +import static java.lang.Math.max; +import static java.lang.Math.min; + +import static com.hartwig.hmftools.common.utils.sv.BaseRegion.positionWithin; +import static com.hartwig.hmftools.common.utils.sv.StartEndIterator.SE_END; +import static com.hartwig.hmftools.common.utils.sv.StartEndIterator.SE_START; +import static com.hartwig.hmftools.common.utils.sv.SvCommonUtils.NEG_ORIENT; +import static com.hartwig.hmftools.common.utils.sv.SvCommonUtils.POS_ORIENT; +import static com.hartwig.hmftools.svprep.SvCommon.SV_LOGGER; +import static com.hartwig.hmftools.svprep.reads.ReadRecord.UNMAPPED_CHR; + +import java.util.List; +import java.util.Set; + +import com.beust.jcommander.internal.Sets; +import com.google.common.collect.Lists; +import com.hartwig.hmftools.common.genome.chromosome.HumanChromosome; +import com.hartwig.hmftools.common.utils.sv.ChrBaseRegion; + +public final class DiscordantGroups +{ + public static final int MIN_FRAGMENT_COUNT = 5; + public static final int MAX_START_DISTANCE = 500; + public static final int MAX_END_DISTANCE = 1000; + + public static List formDiscordantJunctions(final List readGroups) + { + List discordantJunctions = Lists.newArrayList(); + Set assignedGroups = Sets.newHashSet(); + + for(int i = 0; i < readGroups.size() - MIN_FRAGMENT_COUNT; ++i) + { + ReadGroup group1 = readGroups.get(i); + + ChrBaseRegion[] regions1 = groupRegions(group1); + List closeGroups = null; + + for(int j = i + 1; j < readGroups.size(); ++j) + { + ReadGroup group2 = readGroups.get(j); + + if(assignedGroups.contains(group2.id())) + continue; + + ChrBaseRegion[] regions2 = groupRegions(group2); + + if(regionsWithinRange(regions1, regions2)) + { + if(closeGroups == null) + closeGroups = Lists.newArrayList(group1); + + closeGroups.add(group2); + + // widen with new group + regions1[SE_START].setStart(min(regions1[SE_START].start(), regions2[SE_START].start())); + regions1[SE_START].setEnd(max(regions1[SE_START].end(), regions2[SE_START].end())); + regions1[SE_END].setStart(min(regions1[SE_END].start(), regions2[SE_END].start())); + regions1[SE_END].setEnd(max(regions1[SE_END].end(), regions2[SE_END].end())); + } + } + + if(closeGroups != null && closeGroups.size() >= MIN_FRAGMENT_COUNT) + { + closeGroups.forEach(x -> assignedGroups.add(x.id())); + addJunctions(closeGroups, regions1, discordantJunctions); + } + } + + return discordantJunctions; + } + + private static void addJunctions( + final List readGroups, final ChrBaseRegion[] regions, final List discordantJunctions) + { + // determine orientation and then create junctions for any local regions + for(int se = SE_START; se <= SE_END; ++se) + { + // find a read matching the region boundary and orientation + ChrBaseRegion region = regions[se]; + int junctionPosition = 0; + byte junctionOrientation = 0; + ReadRecord boundaryRead = null; + + for(ReadGroup readGroup : readGroups) + { + ReadRecord read = readGroup.reads().stream() + .filter(x -> x.start() == region.start() && x.orientation() == NEG_ORIENT).findFirst().orElse(null); + + if(read != null) + { + junctionPosition = read.start(); + junctionOrientation = read.orientation(); + boundaryRead = read; + break; + } + + read = readGroup.reads().stream() + .filter(x -> x.start() == region.end() && x.orientation() == POS_ORIENT).findFirst().orElse(null); + + if(read != null) + { + junctionPosition = read.end(); + junctionOrientation = read.orientation(); + boundaryRead = read; + break; + } + } + + if(boundaryRead != null) + { + JunctionData junctionData = new JunctionData(junctionPosition, junctionOrientation, boundaryRead); + discordantJunctions.add(junctionData); + + junctionData.markDiscordantGroup(); + readGroups.forEach(x -> junctionData.SupportingGroups.add(x)); + readGroups.forEach(x -> x.addJunctionPosition(junctionData.Position)); + } + } + } + + public static boolean isDiscordantGroup(final ReadGroup readGroup, final int maxFragmentLength) + { + // only the first read is used and so only that is checked + return isDiscordantRead(readGroup.reads().get(0), maxFragmentLength); + // return readGroup.reads().stream().anyMatch(x -> isDiscordantRead(x, maxFragmentLength)); + } + + private static boolean isDiscordantRead(final ReadRecord read, final int maxFragmentLength) + { + if(read.Chromosome.equals(UNMAPPED_CHR) || read.MateChromosome.equals(UNMAPPED_CHR)) + return false; + + if(read.fragmentInsertSize() > maxFragmentLength) + return true; + + if(!read.Chromosome.equals(read.MateChromosome)) + return true; + + if(read.record().getReadNegativeStrandFlag() == read.record().getMateNegativeStrandFlag()) + return true; + + return false; + } + + private static boolean regionsWithinRange(final ChrBaseRegion[] regions1, final ChrBaseRegion[] regions2) + { + for(int se = SE_START; se <= SE_END; ++se) + { + if(!regions1[se].Chromosome.equals(regions2[se].Chromosome)) + return false; + + if(!positionWithin( + regions2[se].start(), + regions1[se].start() - MAX_START_DISTANCE, + regions1[se].start() + MAX_START_DISTANCE)) + { + return false; + } + } + + return true; + } + + private static ChrBaseRegion[] groupRegions(final ReadGroup readGroup) + { + ReadRecord read = readGroup.reads().get(0); + String chr1 = read.Chromosome; + String chr2 = read.MateChromosome; + int pos1 = read.orientation() == POS_ORIENT ? read.end() : read.start(); + int pos2 = readGroup.reads().get(0).MatePosStart; + + + boolean firstIsLower = false; + + if(HumanChromosome.chromosomeRank(chr1) < HumanChromosome.chromosomeRank(chr2)) + { + firstIsLower = true; + } + else if(HumanChromosome.chromosomeRank(chr1) > HumanChromosome.chromosomeRank(chr2)) + { + firstIsLower = false; + } + else + { + firstIsLower = pos1 < pos2; + } + + return firstIsLower ? + new ChrBaseRegion[] { new ChrBaseRegion(chr1, pos1, pos1), new ChrBaseRegion(chr2, pos2, pos2) } : + new ChrBaseRegion[] { new ChrBaseRegion(chr2, pos2, pos2), new ChrBaseRegion(chr1, pos1, pos1) }; + } +} diff --git a/sv-prep/src/main/java/com/hartwig/hmftools/svprep/reads/JunctionData.java b/sv-prep/src/main/java/com/hartwig/hmftools/svprep/reads/JunctionData.java index 73bd2e2b10..7bb6090a15 100644 --- a/sv-prep/src/main/java/com/hartwig/hmftools/svprep/reads/JunctionData.java +++ b/sv-prep/src/main/java/com/hartwig/hmftools/svprep/reads/JunctionData.java @@ -17,6 +17,7 @@ public class JunctionData public final List RemoteJunctions; private boolean mInternalIndel; + private boolean mDiscordantGroup; private boolean mHotspot; private int mDepth; @@ -32,6 +33,7 @@ public JunctionData(final int position, final byte orientation, final ReadRecord mHotspot = false; mInternalIndel = false; + mDiscordantGroup = false; mDepth = 0; } @@ -46,6 +48,9 @@ public JunctionData(final int position, final byte orientation, final ReadRecord public boolean internalIndel() { return mInternalIndel; } public void markInternalIndel() { mInternalIndel = true; } + public boolean discordantGroup() { return mDiscordantGroup; } + public void markDiscordantGroup() { mDiscordantGroup = true; } + public void setDepth(int depth) { mDepth = depth; } public int depth() { return mDepth; } diff --git a/sv-prep/src/main/java/com/hartwig/hmftools/svprep/reads/JunctionTracker.java b/sv-prep/src/main/java/com/hartwig/hmftools/svprep/reads/JunctionTracker.java index 777d66b329..329e423fb1 100644 --- a/sv-prep/src/main/java/com/hartwig/hmftools/svprep/reads/JunctionTracker.java +++ b/sv-prep/src/main/java/com/hartwig/hmftools/svprep/reads/JunctionTracker.java @@ -9,10 +9,13 @@ import static com.hartwig.hmftools.common.utils.sv.StartEndIterator.SE_START; import static com.hartwig.hmftools.common.utils.sv.SvCommonUtils.NEG_ORIENT; import static com.hartwig.hmftools.common.utils.sv.SvCommonUtils.POS_ORIENT; +import static com.hartwig.hmftools.svprep.SvCommon.SV_LOGGER; import static com.hartwig.hmftools.svprep.SvConstants.LOW_BASE_QUALITY; import static com.hartwig.hmftools.svprep.SvConstants.MIN_HOTSPOT_JUNCTION_SUPPORT; import static com.hartwig.hmftools.svprep.SvConstants.MIN_INDEL_SUPPORT_LENGTH; import static com.hartwig.hmftools.svprep.SvConstants.MIN_MAP_QUALITY; +import static com.hartwig.hmftools.svprep.reads.DiscordantGroups.formDiscordantJunctions; +import static com.hartwig.hmftools.svprep.reads.DiscordantGroups.isDiscordantGroup; import static com.hartwig.hmftools.svprep.reads.ReadFilterType.INSERT_MAP_OVERLAP; import static com.hartwig.hmftools.svprep.reads.ReadFilterType.POLY_G_SC; import static com.hartwig.hmftools.svprep.reads.ReadFilters.isChimericRead; @@ -189,6 +192,7 @@ public void assignFragments() mInitialSupportingFrags = candidateSupportGroups.size(); + List nonJunctionGroups = Lists.newArrayList(); Set supportedJunctions = Sets.newHashSet(); for(ReadGroup readGroup : candidateSupportGroups) { @@ -209,8 +213,23 @@ public void assignFragments() supportedJunctions.forEach(x -> x.SupportingGroups.add(readGroup)); supportedJunctions.forEach(x -> readGroup.addJunctionPosition(x.Position)); } + else if(!groupInBlacklist(readGroup) && isDiscordantGroup(readGroup, mFilterConfig.fragmentLengthMax())) + { + nonJunctionGroups.add(readGroup); + } } + if(nonJunctionGroups.size() > 1000) + { + SV_LOGGER.info("region({}) checking discordant groups from {} read groups", mRegion, nonJunctionGroups.size()); + } + + List discordantJunctions = formDiscordantJunctions(nonJunctionGroups); + discordantJunctions.forEach(x -> addJunction(x)); + + // no obvious need to re-check support at these junctions since all proximate facing read groups have already been tested + // and allocated to these groups + if(mBaseDepth != null) { mJunctions.forEach(x -> x.setDepth(getBaseDepth(x.Position))); @@ -397,6 +416,30 @@ else if(junctionData.Position > junctionPosition) return junctionData; } + private void addJunction(final JunctionData newJunction) + { + int index = 0; + + while(index < mJunctions.size()) + { + JunctionData junctionData = mJunctions.get(index); + + if(junctionData.Position == newJunction.Position) + { + if(junctionData.Orientation == newJunction.Orientation) + return; + } + else if(junctionData.Position > newJunction.Position) + { + break; + } + + ++index; + } + + mJunctions.add(index, newJunction); + } + private void checkJunctionSupport(final ReadRecord read, final Set supportedJunctions) { // first check indel support @@ -704,6 +747,9 @@ private boolean junctionHasSupport(final JunctionData junctionData) if(junctionData.isExisting()) return !junctionData.JunctionGroups.isEmpty() || !junctionData.SupportingGroups.isEmpty(); + if(junctionData.discordantGroup()) + return true; + // 1 junction read, 3 exact supporting reads altogether and 1 map-qual read int junctionFrags = junctionData.JunctionGroups.size(); diff --git a/sv-prep/src/main/java/com/hartwig/hmftools/svprep/reads/ReadRecord.java b/sv-prep/src/main/java/com/hartwig/hmftools/svprep/reads/ReadRecord.java index 82bc998ff9..30869acc39 100644 --- a/sv-prep/src/main/java/com/hartwig/hmftools/svprep/reads/ReadRecord.java +++ b/sv-prep/src/main/java/com/hartwig/hmftools/svprep/reads/ReadRecord.java @@ -44,6 +44,8 @@ public class ReadRecord public static ReadRecord from(final SAMRecord record) { return new ReadRecord(record); } + public static final String UNMAPPED_CHR = "-1"; + public ReadRecord(final SAMRecord record) { mRecord = record; @@ -55,7 +57,7 @@ public ReadRecord(final SAMRecord record) } else { - Chromosome = "-1"; + Chromosome = UNMAPPED_CHR; Positions = new int[] { 0, 0 }; } @@ -66,7 +68,7 @@ public ReadRecord(final SAMRecord record) } else { - MateChromosome = "-1"; + MateChromosome = UNMAPPED_CHR; MatePosStart = 0; }