Skip to content

Commit

Permalink
SvPrep: added discordant junctions
Browse files Browse the repository at this point in the history
  • Loading branch information
charlesshale committed Aug 2, 2022
1 parent b4b75ca commit 7abb3d7
Show file tree
Hide file tree
Showing 4 changed files with 249 additions and 2 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
package com.hartwig.hmftools.svprep.reads;

import static java.lang.Math.max;
import static java.lang.Math.min;

import static com.hartwig.hmftools.common.utils.sv.BaseRegion.positionWithin;
import static com.hartwig.hmftools.common.utils.sv.StartEndIterator.SE_END;
import static com.hartwig.hmftools.common.utils.sv.StartEndIterator.SE_START;
import static com.hartwig.hmftools.common.utils.sv.SvCommonUtils.NEG_ORIENT;
import static com.hartwig.hmftools.common.utils.sv.SvCommonUtils.POS_ORIENT;
import static com.hartwig.hmftools.svprep.SvCommon.SV_LOGGER;
import static com.hartwig.hmftools.svprep.reads.ReadRecord.UNMAPPED_CHR;

import java.util.List;
import java.util.Set;

import com.beust.jcommander.internal.Sets;
import com.google.common.collect.Lists;
import com.hartwig.hmftools.common.genome.chromosome.HumanChromosome;
import com.hartwig.hmftools.common.utils.sv.ChrBaseRegion;

public final class DiscordantGroups
{
public static final int MIN_FRAGMENT_COUNT = 5;
public static final int MAX_START_DISTANCE = 500;
public static final int MAX_END_DISTANCE = 1000;

public static List<JunctionData> formDiscordantJunctions(final List<ReadGroup> readGroups)
{
List<JunctionData> discordantJunctions = Lists.newArrayList();
Set<String> assignedGroups = Sets.newHashSet();

for(int i = 0; i < readGroups.size() - MIN_FRAGMENT_COUNT; ++i)
{
ReadGroup group1 = readGroups.get(i);

ChrBaseRegion[] regions1 = groupRegions(group1);
List<ReadGroup> closeGroups = null;

for(int j = i + 1; j < readGroups.size(); ++j)
{
ReadGroup group2 = readGroups.get(j);

if(assignedGroups.contains(group2.id()))
continue;

ChrBaseRegion[] regions2 = groupRegions(group2);

if(regionsWithinRange(regions1, regions2))
{
if(closeGroups == null)
closeGroups = Lists.newArrayList(group1);

closeGroups.add(group2);

// widen with new group
regions1[SE_START].setStart(min(regions1[SE_START].start(), regions2[SE_START].start()));
regions1[SE_START].setEnd(max(regions1[SE_START].end(), regions2[SE_START].end()));
regions1[SE_END].setStart(min(regions1[SE_END].start(), regions2[SE_END].start()));
regions1[SE_END].setEnd(max(regions1[SE_END].end(), regions2[SE_END].end()));
}
}

if(closeGroups != null && closeGroups.size() >= MIN_FRAGMENT_COUNT)
{
closeGroups.forEach(x -> assignedGroups.add(x.id()));
addJunctions(closeGroups, regions1, discordantJunctions);
}
}

return discordantJunctions;
}

private static void addJunctions(
final List<ReadGroup> readGroups, final ChrBaseRegion[] regions, final List<JunctionData> discordantJunctions)
{
// determine orientation and then create junctions for any local regions
for(int se = SE_START; se <= SE_END; ++se)
{
// find a read matching the region boundary and orientation
ChrBaseRegion region = regions[se];
int junctionPosition = 0;
byte junctionOrientation = 0;
ReadRecord boundaryRead = null;

for(ReadGroup readGroup : readGroups)
{
ReadRecord read = readGroup.reads().stream()
.filter(x -> x.start() == region.start() && x.orientation() == NEG_ORIENT).findFirst().orElse(null);

if(read != null)
{
junctionPosition = read.start();
junctionOrientation = read.orientation();
boundaryRead = read;
break;
}

read = readGroup.reads().stream()
.filter(x -> x.start() == region.end() && x.orientation() == POS_ORIENT).findFirst().orElse(null);

if(read != null)
{
junctionPosition = read.end();
junctionOrientation = read.orientation();
boundaryRead = read;
break;
}
}

if(boundaryRead != null)
{
JunctionData junctionData = new JunctionData(junctionPosition, junctionOrientation, boundaryRead);
discordantJunctions.add(junctionData);

junctionData.markDiscordantGroup();
readGroups.forEach(x -> junctionData.SupportingGroups.add(x));
readGroups.forEach(x -> x.addJunctionPosition(junctionData.Position));
}
}
}

public static boolean isDiscordantGroup(final ReadGroup readGroup, final int maxFragmentLength)
{
// only the first read is used and so only that is checked
return isDiscordantRead(readGroup.reads().get(0), maxFragmentLength);
// return readGroup.reads().stream().anyMatch(x -> isDiscordantRead(x, maxFragmentLength));
}

private static boolean isDiscordantRead(final ReadRecord read, final int maxFragmentLength)
{
if(read.Chromosome.equals(UNMAPPED_CHR) || read.MateChromosome.equals(UNMAPPED_CHR))
return false;

if(read.fragmentInsertSize() > maxFragmentLength)
return true;

if(!read.Chromosome.equals(read.MateChromosome))
return true;

if(read.record().getReadNegativeStrandFlag() == read.record().getMateNegativeStrandFlag())
return true;

return false;
}

private static boolean regionsWithinRange(final ChrBaseRegion[] regions1, final ChrBaseRegion[] regions2)
{
for(int se = SE_START; se <= SE_END; ++se)
{
if(!regions1[se].Chromosome.equals(regions2[se].Chromosome))
return false;

if(!positionWithin(
regions2[se].start(),
regions1[se].start() - MAX_START_DISTANCE,
regions1[se].start() + MAX_START_DISTANCE))
{
return false;
}
}

return true;
}

private static ChrBaseRegion[] groupRegions(final ReadGroup readGroup)
{
ReadRecord read = readGroup.reads().get(0);
String chr1 = read.Chromosome;
String chr2 = read.MateChromosome;
int pos1 = read.orientation() == POS_ORIENT ? read.end() : read.start();
int pos2 = readGroup.reads().get(0).MatePosStart;


boolean firstIsLower = false;

if(HumanChromosome.chromosomeRank(chr1) < HumanChromosome.chromosomeRank(chr2))
{
firstIsLower = true;
}
else if(HumanChromosome.chromosomeRank(chr1) > HumanChromosome.chromosomeRank(chr2))
{
firstIsLower = false;
}
else
{
firstIsLower = pos1 < pos2;
}

return firstIsLower ?
new ChrBaseRegion[] { new ChrBaseRegion(chr1, pos1, pos1), new ChrBaseRegion(chr2, pos2, pos2) } :
new ChrBaseRegion[] { new ChrBaseRegion(chr2, pos2, pos2), new ChrBaseRegion(chr1, pos1, pos1) };
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ public class JunctionData
public final List<RemoteJunction> RemoteJunctions;

private boolean mInternalIndel;
private boolean mDiscordantGroup;
private boolean mHotspot;
private int mDepth;

Expand All @@ -32,6 +33,7 @@ public JunctionData(final int position, final byte orientation, final ReadRecord

mHotspot = false;
mInternalIndel = false;
mDiscordantGroup = false;
mDepth = 0;
}

Expand All @@ -46,6 +48,9 @@ public JunctionData(final int position, final byte orientation, final ReadRecord
public boolean internalIndel() { return mInternalIndel; }
public void markInternalIndel() { mInternalIndel = true; }

public boolean discordantGroup() { return mDiscordantGroup; }
public void markDiscordantGroup() { mDiscordantGroup = true; }

public void setDepth(int depth) { mDepth = depth; }
public int depth() { return mDepth; }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,13 @@
import static com.hartwig.hmftools.common.utils.sv.StartEndIterator.SE_START;
import static com.hartwig.hmftools.common.utils.sv.SvCommonUtils.NEG_ORIENT;
import static com.hartwig.hmftools.common.utils.sv.SvCommonUtils.POS_ORIENT;
import static com.hartwig.hmftools.svprep.SvCommon.SV_LOGGER;
import static com.hartwig.hmftools.svprep.SvConstants.LOW_BASE_QUALITY;
import static com.hartwig.hmftools.svprep.SvConstants.MIN_HOTSPOT_JUNCTION_SUPPORT;
import static com.hartwig.hmftools.svprep.SvConstants.MIN_INDEL_SUPPORT_LENGTH;
import static com.hartwig.hmftools.svprep.SvConstants.MIN_MAP_QUALITY;
import static com.hartwig.hmftools.svprep.reads.DiscordantGroups.formDiscordantJunctions;
import static com.hartwig.hmftools.svprep.reads.DiscordantGroups.isDiscordantGroup;
import static com.hartwig.hmftools.svprep.reads.ReadFilterType.INSERT_MAP_OVERLAP;
import static com.hartwig.hmftools.svprep.reads.ReadFilterType.POLY_G_SC;
import static com.hartwig.hmftools.svprep.reads.ReadFilters.isChimericRead;
Expand Down Expand Up @@ -189,6 +192,7 @@ public void assignFragments()

mInitialSupportingFrags = candidateSupportGroups.size();

List<ReadGroup> nonJunctionGroups = Lists.newArrayList();
Set<JunctionData> supportedJunctions = Sets.newHashSet();
for(ReadGroup readGroup : candidateSupportGroups)
{
Expand All @@ -209,8 +213,23 @@ public void assignFragments()
supportedJunctions.forEach(x -> x.SupportingGroups.add(readGroup));
supportedJunctions.forEach(x -> readGroup.addJunctionPosition(x.Position));
}
else if(!groupInBlacklist(readGroup) && isDiscordantGroup(readGroup, mFilterConfig.fragmentLengthMax()))
{
nonJunctionGroups.add(readGroup);
}
}

if(nonJunctionGroups.size() > 1000)
{
SV_LOGGER.info("region({}) checking discordant groups from {} read groups", mRegion, nonJunctionGroups.size());
}

List<JunctionData> discordantJunctions = formDiscordantJunctions(nonJunctionGroups);
discordantJunctions.forEach(x -> addJunction(x));

// no obvious need to re-check support at these junctions since all proximate facing read groups have already been tested
// and allocated to these groups

if(mBaseDepth != null)
{
mJunctions.forEach(x -> x.setDepth(getBaseDepth(x.Position)));
Expand Down Expand Up @@ -397,6 +416,30 @@ else if(junctionData.Position > junctionPosition)
return junctionData;
}

private void addJunction(final JunctionData newJunction)
{
int index = 0;

while(index < mJunctions.size())
{
JunctionData junctionData = mJunctions.get(index);

if(junctionData.Position == newJunction.Position)
{
if(junctionData.Orientation == newJunction.Orientation)
return;
}
else if(junctionData.Position > newJunction.Position)
{
break;
}

++index;
}

mJunctions.add(index, newJunction);
}

private void checkJunctionSupport(final ReadRecord read, final Set<JunctionData> supportedJunctions)
{
// first check indel support
Expand Down Expand Up @@ -704,6 +747,9 @@ private boolean junctionHasSupport(final JunctionData junctionData)
if(junctionData.isExisting())
return !junctionData.JunctionGroups.isEmpty() || !junctionData.SupportingGroups.isEmpty();

if(junctionData.discordantGroup())
return true;

// 1 junction read, 3 exact supporting reads altogether and 1 map-qual read
int junctionFrags = junctionData.JunctionGroups.size();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ public class ReadRecord

public static ReadRecord from(final SAMRecord record) { return new ReadRecord(record); }

public static final String UNMAPPED_CHR = "-1";

public ReadRecord(final SAMRecord record)
{
mRecord = record;
Expand All @@ -55,7 +57,7 @@ public ReadRecord(final SAMRecord record)
}
else
{
Chromosome = "-1";
Chromosome = UNMAPPED_CHR;
Positions = new int[] { 0, 0 };
}

Expand All @@ -66,7 +68,7 @@ public ReadRecord(final SAMRecord record)
}
else
{
MateChromosome = "-1";
MateChromosome = UNMAPPED_CHR;
MatePosStart = 0;
}

Expand Down

0 comments on commit 7abb3d7

Please sign in to comment.