Skip to content

Commit

Permalink
Cobalt: removed off-target logic
Browse files Browse the repository at this point in the history
  • Loading branch information
charlesshale committed Dec 9, 2024
1 parent 41b652e commit eae22d1
Show file tree
Hide file tree
Showing 5 changed files with 4 additions and 259 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ static class SampleRatios
if(targetRegionEnrichment != null)
{
CB_LOGGER.info("using targeted ratio");
readRatios = new TargetedRatioMapper(targetRegionEnrichment, chromosomePosCodec).mapRatios(readRatios);
readRatios = new TargetedRatioMapper(targetRegionEnrichment).mapRatios(readRatios);
}

gcNormalizedRatioMapper = new GcNormalizedRatioMapper();
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,28 +1,13 @@
package com.hartwig.hmftools.cobalt.targeted;

import static java.lang.Math.round;
import static java.lang.String.format;

import static com.hartwig.hmftools.cobalt.CobaltConfig.CB_LOGGER;
import static com.hartwig.hmftools.cobalt.CobaltUtils.replaceColumn;

import java.util.List;
import java.util.stream.Collectors;

import com.hartwig.hmftools.cobalt.ChromosomePositionCodec;
import com.hartwig.hmftools.cobalt.CobaltColumns;
import com.hartwig.hmftools.cobalt.CobaltConstants;
import com.hartwig.hmftools.cobalt.lowcov.LowCoverageRatioMapper;
import com.hartwig.hmftools.cobalt.ratio.GcNormalizedRatioMapper;
import com.hartwig.hmftools.cobalt.ratio.RatioMapper;
import com.hartwig.hmftools.common.cobalt.ImmutableReadRatio;
import com.hartwig.hmftools.common.cobalt.ReadRatio;
import com.hartwig.hmftools.common.genome.position.GenomePosition;
import com.hartwig.hmftools.common.utils.Doubles;

import org.apache.commons.lang3.Validate;
import org.apache.logging.log4j.Level;
import org.jetbrains.annotations.Nullable;

import tech.tablesaw.api.DoubleColumn;
import tech.tablesaw.api.Table;
Expand All @@ -31,28 +16,15 @@
public class TargetedRatioMapper implements RatioMapper
{
private final Table mTargetRegionEnrichment;
private final ChromosomePositionCodec mChromosomePosCodec;

public TargetedRatioMapper(final Table targetRegionEnrichment,
ChromosomePositionCodec chromosomePosCodec)
public TargetedRatioMapper(final Table targetRegionEnrichment)
{
mTargetRegionEnrichment = targetRegionEnrichment;
mChromosomePosCodec = chromosomePosCodec;
}

// we use on target ratios only for now
@Override
public Table mapRatios(final Table inputRatios)
{
return onTargetRatios(inputRatios);
}

private void populateCombinedRatios(final Table ratios1, final Table ratios2)
{
// mCombinedRatios = ratios1.append(ratios2);
}

Table onTargetRatios(final Table inputRatios)
{
// find all the ratios that are inside the target enriched regions
// we filter out all the regions with 0 gc normalised ratios, as they do not actually
Expand Down Expand Up @@ -82,153 +54,4 @@ Table onTargetRatios(final Table inputRatios)

return onTargetRatios;
}

// we create a pan window ratio by taking the median count of super windows that combine multiple windows
Table offTargetRatios(final Table inputRatios)
{
// merge in the targeted region columns
Table offTargetRatios = inputRatios.joinOn(CobaltColumns.ENCODED_CHROMOSOME_POS).inner(mTargetRegionEnrichment);

// resort it, the join messes up with the ordering
offTargetRatios = offTargetRatios.sortAscendingOn(CobaltColumns.ENCODED_CHROMOSOME_POS);

offTargetRatios = offTargetRatios.where(
offTargetRatios.booleanColumn("offTarget").asSelection()
.and(offTargetRatios.doubleColumn("ratio").isNonNegative())
.and(offTargetRatios.doubleColumn("relativeEnrichment").isNotMissing()));

// double median = offTargetRatios.doubleColumn("ratio").median();

// normalise the ratio by relative enrichment
replaceColumn(offTargetRatios, "ratio",
offTargetRatios.doubleColumn("ratio")
.divide(offTargetRatios.doubleColumn("relativeEnrichment")));

CB_LOGGER.info("off target after enrichment normalisation: \n{}", offTargetRatios);

// next we do low coverage
offTargetRatios = new LowCoverageRatioMapper(1000, mChromosomePosCodec).mapRatios(offTargetRatios);

CB_LOGGER.info("off target after consolidation: \n{}", offTargetRatios);

// apply gc normalisation
GcNormalizedRatioMapper gcNormalizedRatioMapper = new GcNormalizedRatioMapper();
offTargetRatios = gcNormalizedRatioMapper.mapRatios(offTargetRatios);

CB_LOGGER.info("off target gc normalisation: \n{}", gcNormalizedRatioMapper.gcMedianReadDepthTable());
CB_LOGGER.info("off target after gc normalisation: \n{}", offTargetRatios);

return offTargetRatios;

// remove any with invalid ratios
// mOffTargetRatios = offTargetRatios.where(offTargetRatios.doubleColumn(CobaltColumns.RATIO).)

/*
Window window = new Window(offTargetWindowSize);
for(String chromosome : rawRatios.stringColumn("chromosome").unique())
{
int currentWindowStart = 1;
List<ReadRatio> windowGcRatios = new ArrayList<>();
// we need this to make sure we get consistent chromosome name (1 vs chr1)
for(ReadRatio readRatio : rawRatios.get(chromosome))
{
// todo: make sure this is sorted
int windowStart = window.start(readRatio.position());
if(windowStart != currentWindowStart)
{
if(currentWindowStart != -1)
{
ReadRatio unnormalizedRatio = unnormalizedOffTargetRatio(
offTargetWindowSize, chromosomeStr, currentWindowStart, windowGcRatios, targetRegions);
if(unnormalizedRatio != null)
{
unnormalizedRatios.put(chromosome, unnormalizedRatio);
}
}
currentWindowStart = windowStart;
windowGcRatios.clear();
}
if(readRatio.ratio() >= 0)
windowGcRatios.add(readRatio);
}
ReadRatio unnormalizedRatio = unnormalizedOffTargetRatio(
offTargetWindowSize, chromosomeStr, currentWindowStart, windowGcRatios, targetRegions);
if(unnormalizedRatio != null)
unnormalizedRatios.put(chromosome, unnormalizedRatio);
}
// now we want to normalise all of those off target gc ratios by the median
List<Double> values = new ArrayList<>();
unnormalizedRatios.values().forEach(x -> values.add(x.ratio()));
double median = Doubles.median(values);
CB_LOGGER.debug("normalizing {} off target windows ratio by median: {}", unnormalizedRatios.size(), median);
mOffTargetRatios.clear();
//for ((key, value) in unnormalizedRatios.entries())
for(Map.Entry<Chromosome,ReadRatio> entry : unnormalizedRatios.entries())
{
ReadRatio readRatio = entry.getValue();
double normalizedRatio = readRatio.ratio() / median;
mOffTargetRatios.put(entry.getKey(), ImmutableReadRatio.builder().from(readRatio).ratio(normalizedRatio).build());
}
*/
}

@Nullable
private static ReadRatio unnormalizedOffTargetRatio(
int offTargetWindowSize, final String chromosome, int windowStart, final List<ReadRatio> windowGcRatios,
final List<GenomePosition> targetRegions)
{
int minNumGcRatios = (int)round((double)offTargetWindowSize / CobaltConstants.WINDOW_SIZE * CobaltConstants.MIN_OFF_TARGET_WINDOW_RATIO);

int windowEnd = windowStart + offTargetWindowSize - 1;

// the window position is the middle
int windowMid = windowStart + offTargetWindowSize / 2;

// check for targeted regions, we want to remove them
for (GenomePosition targetRegion : targetRegions)
{
if (targetRegion.chromosome().equals(chromosome) && windowStart <= targetRegion.position() && windowEnd > targetRegion.position())
{
// this window contains a target region
int removeStart = targetRegion.position() - 2 * CobaltConstants.WINDOW_SIZE;
int removeEnd = targetRegion.position() + 2 * CobaltConstants.WINDOW_SIZE;
CB_LOGGER.trace("off target window: {}:{} ({} - {}), contains target region",
chromosome, windowMid, windowStart, windowEnd);

windowGcRatios.removeIf(o -> o.position() >= removeStart && o.position() <= removeEnd);
}
}

if (windowGcRatios.size() < minNumGcRatios)
{
// if we don't have enough sub windows with valid values then we skip this
CB_LOGGER.trace( "off target window: {}:{} ({} - {}), not enough sub window",
chromosome, windowMid, windowStart, windowEnd);
return null;
}

double median = Doubles.median(windowGcRatios.stream().map(ReadRatio::ratio).collect(Collectors.toList()));

CB_LOGGER.debug("off target window: {}:{} ({} - {}), num sub windows: {}, median: {}",
chromosome, windowMid, windowStart, windowEnd, windowGcRatios.size(), format("%.4f", median));

return !Double.isNaN(median) ? ImmutableReadRatio.builder().chromosome(chromosome).position(windowMid).ratio(median).build() : null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,9 @@ public void testOnTargetRatio()

chromosomePositionCodec.addEncodedChrPosColumn(targetEnrichmentRatios, true);

TargetedRatioMapper ratioMapper = new TargetedRatioMapper(targetEnrichmentRatios, chromosomePositionCodec);
TargetedRatioMapper ratioMapper = new TargetedRatioMapper(targetEnrichmentRatios);

Table onTargetRatios = ratioMapper.onTargetRatios(ratios);
Table onTargetRatios = ratioMapper.mapRatios(ratios);

assertEquals(2, onTargetRatios.rowCount());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,11 @@ public final class GCMedianReadDepthFile
private static final String EXTENSION = ".cobalt.gc.median.tsv";
private static final int ASSUMED_READ_LENGTH = 151;

@NotNull
public static String generateFilename(final String basePath, final String sample)
{
return basePath + File.separator + sample + EXTENSION;
}

@NotNull
public static GCMedianReadDepth read(final String filename) throws IOException
{
return fromLines(Files.readAllLines(new File(filename).toPath()));
Expand All @@ -35,7 +33,6 @@ public static void write(final String fileName, final GCMedianReadDepth gcMedian
Files.write(new File(fileName).toPath(), toLines(gcMedianReadDepth));
}

@NotNull
private static GCMedianReadDepth fromLines(final List<String> lines)
{
boolean useReadDepth = true;
Expand Down Expand Up @@ -91,7 +88,6 @@ private static GCMedianReadDepth fromLines(final List<String> lines)
return new GCMedianReadDepth(mean, median, medianPerBucket);
}

@NotNull
private static List<String> toLines(final GCMedianReadDepth gcMedianReadDepth)
{
final List<String> lines = new ArrayList<>();
Expand Down

0 comments on commit eae22d1

Please sign in to comment.