Skip to content

Commit

Permalink
resovling issues from the rebase
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesemery committed Sep 18, 2019
1 parent 07045c6 commit fe36a8f
Show file tree
Hide file tree
Showing 27 changed files with 4,771 additions and 1,224 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,23 @@
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.BaseGraph;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.SeqGraph;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingGraph;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingGraphInterface;
import org.broadinstitute.hellbender.utils.Utils;

/**
* Result of assembling, with the resulting graph and status
*/
public final class AssemblyResult {
private final Status status;
private final ReadThreadingGraph threadingGraph;
private final ReadThreadingGraphInterface threadingGraph;
private final SeqGraph graph;

/**
* Create a new assembly result
* @param status the status, cannot be null
* @param graph the resulting graph of the assembly, can only be null if result is failed
*/
public AssemblyResult(final Status status, final SeqGraph graph, final ReadThreadingGraph threadingGraph) {
public AssemblyResult(final Status status, final SeqGraph graph, final ReadThreadingGraphInterface threadingGraph) {
Utils.nonNull(status, "status cannot be null");
Utils.validateArg( status == Status.FAILED || (graph != null || threadingGraph != null) , "graph is null but status is " + status);

Expand All @@ -27,7 +28,7 @@ public AssemblyResult(final Status status, final SeqGraph graph, final ReadThrea
this.threadingGraph = threadingGraph;
}

public ReadThreadingGraph getThreadingGraph() {
public ReadThreadingGraphInterface getThreadingGraph() {
return threadingGraph;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import org.apache.logging.log4j.Logger;
import org.broadinstitute.hellbender.engine.AssemblyRegion;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingGraph;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingGraphInterface;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.haplotype.EventMap;
Expand Down Expand Up @@ -433,7 +434,7 @@ public int getMinimumKmerSize() {
*
* @return {@code null} if there is no read-threading-graph amongst assembly results with that kmerSize.
*/
public ReadThreadingGraph getUniqueReadThreadingGraph(final int kmerSize) {
public ReadThreadingGraphInterface getUniqueReadThreadingGraph(final int kmerSize) {
final AssemblyResult assemblyResult = assemblyResultByKmerSize.get(kmerSize);
if (assemblyResult == null) {
return null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -242,8 +242,8 @@ private VariantContext removeAltAllelesIfTooManyGenotypes(final int ploidy, fina
if (originalAlleleCount > practicalAlleleCount) {
final List<Allele> allelesToKeep = whichAllelesToKeepBasedonHapScores(alleleMapper, practicalAlleleCount);
alleleMapper.keySet().retainAll(allelesToKeep);
logger.warn(String.format("Removed alt alleles where ploidy is %d and original allele count is %d, whereas after trimming the allele count becomes %d. Alleles kept are:%s",
ploidy, originalAlleleCount, practicalAlleleCount, allelesToKeep));
logger.warn(String.format("At position %s removed alt alleles where ploidy is %d and original allele count is %d, whereas after trimming the allele count becomes %d. Alleles kept are:%s",
new SimpleInterval(mergedVC).toString(), ploidy, originalAlleleCount, practicalAlleleCount, allelesToKeep));
return removeExcessAltAllelesFromVC(mergedVC, allelesToKeep);
} else {
return mergedVC;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,14 +149,14 @@ public abstract class ReadThreadingAssemblerArgumentCollection implements Serial
public int maxUnprunedVariants = 100;

/**
* Disables graph simplification into a seq graph. This is experimental and may cause performance issues for the KBestHaplotypeFinder
* Disables graph simplification into a seq graph. This is experimental and may cause performance issues for the GraphBasedKBestHaplotypeFinder
*
* NOTE: --disable-sequence-graph-simplification is currently an experimental feature that does not directly match with
* the regular HaplotypeCaller. Specifically the haplotype finding code does not perform correctly at complicated
* sites. Use this mode at your own risk.
*/
@Hidden
@Argument(fullName="disable-sequence-graph-simplification", doc = "If enabled, haplotype caller will detect haplotypes on the unmodified debrujin graph", optional = true)
@Argument(fullName="disable-sequence-graph-simplification", doc = "If enabled, haplotype caller will detect haplotypes on the unmodified de Bruijn graph", optional = true)
public boolean disableSeqGraphConstruciton = false;

@Advanced
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,10 +133,10 @@ public final byte[] getAdditionalSequence( final V v ) {
/**
* Pull out the additional sequence implied by traversing this node in the graph
* @param v the vertex from which to pull out the additional base sequence
* @param isSource if true, treat v as a source vertex regardless of graph in degree
* @param isSource if true, treat v as a source vertex regardless of in-degree
* @return non-null byte array
*/
public final byte[] getAdditionalSequence( final V v, final boolean isSource) {
public static final byte[] getAdditionalSequence( final BaseVertex v, final boolean isSource) {
Utils.nonNull(v, "Attempting to pull sequence from a null vertex.");
return v.getAdditionalSequence(isSource);
}
Expand Down Expand Up @@ -186,14 +186,14 @@ public final boolean isRefSink( final V v ) {
/**
* @return the reference source vertex pulled from the graph, can be null if it doesn't exist in the graph
*/
public final V getReferenceSourceVertex( ) {
public V getReferenceSourceVertex( ) {
return vertexSet().stream().filter(v -> isRefSource(v)).findFirst().orElse(null);
}

/**
* @return the reference sink vertex pulled from the graph, can be null if it doesn't exist in the graph
*/
public final V getReferenceSinkVertex( ) {
public V getReferenceSinkVertex( ) {
return vertexSet().stream().filter(v -> isRefSink(v)).findFirst().orElse(null);
}

Expand Down Expand Up @@ -258,7 +258,7 @@ public final V getPrevReferenceVertex( final V v ) {
* @param includeStop should the ending vertex be included in the path
* @return byte[] array holding the reference bases, this can be null if there are no nodes between the starting and ending vertex (insertions for example)
*/
public final byte[] getReferenceBytes( final V fromVertex, final V toVertex, final boolean includeStart, final boolean includeStop ) {
public byte[] getReferenceBytes( final V fromVertex, final V toVertex, final boolean includeStart, final boolean includeStop ) {
Utils.nonNull(fromVertex, "Starting vertex in requested path cannot be null.");
Utils.nonNull(toVertex, "From vertex in requested path cannot be null.");

Expand Down Expand Up @@ -401,11 +401,18 @@ public final void printGraph(final PrintStream graphWriter, final boolean writeH
graphWriter.println(String.format("\t%s [label=\"%s\",shape=box]", v.toString(), new String(getAdditionalSequence(v)) + v.getAdditionalInfo()));
}

getExtraGraphFileLines().forEach(graphWriter::println);

if ( writeHeader ) {
graphWriter.println("}");
}
}

// Extendable method intended to allow for adding extra material to the graph
public List<String> getExtraGraphFileLines() {
return Collections.emptyList();
}

/**
* Remove edges that are connected before the reference source and after the reference sink
*
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
package org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs;

import org.apache.commons.lang3.mutable.MutableInt;
import org.broadinstitute.hellbender.utils.BaseUtils;
import org.broadinstitute.hellbender.utils.Utils;
import org.jgrapht.alg.CycleDetector;

import java.util.*;
import java.util.stream.Collectors;

/**
* Efficient algorithm to obtain the list of best haplotypes given the {@link BaseGraph instance}.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class GraphBasedKBestHaplotypeFinder<V extends BaseVertex, E extends BaseEdge> extends KBestHaplotypeFinder<V, E> {

public final Comparator<KBestHaplotype<V, E>> K_BEST_HAPLOTYPE_COMPARATOR = Comparator.comparingDouble(KBestHaplotype<V, E>::score)
.reversed()
.thenComparing(KBestHaplotype<V, E>::getBases, BaseUtils.BASES_COMPARATOR.reversed()); // This is an arbitrary deterministic tie breaker.

/**
* Constructs a new best haplotypes finder.
*
* @param graph the graph to search.
* @param sources source vertices for all haplotypes.
* @param sinks sink vertices for all haplotypes.
*
* @throws IllegalArgumentException if <ul>
* <li>any of {@code graph}, {@code sources} or {@code sinks} is {@code null} or</li>
* <li>any of {@code sources}' or any {@code sinks}' member is not a vertex in {@code graph}.</li>
* </ul>
*/
public GraphBasedKBestHaplotypeFinder(final BaseGraph<V, E> graph, final Set<V> sources, final Set<V> sinks) {
super(sinks, sources, graph);
}

/**
* Constructor for the special case of a single source and sink
*/
public GraphBasedKBestHaplotypeFinder(final BaseGraph<V, E> graph, final V source, final V sink) {
this(graph, Collections.singleton(source), Collections.singleton(sink));
}

/**
* Constructor for the default case of all sources and sinks
*/
public GraphBasedKBestHaplotypeFinder(final BaseGraph<V, E> graph) {
this(graph, graph.getSources(), graph.getSinks());
}

@Override
public boolean keepCycles() {
return false;
}

/**
* Implement Dijkstra's algorithm as described in https://en.wikipedia.org/wiki/K_shortest_path_routing
*/
@Override
public List<KBestHaplotype<V, E>> findBestHaplotypes(final int maxNumberOfHaplotypes) {
final List<KBestHaplotype<V, E>> result = new ArrayList<>();
final PriorityQueue<KBestHaplotype<V, E>> queue = new PriorityQueue<>(K_BEST_HAPLOTYPE_COMPARATOR);
sources.forEach(source -> queue.add(new KBestHaplotype<>(source, graph)));

final Map<V, MutableInt> vertexCounts = graph.vertexSet().stream()
.collect(Collectors.toMap(v -> v, v -> new MutableInt(0)));

while (!queue.isEmpty() && result.size() < maxNumberOfHaplotypes) {
final KBestHaplotype<V, E> pathToExtend = queue.poll();
final V vertexToExtend = pathToExtend.getLastVertex();
if (sinks.contains(vertexToExtend)) {
result.add(pathToExtend);
} else {
if (vertexCounts.get(vertexToExtend).getAndIncrement() < maxNumberOfHaplotypes) {
final Set<E> outgoingEdges = graph.outgoingEdgesOf(vertexToExtend);
int totalOutgoingMultiplicity = 0;
for (final BaseEdge edge : outgoingEdges) {
totalOutgoingMultiplicity += edge.getMultiplicity();
}

for (final E edge : outgoingEdges) {
queue.add(new KBestHaplotype<>(pathToExtend, edge, totalOutgoingMultiplicity));
}
}
}
}
return result;
}

}
Loading

0 comments on commit fe36a8f

Please sign in to comment.