Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BaseQualityHistogramAnnotation #5986

Merged
merged 1 commit into from
Jun 6, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ protected int aggregate(final List<Integer> values) {

@Override
protected OptionalInt getValueForRead(final GATKRead read, final VariantContext vc) {
return getBaseQuality(read, vc);
}

public static OptionalInt getBaseQuality(final GATKRead read, final VariantContext vc) {
if (vc.getStart() < read.getStart() || read.getEnd() < vc.getStart()) {
return OptionalInt.empty();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package org.broadinstitute.hellbender.tools.walkers.annotator;

import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Multiset;
import com.google.common.collect.TreeMultiset;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFHeaderLineCount;
import htsjdk.variant.vcf.VCFHeaderLineType;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
import org.apache.commons.lang.mutable.MutableInt;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.utils.QualityUtils;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import scala.Int;

import java.util.*;
import java.util.stream.Collectors;

public class BaseQualityHistogram extends InfoFieldAnnotation {

public static final String KEY = "BQHIST";

public Map<String, Object> annotate(final ReferenceContext ref,
final VariantContext vc,
final ReadLikelihoods<Allele> likelihoods) {
Utils.nonNull(vc);
if ( likelihoods == null ) {
return Collections.emptyMap();
}

final Map<Allele, TreeMultiset<Integer>> values = likelihoods.alleles().stream()
.collect(Collectors.toMap(a -> a, a -> TreeMultiset.create()));

Utils.stream(likelihoods.bestAllelesBreakingTies())
.filter(ba -> ba.isInformative() && isUsableRead(ba.read))
.forEach(ba -> BaseQuality.getBaseQuality(ba.read, vc).ifPresent(v -> values.get(ba.allele).add(v)));


final List<Integer> distinctBaseQualities = likelihoods.alleles().stream()
.flatMap(a -> values.get(a).stream())
.distinct()
.sorted()
.collect(Collectors.toList());

final List<Integer> output = new ArrayList<>();

for (final int qual : distinctBaseQualities) {
output.add(qual);
likelihoods.alleles().forEach(allele -> output.add(values.get(allele).count(qual)));
}


return ImmutableMap.of(KEY, output);
}

@Override
public List<VCFInfoHeaderLine> getDescriptions() {
return Arrays.asList(new VCFInfoHeaderLine(KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer,
"Base quality counts for each allele represented sparsely as alternating entries of qualities and counts for each allele." +
"For example [10,1,0,20,0,1] means one ref base with quality 10 and one alt base with quality 20."));
}

@Override
public List<String> getKeyNames() { return Arrays.asList(KEY); }

private static boolean isUsableRead(final GATKRead read) {
return read.getMappingQuality() != 0 && read.getMappingQuality() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE;
}
}
Loading