Skip to content

Commit

Permalink
switch from ExcessHet back to HWE (#6848)
Browse files Browse the repository at this point in the history
  • Loading branch information
meganshand authored and kcibul committed Mar 9, 2021
1 parent 1e5418c commit b6cd5a5
Show file tree
Hide file tree
Showing 7 changed files with 39 additions and 40 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@
public class CommonCode {
public static String CALL_RATE = "CALL_RATE";
public static String INVARIANT = "INVARIANT";
public static String HWE = "HWE";

public static String EXCESS_HET_FILTER = "EXCESS_HET";
public static String HWE_FILTER = "HWE";
public static String CALL_RATE_FILTER = "CALL_RATE";
public static String INVARIANT_FILTER = "INVARIANT";

Expand Down Expand Up @@ -58,10 +59,11 @@ public static VCFHeader generateRawArrayVcfHeader(Set<String> sampleNames, final
// TODO: are there offical headers for this?
lines.add(new VCFInfoHeaderLine(CALL_RATE, 1, VCFHeaderLineType.Float, "Call Rate"));
lines.add(new VCFInfoHeaderLine(INVARIANT, 1, VCFHeaderLineType.Flag, "Invariant"));
lines.add(new VCFInfoHeaderLine(HWE, 1, VCFHeaderLineType.Float, "Phred-scaled HWE p-value"));

lines.add(new VCFFilterHeaderLine(INVARIANT_FILTER, "No variant samples in reference QC panel"));
lines.add(new VCFFilterHeaderLine(CALL_RATE_FILTER, "Inadequate call rate in reference QC panel"));
lines.add(new VCFFilterHeaderLine(EXCESS_HET_FILTER, "Excess Hets in reference QC panel"));
lines.add(new VCFFilterHeaderLine(HWE_FILTER, "HWE is violated in reference QC panel"));

final VCFHeader header = new VCFHeader(lines, sampleNames);
header.setSequenceDictionary(sequenceDictionary);
Expand Down Expand Up @@ -151,8 +153,6 @@ public static Set<VCFHeaderLine> getEvoquerVcfHeaderLines() {
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AS_QUAL_BY_DEPTH_KEY));
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.QUAL_BY_DEPTH_KEY));

headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.EXCESS_HET_KEY));

headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.SB_TABLE_KEY));

// TODO: There must be a more appropriate constant to use for these
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.broadinstitute.hellbender.tools.variantdb.arrays;

import htsjdk.tribble.util.popgen.HardyWeinbergCalculation;
import org.apache.avro.generic.GenericRecord;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
Expand All @@ -10,7 +11,6 @@
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.variantdb.IngestConstants;
import org.broadinstitute.hellbender.tools.variantdb.arrays.tables.GenotypeCountsSchema;
import org.broadinstitute.hellbender.tools.walkers.annotator.ExcessHet;
import org.broadinstitute.hellbender.utils.GenotypeCounts;
import org.broadinstitute.hellbender.utils.bigquery.StorageAPIAvroReader;
import org.broadinstitute.hellbender.utils.bigquery.TableReference;
Expand Down Expand Up @@ -44,7 +44,7 @@ public class ArrayCalculateMetrics extends GATKTool {

public enum HeaderFieldEnum {
probe_id,
excess_het,
hwe_pval,
call_rate,
invariant
}
Expand Down Expand Up @@ -72,19 +72,26 @@ public void traverse() {
for ( final GenericRecord row : reader ) {
List<String> thisRow = new ArrayList<>();
// data in row should never be null
long probeId = (Long) row.get(GenotypeCountsSchema.PROBE_ID_INDEX);
long probeId = (Long) row.get(GenotypeCountsSchema.PROBE_ID);
thisRow.add(String.valueOf(probeId));

long combined_hom_var = (Long) row.get(GenotypeCountsSchema.HOM_VAR_INDEX) +
(Long) row.get(GenotypeCountsSchema.HET_1_2_INDEX) +
(Long) row.get(GenotypeCountsSchema.HOM_VAR_2_2_INDEX);
long combined_hom_var = (Long) row.get(GenotypeCountsSchema.HOM_VAR_COUNT) +
(Long) row.get(GenotypeCountsSchema.HET_1_2_COUNT) +
(Long) row.get(GenotypeCountsSchema.HOM_VAR_COUNT);

GenotypeCounts genotypeCounts = new GenotypeCounts((Long) row.get(GenotypeCountsSchema.HOM_REF_INDEX),
(Long) row.get(GenotypeCountsSchema.HET_INDEX), combined_hom_var);
long noCalls = (Long) row.get(GenotypeCountsSchema.NO_CALL_INDEX);
GenotypeCounts genotypeCounts = new GenotypeCounts((Long) row.get(GenotypeCountsSchema.HOM_REF_COUNT),
(Long) row.get(GenotypeCountsSchema.HET_COUNT), combined_hom_var);
long noCalls = (Long) row.get(GenotypeCountsSchema.NO_CALL_COUNT);
int sampleCount = (int) genotypeCounts.getRefs() + (int) genotypeCounts.getHets() + (int) genotypeCounts.getHoms() + (int) noCalls;
double excessHet = ExcessHet.calculateEH(genotypeCounts, sampleCount).getRight();
thisRow.add(String.format("%.0f", excessHet));
double hwe;
// If there's no data set the p-value to 1.
if (genotypeCounts.getRefs() + genotypeCounts.getHets() + genotypeCounts.getHoms() <= 0) {
hwe = 1;
} else {
hwe = HardyWeinbergCalculation.hwCalculate((int) genotypeCounts.getRefs(), (int) genotypeCounts.getHets(), (int) genotypeCounts.getHoms());
}
double phredHwe = hwe == 1 ? 0 : Math.floor(-10.0 * Math.log10(hwe));
thisRow.add(String.format("%.0f", phredHwe));

double callRate = 1.0 - ((double) noCalls / sampleCount);
thisRow.add(String.format("%.3f", callRate));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,10 +119,10 @@ public enum QueryMode {
private boolean removeFilteredVariants = false;

@Argument(
fullName = "excess-het-threshold",
doc = "Filter variants with excess het greater than this value",
fullName = "hwe-phred-scaled-threshold",
doc = "Filter variants with HWE phred-scaled p-value greater than this value",
optional = true)
private float excessHetThreshold = 60.0f;
private float hwePvalThreshold = 60.0f;

@Argument(
fullName = "call-rate-threshold",
Expand Down Expand Up @@ -230,7 +230,7 @@ protected void onStartup() {
progressMeter,
useLegacyGTEncoding,
removeFilteredVariants,
excessHetThreshold,
hwePvalThreshold,
callRateThreshold,
filterInvariants);
vcfWriter.writeHeader(header);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ public class ArrayExtractCohortEngine {
private final boolean useLegacyGTEncoding; //TODO remove

final boolean removeFilteredVariants;
final float excessHetThreshold;
final float hwePvalThreshold;
final float callRateThreshold;
final boolean filterInvariants;

Expand All @@ -83,7 +83,7 @@ public ArrayExtractCohortEngine(final String projectID,
final ProgressMeter progressMeter,
final boolean useLegacyGTEncoding,
final boolean removeFilteredVariants,
final float excessHetThreshold,
final float hwePvalThreshold,
final float callRateThreshold,
final boolean filterInvariants) {

Expand Down Expand Up @@ -115,7 +115,7 @@ public ArrayExtractCohortEngine(final String projectID,
this.useLegacyGTEncoding = useLegacyGTEncoding;

this.removeFilteredVariants = removeFilteredVariants;
this.excessHetThreshold = excessHetThreshold;
this.hwePvalThreshold = hwePvalThreshold;
this.callRateThreshold = callRateThreshold;
this.filterInvariants = filterInvariants;
}
Expand Down Expand Up @@ -268,9 +268,9 @@ private void finalizeCurrentVariant(final List<VariantContext> unmergedCalls, fi

final VariantContextBuilder builder = new VariantContextBuilder(mergedVC);

builder.attribute(GATKVCFConstants.EXCESS_HET_KEY, probeQcMetrics.excess_het);
if (probeQcMetrics.excess_het > excessHetThreshold) {
builder.filter("EXCESS_HET");
builder.attribute(CommonCode.HWE, probeQcMetrics.hwe_pval);
if (probeQcMetrics.hwe_pval > hwePvalThreshold) {
builder.filter("HWE_PVAL");
}

builder.attribute(CommonCode.CALL_RATE, probeQcMetrics.call_rate);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,6 @@ public class GenotypeCountsSchema {
public static final String HOM_VAR_2_2_COUNT = "hom_var_2_2";
public static final String NO_CALL_COUNT = "no_call";

public static final int PROBE_ID_INDEX = 0;
public static final int HOM_REF_INDEX = 1;
public static final int HET_INDEX = 2;
public static final int HOM_VAR_INDEX = 3;
public static final int HET_1_2_INDEX = 4;
public static final int HOM_VAR_2_2_INDEX = 5;
public static final int NO_CALL_INDEX = 6;


public static final List<String> GENOTYPE_COUNTS_FIELDS = Arrays.asList(PROBE_ID, HOM_REF_COUNT, HET_COUNT, HOM_VAR_COUNT, HET_1_2_COUNT, HOM_VAR_2_2_COUNT, NO_CALL_COUNT);
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@

public class ProbeQcMetrics {
public final long probeId;
public final Double excess_het;
public final Double hwe_pval;
public final Double call_rate;
public final Boolean invariant;

public ProbeQcMetrics(final long probeId, final Double excess_het, final Double call_rate, final Boolean invariant) {
public ProbeQcMetrics(final long probeId, final Double hwe_pval, final Double call_rate, final Boolean invariant) {
this.probeId = probeId;
this.excess_het = excess_het;
this.hwe_pval = hwe_pval;
this.call_rate = call_rate;
this.invariant = invariant;
}
Expand All @@ -32,7 +32,7 @@ public static Map<Long, ProbeQcMetrics> getProbeQcMetricsWithStorageAPI(String f
for ( final GenericRecord row : reader ) {
ProbeQcMetrics p = new ProbeQcMetrics(
(Long) row.get(ProbeQcMetricsSchema.PROBE_ID),
getOptionalDouble(row, ProbeQcMetricsSchema.EXCESS_HET),
getOptionalDouble(row, ProbeQcMetricsSchema.HWE_PVAL),
getOptionalDouble(row, ProbeQcMetricsSchema.CALL_RATE),
getOptionalBoolean(row, ProbeQcMetricsSchema.INVARIANT)
);
Expand All @@ -58,7 +58,7 @@ private static Boolean getOptionalBoolean(GenericRecord rec, String fieldName) {

@Override
public String toString() {
return "ProbeQcMetric [probeId=" + probeId + ", excess_het=" + excess_het + ", call_rate="
return "ProbeQcMetric [probeId=" + probeId + ", hwe_pval=" + hwe_pval + ", call_rate="
+ call_rate + ", invariant=" + invariant + "]";
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
public class ProbeQcMetricsSchema {

public static final String PROBE_ID = "probe_id";
public static final String EXCESS_HET = "excess_het";
public static final String HWE_PVAL = "hwe_pval";
public static final String CALL_RATE = "call_rate";
public static final String INVARIANT = "invariant";

public static final List<String> PROBE_QC_METRIC_FIELDS = Arrays.asList(PROBE_ID, EXCESS_HET, CALL_RATE, INVARIANT);
public static final List<String> PROBE_QC_METRIC_FIELDS = Arrays.asList(PROBE_ID, HWE_PVAL, CALL_RATE, INVARIANT);

}

0 comments on commit b6cd5a5

Please sign in to comment.