Skip to content

Commit

Permalink
Created a VariantAnnotationArgumentCollection to reduce code duplicat…
Browse files Browse the repository at this point in the history
…ion and added a StandardM2Annotation group
  • Loading branch information
jamesemery committed Sep 27, 2017
1 parent 847048d commit 999a1a7
Show file tree
Hide file tree
Showing 21 changed files with 135 additions and 104 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ private StandardArgumentDefinitions(){}
public static final String ADD_OUTPUT_SAM_PROGRAM_RECORD = "addOutputSAMProgramRecord";
public static final String ADD_OUTPUT_VCF_COMMANDLINE = "addOutputVCFCommandLine";
public static final String SEQUENCE_DICTIONARY_NAME = "sequenceDictionary";
public static final String ANNOTATION_LONG_NAME = "annotation";
public static final String ANNOTATION_GROUP_LONG_NAME = "annotationGroup";
public static final String ANNOTATIONS_TO_EXCLUDE_LONG_NAME = "annotationsToExclude";

public static final String INPUT_SHORT_NAME = "I";
public static final String OUTPUT_SHORT_NAME = "O";
Expand Down Expand Up @@ -65,6 +68,9 @@ private StandardArgumentDefinitions(){}
public static final String CLOUD_PREFETCH_BUFFER_SHORT_NAME = "CPB";
public static final String CLOUD_INDEX_PREFETCH_BUFFER_SHORT_NAME = "CIPB";
public static final String DISABLE_BAM_INDEX_CACHING_SHORT_NAME = "DBIC";
public static final String ANNOTATION_SHORT_NAME = "A";
public static final String ANNOTATION_GROUP_SHORT_NAME = "G";
public static final String ANNOTATIONS_TO_EXCLUDE_SHORT_NAME = "AX";

public static final String SPARK_PROPERTY_NAME = "conf";

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package org.broadinstitute.hellbender.cmdline.argumentcollections;

import org.broadinstitute.barclay.argparser.Advanced;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.utils.Utils;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;

/**
* Arguments for requesting VariantContext annotations to be processed by {@link org.broadinstitute.hellbender.tools.walkers.annotator.VariantAnnotatorEngine}
* for tools that process variants objects.
*/
public class VariantAnnotationArgumentCollection implements Serializable {
private static final long serialVersionUID = 1L;

/**
* Argument collection constructor which defines tool default annotation arguments when they are not overridden by the user
*
* @param defaultGroups List annotation group names to be used by default
* @param defaultAnnotations List of annotation class names to be used by default
* @param defaultAnnotationsToExclude List of annotation class names to exclude by default. These override the default annotations and annotation groups.
*/
public VariantAnnotationArgumentCollection(List<String> defaultGroups, List<String> defaultAnnotations, List<String> defaultAnnotationsToExclude) {
Utils.nonNull(defaultGroups);
Utils.nonNull(defaultAnnotations);
Utils.nonNull(defaultAnnotationsToExclude);

annotationGroupsToUse = new ArrayList<>(defaultGroups);
annotationsToUse = new ArrayList<>(defaultAnnotations);
annotationsToExclude = new ArrayList<>(defaultAnnotationsToExclude);
}

/**
* Which annotations to include in variant calls in the output. These supplement annotations provided by annotation groups.
*/
@Argument(fullName=StandardArgumentDefinitions.ANNOTATION_LONG_NAME, shortName=StandardArgumentDefinitions.ANNOTATION_SHORT_NAME, doc="One or more specific annotations to add to variant calls", optional=true)
public List<String> annotationsToUse = new ArrayList<>();

/**
* Which annotations to exclude from output in the variant calls. Note that this argument has higher priority than the
* -A or -G arguments, so these annotations will be excluded even if they are explicitly included with the other
* options.
*/
@Argument(fullName=StandardArgumentDefinitions.ANNOTATIONS_TO_EXCLUDE_LONG_NAME, shortName=StandardArgumentDefinitions.ANNOTATIONS_TO_EXCLUDE_SHORT_NAME, doc="One or more specific annotations to exclude from variant calls", optional=true)
public List<String> annotationsToExclude = new ArrayList<>();

/**
* Which groups of annotations to add to the output variant calls.
* Any requirements that are not met (e.g. failing to provide a pedigree file for a pedigree-based annotation) may cause the run to fail.
*/
@Argument(fullName= StandardArgumentDefinitions.ANNOTATION_GROUP_LONG_NAME, shortName=StandardArgumentDefinitions.ANNOTATION_GROUP_SHORT_NAME, doc="One or more groups of annotations to apply to variant calls", optional=true)
public List<String> annotationGroupsToUse = new ArrayList<>();

}
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ public static JavaRDD<VariantContext> callVariantsWithHaplotypeCaller(
final Broadcast<ReferenceMultiSource> referenceBroadcast = ctx.broadcast(reference);
final Broadcast<HaplotypeCallerArgumentCollection> hcArgsBroadcast = ctx.broadcast(hcArgs);

final VariantAnnotatorEngine variantAnnotatorEngine = VariantAnnotatorEngine.ofSelectedMinusExcluded(hcArgs.annotationGroupsToUse, hcArgs.annotationsToUse, hcArgs.annotationsToExclude, hcArgs.dbsnp.dbsnp, hcArgs.comps);
final VariantAnnotatorEngine variantAnnotatorEngine = VariantAnnotatorEngine.ofSelectedMinusExcluded(hcArgs.variantAnnotationArgumentCollection, hcArgs.dbsnp.dbsnp, hcArgs.comps);
final Broadcast<VariantAnnotatorEngine> annotatorEngineBroadcast = ctx.broadcast(variantAnnotatorEngine);

final List<ShardBoundary> shardBoundaries = getShardBoundaries(header, intervals, shardingArgs.readShardSize, shardingArgs.readShardPadding);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.*;
import org.broadinstitute.hellbender.cmdline.argumentcollections.DbsnpArgumentCollection;
import org.broadinstitute.hellbender.cmdline.argumentcollections.VariantAnnotationArgumentCollection;
import org.broadinstitute.hellbender.cmdline.programgroups.VariantProgramGroup;
import org.broadinstitute.hellbender.engine.*;
import org.broadinstitute.hellbender.tools.walkers.annotator.*;
Expand Down Expand Up @@ -94,26 +95,11 @@ public final class GenotypeGVCFs extends VariantWalker {
@ArgumentCollection
private GenotypeCalculationArgumentCollection genotypeArgs = new GenotypeCalculationArgumentCollection();

/**
* Which annotations to recompute for the combined output VCF file.
*/
@Advanced
@Argument(fullName="annotation", shortName="A", doc="One or more specific annotations to recompute", optional=true)
private List<String> annotationsToUse = new ArrayList<>();


@Advanced
@Argument(fullName="annotationsToExclude", shortName="AX", doc="One or more specific annotations to exclude from recomputation", optional=true)
private List<String> annotationsToExclude = new ArrayList<>();

/**
* This argument controls which groups of annotations to add to the output VCF file. Not recommended for normal
* operation of the tool because it obscures the specific requirements of individual annotations. Any requirements
* that are not met (e.g. failing to provide a pedigree file for a pedigree-based annotation) may cause the run to fail.
*/
@Advanced
@Argument(fullName="group", shortName="G", doc="One or more classes/groups of annotations to apply to variant calls", optional=true)
private List<String> annotationGroupsToUse = new ArrayList<>(Arrays.asList(new String[]{StandardAnnotation.class.getSimpleName()}));
@ArgumentCollection
private final VariantAnnotationArgumentCollection variantAnnotationArgumentCollection = new VariantAnnotationArgumentCollection(
Arrays.asList(StandardAnnotation.class.getSimpleName()),
Collections.emptyList(),
Collections.emptyList());

/**
* This option can only be activated if intervals are specified.
Expand Down Expand Up @@ -164,7 +150,7 @@ public void onTraversalStart() {

genotypingEngine = new MinimalGenotypingEngine(createUAC(), samples, new GeneralPloidyFailOverAFCalculatorProvider(genotypeArgs));

annotationEngine = VariantAnnotatorEngine.ofSelectedMinusExcluded(annotationGroupsToUse, annotationsToUse, annotationsToExclude, dbsnp.dbsnp, Collections.emptyList());
annotationEngine = VariantAnnotatorEngine.ofSelectedMinusExcluded(variantAnnotationArgumentCollection, dbsnp.dbsnp, Collections.emptyList());

merger = new ReferenceConfidenceVariantContextMerger();

Expand Down
Original file line number Diff line number Diff line change
@@ -1,25 +1,22 @@
package org.broadinstitute.hellbender.tools.walkers.annotator;

import com.google.common.primitives.Doubles;
import com.google.common.primitives.Ints;
import htsjdk.variant.variantcontext.VariantContext;
import org.apache.commons.math3.stat.descriptive.rank.Median;
import org.broadinstitute.hellbender.utils.GATKProtectedMathUtils;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.read.AlignmentUtils;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.read.ReadUtils;

import java.util.List;
import java.util.OptionalDouble;
import java.util.OptionalInt;

/**
* Median base quality of bases supporting each allele.
*
* Created by David Benjamin on 3/20/17.
*/
public class BaseQuality extends PerAlleleAnnotation {
public class BaseQuality extends PerAlleleAnnotation implements StandardMutectAnnotation {
public static final String KEY = "MBQ";

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
* <li><b><a href="https://www.broadinstitute.org/gatk/guide/tooldocs/org_broadinstitute_gatk_tools_walkers_annotator_DepthPerSampleHC.php">DepthPerSampleHC</a></b> calculates depth of coverage after filtering by HaplotypeCaller.</li>
* </ul>
*/
public final class Coverage extends InfoFieldAnnotation implements StandardAnnotation {
public final class Coverage extends InfoFieldAnnotation implements StandardAnnotation, StandardMutectAnnotation {

@Override
public Map<String, Object> annotate(final ReferenceContext ref,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
* <li><b><a href="https://www.broadinstitute.org/gatk/guide/tooldocs/org_broadinstitute_gatk_tools_walkers_annotator_AlleleBalanceBySample.php">AlleleBalanceBySample</a></b> calculates allele balance for each individual sample.</li>
* </ul>
*/
public final class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {
public final class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation, StandardMutectAnnotation {

@Override
public void annotate(final ReferenceContext ref,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,23 +1,20 @@
package org.broadinstitute.hellbender.tools.walkers.annotator;

import com.google.common.primitives.Doubles;
import com.google.common.primitives.Ints;
import htsjdk.variant.variantcontext.VariantContext;
import org.apache.commons.math3.stat.descriptive.rank.Median;
import org.broadinstitute.hellbender.utils.GATKProtectedMathUtils;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.read.GATKRead;

import java.util.List;
import java.util.OptionalDouble;
import java.util.OptionalInt;

/**
* Median fragment length of reads supporting each allele.
*
* Created by David Benjamin on 3/20/17.
*/
public class FragmentLength extends PerAlleleAnnotation {
public class FragmentLength extends PerAlleleAnnotation implements StandardMutectAnnotation {
public static final String KEY = "MFRL";

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -1,25 +1,20 @@
package org.broadinstitute.hellbender.tools.walkers.annotator;

import com.google.common.primitives.Doubles;
import com.google.common.primitives.Ints;
import htsjdk.variant.variantcontext.VariantContext;
import org.apache.commons.math3.stat.descriptive.rank.Median;
import org.broadinstitute.hellbender.utils.GATKProtectedMathUtils;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.read.AlignmentUtils;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.read.ReadUtils;

import java.util.List;
import java.util.OptionalDouble;
import java.util.OptionalInt;

/**
* Median mapping quality of reads supporting each allele.
*
* Created by David Benjamin on 3/20/17.
*/
public class MappingQuality extends PerAlleleAnnotation {
public class MappingQuality extends PerAlleleAnnotation implements StandardMutectAnnotation {
public static final String KEY = "MMQ";

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,11 @@
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

import static org.broadinstitute.hellbender.utils.BaseUtils.Base.A;
import static org.broadinstitute.hellbender.utils.BaseUtils.Base.C;


/**
* Count of read pairs in the F1R2 and F2R1 configurations supporting the reference and alternate alleles
Expand All @@ -36,7 +32,7 @@
* "Discovery and characterization of artefactual mutations in deep coverage targeted capture sequencing data due to oxidative DNA damage during sample preparation."
* by Costello et al.</a></p>
*/
public final class OxoGReadCounts extends GenotypeAnnotation {
public final class OxoGReadCounts extends GenotypeAnnotation implements StandardMutectAnnotation {

@Override
public List<String> getKeyNames() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
*
* Created by David Benjamin on 3/20/17.
*/
public class ReadPosition extends PerAlleleAnnotation {
public class ReadPosition extends PerAlleleAnnotation implements StandardMutectAnnotation {
public static final String KEY = "MPOS";

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package org.broadinstitute.hellbender.tools.walkers.annotator;

/**
* This is a marker interface used to indicate which annotations are "Standard" for MuTect only.
* This is a marker interface used to indicate which annotations are "Standard" for Mutect2 only.
*/
public interface StandardSomaticAnnotation {
public interface StandardMutectAnnotation extends Annotation {
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
*
* Created by tsato on 4/19/17.
*/
public class StrandArtifact extends GenotypeAnnotation implements StandardSomaticAnnotation {
public class StrandArtifact extends GenotypeAnnotation implements StandardMutectAnnotation {
public static final String POSTERIOR_PROBABILITIES_KEY = "SA_POST_PROB";
public static final String MAP_ALLELE_FRACTIONS_KEY = "SA_MAP_AF";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
* </ul>
*
*/
public final class TandemRepeat extends InfoFieldAnnotation {
public final class TandemRepeat extends InfoFieldAnnotation implements StandardMutectAnnotation {

@Override
public Map<String, Object> annotate(final ReferenceContext ref,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
* We filter the variant if the count is lower than a user-specified threshold.
* Mutect2FilteringEngine::applyDuplicatedAltReadFilter is the accompanying filter.
*/
public class UniqueAltReadCount extends GenotypeAnnotation implements StandardSomaticAnnotation {
public class UniqueAltReadCount extends GenotypeAnnotation {
public static final String UNIQUE_ALT_READ_SET_COUNT_KEY = "UNIQ_ALT_READ_COUNT";

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import htsjdk.variant.variantcontext.*;
import htsjdk.variant.vcf.*;
import org.broadinstitute.barclay.argparser.CommandLineException;
import org.broadinstitute.hellbender.cmdline.argumentcollections.VariantAnnotationArgumentCollection;
import org.broadinstitute.hellbender.engine.FeatureContext;
import org.broadinstitute.hellbender.engine.FeatureInput;
import org.broadinstitute.hellbender.engine.ReferenceContext;
Expand Down Expand Up @@ -79,6 +80,26 @@ public static VariantAnnotatorEngine ofSelectedMinusExcluded(final List<String>
return new VariantAnnotatorEngine(AnnotationManager.ofSelectedMinusExcluded(annotationGroupsToUse, annotationsToUse, annotationsToExclude), dbSNPInput, comparisonFeatureInputs);
}

/**
* An overload of {@link org.broadinstitute.hellbender.tools.walkers.annotator.VariantAnnotatorEngine#ofSelectedMinusExcluded ofSelectedMinusExcluded}
* except that it accepts a {@link org.broadinstitute.hellbender.cmdline.argumentcollections.VariantAnnotationArgumentCollection} as input.
* @param argumentCollection VariantAnnotationArgumentCollection containing requested annotations.
* @param dbSNPInput input for variants from a known set from DbSNP or null if not provided.
* The annotation engine will mark variants overlapping anything in this set using {@link htsjdk.variant.vcf.VCFConstants#DBSNP_KEY}.
* @param comparisonFeatureInputs list of inputs with known variants.
* The annotation engine will mark variants overlapping anything in those sets using the name given by {@link FeatureInput#getName()}.
* Note: the DBSNP FeatureInput should be passed in separately, and not as part of this List - an GATKException will be thrown otherwise.
* Note: there are no non-DBSNP comparison FeatureInputs an empty List should be passed in here, rather than null.
* @return a VariantAnnotatorEngine initialized with the requested annotations
*/
public static VariantAnnotatorEngine ofSelectedMinusExcluded(final VariantAnnotationArgumentCollection argumentCollection,
final FeatureInput<VariantContext> dbSNPInput,
final List<FeatureInput<VariantContext>> comparisonFeatureInputs) {
return ofSelectedMinusExcluded(argumentCollection.annotationGroupsToUse,
argumentCollection.annotationsToUse,
argumentCollection.annotationsToExclude,
dbSNPInput, comparisonFeatureInputs);
}
private VariantOverlapAnnotator initializeOverlapAnnotator(final FeatureInput<VariantContext> dbSNPInput, final List<FeatureInput<VariantContext>> featureInputs) {
final Map<FeatureInput<VariantContext>, String> overlaps = new LinkedHashMap<>();
for ( final FeatureInput<VariantContext> fi : featureInputs) {
Expand All @@ -94,6 +115,7 @@ private VariantOverlapAnnotator initializeOverlapAnnotator(final FeatureInput<Va
return new VariantOverlapAnnotator(dbSNPInput, overlaps);
}


/**
* Returns the list of genotype annotations that will be applied.
* Note: The returned list is unmodifiable.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,19 +172,6 @@ public abstract class AssemblyBasedCallerArgumentCollection extends StandardCall

//Annotations


/**
* Which annotations to exclude from output in the VCF file. Note that this argument has higher priority than the
* -A or -G arguments, so these annotations will be excluded even if they are explicitly included with the other
* options. When HaplotypeCaller is run with -ERC GVCF or -ERC BP_RESOLUTION, some annotations are excluded from the
* output by default because they will only be meaningful once they have been recalculated by GenotypeGVCFs. As
* of version 3.3 this concerns ChromosomeCounts, FisherStrand, StrandOddsRatio and QualByDepth.
*
*/
@Advanced
@Argument(fullName = "excludeAnnotation", shortName = "XA", doc = "One or more specific annotations to exclude", optional = true)
public List<String> annotationsToExclude = new ArrayList<>();

@Advanced
@Argument(fullName = "smithWaterman", shortName = "smithWaterman", doc = "Which Smith-Waterman implementation to use, generally FASTEST_AVAILABLE is the right choice", optional = true)
public SmithWatermanAligner.Implementation smithWatermanImplementation = SmithWatermanAligner.Implementation.FASTEST_AVAILABLE;
Expand Down
Loading

0 comments on commit 999a1a7

Please sign in to comment.