Skip to content

Commit

Permalink
Sage: append can optionally only process variants with genes with con…
Browse files Browse the repository at this point in the history
…fig 'require_gene'
  • Loading branch information
charlesshale committed May 24, 2023
1 parent c5524d1 commit e607b11
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,19 @@ else if(vcfSampleNames.get(i).equals(tumorId))

public boolean fileValid() { return mFileValid; }

public void close()
{
try
{
if(mReader != null)
mReader.close();
}
catch(IOException e)
{
LOGGER.error("failed to close VCF({}): {}", mFilename, e.toString());
}
}

@Nullable
public VCFHeader vcfHeader() { return (VCFHeader)mReader.getHeader(); }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
import com.hartwig.hmftools.common.utils.TaskExecutor;
import com.hartwig.hmftools.common.utils.sv.ChrBaseRegion;
import com.hartwig.hmftools.common.utils.version.VersionInfo;
import com.hartwig.hmftools.common.variant.VcfFileReader;
import com.hartwig.hmftools.common.variant.impact.VariantImpact;
import com.hartwig.hmftools.common.variant.impact.VariantImpactSerialiser;
import com.hartwig.hmftools.sage.SageConfig;
import com.hartwig.hmftools.sage.pipeline.ChromosomePartition;
import com.hartwig.hmftools.sage.quality.BaseQualityRecalibration;
Expand Down Expand Up @@ -52,9 +55,11 @@ public class SageAppendApplication
private final SageConfig mConfig;
private final String mInputVcf;
private final IndexedFastaSequenceFile mRefGenome;
private final boolean mFilterToGenes;

private static final double MIN_PRIOR_VERSION = 2.8;
private static final String INPUT_VCF = "input_vcf";
private static final String FILTER_TO_GENES = "require_gene";

public SageAppendApplication(final Options options, final String... args) throws ParseException, IOException
{
Expand All @@ -67,6 +72,7 @@ public SageAppendApplication(final Options options, final String... args) throws

mConfig = new SageConfig(true, version.version(), cmd);
mInputVcf = mConfig.SampleDataDir + cmd.getOptionValue(INPUT_VCF);
mFilterToGenes = cmd.hasOption(FILTER_TO_GENES);

mRefGenome = new IndexedFastaSequenceFile(new File(mConfig.RefGenomeFile));
}
Expand Down Expand Up @@ -96,43 +102,54 @@ public void run() throws IOException, ExecutionException, InterruptedException

long startTime = System.currentTimeMillis();

final List<VariantContext> existingVariants = Lists.newArrayList();
// mFilterToGenes

VCFHeader inputHeader = null;
VcfFileReader vcfFileReader = new VcfFileReader(mInputVcf);

try
if(!vcfFileReader.fileValid())
{
AbstractFeatureReader<VariantContext, LineIterator> vcfReader = AbstractFeatureReader.getFeatureReader(
mInputVcf, new VCFCodec(), false);
SG_LOGGER.error("invalid input VCF({})", mInputVcf);
System.exit(1);
}

inputHeader = (VCFHeader)vcfReader.getHeader();
if(!validateInputHeader(inputHeader))
{
System.exit(1);
}
VCFHeader inputHeader = vcfFileReader.vcfHeader();

existingVariants.addAll(verifyAndReadExisting(vcfReader));
vcfReader.close();
}
catch(Exception e)
if(!validateInputHeader(inputHeader))
{
SG_LOGGER.error("failed to read input VCF({})", mInputVcf, e.toString());
e.printStackTrace();
System.exit(1);
}

final List<VariantContext> existingVariants = Lists.newArrayList();

for(VariantContext variantContext : vcfFileReader.iterator())
{
VariantContext variant = variantContext.fullyDecode(inputHeader, false);

if(mFilterToGenes)
{
VariantImpact variantImpact = VariantImpactSerialiser.fromVariantContext(variant);

if(variantImpact == null || variantImpact.CanonicalGeneName.isEmpty())
continue;
}

existingVariants.add(variant);
}

vcfFileReader.close();

SG_LOGGER.info("loaded {} variants", existingVariants.size());

SG_LOGGER.info("writing to file: {}", mConfig.OutputFile);
final VariantVCF outputVCF = new VariantVCF(mRefGenome, mConfig, inputHeader);

if(existingVariants.isEmpty())
{
outputVCF.close();
SG_LOGGER.info("input VCF empty", existingVariants.size());
SG_LOGGER.info("writing empty output VCF", existingVariants.size());
return;
}

SG_LOGGER.info("loaded {} variants", existingVariants.size());

final SAMSequenceDictionary dictionary = dictionary();

BaseQualityRecalibration baseQualityRecalibration = new BaseQualityRecalibration(mConfig, mRefGenome);
Expand Down Expand Up @@ -193,20 +210,6 @@ public void run() throws IOException, ExecutionException, InterruptedException
SG_LOGGER.info("completed in {} seconds", String.format("%.1f",timeTaken / 1000.0));
}

private List<VariantContext> verifyAndReadExisting(final AbstractFeatureReader<VariantContext, LineIterator> vcfReader) throws IOException
{
List<VariantContext> result = Lists.newArrayList();

VCFHeader header = (VCFHeader) vcfReader.getHeader();

for(VariantContext variantContext : vcfReader.iterator())
{
result.add(variantContext.fullyDecode(header, false));
}

return result;
}

private boolean validateInputHeader(VCFHeader header)
{
double oldVersion = sageVersion(header);
Expand Down Expand Up @@ -280,6 +283,7 @@ public static Options createOptions()
final Options options = new Options();
SageConfig.commonOptions().getOptions().forEach(options::addOption);
options.addOption(INPUT_VCF, true, "Path to input vcf");
options.addOption(FILTER_TO_GENES, false, "Only process variants with gene annotations");
return options;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -333,10 +333,12 @@ public ReadMatchType processRead(

logReadEvidence(sampleId, record, match.toString(), readIndex);

/*
if(SG_LOGGER.isTraceEnabled() && sampleId != null)
{
qualityCalc.logReadQualCalcs(this, readIndex, record, adjustedNumOfEvents);
}
*/

countStrandedness(record);

Expand Down Expand Up @@ -421,12 +423,14 @@ else if(rawContext.AltSupport)
mShortened++;
}

/*
if(rawContext.RefSupport)
matchStr = "REF";
else if(rawContext.AltSupport)
matchStr = "ALT";
logReadEvidence(sampleId, record, matchStr, readIndex);
*/

return matchType;
}
Expand Down

0 comments on commit e607b11

Please sign in to comment.