-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add annotation + viola + join checks + cleanup (#29)
* add join checks * fix delly mismatches * Added viola * fix header issues in single caller runs * header fix? * appease eclint * add stub to reverse_bed * fix for gridss? * appease eclint again! * typo * fix permissions * fix shebang * fix shebang again * disable gridss * fix tests * remove old code from gatk-sv * Add VEP to the pipeline * add some QOL parameters * update tests * QOL changes (variables and joins) * disable scattering for delly * remove unused gatk modules * update modules * made BED files optional * add metro map and update readme * forgot to save svg * fix linting * fix vep test * free up CI space? * use another cache for vep
- Loading branch information
Showing
93 changed files
with
2,356 additions
and
3,995 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,5 @@ | ||
sample,family,cram,crai,bed | ||
PosCon1,family1,s3://test-data/genomics/homo_sapiens/illumina/cram/SVcontrol/small/PosCon1.cram,s3://test-data/genomics/homo_sapiens/illumina/cram/SVcontrol/small/PosCon1.cram.crai,s3://test-data/genomics/homo_sapiens/illumina/regions/SVcontrol/PosCon1and2.roi.bed | ||
PosCon2,family1,s3://test-data/genomics/homo_sapiens/illumina/cram/SVcontrol/small/PosCon2.cram,s3://test-data/genomics/homo_sapiens/illumina/cram/SVcontrol/small/PosCon2.cram.crai,s3://test-data/genomics/homo_sapiens/illumina/regions/SVcontrol/PosCon1and2.roi.bed | ||
PosCon3,,s3://test-data/genomics/homo_sapiens/illumina/cram/SVcontrol/small/PosCon3.cram,s3://test-data/genomics/homo_sapiens/illumina/cram/SVcontrol/small/PosCon3.cram.crai,s3://test-data/genomics/homo_sapiens/illumina/regions/SVcontrol/PosCon3.roi.bed | ||
PosCon3,,s3://test-data/genomics/homo_sapiens/illumina/cram/SVcontrol/small/PosCon3.cram,s3://test-data/genomics/homo_sapiens/illumina/cram/SVcontrol/small/PosCon3.cram.crai, | ||
PosCon4,,s3://test-data/genomics/homo_sapiens/illumina/cram/SVcontrol/small/PosCon4.cram,s3://test-data/genomics/homo_sapiens/illumina/cram/SVcontrol/small/PosCon4.cram.crai,s3://test-data/genomics/homo_sapiens/illumina/regions/SVcontrol/PosCon4.roi.bed | ||
PosCon5,,s3://test-data/genomics/homo_sapiens/illumina/cram/SVcontrol/small/PosCon5.cram,s3://test-data/genomics/homo_sapiens/illumina/cram/SVcontrol/small/PosCon5.cram.crai,s3://test-data/genomics/homo_sapiens/illumina/regions/SVcontrol/PosCon5.roi.bed |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
#!/usr/local/bin/Rscript | ||
# Fetched from https://github.com/PapenfussLab/gridss/blob/master/example/simple-event-annotation.R | ||
# Although it's been slightly adjusted | ||
|
||
# if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") | ||
#BiocManager::install("StructuralVariantAnnotation") | ||
#install.packages("stringr") | ||
library(VariantAnnotation) | ||
library(StructuralVariantAnnotation) | ||
library(stringr) | ||
|
||
args <- commandArgs() | ||
input_vcf <- args[1] | ||
output_vcf <- args[2] | ||
|
||
#' Simple SV type classifier | ||
simpleEventType <- function(gr) { | ||
pgr = partner(gr) | ||
return(ifelse(seqnames(gr) != seqnames(pgr), "CTX", # inter-chromosomosal | ||
ifelse(strand(gr) == strand(pgr), "INV", | ||
ifelse(gr$insLen >= abs(gr$svLen) * 0.7, "INS", # TODO: improve classification of complex events | ||
ifelse(xor(start(gr) < start(pgr), strand(gr) == "-"), "DEL", | ||
"DUP"))))) | ||
} | ||
|
||
vcf <- readVcf(input_vcf, "hg38") | ||
info(header(vcf)) = unique(as(rbind(as.data.frame(info(header(vcf))), data.frame( | ||
row.names=c("SIMPLE_TYPE"), | ||
Number=c("1"), | ||
Type=c("String"), | ||
Description=c("Simple event type annotation based purely on breakend position and orientation."))), "DataFrame")) | ||
gr <- breakpointRanges(vcf) | ||
svtype <- simpleEventType(gr) | ||
info(vcf)$SIMPLE_TYPE <- NA_character_ | ||
info(vcf[gr$sourceId])$SIMPLE_TYPE <- svtype | ||
info(vcf[gr$sourceId])$SVLEN <- gr$svLen | ||
writeVcf(vcf, output_vcf) # generated by example/gridss.sh | ||
|
||
# # TODO: perform event filtering here | ||
# # By default, GRIDSS is very sensitive but this comes at the cost of a high false discovery rate | ||
# gr <- gr[gr$FILTER == "PASS" & partner(gr)$FILTER == "PASS"] # Remove low confidence calls | ||
|
||
# simplegr <- gr[simpleEventType(gr) %in% c("INS", "INV", "DEL", "DUP")] | ||
# simplebed <- data.frame( | ||
# chrom=seqnames(simplegr), | ||
# # call the centre of the homology/inexact interval | ||
# start=as.integer((start(simplegr) + end(simplegr)) / 2), | ||
# end=as.integer((start(partner(simplegr)) + end(partner(simplegr))) / 2), | ||
# name=simpleEventType(simplegr), | ||
# score=simplegr$QUAL, | ||
# strand="." | ||
# ) | ||
# # Just the lower of the two breakends so we don't output everything twice | ||
# simplebed <- simplebed[simplebed$start < simplebed$end,] | ||
# write.table(simplebed, "chr12.1527326.DEL1024.simple.bed", quote=FALSE, sep='\t', row.names=FALSE, col.names=FALSE) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#!/usr/local/bin/python | ||
|
||
import argparse | ||
import os | ||
|
||
import viola | ||
|
||
if __name__ == "__main__": | ||
# Setting up argparser | ||
parser = argparse.ArgumentParser(description="A script to standardize VCFs using Viola-SV") | ||
parser.add_argument('vcf', metavar='FILE', type=str, help="The called VCF") | ||
parser.add_argument('caller', metavar='STRING', type=str, help="The caller used to call the VCF") | ||
parser.add_argument('out_file', metavar='FILE', type=str, help="The standardized VCF") | ||
parser.add_argument('patient_name', metavar='STRING', type=str, help="The name of the patient in the VCF file") | ||
|
||
args = parser.parse_args() | ||
|
||
vcf = args.vcf | ||
caller = args.caller | ||
out_file = args.out_file | ||
patient_name = args.patient_name | ||
|
||
if caller == "smoove": caller = "lumpy" | ||
|
||
if caller == "gridss": | ||
svlen_not_added = True | ||
old_vcf = f'old_{vcf}' | ||
os.rename(vcf, old_vcf) | ||
with open(old_vcf, 'r') as old: | ||
with open(vcf, 'w') as new: | ||
for line in old.readlines(): | ||
if line.startswith("##INFO") and svlen_not_added: | ||
svlen_not_added = False | ||
new.write("##INFO=<ID=SVLEN,Number=1,Type=Integer,Description=\"The length of the structural variant.\">\n") | ||
new.write(line.replace("CIRPOS", "CIEND")) | ||
|
||
viola.read_vcf(vcf, variant_caller=caller, patient_name=patient_name).breakend2breakpoint().to_vcf(out_file) |
Oops, something went wrong.