Skip to content

Commit

Permalink
removing indels first from vcf to avoid issues with phASER then proce…
Browse files Browse the repository at this point in the history
…ss VCF as before
  • Loading branch information
ChristopheLegendre committed Jan 26, 2023
1 parent 8ed9a05 commit d5fdf66
Showing 1 changed file with 19 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ echo -e "USAGE: $0 \$VCF.gz \$TBAM \$SNAME_T \$CPUS \n1) compressed_VCF\n2) BAM
}

## checkings section
if [[ $# -ne 4 ]] ; then usage ; exit -1 ; fi
if [[ $# -ne 4 ]] ; then usage ; exit -1 ; fi
for F in ${SCRIPT_GET_CONSPOS} ${VCF} ${TBAM} ; do checkFile ${F} ; done
for EXE in samtools bcftools phaser.py ; do check_exe_in_path ${EXE} ; done
if [[ ${CPUS} -ge ${MAX_CPUS_IN_CPUINFO} ]] ; then CPUS=$((${MAX_CPUS_IN_CPUINFO}-1)) ; fi
Expand Down Expand Up @@ -86,6 +86,22 @@ VCF=${VCF_ORIGINAL_INPUT}

if [[ 1 == 1 ]] ;then

echo -e "Keeping out the Indels as phASER exclude them from phasing anyway" 1>&2
## We had to do this due to encountering an edge case mentioned in issue #27 in github
bcftools filter -O z -i 'TYPE="indel"' -o ${VCF/vcf.gz/indels.vcf.gz} ${VCF}
check_ev $? "bcftools filter out indels"
bcftools index --tbi ${VCF/vcf.gz/indels.vcf.gz}
check_ev $? "bcftools index indels calls"
VCF_INDELS_ONLY=${VCF/vcf.gz/indels.vcf.gz}

## now Subsetting the VCF_SBCP to continue without the indels
bcftools filter -O z -e 'TYPE="indel"' -o ${VCF/vcf.gz/noindels.vcf.gz} ${VCF}
check_ev $? "bcftools filter out indels"
bcftools index --tbi ${VCF/vcf.gz/noindels.vcf.gz}
check_ev $? "bcftools index indels calls"
cp ${VCF} ${VCF}.original_input.vcf
cp ${VCF/vcf.gz/noindels.vcf.gz} ${VCF}

echo -e "get consecutive positions ... as tabulated text file for bcftools ..."
python3 ${SCRIPT_GET_CONSPOS} ${VCF}
check_ev $? "$(basename ${SCRIPT_GET_CONSPOS})"
Expand Down Expand Up @@ -269,7 +285,7 @@ VCF_IN_UNPHASED=${VCF_ORIGINAL_INPUT/vcf.gz/TempNoConsPos.vcf.gz}
VCF_IN_PHASED=${VCF_OUT}
## WE OUTPUT An UNcompressed VCF;
VCF_OUT=${VCF_ORIGINAL_INPUT/.vcf.gz/.blocs.vcf}
mycmd="bcftools concat -a -O v ${VCF_IN_UNPHASED} ${VCF_IN_PHASED} | bcftools sort -O v -T ${PWD} --max-mem 2G - > ${VCF_OUT}"
mycmd="bcftools concat -a -O v ${VCF_INDELS_ONLY} ${VCF_IN_UNPHASED} ${VCF_IN_PHASED} | bcftools sort -O v -T ${PWD} --max-mem 2G - > ${VCF_OUT}"
echo ${mycmd}
eval ${mycmd}
check_ev $? "bcftools concat ${VCF_OUT}"
Expand All @@ -282,4 +298,4 @@ touch Completed_Recomposition_Strelka2_for_VCF_$(basename ${VCF_ORIGINAL_INPUT})

echo "${VCF_OUT}" 2>&1

exit 0
exit 0

0 comments on commit d5fdf66

Please sign in to comment.