Skip to content

Commit

Permalink
Optional mpileup params, incr default mindepth
Browse files Browse the repository at this point in the history
Set default mindepth to:

--ignore-RG --min-MQ 1 --max-depth 250000 --max-idepth 250000

Add mpileup_parameters as option. Update example files as well since they are
to outdated. Not sure why the coverage goes down by 2, bam file is still the
same. Code seems similar as well. Perhaps change in mpileup version.
  • Loading branch information
inodb committed Dec 24, 2015
1 parent fd0d284 commit 3a25f23
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 19 deletions.
20 changes: 18 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,20 @@ Run
::

usage: sufam [-h] [--sample_name SAMPLE_NAME] [--format {matrix,sufam}]
[--mpileup-parameters MPILEUP_PARAMETERS] [--version]
reffa vcf bam

So U Found A Mutation? (SUFAM)

Found a mutation in one or more samples? Now you want to check if they are in
another sample. Unfortunately mutect, varscan or whatever other variant caller
is not calling them. Use SUFAM. The super sensitive validation caller that
calls everything on a given position. All you need is a vcf with the mutations
that you are interested in and the sam/bam file of the sample where you want to
find the same inconsipicuous mutation.

Author: inodb

positional arguments:
reffa Reference genome (fasta)
vcf VCF with mutations to be validated
Expand All @@ -35,9 +47,13 @@ Run
optional arguments:
-h, --help show this help message and exit
--sample_name SAMPLE_NAME
Set name of sample, used in output.
Set name of sample, used in output [name of bam].
--format {matrix,sufam}
Set output format
Set output format [sufam]
--mpileup-parameters MPILEUP_PARAMETERS
Set options for mpileup [--ignore-RG --min-MQ 1 --max-
depth 250000 --max-idepth 250000]
--version show program's version number and exit

Example
~~~~~~~
Expand Down
6 changes: 2 additions & 4 deletions example/sufam.log
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
Running:
samtools view -bh tests/test_data/subset1.bam 17:7574012-7574012 | samtools mpileup -R -q 1 -f tests/test_data/human_g1k_v37_chr17.fa - | grep -P '^17\t7574012\t'
samtools view -bh tests/test_data/subset1.bam 17:7574012-7574012 | samtools mpileup --ignore-RG --min-MQ 1 --max-depth 250000 --max-idepth 250000 -f tests/test_data/human_g1k_v37_chr17.fa - | awk '$2 == 7574012'
[mpileup] 1 samples in 1 input files
<mpileup> Set max per-file depth to 8000
Running:
samtools view -bh tests/test_data/subset1.bam 17:7574012-7574012 | samtools mpileup -R -q 1 -f tests/test_data/human_g1k_v37_chr17.fa - | grep -P '^17\t7574012\t'
samtools view -bh tests/test_data/subset1.bam 17:7574012-7574012 | samtools mpileup --ignore-RG --min-MQ 1 --max-depth 250000 --max-idepth 250000 -f tests/test_data/human_g1k_v37_chr17.fa - | awk '$2 == 7574012'
[mpileup] 1 samples in 1 input files
<mpileup> Set max per-file depth to 8000
4 changes: 2 additions & 2 deletions example/sufam.tsv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
sample chrom pos ref cov A C G T * - + val_ref val_alt val_al_type val_al_count val_maf most_common_indel most_common_indel_count most_common_indel_maf most_common_indel_type most_common_al most_common_al_count most_common_al_maf most_common_count most_common_maf
tests/test_data/subset1.bam 17 7574012 C 675 225 450 0 0 0 nan nan C G snv 0 0.0 nan nan nan nan A 225 0.333333333333 225 0.333333333333
tests/test_data/subset1.bam 17 7574012 C 675 225 450 0 0 0 nan nan C A snv 225 0.333333333333 nan nan nan nan A 225 0.333333333333 225 0.333333333333
tests/test_data/subset1.bam 17 7574012 C 673 223 450 0 0 0 nan nan C G snv 0 0.0 nan nan nan nan A 223 0.331352154532 223 0.331352154532
tests/test_data/subset1.bam 17 7574012 C 673 223 450 0 0 0 nan nan C A snv 223 0.331352154532 nan nan nan nan A 223 0.331352154532 223 0.331352154532
15 changes: 10 additions & 5 deletions sufam/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,8 @@ def _write_bp(outfile, bp, header, output_format):
raise(Exception("Unrecognized output format"))


def validate_mutations(vcffile, bam, reffa, sample, output_format, outfile):
def validate_mutations(vcffile, bam, reffa, sample, output_format, outfile,
mpileup_parameters=mpileup_parser.MPILEUP_DEFAULT_PARAMS):
"""Check if mutations in vcf are in bam"""
header = []
output_header = "sample chrom pos ref cov A C G T * - + " \
Expand Down Expand Up @@ -209,7 +210,7 @@ def validate_mutations(vcffile, bam, reffa, sample, output_format, outfile):
"cov": 0, "A": 0, "C": 0, "G": 0, "T": 0,
"val_ref": record["REF"], "val_alt": record["ALT"],
"val_al_type": record_type, "val_al_count": 0, "val_maf": 0})
bp_lines = mpileup_parser.run_and_parse(bam, record["CHROM"], record["POS"], record["POS"], reffa)
bp_lines = mpileup_parser.run_and_parse(bam, record["CHROM"], record["POS"], record["POS"], reffa, mpileup_parameters)
bpdf = get_baseparser_extended_df(sample, bp_lines, record["REF"], record["ALT"])
if bpdf is None:
bp = no_cov
Expand All @@ -225,13 +226,17 @@ def main():
parser.add_argument("vcf", type=str, help="VCF with mutations to be validated")
parser.add_argument("bam", type=str, help="BAM to find mutations in")
parser.add_argument("--sample_name", type=str, default=None, help="Set name "
"of sample, used in output.")
parser.add_argument("--format", type=str, choices=["matrix", "sufam"], default="sufam", help="Set output format")
"of sample, used in output [name of bam].")
parser.add_argument("--format", type=str, choices=["matrix", "sufam"], default="sufam",
help="Set output format [sufam]")
parser.add_argument("--mpileup-parameters", type=str, default=mpileup_parser.MPILEUP_DEFAULT_PARAMS,
help="Set options for mpileup [{}]".format(mpileup_parser.MPILEUP_DEFAULT_PARAMS))
parser.add_argument("--version", action='version', version=sufam.__version__)
args = parser.parse_args()
if args.sample_name is None:
args.sample_name = args.bam
validate_mutations(args.vcf, args.bam, args.reffa, args.sample_name, args.format, sys.stdout)
validate_mutations(args.vcf, args.bam, args.reffa, args.sample_name,
args.format, sys.stdout, mpileup_parameters=args.mpileup_parameters)


if __name__ == "__main__":
Expand Down
16 changes: 10 additions & 6 deletions sufam/mpileup_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from sufam.mutation import Mutation, MutationsAtSinglePosition
from collections import Counter

MPILEUP_DEFAULT_PARAMS = '--ignore-RG --min-MQ 1 --max-depth 250000 --max-idepth 250000'


class ParseString(object):

Expand Down Expand Up @@ -92,12 +94,14 @@ def parse(line):
return '\t'.join([toks[0], toks[1], ref, cov]) + '\t' + str(ParseString(ref, toks[4]))


def run(bam, chrom, pos1, pos2, reffa):
def run(bam, chrom, pos1, pos2, reffa, parameters):
"""Run mpileup on given chrom and pos"""
posmin = min(pos1, pos2)
posmax = max(pos1, pos2)
cmd = "samtools view -bh {bam} {chrom}:{pos1}-{pos2} " \
"| samtools mpileup -R -q 1 -f {reffa} -".format(bam=bam, chrom=chrom, pos1=posmin, pos2=posmax, reffa=reffa)
"| samtools mpileup {parameters} -f {reffa} -".format(bam=bam, chrom=chrom,
pos1=posmin, pos2=posmax,
reffa=reffa, parameters=parameters)
if pos1 == pos2:
cmd += " | awk '$2 == {pos}'".format(pos=pos1)
else:
Expand All @@ -116,12 +120,12 @@ def run(bam, chrom, pos1, pos2, reffa):
return stdout


def run_and_parse(bam, chrom, pos1, pos2, reffa):
return [parse(line) for line in run(bam, chrom, pos1, pos2, reffa).split("\n")[:-1]]
def run_and_parse(bam, chrom, pos1, pos2, reffa, mpileup_parameters=MPILEUP_DEFAULT_PARAMS):
return [parse(line) for line in run(bam, chrom, pos1, pos2, reffa, mpileup_parameters).split("\n")[:-1]]


def run_and_get_mutations(bam, chrom, pos1, pos2, reffa):
return [get_mutations(line) for line in run(bam, chrom, pos1, pos2, reffa).split("\n")[:-1]]
def run_and_get_mutations(bam, chrom, pos1, pos2, reffa, mpileup_parameters=MPILEUP_DEFAULT_PARAMS):
return [get_mutations(line) for line in run(bam, chrom, pos1, pos2, reffa, mpileup_parameters).split("\n")[:-1]]


def main():
Expand Down

0 comments on commit 3a25f23

Please sign in to comment.