Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve docs re chimeric/unmapped/unpaired read pairs #629

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 11 additions & 9 deletions tests/count_help
Original file line number Diff line number Diff line change
Expand Up @@ -71,15 +71,6 @@ Options:
--mapping-quality=MAPPING_QUALITY
Minimum mapping quality for a read to be retained
[default=0]
--unmapped-reads=UNMAPPED_READS
How to handle unmapped reads. Options are 'discard',
'use' or 'correct' [default=discard]
--chimeric-pairs=CHIMERIC_PAIRS
How to handle chimeric read pairs. Options are
'discard', 'use' or 'correct' [default=use]
--unpaired-reads=UNPAIRED_READS
How to handle unpaired reads. Options are 'discard',
'use' or 'correct' [default=use]
--ignore-umi Ignore UMI and dedup only on position
--ignore-tlen Option to dedup paired end reads based solely on
read1, whether or not the template length is the same
Expand All @@ -90,6 +81,17 @@ Options:
-o, --out-sam Output alignments in sam format [default=False]
--no-sort-output Don't Sort the output

Dedup and Count SAM/BAM options:
--unmapped-reads=UNMAPPED_READS
How to handle unmapped reads. Options are 'discard' or
'use' [default=discard]
--chimeric-pairs=CHIMERIC_PAIRS
How to handle chimeric read pairs. Options are
'discard' or 'use' [default=use]
--unpaired-reads=UNPAIRED_READS
How to handle unpaired reads. Options are 'discard'or
'use' [default=use]

input/output options:
-I FILE, --stdin=FILE
file to read stdin from [default = stdin].
Expand Down
20 changes: 11 additions & 9 deletions tests/dedup_help
Original file line number Diff line number Diff line change
Expand Up @@ -81,15 +81,6 @@ Options:
--mapping-quality=MAPPING_QUALITY
Minimum mapping quality for a read to be retained
[default=0]
--unmapped-reads=UNMAPPED_READS
How to handle unmapped reads. Options are 'discard',
'use' or 'correct' [default=discard]
--chimeric-pairs=CHIMERIC_PAIRS
How to handle chimeric read pairs. Options are
'discard', 'use' or 'correct' [default=use]
--unpaired-reads=UNPAIRED_READS
How to handle unpaired reads. Options are 'discard',
'use' or 'correct' [default=use]
--ignore-umi Ignore UMI and dedup only on position
--ignore-tlen Option to dedup paired end reads based solely on
read1, whether or not the template length is the same
Expand All @@ -100,6 +91,17 @@ Options:
-o, --out-sam Output alignments in sam format [default=False]
--no-sort-output Don't Sort the output

Dedup and Count SAM/BAM options:
--unmapped-reads=UNMAPPED_READS
How to handle unmapped reads. Options are 'discard' or
'use' [default=discard]
--chimeric-pairs=CHIMERIC_PAIRS
How to handle chimeric read pairs. Options are
'discard' or 'use' [default=use]
--unpaired-reads=UNPAIRED_READS
How to handle unpaired reads. Options are 'discard'or
'use' [default=use]

input/output options:
-I FILE, --stdin=FILE
file to read stdin from [default = stdin].
Expand Down
20 changes: 11 additions & 9 deletions tests/group_help
Original file line number Diff line number Diff line change
Expand Up @@ -84,15 +84,6 @@ Options:
--mapping-quality=MAPPING_QUALITY
Minimum mapping quality for a read to be retained
[default=0]
--unmapped-reads=UNMAPPED_READS
How to handle unmapped reads. Options are 'discard',
'use' or 'correct' [default=discard]
--chimeric-pairs=CHIMERIC_PAIRS
How to handle chimeric read pairs. Options are
'discard', 'use' or 'correct' [default=use]
--unpaired-reads=UNPAIRED_READS
How to handle unpaired reads. Options are 'discard',
'use' or 'correct' [default=use]
--ignore-umi Ignore UMI and dedup only on position
--ignore-tlen Option to dedup paired end reads based solely on
read1, whether or not the template length is the same
Expand All @@ -103,6 +94,17 @@ Options:
-o, --out-sam Output alignments in sam format [default=False]
--no-sort-output Don't Sort the output

Group SAM/BAM options:
--unmapped-reads=UNMAPPED_READS
How to handle unmapped reads. Options are 'discard',
'use' or 'output' [default=discard]
--chimeric-pairs=CHIMERIC_PAIRS
How to handle chimeric read pairs. Options are
'discard', 'use' or 'output' [default=use]
--unpaired-reads=UNPAIRED_READS
How to handle unpaired reads. Options are 'discard',
'use' or 'output' [default=use]

input/output options:
-I FILE, --stdin=FILE
file to read stdin from [default = stdin].
Expand Down
77 changes: 53 additions & 24 deletions umi_tools/Utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,8 +258,6 @@ class method (:func:`cachedmethod`) calls.
import regex
from umi_tools import __version__

from builtins import bytes, chr


class DefaultOptions:
stdlog = sys.stdout
Expand Down Expand Up @@ -599,6 +597,8 @@ def Start(parser=None,
add_extract_options=False,
add_group_dedup_options=True,
add_sam_options=True,
add_dedup_count_sam_options=False,
add_group_sam_options=False,
add_umi_grouping_options=True,
return_parser=False):
"""set up an experiment.
Expand Down Expand Up @@ -885,27 +885,6 @@ def Start(parser=None,
group.add_option("--output-unmapped", dest="output_unmapped", action="store_true",
default=False, help=optparse.SUPPRESS_HELP)

group.add_option("--unmapped-reads", dest="unmapped_reads",
type="choice",
choices=("discard", "use", "output"),
default="discard",
help=("How to handle unmapped reads. Options are "
"'discard', 'use' or 'correct' [default=%default]"))

group.add_option("--chimeric-pairs", dest="chimeric_pairs",
type="choice",
choices=("discard", "use", "output"),
default="use",
help=("How to handle chimeric read pairs. Options are "
"'discard', 'use' or 'correct' [default=%default]"))

group.add_option("--unpaired-reads", dest="unpaired_reads",
type="choice",
choices=("discard", "use", "output"),
default="use",
help=("How to handle unpaired reads. Options are "
"'discard', 'use' or 'correct' [default=%default]"))

group.add_option("--ignore-umi", dest="ignore_umi",
action="store_true", help="Ignore UMI and dedup"
" only on position", default=False)
Expand Down Expand Up @@ -941,6 +920,56 @@ def Start(parser=None,

parser.add_option_group(group)

if add_dedup_count_sam_options:
group = OptionGroup(parser, "Dedup and Count SAM/BAM options")

group.add_option("--unmapped-reads", dest="unmapped_reads",
type="choice",
choices=("discard", "use"),
default="discard",
help=("How to handle unmapped reads. Options are "
"'discard' or 'use' [default=%default]"))

group.add_option("--chimeric-pairs", dest="chimeric_pairs",
type="choice",
choices=("discard", "use"),
default="use",
help=("How to handle chimeric read pairs. Options are "
"'discard' or 'use' [default=%default]"))

group.add_option("--unpaired-reads", dest="unpaired_reads",
type="choice",
choices=("discard", "use"),
default="use",
help=("How to handle unpaired reads. Options are "
"'discard'or 'use' [default=%default]"))
parser.add_option_group(group)

if add_group_sam_options:
group = OptionGroup(parser, "Group SAM/BAM options")

group.add_option("--unmapped-reads", dest="unmapped_reads",
type="choice",
choices=("discard", "use", "output"),
default="discard",
help=("How to handle unmapped reads. Options are "
"'discard', 'use' or 'output' [default=%default]"))

group.add_option("--chimeric-pairs", dest="chimeric_pairs",
type="choice",
choices=("discard", "use", "output"),
default="use",
help=("How to handle chimeric read pairs. Options are "
"'discard', 'use' or 'output' [default=%default]"))

group.add_option("--unpaired-reads", dest="unpaired_reads",
type="choice",
choices=("discard", "use", "output"),
default="use",
help=("How to handle unpaired reads. Options are "
"'discard', 'use' or 'output' [default=%default]"))
parser.add_option_group(group)

if add_pipe_options:
group = OptionGroup(parser, "input/output options")
group.add_option("-I", "--stdin", dest="stdin", type="string",
Expand Down Expand Up @@ -1178,7 +1207,7 @@ def validateExtractOptions(options):
"(starting with 'umi_') %s, %s" % (
options.pattern, options.pattern2))

return(extract_cell, extract_umi)
return (extract_cell, extract_umi)


def validateSamOptions(options, group=False):
Expand Down
2 changes: 1 addition & 1 deletion umi_tools/count.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def main(argv=None):
parser.add_option_group(group)

# add common options (-h/--help, ...) and parse command line
(options, args) = U.Start(parser, argv=argv, add_group_dedup_options=False)
(options, args) = U.Start(parser, argv=argv, add_group_dedup_options=False, add_dedup_count_sam_options=True)

options.per_gene = True # hardcodes counting to per-gene only

Expand Down
2 changes: 1 addition & 1 deletion umi_tools/dedup.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def main(argv=None):
parser.add_option_group(group)

# add common options (-h/--help, ...) and parse command line
(options, args) = U.Start(parser, argv=argv)
(options, args) = U.Start(parser, argv=argv, add_dedup_count_sam_options=True)

U.validateSamOptions(options, group=False)

Expand Down
2 changes: 1 addition & 1 deletion umi_tools/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def main(argv=None):
parser.add_option_group(group)

# add common options (-h/--help, ...) and parse command line
(options, args) = U.Start(parser, argv=argv)
(options, args) = U.Start(parser, argv=argv, add_group_sam_options=True)

U.validateSamOptions(options, group=True)

Expand Down
Loading