From 6ffd0ad62ea180f4830b7f7404cd62ebdb99b64a Mon Sep 17 00:00:00 2001 From: Seung-been Lee Date: Tue, 1 Mar 2022 14:18:58 +0900 Subject: [PATCH 01/10] Bump up version number --- fuc/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fuc/version.py b/fuc/version.py index c3d10d7..2ef0c52 100644 --- a/fuc/version.py +++ b/fuc/version.py @@ -1 +1 @@ -__version__ = '0.31.0' +__version__ = '0.32.0' From 4d62b4ca1ed9b9fc60ffe0a0ea9cd7323a5a4ae3 Mon Sep 17 00:00:00 2001 From: "Seung-been \"Steven\" Lee" Date: Wed, 2 Mar 2022 11:15:31 +0900 Subject: [PATCH 02/10] Update CHANGELOG.rst --- CHANGELOG.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 45de339..cb5fbf8 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,9 @@ Changelog ********* +0.32.0 (in development) +----------------------- + 0.31.0 (2022-03-01) ------------------- From e5577bf9cf748862f9592a92b42a1eecfaa65d62 Mon Sep 17 00:00:00 2001 From: "Seung-been \"Steven\" Lee" Date: Fri, 4 Mar 2022 15:48:58 +0900 Subject: [PATCH 03/10] Update `pykallisto.KallistoFrame`: * Add optional argument ``filter_off`` for :class:`pykallisto.KallistoFrame` constructor, which is useful for generating a simple count or tpm matrix. --- fuc/api/pykallisto.py | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/fuc/api/pykallisto.py b/fuc/api/pykallisto.py index 1e8ed63..723fed6 100644 --- a/fuc/api/pykallisto.py +++ b/fuc/api/pykallisto.py @@ -28,10 +28,16 @@ class KallistoFrame: default, the :meth:`pykallisto.basic_filter` method will be used. filter_target_id : list, optional Transcripts to filter using methods that can't be implemented using - ``filter_fun``. If provided, this will override ``filter_fun``. + ``filter_func``. If provided, this will override ``filter_func``. + filter_off : bool, default: False + If True, do not apply any filtering. Useful for generating a simple + count or tpm matrix. """ - def _import_data(self, metadata, filter_func=None, filter_target_id=None): + def _import_data( + self, metadata, filter_func=None, filter_target_id=None, + filter_off=False + ): dfs = {} for i, r in metadata.iterrows(): df = pd.read_table(r['path'] + '/abundance.tsv', index_col=0) @@ -40,23 +46,29 @@ def _import_data(self, metadata, filter_func=None, filter_target_id=None): df_tx_count.columns = metadata.index df_tx_tpm = pd.concat([v['tpm'] for k, v in dfs.items()], axis=1) df_tx_tpm.columns = metadata.index - if filter_target_id is None: - if filter_func is None: - filter_func = basic_filter - filtered_ids = df_tx_count.apply(filter_func, axis=1) + + if filter_off: + filtered_ids = None else: - filtered_ids = pd.Series(df_tx_count.index.isin(filter_target_id), - index=df_tx_count.index) + if filter_target_id is None: + if filter_func is None: + filter_func = basic_filter + filtered_ids = df_tx_count.apply(filter_func, axis=1) + else: + filtered_ids = pd.Series(df_tx_count.index.isin(filter_target_id), + index=df_tx_count.index) + return df_tx_count, df_tx_tpm, filtered_ids def __init__( self, metadata, tx2gene, aggregation_column, filter_func=None, - filter_target_id=None + filter_target_id=None, filter_off=False ): self.metadata = metadata self.tx2gene = tx2gene self.aggregation_column = aggregation_column - results = self._import_data(metadata, filter_func, filter_target_id) + results = self._import_data(metadata, filter_func, + filter_target_id, filter_off) self.df_tx_count = results[0] self.df_tx_tpm = results[1] self.filtered_ids = results[2] @@ -80,7 +92,8 @@ def aggregate(self, filter=True): gene_s = self.tx2gene[self.aggregation_column] df = pd.concat([tx_df, gene_s], axis=1) if filter: - df = df[self.filtered_ids] + if self.filtered_ids is not None: + df = df[self.filtered_ids] df = df.groupby(['gene_symbol']).sum() setattr(self, f'df_gene_{unit}', df) @@ -131,7 +144,8 @@ def plot_differential_abundance( else: df = getattr(self, f'df_tx_{unit}') if filter: - df = df[self.filtered_ids] + if self.filtered_ids is not None: + df = df[self.filtered_ids] s = self.tx2gene[self.tx2gene[self.aggregation_column] == gene] df = df[df.index.isin(s.index.to_list())] if name != 'target_id': From eb4756ba90c537b57ebd2aa31e74b5e31973d9b2 Mon Sep 17 00:00:00 2001 From: "Seung-been \"Steven\" Lee" Date: Fri, 4 Mar 2022 16:26:07 +0900 Subject: [PATCH 04/10] Update `vcf-call` command: * Add new optional argument ``--dir-path`` to :command:`vcf-call` command for storing intermediate files. --- CHANGELOG.rst | 3 ++ docs/cli.rst | 7 +++++ fuc/api/pyvcf.py | 77 ++++++++++++++++++++++++++------------------- fuc/cli/vcf_call.py | 13 +++++++- 4 files changed, 67 insertions(+), 33 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index cb5fbf8..62085bf 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,6 +4,9 @@ Changelog 0.32.0 (in development) ----------------------- +* Add new optional argument ``--dir-path`` to :command:`vcf-call` command for storing intermediate files. +* Add optional argument ``filter_off`` for :class:`pykallisto.KallistoFrame` constructor, which is useful for generating a simple count or tpm matrix. + 0.31.0 (2022-03-01) ------------------- diff --git a/docs/cli.rst b/docs/cli.rst index e815334..576da8e 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -1227,6 +1227,7 @@ vcf-call $ fuc vcf-call -h usage: fuc vcf-call [-h] [-r TEXT [TEXT ...]] [--min-mq INT] [--max-depth INT] + [--dir-path PATH] fasta bams [bams ...] Call SNVs and indels from BAM files. @@ -1259,6 +1260,12 @@ vcf-call (default: 1). --max-depth INT At a position, read maximally this number of reads per input file (default: 250). + --dir-path PATH By default, intermediate files (likelihoods.bcf, + calls.bcf, and calls.normalized.bcf) will be stored + in a temporary directory, which is automatically + deleted after creating final VCF. If you provide a + directory path, intermediate files will be stored + there. [Example] Specify regions manually: $ fuc vcf-call ref.fa 1.bam 2.bam \ diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py index 1d4f1a3..8fdd2f0 100644 --- a/fuc/api/pyvcf.py +++ b/fuc/api/pyvcf.py @@ -216,7 +216,8 @@ } def call( - fasta, bams, regions=None, path=None, min_mq=1, max_depth=250 + fasta, bams, regions=None, path=None, min_mq=1, max_depth=250, + dir_path=None ): """ Call SNVs and indels from BAM files. @@ -249,6 +250,11 @@ def call( Minimum mapping quality for an alignment to be used. max_depth : int, default: 250 At a position, read maximally this number of reads per input file. + dir_path : str, optional + By default, intermediate files (likelihoods.bcf, calls.bcf, and + calls.normalized.bcf) will be stored in a temporary directory, which + is automatically deleted after creating final VCF. If you provide a + directory path, intermediate files will be stored there. Returns ------- @@ -281,41 +287,48 @@ def call( regions = common.sort_regions(regions) regions = [chr_prefix + x.replace('chr', '') for x in regions] - with tempfile.TemporaryDirectory() as t: - # Step 1: Get genotype likelihoods. - args = ['-Ou', '-a', 'AD'] - args += ['-q', str(min_mq)] - args += ['--max-depth', str(max_depth)] - args += ['-f', fasta] - if regions is not None: - args += ['-r', ','.join(regions)] - results = bcftools.mpileup(*(args + bams)) - with open(f'{t}/likelihoods.bcf', 'wb') as f: - f.write(results) + if dir_path is None: + temp_dir = tempfile.TemporaryDirectory() + else: + temp_dir = dir_path - # Step 2: Call variants. - args = [f'{t}/likelihoods.bcf', '-Oz', '-mv'] - results = bcftools.call(*args) - with open(f'{t}/calls.bcf', 'wb') as f: - f.write(results) + # Step 1: Get genotype likelihoods. + args = ['-Ou', '-a', 'AD'] + args += ['-q', str(min_mq)] + args += ['--max-depth', str(max_depth)] + args += ['-f', fasta] + if regions is not None: + args += ['-r', ','.join(regions)] + results = bcftools.mpileup(*(args + bams)) + with open(f'{temp_dir}/likelihoods.bcf', 'wb') as f: + f.write(results) + + # Step 2: Call variants. + args = [f'{temp_dir}/likelihoods.bcf', '-Oz', '-mv'] + results = bcftools.call(*args) + with open(f'{temp_dir}/calls.bcf', 'wb') as f: + f.write(results) + + # Step 3: Normalize indels. + args = [f'{temp_dir}/calls.bcf', '-Ob', '-f', fasta] + results = bcftools.norm(*args) + with open(f'{temp_dir}/calls.normalized.bcf', 'wb') as f: + f.write(results) + + # Step 4: Filter variant. + args = [f'{temp_dir}/calls.normalized.bcf', '-Ov', '--IndelGap', '5'] + results = bcftools.filter(*args) - # Step 3: Normalize indels. - args = [f'{t}/calls.bcf', '-Ob', '-f', fasta] - results = bcftools.norm(*args) - with open(f'{t}/calls.normalized.bcf', 'wb') as f: + if path is None: + return results + elif path == '-': + sys.stdout.write(results) + else: + with open(path, 'w') as f: f.write(results) - # Step 4: Filter variant. - args = [f'{t}/calls.normalized.bcf', '-Ov', '--IndelGap', '5'] - results = bcftools.filter(*args) - - if path is None: - return results - elif path == '-': - sys.stdout.write(results) - else: - with open(path, 'w') as f: - f.write(results) + if dir_path is None: + temp_dir.cleanup() def rescue_filtered_variants(vfs, format='GT'): """ diff --git a/fuc/cli/vcf_call.py b/fuc/cli/vcf_call.py index b200783..a47393a 100644 --- a/fuc/cli/vcf_call.py +++ b/fuc/cli/vcf_call.py @@ -78,9 +78,20 @@ def create_parser(subparsers): """At a position, read maximally this number of reads per input file (default: 250).""" ) + parser.add_argument( + '--dir-path', + metavar='PATH', + help= +"""By default, intermediate files (likelihoods.bcf, +calls.bcf, and calls.normalized.bcf) will be stored +in a temporary directory, which is automatically +deleted after creating final VCF. If you provide a +directory path, intermediate files will be stored +there.""" + ) def main(args): api.pyvcf.call( args.fasta, args.bams, regions=args.regions, path='-', - min_mq=args.min_mq, max_depth=args.max_depth + min_mq=args.min_mq, max_depth=args.max_depth, dir_path=args.dir_path ) From ded2cc9d35425f22e028d1ec5ffd05e85075f0fe Mon Sep 17 00:00:00 2001 From: "Seung-been \"Steven\" Lee" Date: Fri, 4 Mar 2022 17:03:44 +0900 Subject: [PATCH 05/10] Update `vcf-call` --- fuc/api/pyvcf.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py index 8fdd2f0..93811b7 100644 --- a/fuc/api/pyvcf.py +++ b/fuc/api/pyvcf.py @@ -288,7 +288,8 @@ def call( regions = [chr_prefix + x.replace('chr', '') for x in regions] if dir_path is None: - temp_dir = tempfile.TemporaryDirectory() + t = tempfile.TemporaryDirectory() + temp_dir = t.name else: temp_dir = dir_path @@ -328,7 +329,7 @@ def call( f.write(results) if dir_path is None: - temp_dir.cleanup() + t.cleanup() def rescue_filtered_variants(vfs, format='GT'): """ From a74f7b07ff5b831830884dc4565f456d2425e4ac Mon Sep 17 00:00:00 2001 From: "Seung-been \"Steven\" Lee" Date: Wed, 30 Mar 2022 13:24:56 +0900 Subject: [PATCH 06/10] Update `vcf-call` command: * Add new optional argument ``gap_frac`` to :command:`vcf-call` command so that users can control indel calling sensitivity. --- CHANGELOG.rst | 3 ++- docs/cli.rst | 4 +++- fuc/api/pyvcf.py | 5 ++++- fuc/cli/vcf_call.py | 12 +++++++++++- 4 files changed, 20 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 62085bf..31e5c52 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -5,7 +5,8 @@ Changelog ----------------------- * Add new optional argument ``--dir-path`` to :command:`vcf-call` command for storing intermediate files. -* Add optional argument ``filter_off`` for :class:`pykallisto.KallistoFrame` constructor, which is useful for generating a simple count or tpm matrix. +* Add new optional argument ``filter_off`` for :class:`pykallisto.KallistoFrame` constructor, which is useful for generating a simple count or tpm matrix. +* Add new optional argument ``gap_frac`` to :command:`vcf-call` command so that users can control indel calling sensitivity. 0.31.0 (2022-03-01) ------------------- diff --git a/docs/cli.rst b/docs/cli.rst index 576da8e..f3403e7 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -1227,7 +1227,7 @@ vcf-call $ fuc vcf-call -h usage: fuc vcf-call [-h] [-r TEXT [TEXT ...]] [--min-mq INT] [--max-depth INT] - [--dir-path PATH] + [--dir-path PATH] [--gap_frac FLOAT] fasta bams [bams ...] Call SNVs and indels from BAM files. @@ -1266,6 +1266,8 @@ vcf-call deleted after creating final VCF. If you provide a directory path, intermediate files will be stored there. + --gap_frac FLOAT Minimum fraction of gapped reads for calling indels + (default: 0.002). [Example] Specify regions manually: $ fuc vcf-call ref.fa 1.bam 2.bam \ diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py index 93811b7..ab5c2da 100644 --- a/fuc/api/pyvcf.py +++ b/fuc/api/pyvcf.py @@ -217,7 +217,7 @@ def call( fasta, bams, regions=None, path=None, min_mq=1, max_depth=250, - dir_path=None + dir_path=None, gap_frac=0.002 ): """ Call SNVs and indels from BAM files. @@ -255,6 +255,8 @@ def call( calls.normalized.bcf) will be stored in a temporary directory, which is automatically deleted after creating final VCF. If you provide a directory path, intermediate files will be stored there. + gap_frac : float, default: 0.002 + Minimum fraction of gapped reads for calling indels. Returns ------- @@ -298,6 +300,7 @@ def call( args += ['-q', str(min_mq)] args += ['--max-depth', str(max_depth)] args += ['-f', fasta] + args += ['-F', str(gap_frac)] if regions is not None: args += ['-r', ','.join(regions)] results = bcftools.mpileup(*(args + bams)) diff --git a/fuc/cli/vcf_call.py b/fuc/cli/vcf_call.py index a47393a..6cc58fa 100644 --- a/fuc/cli/vcf_call.py +++ b/fuc/cli/vcf_call.py @@ -89,9 +89,19 @@ def create_parser(subparsers): directory path, intermediate files will be stored there.""" ) + parser.add_argument( + '--gap_frac', + metavar='FLOAT', + type=float, + default=0.002, + help= +"""Minimum fraction of gapped reads for calling indels +(default: 0.002).""" + ) def main(args): api.pyvcf.call( args.fasta, args.bams, regions=args.regions, path='-', - min_mq=args.min_mq, max_depth=args.max_depth, dir_path=args.dir_path + min_mq=args.min_mq, max_depth=args.max_depth, dir_path=args.dir_path, + gap_frac=args.gap_frac ) From 430a46f30123f3d8867538e94e50ce1a880edebc Mon Sep 17 00:00:00 2001 From: "Seung-been \"Steven\" Lee" Date: Wed, 30 Mar 2022 14:20:51 +0900 Subject: [PATCH 07/10] Fix bug in `pyvcf.call`: * Fix minor bug in :meth:`pyvcf.call` method when ``pybed.BedFrame`` object is given as ``regions``. --- CHANGELOG.rst | 5 +++-- fuc/api/pyvcf.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 31e5c52..f612dc2 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,9 +4,10 @@ Changelog 0.32.0 (in development) ----------------------- -* Add new optional argument ``--dir-path`` to :command:`vcf-call` command for storing intermediate files. * Add new optional argument ``filter_off`` for :class:`pykallisto.KallistoFrame` constructor, which is useful for generating a simple count or tpm matrix. -* Add new optional argument ``gap_frac`` to :command:`vcf-call` command so that users can control indel calling sensitivity. +* Add new optional argument ``--dir-path`` to :command:`vcf-call` command for storing intermediate files. +* Add new optional argument ``--gap_frac`` to :command:`vcf-call` command so that users can control indel calling sensitivity. +* Fix minor bug in :meth:`pyvcf.call` method when ``pybed.BedFrame`` object is given as ``regions``. 0.31.0 (2022-03-01) ------------------- diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py index ab5c2da..c2ba9e6 100644 --- a/fuc/api/pyvcf.py +++ b/fuc/api/pyvcf.py @@ -279,7 +279,7 @@ def call( elif isinstance(regions, list): pass elif isinstance(regions, pybed.BedFrame): - regions = bf.to_regions() + regions = regions.to_regions() else: raise TypeError("Incorrect type of argument 'regions'") if '.bed' in regions[0]: From 17fba04ae7642ae47d474699be0763a3cf9e0850 Mon Sep 17 00:00:00 2001 From: "Seung-been \"Steven\" Lee" Date: Thu, 31 Mar 2022 15:41:47 +0900 Subject: [PATCH 08/10] Update `vcf-call`: * Add new optional argument ``--group-samples`` to :command:`vcf-call` command so that users can group samples into populations and apply the HWE assumption within but not across the populations. --- CHANGELOG.rst | 1 + docs/cli.rst | 12 ++++++++++++ fuc/api/pyvcf.py | 13 ++++++++++++- fuc/cli/vcf_call.py | 18 +++++++++++++++++- 4 files changed, 42 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f612dc2..863c68d 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -7,6 +7,7 @@ Changelog * Add new optional argument ``filter_off`` for :class:`pykallisto.KallistoFrame` constructor, which is useful for generating a simple count or tpm matrix. * Add new optional argument ``--dir-path`` to :command:`vcf-call` command for storing intermediate files. * Add new optional argument ``--gap_frac`` to :command:`vcf-call` command so that users can control indel calling sensitivity. +* Add new optional argument ``--group-samples`` to :command:`vcf-call` command so that users can group samples into populations and apply the HWE assumption within but not across the populations. * Fix minor bug in :meth:`pyvcf.call` method when ``pybed.BedFrame`` object is given as ``regions``. 0.31.0 (2022-03-01) diff --git a/docs/cli.rst b/docs/cli.rst index f3403e7..734b9e3 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -1228,6 +1228,7 @@ vcf-call $ fuc vcf-call -h usage: fuc vcf-call [-h] [-r TEXT [TEXT ...]] [--min-mq INT] [--max-depth INT] [--dir-path PATH] [--gap_frac FLOAT] + [--group-samples PATH] fasta bams [bams ...] Call SNVs and indels from BAM files. @@ -1268,6 +1269,17 @@ vcf-call there. --gap_frac FLOAT Minimum fraction of gapped reads for calling indels (default: 0.002). + --group-samples PATH By default, all samples are assumed to come from a + single population. This option allows to group + samples into populations and apply the HWE assumption + within but not across the populations. To use this + option, provide a tab-delimited text file with sample + names in the first column and group names in the + second column. If '-' is given instead, no HWE + assumption is made at all and single-sample calling + is performed. Note that in low coverage data this + inflates the rate of false positives. Therefore, make + sure you know what you are doing. [Example] Specify regions manually: $ fuc vcf-call ref.fa 1.bam 2.bam \ diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py index c2ba9e6..f455855 100644 --- a/fuc/api/pyvcf.py +++ b/fuc/api/pyvcf.py @@ -217,7 +217,7 @@ def call( fasta, bams, regions=None, path=None, min_mq=1, max_depth=250, - dir_path=None, gap_frac=0.002 + dir_path=None, gap_frac=0.002, group_samples=None ): """ Call SNVs and indels from BAM files. @@ -257,6 +257,15 @@ def call( directory path, intermediate files will be stored there. gap_frac : float, default: 0.002 Minimum fraction of gapped reads for calling indels. + group_samples : str, optional + By default, all samples are assumed to come from a single population. + This option allows to group samples into populations and apply the + HWE assumption within but not across the populations. To use this + option, provide a tab-delimited text file with sample names in the + first column and group names in the second column. If '-' is given + instead, no HWE assumption is made at all and single-sample calling + is performed. Note that in low coverage data this inflates the rate + of false positives. Therefore, make sure you know what you are doing. Returns ------- @@ -309,6 +318,8 @@ def call( # Step 2: Call variants. args = [f'{temp_dir}/likelihoods.bcf', '-Oz', '-mv'] + if group_samples is not None: + args += ['-G', group_samples] results = bcftools.call(*args) with open(f'{temp_dir}/calls.bcf', 'wb') as f: f.write(results) diff --git a/fuc/cli/vcf_call.py b/fuc/cli/vcf_call.py index 6cc58fa..0d05fa5 100644 --- a/fuc/cli/vcf_call.py +++ b/fuc/cli/vcf_call.py @@ -98,10 +98,26 @@ def create_parser(subparsers): """Minimum fraction of gapped reads for calling indels (default: 0.002).""" ) + parser.add_argument( + '--group-samples', + metavar='PATH', + help= +"""By default, all samples are assumed to come from a +single population. This option allows to group +samples into populations and apply the HWE assumption +within but not across the populations. To use this +option, provide a tab-delimited text file with sample +names in the first column and group names in the +second column. If '-' is given instead, no HWE +assumption is made at all and single-sample calling +is performed. Note that in low coverage data this +inflates the rate of false positives. Therefore, make +sure you know what you are doing.""" + ) def main(args): api.pyvcf.call( args.fasta, args.bams, regions=args.regions, path='-', min_mq=args.min_mq, max_depth=args.max_depth, dir_path=args.dir_path, - gap_frac=args.gap_frac + gap_frac=args.gap_frac, group_samples=args.group_samples ) From f094e807c6bb165d603628d219cf09c1435642ba Mon Sep 17 00:00:00 2001 From: "Seung-been \"Steven\" Lee" Date: Fri, 1 Apr 2022 10:39:39 +0900 Subject: [PATCH 09/10] Update `vcf-call` to create uncompressed BCF as temp files --- fuc/api/pyvcf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py index f455855..662f8fd 100644 --- a/fuc/api/pyvcf.py +++ b/fuc/api/pyvcf.py @@ -317,7 +317,7 @@ def call( f.write(results) # Step 2: Call variants. - args = [f'{temp_dir}/likelihoods.bcf', '-Oz', '-mv'] + args = [f'{temp_dir}/likelihoods.bcf', '-Ou', '-mv'] if group_samples is not None: args += ['-G', group_samples] results = bcftools.call(*args) @@ -325,7 +325,7 @@ def call( f.write(results) # Step 3: Normalize indels. - args = [f'{temp_dir}/calls.bcf', '-Ob', '-f', fasta] + args = [f'{temp_dir}/calls.bcf', '-Ou', '-f', fasta] results = bcftools.norm(*args) with open(f'{temp_dir}/calls.normalized.bcf', 'wb') as f: f.write(results) From 8312f58b405a7d66f3233d3019ae8d5322fba250 Mon Sep 17 00:00:00 2001 From: Seung-been Lee Date: Sat, 2 Apr 2022 13:35:31 +0900 Subject: [PATCH 10/10] Update docs --- CHANGELOG.rst | 4 ++-- docs/cli.rst | 11 ++++++----- fuc/api/pyvcf.py | 9 +++++---- fuc/cli/vcf_call.py | 11 ++++++----- 4 files changed, 19 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 863c68d..fb7c24b 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,8 +1,8 @@ Changelog ********* -0.32.0 (in development) ------------------------ +0.32.0 (2022-04-02) +------------------- * Add new optional argument ``filter_off`` for :class:`pykallisto.KallistoFrame` constructor, which is useful for generating a simple count or tpm matrix. * Add new optional argument ``--dir-path`` to :command:`vcf-call` command for storing intermediate files. diff --git a/docs/cli.rst b/docs/cli.rst index 734b9e3..6f3ad11 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -1275,11 +1275,12 @@ vcf-call within but not across the populations. To use this option, provide a tab-delimited text file with sample names in the first column and group names in the - second column. If '-' is given instead, no HWE - assumption is made at all and single-sample calling - is performed. Note that in low coverage data this - inflates the rate of false positives. Therefore, make - sure you know what you are doing. + second column. If '--group-samples -' is given + instead, no HWE assumption is made at all and + single-sample calling is performed. Note that in low + coverage data this inflates the rate of false + positives. Therefore, make sure you know what you are + doing. [Example] Specify regions manually: $ fuc vcf-call ref.fa 1.bam 2.bam \ diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py index 662f8fd..341ce5e 100644 --- a/fuc/api/pyvcf.py +++ b/fuc/api/pyvcf.py @@ -262,10 +262,11 @@ def call( This option allows to group samples into populations and apply the HWE assumption within but not across the populations. To use this option, provide a tab-delimited text file with sample names in the - first column and group names in the second column. If '-' is given - instead, no HWE assumption is made at all and single-sample calling - is performed. Note that in low coverage data this inflates the rate - of false positives. Therefore, make sure you know what you are doing. + first column and group names in the second column. If + ``group_samples='-'`` is given instead, no HWE assumption is made at + all and single-sample calling is performed. Note that in low coverage + data this inflates the rate of false positives. Therefore, make sure + you know what you are doing. Returns ------- diff --git a/fuc/cli/vcf_call.py b/fuc/cli/vcf_call.py index 0d05fa5..b08a6e8 100644 --- a/fuc/cli/vcf_call.py +++ b/fuc/cli/vcf_call.py @@ -108,11 +108,12 @@ def create_parser(subparsers): within but not across the populations. To use this option, provide a tab-delimited text file with sample names in the first column and group names in the -second column. If '-' is given instead, no HWE -assumption is made at all and single-sample calling -is performed. Note that in low coverage data this -inflates the rate of false positives. Therefore, make -sure you know what you are doing.""" +second column. If '--group-samples -' is given +instead, no HWE assumption is made at all and +single-sample calling is performed. Note that in low +coverage data this inflates the rate of false +positives. Therefore, make sure you know what you are +doing.""" ) def main(args):