From 6ffd0ad62ea180f4830b7f7404cd62ebdb99b64a Mon Sep 17 00:00:00 2001
From: Seung-been Lee <sbstevenlee@gmail.com>
Date: Tue, 1 Mar 2022 14:18:58 +0900
Subject: [PATCH 01/10] Bump up version number

---
 fuc/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fuc/version.py b/fuc/version.py
index c3d10d7..2ef0c52 100644
--- a/fuc/version.py
+++ b/fuc/version.py
@@ -1 +1 @@
-__version__ = '0.31.0'
+__version__ = '0.32.0'

From 4d62b4ca1ed9b9fc60ffe0a0ea9cd7323a5a4ae3 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Wed, 2 Mar 2022 11:15:31 +0900
Subject: [PATCH 02/10] Update CHANGELOG.rst

---
 CHANGELOG.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 45de339..cb5fbf8 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -1,6 +1,9 @@
 Changelog
 *********
 
+0.32.0 (in development)
+-----------------------
+
 0.31.0 (2022-03-01)
 -------------------
 

From e5577bf9cf748862f9592a92b42a1eecfaa65d62 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Fri, 4 Mar 2022 15:48:58 +0900
Subject: [PATCH 03/10] Update `pykallisto.KallistoFrame`:

* Add optional argument ``filter_off`` for
:class:`pykallisto.KallistoFrame` constructor, which is useful for
generating a simple count or tpm matrix.
---
 fuc/api/pykallisto.py | 38 ++++++++++++++++++++++++++------------
 1 file changed, 26 insertions(+), 12 deletions(-)

diff --git a/fuc/api/pykallisto.py b/fuc/api/pykallisto.py
index 1e8ed63..723fed6 100644
--- a/fuc/api/pykallisto.py
+++ b/fuc/api/pykallisto.py
@@ -28,10 +28,16 @@ class KallistoFrame:
         default, the :meth:`pykallisto.basic_filter` method will be used.
     filter_target_id : list, optional
         Transcripts to filter using methods that can't be implemented using
-        ``filter_fun``. If provided, this will override ``filter_fun``.
+        ``filter_func``. If provided, this will override ``filter_func``.
+    filter_off : bool, default: False
+        If True, do not apply any filtering. Useful for generating a simple
+        count or tpm matrix.
     """
 
-    def _import_data(self, metadata, filter_func=None, filter_target_id=None):
+    def _import_data(
+        self, metadata, filter_func=None, filter_target_id=None,
+        filter_off=False
+    ):
         dfs = {}
         for i, r in metadata.iterrows():
             df = pd.read_table(r['path'] + '/abundance.tsv', index_col=0)
@@ -40,23 +46,29 @@ def _import_data(self, metadata, filter_func=None, filter_target_id=None):
         df_tx_count.columns = metadata.index
         df_tx_tpm = pd.concat([v['tpm'] for k, v in dfs.items()], axis=1)
         df_tx_tpm.columns = metadata.index
-        if filter_target_id is None:
-            if filter_func is None:
-                filter_func = basic_filter
-            filtered_ids = df_tx_count.apply(filter_func, axis=1)
+
+        if filter_off:
+            filtered_ids = None
         else:
-            filtered_ids = pd.Series(df_tx_count.index.isin(filter_target_id),
-                index=df_tx_count.index)
+            if filter_target_id is None:
+                if filter_func is None:
+                    filter_func = basic_filter
+                filtered_ids = df_tx_count.apply(filter_func, axis=1)
+            else:
+                filtered_ids = pd.Series(df_tx_count.index.isin(filter_target_id),
+                    index=df_tx_count.index)
+
         return df_tx_count, df_tx_tpm, filtered_ids
 
     def __init__(
         self, metadata, tx2gene, aggregation_column, filter_func=None,
-        filter_target_id=None
+        filter_target_id=None, filter_off=False
     ):
         self.metadata = metadata
         self.tx2gene = tx2gene
         self.aggregation_column = aggregation_column
-        results = self._import_data(metadata, filter_func, filter_target_id)
+        results = self._import_data(metadata, filter_func,
+            filter_target_id, filter_off)
         self.df_tx_count = results[0]
         self.df_tx_tpm = results[1]
         self.filtered_ids = results[2]
@@ -80,7 +92,8 @@ def aggregate(self, filter=True):
             gene_s = self.tx2gene[self.aggregation_column]
             df = pd.concat([tx_df, gene_s], axis=1)
             if filter:
-                df = df[self.filtered_ids]
+                if self.filtered_ids is not None:
+                    df = df[self.filtered_ids]
             df = df.groupby(['gene_symbol']).sum()
             setattr(self, f'df_gene_{unit}', df)
 
@@ -131,7 +144,8 @@ def plot_differential_abundance(
         else:
             df = getattr(self, f'df_tx_{unit}')
             if filter:
-                df = df[self.filtered_ids]
+                if self.filtered_ids is not None:
+                    df = df[self.filtered_ids]
             s = self.tx2gene[self.tx2gene[self.aggregation_column] == gene]
             df = df[df.index.isin(s.index.to_list())]
             if name != 'target_id':

From eb4756ba90c537b57ebd2aa31e74b5e31973d9b2 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Fri, 4 Mar 2022 16:26:07 +0900
Subject: [PATCH 04/10] Update `vcf-call` command:

* Add new optional argument ``--dir-path`` to :command:`vcf-call`
command for storing intermediate files.
---
 CHANGELOG.rst       |  3 ++
 docs/cli.rst        |  7 +++++
 fuc/api/pyvcf.py    | 77 ++++++++++++++++++++++++++-------------------
 fuc/cli/vcf_call.py | 13 +++++++-
 4 files changed, 67 insertions(+), 33 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index cb5fbf8..62085bf 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -4,6 +4,9 @@ Changelog
 0.32.0 (in development)
 -----------------------
 
+* Add new optional argument ``--dir-path`` to :command:`vcf-call` command for storing intermediate files.
+* Add optional argument ``filter_off`` for :class:`pykallisto.KallistoFrame` constructor, which is useful for generating a simple count or tpm matrix.
+
 0.31.0 (2022-03-01)
 -------------------
 
diff --git a/docs/cli.rst b/docs/cli.rst
index e815334..576da8e 100644
--- a/docs/cli.rst
+++ b/docs/cli.rst
@@ -1227,6 +1227,7 @@ vcf-call
 
    $ fuc vcf-call -h
    usage: fuc vcf-call [-h] [-r TEXT [TEXT ...]] [--min-mq INT] [--max-depth INT]
+                       [--dir-path PATH]
                        fasta bams [bams ...]
    
    Call SNVs and indels from BAM files.
@@ -1259,6 +1260,12 @@ vcf-call
                            (default: 1).
      --max-depth INT       At a position, read maximally this number of reads
                            per input file (default: 250).
+     --dir-path PATH       By default, intermediate files (likelihoods.bcf,
+                           calls.bcf, and calls.normalized.bcf) will be stored
+                           in a temporary directory, which is automatically
+                           deleted after creating final VCF. If you provide a
+                           directory path, intermediate files will be stored
+                           there.
    
    [Example] Specify regions manually:
      $ fuc vcf-call ref.fa 1.bam 2.bam \
diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py
index 1d4f1a3..8fdd2f0 100644
--- a/fuc/api/pyvcf.py
+++ b/fuc/api/pyvcf.py
@@ -216,7 +216,8 @@
 }
 
 def call(
-    fasta, bams, regions=None, path=None, min_mq=1, max_depth=250
+    fasta, bams, regions=None, path=None, min_mq=1, max_depth=250,
+    dir_path=None
 ):
     """
     Call SNVs and indels from BAM files.
@@ -249,6 +250,11 @@ def call(
         Minimum mapping quality for an alignment to be used.
     max_depth : int, default: 250
         At a position, read maximally this number of reads per input file.
+    dir_path : str, optional
+        By default, intermediate files (likelihoods.bcf, calls.bcf, and
+        calls.normalized.bcf) will be stored in a temporary directory, which
+        is automatically deleted after creating final VCF. If you provide a
+        directory path, intermediate files will be stored there.
 
     Returns
     -------
@@ -281,41 +287,48 @@ def call(
             regions = common.sort_regions(regions)
         regions = [chr_prefix + x.replace('chr', '') for x in regions]
 
-    with tempfile.TemporaryDirectory() as t:
-        # Step 1: Get genotype likelihoods.
-        args = ['-Ou', '-a', 'AD']
-        args += ['-q', str(min_mq)]
-        args += ['--max-depth', str(max_depth)]
-        args += ['-f', fasta]
-        if regions is not None:
-            args += ['-r', ','.join(regions)]
-        results = bcftools.mpileup(*(args + bams))
-        with open(f'{t}/likelihoods.bcf', 'wb') as f:
-            f.write(results)
+    if dir_path is None:
+        temp_dir = tempfile.TemporaryDirectory()
+    else:
+        temp_dir = dir_path
 
-        # Step 2: Call variants.
-        args = [f'{t}/likelihoods.bcf', '-Oz', '-mv']
-        results = bcftools.call(*args)
-        with open(f'{t}/calls.bcf', 'wb') as f:
-            f.write(results)
+    # Step 1: Get genotype likelihoods.
+    args = ['-Ou', '-a', 'AD']
+    args += ['-q', str(min_mq)]
+    args += ['--max-depth', str(max_depth)]
+    args += ['-f', fasta]
+    if regions is not None:
+        args += ['-r', ','.join(regions)]
+    results = bcftools.mpileup(*(args + bams))
+    with open(f'{temp_dir}/likelihoods.bcf', 'wb') as f:
+        f.write(results)
+
+    # Step 2: Call variants.
+    args = [f'{temp_dir}/likelihoods.bcf', '-Oz', '-mv']
+    results = bcftools.call(*args)
+    with open(f'{temp_dir}/calls.bcf', 'wb') as f:
+        f.write(results)
+
+    # Step 3: Normalize indels.
+    args = [f'{temp_dir}/calls.bcf', '-Ob', '-f', fasta]
+    results = bcftools.norm(*args)
+    with open(f'{temp_dir}/calls.normalized.bcf', 'wb') as f:
+        f.write(results)
+
+    # Step 4: Filter variant.
+    args = [f'{temp_dir}/calls.normalized.bcf', '-Ov', '--IndelGap', '5']
+    results = bcftools.filter(*args)
 
-        # Step 3: Normalize indels.
-        args = [f'{t}/calls.bcf', '-Ob', '-f', fasta]
-        results = bcftools.norm(*args)
-        with open(f'{t}/calls.normalized.bcf', 'wb') as f:
+    if path is None:
+        return results
+    elif path == '-':
+        sys.stdout.write(results)
+    else:
+        with open(path, 'w') as f:
             f.write(results)
 
-        # Step 4: Filter variant.
-        args = [f'{t}/calls.normalized.bcf', '-Ov', '--IndelGap', '5']
-        results = bcftools.filter(*args)
-
-        if path is None:
-            return results
-        elif path == '-':
-            sys.stdout.write(results)
-        else:
-            with open(path, 'w') as f:
-                f.write(results)
+    if dir_path is None:
+        temp_dir.cleanup()
 
 def rescue_filtered_variants(vfs, format='GT'):
     """
diff --git a/fuc/cli/vcf_call.py b/fuc/cli/vcf_call.py
index b200783..a47393a 100644
--- a/fuc/cli/vcf_call.py
+++ b/fuc/cli/vcf_call.py
@@ -78,9 +78,20 @@ def create_parser(subparsers):
 """At a position, read maximally this number of reads
 per input file (default: 250)."""
     )
+    parser.add_argument(
+        '--dir-path',
+        metavar='PATH',
+        help=
+"""By default, intermediate files (likelihoods.bcf,
+calls.bcf, and calls.normalized.bcf) will be stored
+in a temporary directory, which is automatically
+deleted after creating final VCF. If you provide a
+directory path, intermediate files will be stored
+there."""
+    )
 
 def main(args):
     api.pyvcf.call(
         args.fasta, args.bams, regions=args.regions, path='-',
-        min_mq=args.min_mq, max_depth=args.max_depth
+        min_mq=args.min_mq, max_depth=args.max_depth, dir_path=args.dir_path
     )

From ded2cc9d35425f22e028d1ec5ffd05e85075f0fe Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Fri, 4 Mar 2022 17:03:44 +0900
Subject: [PATCH 05/10] Update `vcf-call`

---
 fuc/api/pyvcf.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py
index 8fdd2f0..93811b7 100644
--- a/fuc/api/pyvcf.py
+++ b/fuc/api/pyvcf.py
@@ -288,7 +288,8 @@ def call(
         regions = [chr_prefix + x.replace('chr', '') for x in regions]
 
     if dir_path is None:
-        temp_dir = tempfile.TemporaryDirectory()
+        t = tempfile.TemporaryDirectory()
+        temp_dir = t.name
     else:
         temp_dir = dir_path
 
@@ -328,7 +329,7 @@ def call(
             f.write(results)
 
     if dir_path is None:
-        temp_dir.cleanup()
+        t.cleanup()
 
 def rescue_filtered_variants(vfs, format='GT'):
     """

From a74f7b07ff5b831830884dc4565f456d2425e4ac Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Wed, 30 Mar 2022 13:24:56 +0900
Subject: [PATCH 06/10] Update `vcf-call` command:

* Add new optional argument ``gap_frac`` to :command:`vcf-call` command
so that users can control indel calling sensitivity.
---
 CHANGELOG.rst       |  3 ++-
 docs/cli.rst        |  4 +++-
 fuc/api/pyvcf.py    |  5 ++++-
 fuc/cli/vcf_call.py | 12 +++++++++++-
 4 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 62085bf..31e5c52 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -5,7 +5,8 @@ Changelog
 -----------------------
 
 * Add new optional argument ``--dir-path`` to :command:`vcf-call` command for storing intermediate files.
-* Add optional argument ``filter_off`` for :class:`pykallisto.KallistoFrame` constructor, which is useful for generating a simple count or tpm matrix.
+* Add new optional argument ``filter_off`` for :class:`pykallisto.KallistoFrame` constructor, which is useful for generating a simple count or tpm matrix.
+* Add new optional argument ``gap_frac`` to :command:`vcf-call` command so that users can control indel calling sensitivity.
 
 0.31.0 (2022-03-01)
 -------------------
diff --git a/docs/cli.rst b/docs/cli.rst
index 576da8e..f3403e7 100644
--- a/docs/cli.rst
+++ b/docs/cli.rst
@@ -1227,7 +1227,7 @@ vcf-call
 
    $ fuc vcf-call -h
    usage: fuc vcf-call [-h] [-r TEXT [TEXT ...]] [--min-mq INT] [--max-depth INT]
-                       [--dir-path PATH]
+                       [--dir-path PATH] [--gap_frac FLOAT]
                        fasta bams [bams ...]
    
    Call SNVs and indels from BAM files.
@@ -1266,6 +1266,8 @@ vcf-call
                            deleted after creating final VCF. If you provide a
                            directory path, intermediate files will be stored
                            there.
+     --gap_frac FLOAT      Minimum fraction of gapped reads for calling indels
+                           (default: 0.002).
    
    [Example] Specify regions manually:
      $ fuc vcf-call ref.fa 1.bam 2.bam \
diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py
index 93811b7..ab5c2da 100644
--- a/fuc/api/pyvcf.py
+++ b/fuc/api/pyvcf.py
@@ -217,7 +217,7 @@
 
 def call(
     fasta, bams, regions=None, path=None, min_mq=1, max_depth=250,
-    dir_path=None
+    dir_path=None, gap_frac=0.002
 ):
     """
     Call SNVs and indels from BAM files.
@@ -255,6 +255,8 @@ def call(
         calls.normalized.bcf) will be stored in a temporary directory, which
         is automatically deleted after creating final VCF. If you provide a
         directory path, intermediate files will be stored there.
+    gap_frac : float, default: 0.002
+        Minimum fraction of gapped reads for calling indels.
 
     Returns
     -------
@@ -298,6 +300,7 @@ def call(
     args += ['-q', str(min_mq)]
     args += ['--max-depth', str(max_depth)]
     args += ['-f', fasta]
+    args += ['-F', str(gap_frac)]
     if regions is not None:
         args += ['-r', ','.join(regions)]
     results = bcftools.mpileup(*(args + bams))
diff --git a/fuc/cli/vcf_call.py b/fuc/cli/vcf_call.py
index a47393a..6cc58fa 100644
--- a/fuc/cli/vcf_call.py
+++ b/fuc/cli/vcf_call.py
@@ -89,9 +89,19 @@ def create_parser(subparsers):
 directory path, intermediate files will be stored
 there."""
     )
+    parser.add_argument(
+        '--gap_frac',
+        metavar='FLOAT',
+        type=float,
+        default=0.002,
+        help=
+"""Minimum fraction of gapped reads for calling indels
+(default: 0.002)."""
+    )
 
 def main(args):
     api.pyvcf.call(
         args.fasta, args.bams, regions=args.regions, path='-',
-        min_mq=args.min_mq, max_depth=args.max_depth, dir_path=args.dir_path
+        min_mq=args.min_mq, max_depth=args.max_depth, dir_path=args.dir_path,
+        gap_frac=args.gap_frac
     )

From 430a46f30123f3d8867538e94e50ce1a880edebc Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Wed, 30 Mar 2022 14:20:51 +0900
Subject: [PATCH 07/10] Fix bug in `pyvcf.call`:

* Fix minor bug in :meth:`pyvcf.call` method when ``pybed.BedFrame``
object is given as ``regions``.
---
 CHANGELOG.rst    | 5 +++--
 fuc/api/pyvcf.py | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 31e5c52..f612dc2 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -4,9 +4,10 @@ Changelog
 0.32.0 (in development)
 -----------------------
 
-* Add new optional argument ``--dir-path`` to :command:`vcf-call` command for storing intermediate files.
 * Add new optional argument ``filter_off`` for :class:`pykallisto.KallistoFrame` constructor, which is useful for generating a simple count or tpm matrix.
-* Add new optional argument ``gap_frac`` to :command:`vcf-call` command so that users can control indel calling sensitivity.
+* Add new optional argument ``--dir-path`` to :command:`vcf-call` command for storing intermediate files.
+* Add new optional argument ``--gap_frac`` to :command:`vcf-call` command so that users can control indel calling sensitivity.
+* Fix minor bug in :meth:`pyvcf.call` method when ``pybed.BedFrame`` object is given as ``regions``.
 
 0.31.0 (2022-03-01)
 -------------------
diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py
index ab5c2da..c2ba9e6 100644
--- a/fuc/api/pyvcf.py
+++ b/fuc/api/pyvcf.py
@@ -279,7 +279,7 @@ def call(
         elif isinstance(regions, list):
             pass
         elif isinstance(regions, pybed.BedFrame):
-            regions = bf.to_regions()
+            regions = regions.to_regions()
         else:
             raise TypeError("Incorrect type of argument 'regions'")
         if '.bed' in regions[0]:

From 17fba04ae7642ae47d474699be0763a3cf9e0850 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Thu, 31 Mar 2022 15:41:47 +0900
Subject: [PATCH 08/10] Update `vcf-call`:

* Add new optional argument ``--group-samples`` to :command:`vcf-call`
command so that users can group samples into populations and apply the
HWE assumption within but not across the populations.
---
 CHANGELOG.rst       |  1 +
 docs/cli.rst        | 12 ++++++++++++
 fuc/api/pyvcf.py    | 13 ++++++++++++-
 fuc/cli/vcf_call.py | 18 +++++++++++++++++-
 4 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index f612dc2..863c68d 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -7,6 +7,7 @@ Changelog
 * Add new optional argument ``filter_off`` for :class:`pykallisto.KallistoFrame` constructor, which is useful for generating a simple count or tpm matrix.
 * Add new optional argument ``--dir-path`` to :command:`vcf-call` command for storing intermediate files.
 * Add new optional argument ``--gap_frac`` to :command:`vcf-call` command so that users can control indel calling sensitivity.
+* Add new optional argument ``--group-samples`` to :command:`vcf-call` command so that users can group samples into populations and apply the HWE assumption within but not across the populations.
 * Fix minor bug in :meth:`pyvcf.call` method when ``pybed.BedFrame`` object is given as ``regions``.
 
 0.31.0 (2022-03-01)
diff --git a/docs/cli.rst b/docs/cli.rst
index f3403e7..734b9e3 100644
--- a/docs/cli.rst
+++ b/docs/cli.rst
@@ -1228,6 +1228,7 @@ vcf-call
    $ fuc vcf-call -h
    usage: fuc vcf-call [-h] [-r TEXT [TEXT ...]] [--min-mq INT] [--max-depth INT]
                        [--dir-path PATH] [--gap_frac FLOAT]
+                       [--group-samples PATH]
                        fasta bams [bams ...]
    
    Call SNVs and indels from BAM files.
@@ -1268,6 +1269,17 @@ vcf-call
                            there.
      --gap_frac FLOAT      Minimum fraction of gapped reads for calling indels
                            (default: 0.002).
+     --group-samples PATH  By default, all samples are assumed to come from a
+                           single population. This option allows to group
+                           samples into populations and apply the HWE assumption
+                           within but not across the populations. To use this
+                           option, provide a tab-delimited text file with sample
+                           names in the first column and group names in the
+                           second column. If '-' is given instead, no HWE
+                           assumption is made at all and single-sample calling
+                           is performed. Note that in low coverage data this
+                           inflates the rate of false positives. Therefore, make
+                           sure you know what you are doing.
    
    [Example] Specify regions manually:
      $ fuc vcf-call ref.fa 1.bam 2.bam \
diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py
index c2ba9e6..f455855 100644
--- a/fuc/api/pyvcf.py
+++ b/fuc/api/pyvcf.py
@@ -217,7 +217,7 @@
 
 def call(
     fasta, bams, regions=None, path=None, min_mq=1, max_depth=250,
-    dir_path=None, gap_frac=0.002
+    dir_path=None, gap_frac=0.002, group_samples=None
 ):
     """
     Call SNVs and indels from BAM files.
@@ -257,6 +257,15 @@ def call(
         directory path, intermediate files will be stored there.
     gap_frac : float, default: 0.002
         Minimum fraction of gapped reads for calling indels.
+    group_samples : str, optional
+        By default, all samples are assumed to come from a single population.
+        This option allows to group samples into populations and apply the
+        HWE assumption within but not across the populations. To use this
+        option, provide a tab-delimited text file with sample names in the
+        first column and group names in the second column. If '-' is given
+        instead, no HWE assumption is made at all and single-sample calling
+        is performed. Note that in low coverage data this inflates the rate
+        of false positives. Therefore, make sure you know what you are doing.
 
     Returns
     -------
@@ -309,6 +318,8 @@ def call(
 
     # Step 2: Call variants.
     args = [f'{temp_dir}/likelihoods.bcf', '-Oz', '-mv']
+    if group_samples is not None:
+        args += ['-G', group_samples]
     results = bcftools.call(*args)
     with open(f'{temp_dir}/calls.bcf', 'wb') as f:
         f.write(results)
diff --git a/fuc/cli/vcf_call.py b/fuc/cli/vcf_call.py
index 6cc58fa..0d05fa5 100644
--- a/fuc/cli/vcf_call.py
+++ b/fuc/cli/vcf_call.py
@@ -98,10 +98,26 @@ def create_parser(subparsers):
 """Minimum fraction of gapped reads for calling indels
 (default: 0.002)."""
     )
+    parser.add_argument(
+        '--group-samples',
+        metavar='PATH',
+        help=
+"""By default, all samples are assumed to come from a
+single population. This option allows to group
+samples into populations and apply the HWE assumption
+within but not across the populations. To use this
+option, provide a tab-delimited text file with sample
+names in the first column and group names in the
+second column. If '-' is given instead, no HWE
+assumption is made at all and single-sample calling
+is performed. Note that in low coverage data this
+inflates the rate of false positives. Therefore, make
+sure you know what you are doing."""
+    )
 
 def main(args):
     api.pyvcf.call(
         args.fasta, args.bams, regions=args.regions, path='-',
         min_mq=args.min_mq, max_depth=args.max_depth, dir_path=args.dir_path,
-        gap_frac=args.gap_frac
+        gap_frac=args.gap_frac, group_samples=args.group_samples
     )

From f094e807c6bb165d603628d219cf09c1435642ba Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Fri, 1 Apr 2022 10:39:39 +0900
Subject: [PATCH 09/10] Update `vcf-call` to create uncompressed BCF as temp
 files

---
 fuc/api/pyvcf.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py
index f455855..662f8fd 100644
--- a/fuc/api/pyvcf.py
+++ b/fuc/api/pyvcf.py
@@ -317,7 +317,7 @@ def call(
         f.write(results)
 
     # Step 2: Call variants.
-    args = [f'{temp_dir}/likelihoods.bcf', '-Oz', '-mv']
+    args = [f'{temp_dir}/likelihoods.bcf', '-Ou', '-mv']
     if group_samples is not None:
         args += ['-G', group_samples]
     results = bcftools.call(*args)
@@ -325,7 +325,7 @@ def call(
         f.write(results)
 
     # Step 3: Normalize indels.
-    args = [f'{temp_dir}/calls.bcf', '-Ob', '-f', fasta]
+    args = [f'{temp_dir}/calls.bcf', '-Ou', '-f', fasta]
     results = bcftools.norm(*args)
     with open(f'{temp_dir}/calls.normalized.bcf', 'wb') as f:
         f.write(results)

From 8312f58b405a7d66f3233d3019ae8d5322fba250 Mon Sep 17 00:00:00 2001
From: Seung-been Lee <sbstevenlee@gmail.com>
Date: Sat, 2 Apr 2022 13:35:31 +0900
Subject: [PATCH 10/10] Update docs

---
 CHANGELOG.rst       |  4 ++--
 docs/cli.rst        | 11 ++++++-----
 fuc/api/pyvcf.py    |  9 +++++----
 fuc/cli/vcf_call.py | 11 ++++++-----
 4 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 863c68d..fb7c24b 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -1,8 +1,8 @@
 Changelog
 *********
 
-0.32.0 (in development)
------------------------
+0.32.0 (2022-04-02)
+-------------------
 
 * Add new optional argument ``filter_off`` for :class:`pykallisto.KallistoFrame` constructor, which is useful for generating a simple count or tpm matrix.
 * Add new optional argument ``--dir-path`` to :command:`vcf-call` command for storing intermediate files.
diff --git a/docs/cli.rst b/docs/cli.rst
index 734b9e3..6f3ad11 100644
--- a/docs/cli.rst
+++ b/docs/cli.rst
@@ -1275,11 +1275,12 @@ vcf-call
                            within but not across the populations. To use this
                            option, provide a tab-delimited text file with sample
                            names in the first column and group names in the
-                           second column. If '-' is given instead, no HWE
-                           assumption is made at all and single-sample calling
-                           is performed. Note that in low coverage data this
-                           inflates the rate of false positives. Therefore, make
-                           sure you know what you are doing.
+                           second column. If '--group-samples -' is given
+                           instead, no HWE assumption is made at all and
+                           single-sample calling is performed. Note that in low
+                           coverage data this inflates the rate of false
+                           positives. Therefore, make sure you know what you are
+                           doing.
    
    [Example] Specify regions manually:
      $ fuc vcf-call ref.fa 1.bam 2.bam \
diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py
index 662f8fd..341ce5e 100644
--- a/fuc/api/pyvcf.py
+++ b/fuc/api/pyvcf.py
@@ -262,10 +262,11 @@ def call(
         This option allows to group samples into populations and apply the
         HWE assumption within but not across the populations. To use this
         option, provide a tab-delimited text file with sample names in the
-        first column and group names in the second column. If '-' is given
-        instead, no HWE assumption is made at all and single-sample calling
-        is performed. Note that in low coverage data this inflates the rate
-        of false positives. Therefore, make sure you know what you are doing.
+        first column and group names in the second column. If
+        ``group_samples='-'`` is given instead, no HWE assumption is made at
+        all and single-sample calling is performed. Note that in low coverage
+        data this inflates the rate of false positives. Therefore, make sure
+        you know what you are doing.
 
     Returns
     -------
diff --git a/fuc/cli/vcf_call.py b/fuc/cli/vcf_call.py
index 0d05fa5..b08a6e8 100644
--- a/fuc/cli/vcf_call.py
+++ b/fuc/cli/vcf_call.py
@@ -108,11 +108,12 @@ def create_parser(subparsers):
 within but not across the populations. To use this
 option, provide a tab-delimited text file with sample
 names in the first column and group names in the
-second column. If '-' is given instead, no HWE
-assumption is made at all and single-sample calling
-is performed. Note that in low coverage data this
-inflates the rate of false positives. Therefore, make
-sure you know what you are doing."""
+second column. If '--group-samples -' is given
+instead, no HWE assumption is made at all and
+single-sample calling is performed. Note that in low
+coverage data this inflates the rate of false
+positives. Therefore, make sure you know what you are
+doing."""
     )
 
 def main(args):