From 541c18660d5beb3b2ebbc16695fdcc6b2f28ae40 Mon Sep 17 00:00:00 2001 From: Seung-been Lee Date: Sat, 9 Sep 2023 11:16:00 +0900 Subject: [PATCH 1/4] Bump up version number --- CHANGELOG.rst | 3 +++ fuc/version.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 3c6b3e8..cde6191 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,9 @@ Changelog ********* +0.38.0 (in development) +----------------------- + 0.37.0 (2023-09-09) ------------------- diff --git a/fuc/version.py b/fuc/version.py index 8935b5b..457618b 100644 --- a/fuc/version.py +++ b/fuc/version.py @@ -1 +1 @@ -__version__ = '0.37.0' +__version__ = '0.38.0' From 9c2c06655986bb6ac8b5c3e043b683df5b02e41f Mon Sep 17 00:00:00 2001 From: Seung-been Lee Date: Thu, 6 Jun 2024 20:18:12 +0900 Subject: [PATCH 2/4] Update `pyvcf.has_chr_prefix`: * Update :meth:`pyvcf.has_chr_prefix` method to ignore the HLA contigs for GRCh38. --- CHANGELOG.rst | 2 ++ fuc/api/pyvcf.py | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index cde6191..360488c 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,6 +4,8 @@ Changelog 0.38.0 (in development) ----------------------- +* Update :meth:`pyvcf.has_chr_prefix` method to ignore the HLA contigs for GRCh38. + 0.37.0 (2023-09-09) ------------------- diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py index 8211b5b..bb714f7 100644 --- a/fuc/api/pyvcf.py +++ b/fuc/api/pyvcf.py @@ -763,6 +763,8 @@ def has_chr_prefix(file, size=1000): Return True if all of the sampled contigs from a VCF file have the (annoying) 'chr' string. + For GRCh38, the HLA contigs will be ignored. + Parameters ---------- file : str @@ -779,6 +781,8 @@ def has_chr_prefix(file, size=1000): vcf = VariantFile(file) for record in vcf.fetch(): n += 1 + if record.chrom.startswith('HLA'): + continue if 'chr' not in record.chrom: return False if n > size: From 0f4c601eb8c9612260a01bcbd02e62513b852bfd Mon Sep 17 00:00:00 2001 From: Seung-been Lee Date: Sun, 16 Jun 2024 07:43:19 +0900 Subject: [PATCH 3/4] Deprecate the `common.plot_cytobands` method (#71) --- CHANGELOG.rst | 1 + fuc/api/common.py | 133 ---------------------------------------------- 2 files changed, 1 insertion(+), 133 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 360488c..93c7471 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -5,6 +5,7 @@ Changelog ----------------------- * Update :meth:`pyvcf.has_chr_prefix` method to ignore the HLA contigs for GRCh38. +* :issue:`71`: Deprecate :meth:`common.plot_cytobands` method. 0.37.0 (2023-09-09) ------------------- diff --git a/fuc/api/common.py b/fuc/api/common.py index b5e013b..d9afa1c 100644 --- a/fuc/api/common.py +++ b/fuc/api/common.py @@ -22,7 +22,6 @@ import pandas as pd import numpy as np import matplotlib.pyplot as plt -from matplotlib.collections import BrokenBarHCollection import matplotlib.patches as mpatches import seaborn as sns @@ -841,138 +840,6 @@ def extract_sequence(fasta, region): sequence = '' return sequence -def plot_cytobands(cytoband, bed, ax=None, figsize=None): - """ - Create chromosome ideograms along with BED data. - - The method's source code is derived from a Python script (ideograms.py) - written by Ryan Dale. The original script can be found at: - https://gist.github.com/daler/c98fc410282d7570efc3#file-ideograms-py - - Parameters - ---------- - cytoband : str - Text file containing cytoband ideogram information. - bed : str - BED file to be displayed. - ax : matplotlib.axes.Axes, optional - Pre-existing axes for the plot. Otherwise, crete a new one. - figsize : tuple, optional - Width, height in inches. Format: (float, float). - - Examples - -------- - - .. plot:: - :context: close-figs - - >>> import matplotlib.pyplot as plt - >>> from fuc import common - >>> common.load_dataset('cytoband') - >>> cytoband_file = '~/fuc-data/cytoband/cytoBandIdeo.txt.gz' - >>> bed_file = '~/fuc-data/cytoband/ucsc_genes.bed.gz' - >>> common.plot_cytobands(cytoband_file, bed_file, figsize=(10, 8)) - """ - def chromosome_collections(df, y_positions, height, **kwargs): - del_width = False - if 'width' not in df.columns: - del_width = True - df['width'] = df['end'] - df['start'] - for chrom, group in df.groupby('chrom'): - yrange = (y_positions[chrom], height) - xranges = group[['start', 'width']].values - yield BrokenBarHCollection( - xranges, yrange, edgecolors=("black",), facecolors=group['colors'], **kwargs) - if del_width: - del df['width'] - - # Height of each ideogram - chrom_height = 1 - - # Spacing between consecutive ideograms - chrom_spacing = 1 - - # Height of the gene track. Should be smaller than `chrom_spacing` in order to - # fit correctly - gene_height = 0.4 - - # Padding between the top of a gene track and its corresponding ideogram - gene_padding = 0.1 - - # Decide which chromosomes to use - chromosome_list = [f'chr{i}' for i in list(range(1, 23)) + ['M', 'X', 'Y']] - - # Keep track of the y positions for ideograms and genes for each chromosome, - # and the center of each ideogram (which is where we'll put the ytick labels) - ybase = 0 - chrom_ybase = {} - gene_ybase = {} - chrom_centers = {} - - # Iterate in reverse so that items in the beginning of `chromosome_list` will - # appear at the top of the plot - for chrom in chromosome_list[::-1]: - chrom_ybase[chrom] = ybase - chrom_centers[chrom] = ybase + chrom_height / 2. - gene_ybase[chrom] = ybase - gene_height - gene_padding - ybase += chrom_height + chrom_spacing - - # Read in ideogram.txt, downloaded from UCSC Table Browser - ideo = pd.read_table( - cytoband, - names=['chrom', 'start', 'end', 'name', 'gieStain'] - ) - - # Filter out chromosomes not in our list - ideo = ideo[ideo.chrom.apply(lambda x: x in chromosome_list)] - - # Add a new column for width - ideo['width'] = ideo.end - ideo.start - - # Colors for different chromosome stains - color_lookup = { - 'gneg': (1., 1., 1.), - 'gpos25': (.6, .6, .6), - 'gpos50': (.4, .4, .4), - 'gpos75': (.2, .2, .2), - 'gpos100': (0., 0., 0.), - 'acen': (.8, .4, .4), - 'gvar': (.8, .8, .8), - 'stalk': (.9, .9, .9), - } - - # Add a new column for colors - ideo['colors'] = ideo['gieStain'].apply(lambda x: color_lookup[x]) - - # Same thing for genes - genes = pd.read_table( - bed, - names=['chrom', 'start', 'end', 'name'], - usecols=range(4)) - genes = genes[genes.chrom.apply(lambda x: x in chromosome_list)] - genes['width'] = genes.end - genes.start - genes['colors'] = '#2243a8' - - if ax is None: - fig, ax = plt.subplots(figsize=figsize) - - # Now all we have to do is call our function for the ideogram data... - for collection in chromosome_collections(ideo, chrom_ybase, chrom_height): - ax.add_collection(collection) - - # ...and the gene data - for collection in chromosome_collections( - genes, gene_ybase, gene_height, alpha=0.5, linewidths=0 - ): - ax.add_collection(collection) - - # Axes tweaking - ax.set_yticks([chrom_centers[i] for i in chromosome_list]) - ax.set_yticklabels(chromosome_list) - ax.axis('tight') - - return ax - def convert_file2list(fn): """ Convert a text file to a list of filenames. From 18845cd0e968d97413eb974785732eac3a88dd42 Mon Sep 17 00:00:00 2001 From: Seung-been Lee Date: Sun, 16 Jun 2024 07:51:26 +0900 Subject: [PATCH 4/4] Update CHANGELOG.rst --- CHANGELOG.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 93c7471..71085a9 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,8 +1,8 @@ Changelog ********* -0.38.0 (in development) ------------------------ +0.38.0 (2024-06-16) +------------------- * Update :meth:`pyvcf.has_chr_prefix` method to ignore the HLA contigs for GRCh38. * :issue:`71`: Deprecate :meth:`common.plot_cytobands` method.