From 41b4d11e66d663fc3327198751ee290a7b7c6c67 Mon Sep 17 00:00:00 2001 From: lala8 Date: Mon, 29 Jul 2024 17:07:34 +0000 Subject: [PATCH 1/2] allow custom blacklist file --- .github/workflows/pypi-publish.yml | 2 +- src/grelu/data/preprocess.py | 14 ++++++++------ tests/test_models.py | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index 4223cca..30ffafb 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -5,7 +5,7 @@ name: Publish to PyPI on: push: - tags: + tags: - "*" jobs: diff --git a/src/grelu/data/preprocess.py b/src/grelu/data/preprocess.py index b3d9a7d..ea25099 100644 --- a/src/grelu/data/preprocess.py +++ b/src/grelu/data/preprocess.py @@ -332,7 +332,7 @@ def filter_overlapping( def filter_blacklist( data: Union[pd.DataFrame, AnnData], - genome: str, + genome: Optional[str] = None, blacklist: Optional[str] = None, inplace: bool = False, window: int = 0, @@ -344,7 +344,7 @@ def filter_blacklist( data: Either a pandas dataframe of genomic intervals or an Anndata object with intervals in .var genome: name of the genome corresponding to intervals - blacklist (str): path to blacklist file. If not given, will be + blacklist: path to blacklist file. If not given, it will be extracted from the package resources. inplace: If True, the input is modified in place. If False, a new dataframe or anndata object is returned. @@ -357,11 +357,13 @@ def filter_blacklist( from grelu.resources import get_blacklist_file # Read blacklist - if genome is not None: - blacklist = get_blacklist_file(genome) - - if isinstance(blacklist, str): + if genome is None: + assert ( + blacklist is not None + ), "Either genome name or blacklist file must be provided" blacklist = read_bed(blacklist, str_index=False) + else: + blacklist = get_blacklist_file(genome) # Filter return filter_overlapping( diff --git a/tests/test_models.py b/tests/test_models.py index 2b4c546..93431a4 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,6 +1,6 @@ import torch - import wandb + from grelu.model.models import ( BorzoiModel, BorzoiPretrainedModel, From 55ce1afa52ef3447222ed6176cbfc7a320ee26eb Mon Sep 17 00:00:00 2001 From: lala8 Date: Mon, 29 Jul 2024 17:29:15 +0000 Subject: [PATCH 2/2] fixed logic --- src/grelu/data/preprocess.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/grelu/data/preprocess.py b/src/grelu/data/preprocess.py index ea25099..2187bc8 100644 --- a/src/grelu/data/preprocess.py +++ b/src/grelu/data/preprocess.py @@ -356,14 +356,16 @@ def filter_blacklist( from grelu.io.bed import read_bed from grelu.resources import get_blacklist_file - # Read blacklist - if genome is None: + # Get path to blacklist file + if genome is not None: + blacklist = get_blacklist_file(genome) + else: assert ( blacklist is not None ), "Either genome name or blacklist file must be provided" - blacklist = read_bed(blacklist, str_index=False) - else: - blacklist = get_blacklist_file(genome) + + # Read blacklist file + blacklist = read_bed(blacklist, str_index=False) # Filter return filter_overlapping(