From 417cd5adba9b5d7b815525ae75829d892dd46a6a Mon Sep 17 00:00:00 2001 From: Stephen Fleming Date: Fri, 5 Apr 2024 15:42:08 +0000 Subject: [PATCH 01/17] Update gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 80b6c0c..4d5b686 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,4 @@ dist/ *.csv *.npz *.tar.gz +data/ \ No newline at end of file From 228d434048e31ba954f92242e131dc7c66b7eb7f Mon Sep 17 00:00:00 2001 From: Stephen Fleming Date: Fri, 5 Apr 2024 15:53:13 +0000 Subject: [PATCH 02/17] Github actions run on master and dev PRs only --- .github/workflows/miniwdl_check.yml | 7 ++++++- .github/workflows/run_packaging_check.yml | 4 +++- .github/workflows/run_pytest.yml | 4 +++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/.github/workflows/miniwdl_check.yml b/.github/workflows/miniwdl_check.yml index 8a8de2c..34d1a4c 100644 --- a/.github/workflows/miniwdl_check.yml +++ b/.github/workflows/miniwdl_check.yml @@ -1,7 +1,12 @@ name: 'validate WDL' -on: [pull_request] + +on: + pull_request: + branches: [ master, dev ] + env: MINIWDL_VERSION: 1.8.0 + jobs: miniwdl-check: runs-on: ubuntu-latest diff --git a/.github/workflows/run_packaging_check.yml b/.github/workflows/run_packaging_check.yml index 8a9fc1c..6144cbe 100644 --- a/.github/workflows/run_packaging_check.yml +++ b/.github/workflows/run_packaging_check.yml @@ -2,7 +2,9 @@ name: 'packaging' -on: pull_request +on: + pull_request: + branches: [ master, dev ] jobs: build: diff --git a/.github/workflows/run_pytest.yml b/.github/workflows/run_pytest.yml index 7c8c662..b6a02ae 100644 --- a/.github/workflows/run_pytest.yml +++ b/.github/workflows/run_pytest.yml @@ -2,7 +2,9 @@ name: 'pytest' -on: pull_request +on: + pull_request: + branches: [ master, dev ] jobs: build: From d68c5150f6382baea4cdccf9001f74fcf0748f90 Mon Sep 17 00:00:00 2001 From: Stephen Fleming Date: Fri, 5 Apr 2024 17:10:50 +0000 Subject: [PATCH 03/17] issue #348 lxml_html_clean dependency --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 3585b6a..9b68a3c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,4 +12,5 @@ jupyter jupyter_contrib_nbextensions notebook<7.0.0 nbconvert<7.0.0 +lxml_html_clean psutil From b73fd5643d66eedfbba46cedcc24168e9387b4ed Mon Sep 17 00:00:00 2001 From: Stephen Fleming Date: Fri, 5 Apr 2024 20:14:44 +0000 Subject: [PATCH 04/17] Fix #306: noise count integer overflow --- cellbender/remove_background/estimation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cellbender/remove_background/estimation.py b/cellbender/remove_background/estimation.py index b4866d2..9fbcab7 100644 --- a/cellbender/remove_background/estimation.py +++ b/cellbender/remove_background/estimation.py @@ -238,7 +238,7 @@ def _estimation_array_to_csr(index_converter, row, col = index_converter.get_ng_indices(m_inds=m) if noise_offsets is not None: data = data + np.array([noise_offsets.get(i, 0) for i in m]) - coo = sp.coo_matrix((data.astype(dtype), (row.astype(np.uint64), col.astype(np.uint8))), + coo = sp.coo_matrix((data.astype(dtype), (row.astype(np.uint64), col.astype(np.uint32))), shape=index_converter.matrix_shape, dtype=dtype) coo.sum_duplicates() return coo.tocsr() From 4bdc4f4604045353c6fadf676ca7bcf273c77c2d Mon Sep 17 00:00:00 2001 From: Stephen Fleming Date: Fri, 5 Apr 2024 20:50:50 +0000 Subject: [PATCH 05/17] Make dtypes more explicit --- cellbender/remove_background/estimation.py | 10 +++++++--- cellbender/remove_background/tests/test_estimation.py | 7 ++++++- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/cellbender/remove_background/estimation.py b/cellbender/remove_background/estimation.py index 9fbcab7..a1dee3e 100644 --- a/cellbender/remove_background/estimation.py +++ b/cellbender/remove_background/estimation.py @@ -21,6 +21,10 @@ logger = logging.getLogger('cellbender') +N_CELLS_DATATYPE = np.uint64 +N_GENES_DATATYPE = np.uint32 +COUNT_DATATYPE = np.uint64 + class EstimationMethod(ABC): """Base class for estimation of noise counts, given a posterior.""" @@ -52,7 +56,7 @@ def _estimation_array_to_csr(self, data: np.ndarray, m: np.ndarray, noise_offsets: Optional[Dict[int, int]], - dtype=np.int64) -> sp.csr_matrix: + dtype=COUNT_DATATYPE) -> sp.csr_matrix: """Say you have point estimates for each count matrix element (data) and you have the 'm'-indices for each value (m). This returns a CSR matrix that has the shape of the count matrix, where duplicate entries have @@ -218,7 +222,7 @@ def _estimation_array_to_csr(index_converter, data: np.ndarray, m: np.ndarray, noise_offsets: Optional[Dict[int, int]], - dtype=np.int) -> sp.csr_matrix: + dtype=COUNT_DATATYPE) -> sp.csr_matrix: """Say you have point estimates for each count matrix element (data) and you have the 'm'-indices for each value (m). This returns a CSR matrix that has the shape of the count matrix, where duplicate entries have @@ -238,7 +242,7 @@ def _estimation_array_to_csr(index_converter, row, col = index_converter.get_ng_indices(m_inds=m) if noise_offsets is not None: data = data + np.array([noise_offsets.get(i, 0) for i in m]) - coo = sp.coo_matrix((data.astype(dtype), (row.astype(np.uint64), col.astype(np.uint32))), + coo = sp.coo_matrix((data.astype(dtype), (row.astype(N_CELLS_DATATYPE), col.astype(N_GENES_DATATYPE))), shape=index_converter.matrix_shape, dtype=dtype) coo.sum_duplicates() return coo.tocsr() diff --git a/cellbender/remove_background/tests/test_estimation.py b/cellbender/remove_background/tests/test_estimation.py index c1cb931..a956294 100644 --- a/cellbender/remove_background/tests/test_estimation.py +++ b/cellbender/remove_background/tests/test_estimation.py @@ -6,7 +6,7 @@ import torch from cellbender.remove_background.estimation import Mean, MAP, \ - SingleSample, ThresholdCDF, MultipleChoiceKnapsack, pandas_grouped_apply + SingleSample, ThresholdCDF, MultipleChoiceKnapsack, pandas_grouped_apply, _estimation_array_to_csr from cellbender.remove_background.posterior import IndexConverter, \ dense_to_sparse_op_torch, log_prob_sparse_to_dense @@ -379,3 +379,8 @@ def test_parallel_pandas_grouped_apply(fun): np.testing.assert_array_equal(reg['m'], parallel['m']) np.testing.assert_array_equal(reg['result'], parallel['result']) + + +def test_estimation_array_to_csr(): + + From dc8daad978bc065d9ac42ea5033758f6ec083e04 Mon Sep 17 00:00:00 2001 From: Stephen Fleming Date: Fri, 5 Apr 2024 21:05:09 +0000 Subject: [PATCH 06/17] Unit test that fails before fix --- .../tests/test_estimation.py | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/cellbender/remove_background/tests/test_estimation.py b/cellbender/remove_background/tests/test_estimation.py index a956294..1f10b16 100644 --- a/cellbender/remove_background/tests/test_estimation.py +++ b/cellbender/remove_background/tests/test_estimation.py @@ -6,9 +6,10 @@ import torch from cellbender.remove_background.estimation import Mean, MAP, \ - SingleSample, ThresholdCDF, MultipleChoiceKnapsack, pandas_grouped_apply, _estimation_array_to_csr + SingleSample, ThresholdCDF, MultipleChoiceKnapsack, pandas_grouped_apply, _estimation_array_to_csr, COUNT_DATATYPE from cellbender.remove_background.posterior import IndexConverter, \ dense_to_sparse_op_torch, log_prob_sparse_to_dense +from cellbender.remove_background.tests.conftest import sparse_matrix_equal from typing import Dict, Union @@ -383,4 +384,24 @@ def test_parallel_pandas_grouped_apply(fun): def test_estimation_array_to_csr(): + larger_than_uint16 = 2**16 + 1 + + converter = IndexConverter(total_n_cells=larger_than_uint16, + total_n_genes=larger_than_uint16) + m = larger_than_uint16 + np.arange(-10, 10) + data = np.random.rand(len(m)) * -10 + noise_offsets = None + + output_csr = _estimation_array_to_csr(index_converter=converter, data=data, m=m, noise_offsets=noise_offsets, dtype=COUNT_DATATYPE) + # reimplementation here with totally permissive datatypes + cell_and_gene_dtype = np.float64 + row, col = converter.get_ng_indices(m_inds=m) + if noise_offsets is not None: + data = data + np.array([noise_offsets.get(i, 0) for i in m]) + coo = sp.coo_matrix((data.astype(COUNT_DATATYPE), (row.astype(cell_and_gene_dtype), col.astype(cell_and_gene_dtype))), + shape=converter.matrix_shape, dtype=COUNT_DATATYPE) + coo.sum_duplicates() + truth_csr = coo.tocsr() + + assert sparse_matrix_equal(output_csr, truth_csr) From 4ee23503932efaf69653524be4292f19fdfe8403 Mon Sep 17 00:00:00 2001 From: Stephen Fleming Date: Fri, 5 Apr 2024 21:13:24 +0000 Subject: [PATCH 07/17] integration test for negative outputs: also fails before fix --- cellbender/remove_background/tests/test_integration.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cellbender/remove_background/tests/test_integration.py b/cellbender/remove_background/tests/test_integration.py index 624c59d..078e9f2 100644 --- a/cellbender/remove_background/tests/test_integration.py +++ b/cellbender/remove_background/tests/test_integration.py @@ -50,3 +50,6 @@ def test_full_run(tmpdir_factory, h5_v3_file, cuda): adata_cell_barcodes = adata.obs_names[adata.obs['cell_probability'] > consts.CELL_PROB_CUTOFF] assert set(cell_barcodes) == set(adata_cell_barcodes), \ 'Cell barcodes in h5 are different from those in CSV file' + + # ensure there are no negative count matrix entries in the output + assert np.all(adata.X.data >= 0), 'Negative count matrix entries in output' From 95f013617777680ca6d99c477a1648cb325650e0 Mon Sep 17 00:00:00 2001 From: Stephen Fleming Date: Fri, 5 Apr 2024 21:31:02 +0000 Subject: [PATCH 08/17] Conversion to torch was a problem from uints --- cellbender/remove_background/estimation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cellbender/remove_background/estimation.py b/cellbender/remove_background/estimation.py index a1dee3e..777bf03 100644 --- a/cellbender/remove_background/estimation.py +++ b/cellbender/remove_background/estimation.py @@ -21,9 +21,9 @@ logger = logging.getLogger('cellbender') -N_CELLS_DATATYPE = np.uint64 -N_GENES_DATATYPE = np.uint32 -COUNT_DATATYPE = np.uint64 +N_CELLS_DATATYPE = np.int32 +N_GENES_DATATYPE = np.int32 +COUNT_DATATYPE = np.int32 class EstimationMethod(ABC): From 1cb84794b6e477737b1808ebed1aa9383fd924b8 Mon Sep 17 00:00:00 2001 From: Stephen Fleming Date: Fri, 5 Apr 2024 21:32:05 +0000 Subject: [PATCH 09/17] Correct a test that was unfairly failing when gene dtype was int32 --- cellbender/remove_background/tests/test_estimation.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cellbender/remove_background/tests/test_estimation.py b/cellbender/remove_background/tests/test_estimation.py index 1f10b16..0172366 100644 --- a/cellbender/remove_background/tests/test_estimation.py +++ b/cellbender/remove_background/tests/test_estimation.py @@ -93,11 +93,12 @@ def test_mean_massive_m(log_prob_coo): new_shape = (coo.shape[0] + greater_than_max_int32, coo.shape[1]) new_coo = sp.coo_matrix((coo.data, (new_row, coo.col)), shape=new_shape) + print(new_coo) offset_dict = {k + greater_than_max_int32: v for k, v in log_prob_coo['offsets'].items()} # this is just a shim - converter = IndexConverter(total_n_cells=2, - total_n_genes=new_coo.shape[0]) + converter = IndexConverter(total_n_cells=new_coo.shape[0], + total_n_genes=new_coo.shape[1]) # set up and estimate estimator = Mean(index_converter=converter) From 2b1172798c9175f0a47a9a2a45f9e1813e3567ec Mon Sep 17 00:00:00 2001 From: Stephen Fleming Date: Tue, 9 Apr 2024 19:17:08 +0000 Subject: [PATCH 10/17] More informative print statement in test --- cellbender/remove_background/tests/test_estimation.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cellbender/remove_background/tests/test_estimation.py b/cellbender/remove_background/tests/test_estimation.py index 0172366..2cca641 100644 --- a/cellbender/remove_background/tests/test_estimation.py +++ b/cellbender/remove_background/tests/test_estimation.py @@ -93,7 +93,10 @@ def test_mean_massive_m(log_prob_coo): new_shape = (coo.shape[0] + greater_than_max_int32, coo.shape[1]) new_coo = sp.coo_matrix((coo.data, (new_row, coo.col)), shape=new_shape) - print(new_coo) + print(f'original COO shape: {coo.shape}') + print(f'new COO shape: {new_coo.shape}') + print(f'new row minimum value: {new_coo.row.min()}') + print(f'new row maximum value: {new_coo.row.max()}') offset_dict = {k + greater_than_max_int32: v for k, v in log_prob_coo['offsets'].items()} # this is just a shim From 411cc395f775067a1c001cd7d75f3d080a0aaa97 Mon Sep 17 00:00:00 2001 From: Stephen Fleming Date: Tue, 9 Apr 2024 19:23:04 +0000 Subject: [PATCH 11/17] Failsafe to ensure no negative entries in output --- cellbender/remove_background/run.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cellbender/remove_background/run.py b/cellbender/remove_background/run.py index ca2a592..6f646a0 100644 --- a/cellbender/remove_background/run.py +++ b/cellbender/remove_background/run.py @@ -307,6 +307,9 @@ def compute_output_denoised_counts_reports_metrics(posterior: Posterior, posterior.latents_map['p'], ) + # Failsafe to ensure no negative counts. + assert np.all(denoised_counts.data >= 0), 'Negative count matrix entries in output' + # TODO: correct cell probabilities so that any zero-count droplet becomes "empty" # Save denoised count matrix. From 1fd390f2d563f099e77b9bad46b8535114d6fbe3 Mon Sep 17 00:00:00 2001 From: Stephen Fleming Date: Wed, 10 Apr 2024 15:31:24 +0000 Subject: [PATCH 12/17] Introduce --force-use-checkpoint for redoing v0.3.1 runs --- cellbender/remove_background/argparser.py | 11 ++++++- cellbender/remove_background/checkpoint.py | 36 ++++++++++++++-------- cellbender/remove_background/posterior.py | 6 ++-- cellbender/remove_background/run.py | 3 +- 4 files changed, 39 insertions(+), 17 deletions(-) diff --git a/cellbender/remove_background/argparser.py b/cellbender/remove_background/argparser.py index d53aca3..aebe670 100644 --- a/cellbender/remove_background/argparser.py +++ b/cellbender/remove_background/argparser.py @@ -48,10 +48,19 @@ def add_subparser_args(subparsers: argparse) -> argparse: subparser.add_argument("--checkpoint", nargs=None, type=str, dest='input_checkpoint_tarball', required=False, default=consts.CHECKPOINT_FILE_NAME, - help="Checkpoint tarball produced by the same version " + help="Checkpoint tarball produced by v0.3.0+ " "of CellBender remove-background. If present, " "and the workflow hashes match, training will " "restart from this checkpoint.") + subparser.add_argument("--force-use-checkpoint", + dest='force_use_checkpoint', action="store_true", + help="Normally, checkpoints can only be used if the CellBender " + "code and certain input args match exactly. This flag allows you " + "to bypass this requirement. An example use would be to create a new output " + "using a checkpoint from a run of v0.3.1, a redacted version with " + "faulty output count matrices. If you use this flag, " + "ensure that the input file and the checkpoint match, because " + "CellBender will not check.") subparser.add_argument("--expected-cells", nargs=None, type=int, default=None, dest="expected_cell_count", diff --git a/cellbender/remove_background/checkpoint.py b/cellbender/remove_background/checkpoint.py index 206737b..44ed12e 100644 --- a/cellbender/remove_background/checkpoint.py +++ b/cellbender/remove_background/checkpoint.py @@ -154,7 +154,8 @@ def save_checkpoint(filebase: str, def load_checkpoint(filebase: Optional[str], tarball_name: str = consts.CHECKPOINT_FILE_NAME, - force_device: Optional[str] = None)\ + force_device: Optional[str] = None, + force_use_checkpoint: bool = False)\ -> Dict[str, Union['RemoveBackgroundPyroModel', pyro.optim.PyroOptim, DataLoader, bool]]: """Load checkpoint and prepare a RemoveBackgroundPyroModel and optimizer.""" @@ -163,6 +164,7 @@ def load_checkpoint(filebase: Optional[str], tarball_name=tarball_name, to_load=['model', 'optim', 'param_store', 'dataloader', 'args', 'random_state'], force_device=force_device, + force_use_checkpoint=force_use_checkpoint, ) out.update({'loaded': True}) logger.info(f'Loaded partially-trained checkpoint from {tarball_name}') @@ -172,7 +174,8 @@ def load_checkpoint(filebase: Optional[str], def load_from_checkpoint(filebase: Optional[str], tarball_name: str = consts.CHECKPOINT_FILE_NAME, to_load: List[str] = ['model'], - force_device: Optional[str] = None) -> Dict: + force_device: Optional[str] = None, + force_use_checkpoint: bool = False) -> Dict: """Load specific files from a checkpoint tarball.""" load_kwargs = {} @@ -192,19 +195,24 @@ def load_from_checkpoint(filebase: Optional[str], else: # no tarball loaded, so do not continue trying to load files raise FileNotFoundError - - # See if files have a hash matching input filebase. - if filebase is not None: + + # If posterior is present, do not require run hash to match: will pick up + # after training and run estimation from existing posterior. + # This smoothly allows re-runs (including for problematic v0.3.1) + logger.debug(f'force_use_checkpoint: {force_use_checkpoint}') + if force_use_checkpoint or (filebase is None): + filebase = (glob.glob(os.path.join(tmp_dir, '*_model.torch'))[0] + .replace('_model.torch', '')) + logger.debug(f'Accepting any file hash, so loading {filebase}*') + + else: + # See if files have a hash matching input filebase. basename = os.path.basename(filebase) filebase = os.path.join(tmp_dir, basename) logger.debug(f'Looking for files with base name matching {filebase}*') if not os.path.exists(filebase + '_model.torch'): logger.info('Workflow hash does not match that of checkpoint.') - raise ValueError('Workflow hash does not match that of checkpoint.') - else: - filebase = (glob.glob(os.path.join(tmp_dir, '*_model.torch'))[0] - .replace('_model.torch', '')) - logger.debug(f'Accepting any file hash, so loading {filebase}*') + raise ValueError('Workflow hash does not match that of checkpoint.') out = {} @@ -265,9 +273,10 @@ def load_from_checkpoint(filebase: Optional[str], return out -def attempt_load_checkpoint(filebase: str, +def attempt_load_checkpoint(filebase: Optional[str], tarball_name: str = consts.CHECKPOINT_FILE_NAME, - force_device: Optional[str] = None)\ + force_device: Optional[str] = None, + force_use_checkpoint: bool = False)\ -> Dict[str, Union['RemoveBackgroundPyroModel', pyro.optim.PyroOptim, DataLoader, bool]]: """Load checkpoint and prepare a RemoveBackgroundPyroModel and optimizer, or return the inputs if loading fails.""" @@ -276,7 +285,8 @@ def attempt_load_checkpoint(filebase: str, logger.debug('Attempting to load checkpoint from ' + tarball_name) return load_checkpoint(filebase=filebase, tarball_name=tarball_name, - force_device=force_device) + force_device=force_device, + force_use_checkpoint=force_use_checkpoint) except FileNotFoundError: logger.debug('No tarball found') diff --git a/cellbender/remove_background/posterior.py b/cellbender/remove_background/posterior.py index d99e533..5bdd890 100644 --- a/cellbender/remove_background/posterior.py +++ b/cellbender/remove_background/posterior.py @@ -107,13 +107,15 @@ def _do_posterior_regularization(posterior: Posterior): try: ckpt_posterior = load_from_checkpoint(tarball_name=args.input_checkpoint_tarball, filebase=args.checkpoint_filename, - to_load=['posterior']) + to_load=['posterior'], + force_use_checkpoint=args.force_use_checkpoint) except ValueError: # input checkpoint tarball was not a match for this workflow # but we still may have saved a new tarball ckpt_posterior = load_from_checkpoint(tarball_name=consts.CHECKPOINT_FILE_NAME, filebase=args.checkpoint_filename, - to_load=['posterior']) + to_load=['posterior'], + force_use_checkpoint=args.force_use_checkpoint) if os.path.exists(ckpt_posterior.get('posterior_file', 'does_not_exist')): # Load posterior if it was saved in the checkpoint. posterior.load(file=ckpt_posterior['posterior_file']) diff --git a/cellbender/remove_background/run.py b/cellbender/remove_background/run.py index 6f646a0..7ac466b 100644 --- a/cellbender/remove_background/run.py +++ b/cellbender/remove_background/run.py @@ -630,7 +630,8 @@ def run_inference(dataset_obj: SingleCellRNACountsDataset, # Attempt to load from a previously-saved checkpoint. ckpt = attempt_load_checkpoint(filebase=checkpoint_filename, tarball_name=args.input_checkpoint_tarball, - force_device='cuda:0' if args.use_cuda else 'cpu') + force_device='cuda:0' if args.use_cuda else 'cpu', + force_use_checkpoint=args.force_use_checkpoint) ckpt_loaded = ckpt['loaded'] # True if a checkpoint was loaded successfully if ckpt_loaded: From 2ba1b93eb842133c2d7576e24768c8a6e15abb60 Mon Sep 17 00:00:00 2001 From: Stephen Fleming Date: Wed, 10 Apr 2024 15:31:44 +0000 Subject: [PATCH 13/17] Add a warning to the README about v0.3.1 --- README.rst | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/README.rst b/README.rst index d5895c7..08097cf 100644 --- a/README.rst +++ b/README.rst @@ -36,6 +36,31 @@ The current release contains the following modules. More modules will be added i Please refer to `the documentation `_ for a quick start tutorial. +.. warning:: + The release tagged v0.3.1 included a bug which caused output count matrices to be incorrect. `The bug + `_, + introduced in `#303 `_, compromised output denoised count matrices + (due to an integer overflow) and would often show up as negative entries in the output count matrices. The bug also existed on + the master branch until `#347 `_. + + For now, we recommend using either v0.3.0 or the master branch (after #347) until v0.3.2 is released, and then using v0.3.2. + + Outputs generated with v0.3.1 (or the master branch between #303 and #347) can be salvaged by making use of the + checkpoint file, which is not compromised. The following command will re-run the (inexpensive, CPU-only) + estimation of the output count matrix using the saved posterior in the checkpoint file. Note the use of the new + input argument ``--force-use-checkpoint`` which will allow use of a checkpoint file produced by a different CellBender version: + + .. code-block:: console + + (cellbender) $ cellbender remove-background \ + --input my_raw_count_matrix_file.h5 \ + --output my_cellbender_output_file.h5 \ + --checkpoint path/to/ckpt.tar.gz \ + --force-use-checkpoint + + where ``path/to/ckpt.tar.gz`` is the path to the checkpoint file generated by the original run. Ensure that you pair up the right + ``--input`` with the right ``--checkpoint``. + Installation and Usage ---------------------- From e04f30b1528e43063e588ff1323c822b0d10cda8 Mon Sep 17 00:00:00 2001 From: Stephen Fleming Date: Wed, 10 Apr 2024 16:07:58 +0000 Subject: [PATCH 14/17] rst warning does not render in github readme --- README.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 08097cf..2f6b9b8 100644 --- a/README.rst +++ b/README.rst @@ -36,7 +36,8 @@ The current release contains the following modules. More modules will be added i Please refer to `the documentation `_ for a quick start tutorial. -.. warning:: +.. + The release tagged v0.3.1 included a bug which caused output count matrices to be incorrect. `The bug `_, introduced in `#303 `_, compromised output denoised count matrices From 00901cb34909207b9b97445e3e70266a4c820bf0 Mon Sep 17 00:00:00 2001 From: Stephen Fleming Date: Wed, 10 Apr 2024 16:09:16 +0000 Subject: [PATCH 15/17] Another attempt at rst rendering --- README.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.rst b/README.rst index 2f6b9b8..426c849 100644 --- a/README.rst +++ b/README.rst @@ -36,8 +36,6 @@ The current release contains the following modules. More modules will be added i Please refer to `the documentation `_ for a quick start tutorial. -.. - The release tagged v0.3.1 included a bug which caused output count matrices to be incorrect. `The bug `_, introduced in `#303 `_, compromised output denoised count matrices From 49a3e3eb327dd2f06865adf970c13c2cb7998344 Mon Sep 17 00:00:00 2001 From: Stephen Fleming Date: Wed, 10 Apr 2024 16:14:24 +0000 Subject: [PATCH 16/17] Add the word "warning" --- README.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.rst b/README.rst index 426c849..f32bdef 100644 --- a/README.rst +++ b/README.rst @@ -36,6 +36,8 @@ The current release contains the following modules. More modules will be added i Please refer to `the documentation `_ for a quick start tutorial. + WARNING: + The release tagged v0.3.1 included a bug which caused output count matrices to be incorrect. `The bug `_, introduced in `#303 `_, compromised output denoised count matrices From 254844681cbe3da72cc3a602dd145aa0aec87889 Mon Sep 17 00:00:00 2001 From: Stephen Fleming Date: Wed, 10 Apr 2024 16:22:28 +0000 Subject: [PATCH 17/17] Fix test failure introduced by --force-use-checkpoint --- cellbender/remove_background/tests/test_checkpoint.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cellbender/remove_background/tests/test_checkpoint.py b/cellbender/remove_background/tests/test_checkpoint.py index 59ccb9e..0dbaa22 100644 --- a/cellbender/remove_background/tests/test_checkpoint.py +++ b/cellbender/remove_background/tests/test_checkpoint.py @@ -414,6 +414,7 @@ def test_save_and_load_cellbender_checkpoint(tmpdir_factory, cuda, scheduler): args.constant_learning_rate = not scheduler args.debug = False args.input_checkpoint_tarball = 'none' + args.force_use_checkpoint = False create_random_state_blank_slate(0) pyro.clear_param_store()