From 9224808f7f74383a2f684ef5f6358a491dd6044f Mon Sep 17 00:00:00 2001 From: Eddie Ruiz <32622519+Ed2uiz@users.noreply.github.com> Date: Fri, 2 Feb 2024 13:11:03 -0500 Subject: [PATCH] Revert "Catch up to suite" --- .gitignore | 5 - R/auxiliary_giotto.R | 15 +- tests/testthat/.gitignore | 3 - tests/testthat/test-dbMatrix_filterGiotto.R | 43 ------ tests/testthat/test-dbMatrix_libNorm.R | 62 -------- tests/testthat/test-dbMatrix_logNorm.R | 62 -------- tests/testthat/test-dbMatrix_scale.R | 62 -------- vignettes/Giotto_dbverse_SAIL.qmd | 162 -------------------- 8 files changed, 5 insertions(+), 409 deletions(-) delete mode 100644 tests/testthat/.gitignore delete mode 100644 tests/testthat/test-dbMatrix_filterGiotto.R delete mode 100644 tests/testthat/test-dbMatrix_libNorm.R delete mode 100644 tests/testthat/test-dbMatrix_logNorm.R delete mode 100644 tests/testthat/test-dbMatrix_scale.R delete mode 100644 vignettes/Giotto_dbverse_SAIL.qmd diff --git a/.gitignore b/.gitignore index 4f3518ac4..e9f44b083 100644 --- a/.gitignore +++ b/.gitignore @@ -13,8 +13,3 @@ testdata merfish_preoptic/ *.DS_Store -:temp: -*.wal - -*.RDS -*.db \ No newline at end of file diff --git a/R/auxiliary_giotto.R b/R/auxiliary_giotto.R index 812c86711..6d2c3f623 100644 --- a/R/auxiliary_giotto.R +++ b/R/auxiliary_giotto.R @@ -18,7 +18,6 @@ })) } - #' @title Normalize expression matrix for library size #' @param mymatrix matrix object #' @param scalefactor scalefactor @@ -32,16 +31,15 @@ This will likely result in normalization problems. filter (filterGiotto) or impute (imputeGiotto) spatial units.')) } - - norm_expr = t_flex(t_flex(mymatrix)/ libsizes)*scalefactor + norm_expr = t_flex(t_flex(mymatrix)/ libsizes)*scalefactor return(norm_expr) } #' @title Log normalize expression matrix #' @keywords internal .log_norm_giotto = function(mymatrix, base, offset) { - + if(methods::is(mymatrix, 'DelayedArray')) { mymatrix = log(mymatrix + offset)/log(base) # } else if(methods::is(mymatrix, 'DelayedMatrix')) { @@ -50,9 +48,6 @@ mymatrix@x = log(mymatrix@x + offset)/log(base) # replace with sparseMatrixStats } else if(methods::is(mymatrix, 'Matrix')) { mymatrix@x = log(mymatrix@x + offset)/log(base) - } else if(methods::is(mymatrix, 'dbMatrix')) { - mymatrix[] = dplyr::mutate(mymatrix[], x = x + offset) # workaround for lack of @x slot - mymatrix = log(mymatrix)/log(base) } else { mymatrix = log(as.matrix(mymatrix) + offset)/log(base) } @@ -518,13 +513,13 @@ filterGiotto = function(gobject, # 2. then remove cells that do not have sufficient detected genes ## filter features - filter_index_feats = GiottoClass::rowSums_flex(expr_values >= expression_threshold) >= feat_det_in_min_cells + filter_index_feats = rowSums_flex(expr_values >= expression_threshold) >= feat_det_in_min_cells selected_feat_ids = names(filter_index_feats[filter_index_feats == TRUE]) ## filter cells - filter_index_cells = GiottoClass::colSums_flex(expr_values[filter_index_feats, ] >= expression_threshold) >= min_det_feats_per_cell + filter_index_cells = colSums_flex(expr_values[filter_index_feats, ] >= expression_threshold) >= min_det_feats_per_cell selected_cell_ids = names(filter_index_cells[filter_index_cells == TRUE]) @@ -652,7 +647,7 @@ filterGiotto = function(gobject, ## 1. library size normalize if(library_size_norm == TRUE) { norm_expr = .lib_norm_giotto(mymatrix = raw_expr[], - scalefactor = scalefactor) + scalefactor = scalefactor) } else { norm_expr = raw_expr[] } diff --git a/tests/testthat/.gitignore b/tests/testthat/.gitignore deleted file mode 100644 index 9c9f21de8..000000000 --- a/tests/testthat/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -*.wal -:temp: -*.db \ No newline at end of file diff --git a/tests/testthat/test-dbMatrix_filterGiotto.R b/tests/testthat/test-dbMatrix_filterGiotto.R deleted file mode 100644 index ad18666e9..000000000 --- a/tests/testthat/test-dbMatrix_filterGiotto.R +++ /dev/null @@ -1,43 +0,0 @@ -# silence deprecated internal functions -rlang::local_options(lifecycle_verbosity = "quiet") - -# ---------------------------------------------------------------------------- # -# Setup data -visium = GiottoData::loadGiottoMini(dataset = "visium") -dgc = getExpression(visium, output = "matrix") - -dbsm = dbMatrix::createDBMatrix(value = dgc, - db_path = ":temp:", - name = 'dgc', - class = "dbSparseMatrix", - overwrite = TRUE) - -# Create exprObj with dbsm -expObj_db = createExprObj(expression_data = dbsm, - expression_matrix_class = 'dbSparseMatrix', - name = 'raw') - -# Create giotto object -gobject_db = suppressWarnings(createGiottoObject(expression = expObj_db)) - -# ---------------------------------------------------------------------------- # -# Perform filtering -visium_filtered = filterGiotto(visium, spat_unit = "cell", - feat_type = "rna", - expression_values = "raw") - -gobject_db_filtered = filterGiotto(gobject_db, spat_unit = "cell", - feat_type = "rna", - expression_values = "raw") - -# Get filtered matrix -dgc_visium = getExpression(visium_filtered, output = "matrix") -mat_db = getExpression(gobject_db_filtered, output = "matrix") -dgc_db = dbMatrix:::as_matrix(mat_db) - -# ---------------------------------------------------------------------------- # -# Test filterGiotto() equivalence between dbMatrix and dgCMatrix - -test_that("dbMatrix equivalent to dgCMatrix after filterGiotto()", { - expect_equal(dgc_visium, dgc_db) -}) \ No newline at end of file diff --git a/tests/testthat/test-dbMatrix_libNorm.R b/tests/testthat/test-dbMatrix_libNorm.R deleted file mode 100644 index f37d27037..000000000 --- a/tests/testthat/test-dbMatrix_libNorm.R +++ /dev/null @@ -1,62 +0,0 @@ -# silence deprecated internal functions -rlang::local_options(lifecycle_verbosity = "quiet") - -# ---------------------------------------------------------------------------- # -# Setup data -visium = GiottoData::loadGiottoMini(dataset = "visium") -dgc = getExpression(visium, output = "matrix") - -dbsm = dbMatrix::createDBMatrix(value = dgc, - db_path = ":temp:", - name = 'dgc', - class = "dbSparseMatrix", - overwrite = TRUE) - -# Create exprObj with dbsm -expObj_db = createExprObj(expression_data = dbsm, - expression_matrix_class = 'dbSparseMatrix', - name = 'raw') - -# Create giotto object -gobject_db = suppressWarnings(createGiottoObject(expression = expObj_db)) - -# ---------------------------------------------------------------------------- # -# Perform filtering -visium_filtered = filterGiotto(visium, spat_unit = "cell", - feat_type = "rna", - expression_values = "raw") - -gobject_db_filtered = filterGiotto(gobject_db, spat_unit = "cell", - feat_type = "rna", - expression_values = "raw") - -# ---------------------------------------------------------------------------- # -# Perform library normalization and scaling -visium_filtered = normalizeGiotto(gobject = visium_filtered, - spat_unit = 'cell', - feat_type = 'rna', - expression_values = 'raw', - library_size_norm = TRUE, - log_norm = FALSE, - scale_feats = FALSE, - scale_cells = FALSE) - - -gobject_db_filtered = normalizeGiotto(gobject = gobject_db_filtered, - spat_unit = 'cell', - feat_type = 'rna', - expression_values = 'raw', - library_size_norm = TRUE, - log_norm = FALSE, - scale_feats = FALSE, - scale_cells = FALSE) -# Get normalized matrix -dgc_visium = getExpression(visium_filtered, output = "matrix", values = "normalized") -mat_db = getExpression(gobject_db_filtered, output = "matrix", values = "normalized") -dgc_db = dbMatrix:::as_matrix(mat_db) - -# ---------------------------------------------------------------------------- # -# Test normalizeGiotto() equivalence between dbMatrix and dgCMatrix -test_that("dbMatrix equivalent to dgCMatrix after normalizeGiotto(library_size_norm = TRUE)", { - expect_equal(dgc_visium, dgc_db) -}) \ No newline at end of file diff --git a/tests/testthat/test-dbMatrix_logNorm.R b/tests/testthat/test-dbMatrix_logNorm.R deleted file mode 100644 index cd813289a..000000000 --- a/tests/testthat/test-dbMatrix_logNorm.R +++ /dev/null @@ -1,62 +0,0 @@ -# silence deprecated internal functions -rlang::local_options(lifecycle_verbosity = "quiet") - -# ---------------------------------------------------------------------------- # -# Setup data -visium = GiottoData::loadGiottoMini(dataset = "visium") -dgc = getExpression(visium, output = "matrix") - -dbsm = dbMatrix::createDBMatrix(value = dgc, - db_path = ":temp:", - name = 'dgc', - class = "dbSparseMatrix", - overwrite = TRUE) - -# Create exprObj with dbsm -expObj_db = createExprObj(expression_data = dbsm, - expression_matrix_class = 'dbSparseMatrix', - name = 'raw') - -# Create giotto object -gobject_db = suppressWarnings(createGiottoObject(expression = expObj_db)) - -# ---------------------------------------------------------------------------- # -# Perform filtering -visium_filtered = filterGiotto(visium, spat_unit = "cell", - feat_type = "rna", - expression_values = "raw") - -gobject_db_filtered = filterGiotto(gobject_db, spat_unit = "cell", - feat_type = "rna", - expression_values = "raw") - -# ---------------------------------------------------------------------------- # -# Perform library normalization and scaling -visium_filtered = normalizeGiotto(gobject = visium_filtered, - spat_unit = 'cell', - feat_type = 'rna', - expression_values = 'raw', - library_size_norm = FALSE, - log_norm = TRUE, - scale_feats = FALSE, - scale_cells = FALSE) - - -gobject_db_filtered = normalizeGiotto(gobject = gobject_db_filtered, - spat_unit = 'cell', - feat_type = 'rna', - expression_values = 'raw', - library_size_norm = FALSE, - log_norm = TRUE, - scale_feats = FALSE, - scale_cells = FALSE) -# Get normalized matrix -dgc_visium = getExpression(visium_filtered, output = "matrix", values = "normalized") -mat_db = getExpression(gobject_db_filtered, output = "matrix", values = "normalized") -dgc_db = dbMatrix:::as_matrix(mat_db) - -# ---------------------------------------------------------------------------- # -# Test normalizeGiotto() equivalence between dbMatrix and dgCMatrix -test_that("dbMatrix equivalent to dgCMatrix after normalizeGiotto(log_norm=TRUE)", { - expect_equal(dgc_visium, dgc_db) -}) \ No newline at end of file diff --git a/tests/testthat/test-dbMatrix_scale.R b/tests/testthat/test-dbMatrix_scale.R deleted file mode 100644 index 63227dd80..000000000 --- a/tests/testthat/test-dbMatrix_scale.R +++ /dev/null @@ -1,62 +0,0 @@ -# silence deprecated internal functions -rlang::local_options(lifecycle_verbosity = "quiet") - -# ---------------------------------------------------------------------------- # -# Setup data -visium = GiottoData::loadGiottoMini(dataset = "visium") -dgc = getExpression(visium, output = "matrix") - -dbsm = dbMatrix::createDBMatrix(value = dgc, - db_path = ":temp:", - name = 'dgc', - class = "dbSparseMatrix", - overwrite = TRUE) - -# Create exprObj with dbsm -expObj_db = createExprObj(expression_data = dbsm, - expression_matrix_class = 'dbSparseMatrix', - name = 'raw') - -# Create giotto object -gobject_db = suppressWarnings(createGiottoObject(expression = expObj_db)) - -# ---------------------------------------------------------------------------- # -# Perform filtering -visium_filtered = filterGiotto(visium, spat_unit = "cell", - feat_type = "rna", - expression_values = "raw") - -gobject_db_filtered = filterGiotto(gobject_db, spat_unit = "cell", - feat_type = "rna", - expression_values = "raw") - -# ---------------------------------------------------------------------------- # -# Perform library normalization and scaling -visium_filtered = normalizeGiotto(gobject = visium_filtered, - spat_unit = 'cell', - feat_type = 'rna', - expression_values = 'raw', - library_size_norm = FALSE, - log_norm = FALSE, - scale_feats = TRUE, - scale_cells = TRUE) - - -gobject_db_filtered = normalizeGiotto(gobject = gobject_db_filtered, - spat_unit = 'cell', - feat_type = 'rna', - expression_values = 'raw', - library_size_norm = FALSE, - log_norm = FALSE, - scale_feats = TRUE, - scale_cells = TRUE) -# Get normalized matrix -dgc_visium = getExpression(visium_filtered, output = "matrix", values = "scaled") |> as.matrix() -mat_db = getExpression(gobject_db_filtered, output = "matrix", values = "scaled") -dgc_db = dbMatrix:::as_matrix(mat_db) - -# ---------------------------------------------------------------------------- # -# Test normalizeGiotto() equivalence between dbMatrix and dgCMatrix -test_that("dbMatrix equivalent to dgCMatrix after normalizeGiotto(scale_feats=T,scale=cells=T)", { - expect_equal(dgc_visium, dgc_db) -}) \ No newline at end of file diff --git a/vignettes/Giotto_dbverse_SAIL.qmd b/vignettes/Giotto_dbverse_SAIL.qmd deleted file mode 100644 index 48c305ab6..000000000 --- a/vignettes/Giotto_dbverse_SAIL.qmd +++ /dev/null @@ -1,162 +0,0 @@ ---- -title: "Giotto dbverse implementation SAIL" -format: html ---- - -## Overview -This is a minimal example of how we plan to implement the dbMatrix package in our lab's -spatial analysis toolkit, Giotto Suite. - -For a refresher on `dbMatrix` please see [this](https://drieslab.github.io/dbMatrix/articles/overview.html) page. - -Matrix operations are common in processing spatial omic data. However, recent -spatial technologies are producing massive datasets that are difficult to handle -in memory. We are therefore exploring the use of database-backed matrices to -enable scalable analysis of spatial omic data. - -The below steps involve several matrix operations that are implemented in Giotto Suite. -The idea is to replace the in-memory matrix operations with `dbMatrix` operations -that are backed by a fast DuckDB database. - -## Questions for SAIL -- How can we improve our implementation of `dbMatrix`? -- Important: do not focus on Giotto necessarily, but the implementation of `dbMatrix` in Giotto - -### Load deps -We first load the `db` branch of the Giotto Suite packages. This branch contains -the `dbMatrix` implementation. - -You can install these github repos below -- https://github.com/drieslab/Giotto -- https://github.com/drieslab/GiottoClass -- https://github.com/drieslab/GiottoData -- https://github.com/drieslab/dbMatrix - -Note: Checkout the appropriate branch on each repo shown below - -```{r, message=FALSE} -#| vscode: {languageId: r} -# Note: Please update the file paths to your installation -devtools::load_all(path = '/Users/user/Documents/dries/GiottoSuite/Giotto') #db branch -devtools::load_all(path = '/Users/user/Documents/dries/GiottoSuite/GiottoClass') #db branch -devtools::load_all(path = '/Users/user/Documents/dries/GiottoSuite/GiottoData') #master branch - -# load dbMatrix -devtools::load_all(path = '/Users/user/Documents/dries/dbverse/drieslab/dbMatrix_drieslab') #dev branch -``` - -## 1. Create exprObj with dbMatrix -We use the `GiottoData` package to load a test dataset which contains an in-memory -sparse matrix.The rows are genes and the columns represent cells. Like most spatial data, -the data is sparse. - -```{r, message=FALSE} -#| vscode: {languageId: r} -# Get example data from GiottoData package -visium = GiottoData::loadGiottoMini(dataset = "visium") -dgc = getExpression(visium, output = "matrix") -``` - -We then create a `dbSparseMatrix` using the sparse matrix loaded from the test dataset. -Below shows the constructor for `dbMatrix`. -```{r} -#| vscode: {languageId: r} -# Create dummy dbSparseMatrix -dbsm = createDBMatrix(value = dgc, - db_path = ":temp:", - name = 'dgc', - class = "dbSparseMatrix", - overwrite = TRUE) -dbsm -``` - -We then create an `exprObj` which is an object that is used in our lab's -Giotto Suite implementation to store sparse matrices specifically for -gene expression data. From this we create a `Giotto` object which stores -the `exprObj` and other information about the spatial data. -```{r} -#| vscode: {languageId: r} -# Create exprObj with dbsm -expObj_db = createExprObj(expression_data = dbsm, - expression_matrix_class = 'dbSparseMatrix', - name = 'raw') - -# Create Giotto object using the exprObj -gobject_db = createGiottoObject(expression = expObj_db) -gobject_db - -# Preview the dbMatrix in the gobject -Giotto::getExpression(gobject_db, values = "raw") -``` - -## 2. filterGiotto -We next move on to the first step of the Giotto workflow which involves -filtering genes and cells in the Giotto object. - -```{r} -# debug(filterGiotto) - -gobject_db_filtered = filterGiotto(gobject_db, - spat_unit = "cell", - feat_type = "rna", - expression_values = "raw") -``` - -## 3. Update .rna_standard_normalization -The following demonstrates the normalization steps in the Giotto workflow. - -### library normalization - -```{r} -# debug(normalizeGiotto) - -gobject_db = normalizeGiotto(gobject = gobject_db, - spat_unit = 'cell', - feat_type = 'rna', - expression_values = 'raw', - scalefactor = 5000, - log_norm = FALSE) - -# Preview the normalized dbMatrix in the gobject -Giotto::getExpression(gobject_db, values = "normalized") -``` - -### log normalization -```{r, eval = FALSE} -# debug(normalizeGiotto) - -gobject_db = normalizeGiotto(gobject = gobject_db, - spat_unit = 'cell', - feat_type = 'rna', - expression_values = 'raw', - scalefactor = 5000, - library_size_norm = FALSE) - -# Preview the normalized dbMatrix in the gobject -Giotto::getExpression(gobject_db, values = "normalized") -``` - -#### scaling -```{r} -# debug(normalizeGiotto) - -gobject_db = normalizeGiotto(gobject = gobject_db, - spat_unit = 'cell', - feat_type = 'rna', - expression_values = 'raw', - scalefactor = 5000, - library_size_norm = FALSE, - log_norm = FALSE) - -# Preview the scaled dbMatrix in the gobject -Giotto::getExpression(gobject_db, values = "scaled") -``` - - -# Session Info - -```{r} -#| vscode: {languageId: r} -sessionInfo() -``` -