diff --git a/diffxpy/stats/stats.py b/diffxpy/stats/stats.py index 236a209..4cb8222 100644 --- a/diffxpy/stats/stats.py +++ b/diffxpy/stats/stats.py @@ -66,8 +66,8 @@ def mann_whitney_u_test( pvals = np.asarray([ scipy.stats.mannwhitneyu( - x=np.asarray(x0[:, i].todense()).flatten() if isinstance(x0, scipy.sparse.csr_matrix) else x0[:, i], - y=np.asarray(x1[:, i].todense()).flatten() if isinstance(x0, scipy.sparse.csr_matrix) else x1[:, i], + x=x0[:, i].toarray().flatten() if isinstance(x0, scipy.sparse.spmatrix) else x0[:, i], + y=x1[:, i].toarray().flatten() if isinstance(x0, scipy.sparse.spmatrix) else x1[:, i], use_continuity=True, alternative="two-sided" ).pvalue for i in range(x0.shape[1]) diff --git a/diffxpy/testing/det.py b/diffxpy/testing/det.py index 29d25a8..da7a599 100644 --- a/diffxpy/testing/det.py +++ b/diffxpy/testing/det.py @@ -1564,8 +1564,8 @@ def __init__( x0, x1 = split_x(data, grouping) # Only compute p-values for genes with non-zero observations and non-zero group-wise variance. - mean_x0 = np.asarray(np.mean(x0, axis=0)).flatten().astype(dtype=np.float) - mean_x1 = np.asarray(np.mean(x1, axis=0)).flatten().astype(dtype=np.float) + mean_x0 = np.asarray(np.mean(x0, axis=0, dtype=np.float)).flatten() + mean_x1 = np.asarray(np.mean(x1, axis=0, dtype=np.float)).flatten() # Avoid unnecessary mean computation: self._mean = np.asarray(np.average( a=np.vstack([mean_x0, mean_x1]), @@ -1575,13 +1575,13 @@ def __init__( returned=False )).flatten() self._ave_nonzero = self._mean != 0 # omit all-zero features - if isinstance(x0, scipy.sparse.csr_matrix): + if isinstance(x0, scipy.sparse.spmatrix): # Efficient analytic expression of variance without densification. - var_x0 = np.asarray(np.mean(x0.power(2), axis=0)).flatten().astype(dtype=np.float) - np.square(mean_x0) - var_x1 = np.asarray(np.mean(x1.power(2), axis=0)).flatten().astype(dtype=np.float) - np.square(mean_x1) + var_x0 = np.asarray(np.mean(x0.power(2, dtype=np.float), axis=0)).flatten() - np.square(mean_x0) + var_x1 = np.asarray(np.mean(x1.power(2, dtype=np.float), axis=0)).flatten() - np.square(mean_x1) else: - var_x0 = np.asarray(np.var(x0, axis=0)).flatten().astype(dtype=np.float) - var_x1 = np.asarray(np.var(x1, axis=0)).flatten().astype(dtype=np.float) + var_x0 = np.asarray(np.var(x0, axis=0, dtype=np.float)).flatten() + var_x1 = np.asarray(np.var(x1, axis=0, dtype=np.float)).flatten() self._var_geq_zero = np.logical_or( var_x0 > 0, var_x1 > 0 @@ -1690,8 +1690,8 @@ def __init__( x0, x1 = split_x(data, grouping) - mean_x0 = np.asarray(np.mean(x0, axis=0)).flatten().astype(dtype=np.float) - mean_x1 = np.asarray(np.mean(x1, axis=0)).flatten().astype(dtype=np.float) + mean_x0 = np.asarray(np.mean(x0, axis=0, dtype=np.float)).flatten() + mean_x1 = np.asarray(np.mean(x1, axis=0, dtype=np.float)).flatten() # Avoid unnecessary mean computation: self._mean = np.asarray(np.average( a=np.vstack([mean_x0, mean_x1]), @@ -1700,7 +1700,7 @@ def __init__( axis=0, returned=False )).flatten() - if isinstance(x0, scipy.sparse.csr_matrix): + if isinstance(x0, scipy.sparse.spmatrix): # Efficient analytic expression of variance without densification. var_x0 = np.asarray(np.mean(x0.power(2), axis=0)).flatten().astype(dtype=np.float) - np.square(mean_x0) var_x1 = np.asarray(np.mean(x1.power(2), axis=0)).flatten().astype(dtype=np.float) - np.square(mean_x1) diff --git a/diffxpy/unit_test/test_data_types.py b/diffxpy/unit_test/test_data_types.py index 3f82152..f613fff 100644 --- a/diffxpy/unit_test/test_data_types.py +++ b/diffxpy/unit_test/test_data_types.py @@ -63,22 +63,20 @@ def simulate(self, n_cells: int = 200, n_genes: int = 2): }) return sim.x, random_sample_description - def _test_numpy(self, sparse): + def _test_numpy(self, fmt=np.asarray): data, sample_description = self.simulate() gene_names = ["gene" + str(i) for i in range(data.shape[1])] - if sparse: - data = scipy.sparse.csr_matrix(data) + data = fmt(data) self._test_wald(data=data, sample_description=sample_description, gene_names=gene_names) self._test_lrt(data=data, sample_description=sample_description, gene_names=gene_names) self._test_t_test(data=data, sample_description=sample_description, gene_names=gene_names) self._test_rank(data=data, sample_description=sample_description, gene_names=gene_names) - def _test_anndata(self, sparse): + def _test_anndata(self, fmt=np.asarray): data, sample_description = self.simulate() gene_names = ["gene" + str(i) for i in range(data.shape[1])] - if sparse: - data = scipy.sparse.csr_matrix(data) + data = fmt(data) data = anndata.AnnData(data) data.var_names = gene_names @@ -87,11 +85,10 @@ def _test_anndata(self, sparse): self._test_t_test(data=data, sample_description=sample_description) self._test_rank(data=data, sample_description=sample_description) - def _test_anndata_raw(self, sparse): + def _test_anndata_raw(self, fmt=np.asarray): data, sample_description = self.simulate() gene_names = ["gene" + str(i) for i in range(data.shape[1])] - if sparse: - data = scipy.sparse.csr_matrix(data) + data = fmt(data) data = anndata.AnnData(data) data.var_names = gene_names @@ -106,8 +103,9 @@ def test_numpy(self): logging.getLogger("batchglm").setLevel(logging.WARNING) logging.getLogger("diffxpy").setLevel(logging.WARNING) - self._test_numpy(sparse=False) - self._test_numpy(sparse=True) + self._test_numpy(fmt=np.asarray) + self._test_numpy(fmt=scipy.sparse.csr_matrix) + self._test_numpy(fmt=scipy.sparse.csc_matrix) return True @@ -116,10 +114,12 @@ def test_anndata(self): logging.getLogger("batchglm").setLevel(logging.WARNING) logging.getLogger("diffxpy").setLevel(logging.WARNING) - self._test_anndata(sparse=False) - self._test_anndata(sparse=True) - self._test_anndata_raw(sparse=False) - self._test_anndata_raw(sparse=True) + self._test_anndata(fmt=np.asarray) + self._test_anndata(fmt=scipy.sparse.csr_matrix) + self._test_anndata(fmt=scipy.sparse.csc_matrix) + self._test_anndata_raw(fmt=np.asarray) + self._test_anndata_raw(fmt=scipy.sparse.csr_matrix) + self._test_anndata_raw(fmt=scipy.sparse.csc_matrix) return True