Skip to content

Commit

Permalink
add diffexp module
Browse files Browse the repository at this point in the history
related to #39
  • Loading branch information
abearab committed Jun 12, 2024
1 parent 4fd9dd0 commit 4fa9aa8
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 35 deletions.
Empty file added CanDI/pipelines/__init__.py
Empty file.
52 changes: 52 additions & 0 deletions CanDI/pipelines/diffexp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import numpy as np
import pandas as pd
import anndata as ad

from pydeseq2.dds import DeseqDataSet
from pydeseq2.default_inference import DefaultInference
from pydeseq2.ds import DeseqStats
from adpbulk import ADPBulk


def pseudobulk_by_group(adt, groups, method="mean"):
# initialize the object
adpb = ADPBulk(adt, groupby=groups, method=method)

# perform the pseudobulking
pseudobulk_matrix = adpb.fit_transform()

# retrieve the sample metadata (useful for easy incorporation with edgeR)
sample_meta = adpb.get_meta()

out = ad.AnnData(
X=pseudobulk_matrix,
obs=sample_meta.set_index('SampleName')
)

return out


def run_deseq(adata, design, tested_level, ref_level, n_cpus=8):

inference = DefaultInference(n_cpus=n_cpus)

dds = DeseqDataSet(
counts=adata.to_df().astype(int),
metadata=adata.obs,
design_factors=design, # compare samples based on the "condition"
refit_cooks=True,
inference=inference,
)

dds.deseq2()

stat_res = DeseqStats(
dds,
contrast=[design, tested_level, ref_level],
inference=inference
)
stat_res.summary()

df = stat_res.results_df

return df
35 changes: 0 additions & 35 deletions scripts/run_deseq.r

This file was deleted.

0 comments on commit 4fa9aa8

Please sign in to comment.