-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Generalized install_github, formatting fixes
- Loading branch information
Aarthi Venkat
committed
Feb 18, 2021
1 parent
1a4f5e2
commit fed32fb
Showing
4 changed files
with
379 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
from .r_function import RFunction, install_bioconductor | ||
from .splatter import SplatSimulate | ||
from .slingshot import Slingshot | ||
from .dyngen import DyngenSimulate |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,266 @@ | ||
import pandas as pd | ||
from . import r_function | ||
|
||
_get_backbones = r_function.RFunction( | ||
setup=""" | ||
library(dyngen) | ||
""", | ||
body=""" | ||
names(list_backbones()) | ||
""", | ||
) | ||
|
||
_DyngenSimulate = r_function.RFunction( | ||
args=""" | ||
backbone_name=character(), num_cells=500, num_tfs=100, num_targets=50, | ||
num_hks=25,simulation_census_interval=10, compute_cellwise_grn=FALSE, | ||
compute_rna_velocity=FALSE, n_jobs=7, random_state=NA, verbose=TRUE | ||
""", | ||
setup=""" | ||
library(dyngen) | ||
""", | ||
body=""" | ||
if (!(backbone_name %in% names(list_backbones()))) { | ||
stop("Input not in list of dyngen backbones. | ||
Choose name from get_backbones().") | ||
} | ||
if (!is.na(random_state)) { | ||
set.seed(random_state) | ||
} | ||
backbones <- list('bifurcating'=backbone_bifurcating(), | ||
'bifurcating_converging'=backbone_bifurcating_converging(), | ||
'bifurcating_cycle'=backbone_bifurcating_cycle(), | ||
'bifurcating_loop'=backbone_bifurcating_loop(), | ||
'binary_tree'=backbone_binary_tree(), | ||
'branching'=backbone_branching(), | ||
'consecutive_bifurcating'=backbone_consecutive_bifurcating(), | ||
'converging'=backbone_converging(), | ||
'cycle'=backbone_cycle(), | ||
'cycle_simple'=backbone_cycle_simple(), | ||
'disconnected'=backbone_disconnected(), | ||
'linear'=backbone_linear(), | ||
'linear_simple'=backbone_linear_simple(), | ||
'trifurcating'=backbone_trifurcating() | ||
) | ||
backbone <- backbones[[backbone_name]] | ||
# silent default behavior of dyngen | ||
if (num_tfs < nrow(backbone$module_info)) { | ||
if (verbose) { | ||
cat("If input num_tfs is less than backbone default,", | ||
"Dyngen uses backbone default.\n") | ||
} | ||
num_tfs <- nrow(backbone$module_info) | ||
} | ||
if (verbose) { | ||
cat('Run Parameters:') | ||
cat('\n\tBackbone:', backbone_name) | ||
cat('\n\tNumber of Cells:', num_cells) | ||
cat('\n\tNumber of TFs:', num_tfs) | ||
cat('\n\tNumber of Targets:', num_targets) | ||
cat('\n\tNumber of HKs:', num_hks, '\n') | ||
} | ||
init <- initialise_model( | ||
backbone=backbone, | ||
num_cells=num_cells, | ||
num_tfs=num_tfs, | ||
num_targets=num_targets, | ||
num_hks=num_hks, | ||
simulation_params=simulation_default( | ||
census_interval=as.double(simulation_census_interval), | ||
kinetics_noise_function = kinetics_noise_simple(mean=1, sd=0.005), | ||
ssa_algorithm = ssa_etl(tau=300/3600), | ||
compute_cellwise_grn=compute_cellwise_grn, | ||
compute_rna_velocity=compute_rna_velocity), | ||
num_cores = n_jobs, | ||
download_cache_dir=NULL, | ||
verbose=verbose | ||
) | ||
out <- generate_dataset(init) | ||
data <- list(cell_info = as.data.frame(out$dataset$cell_info), | ||
expression = as.data.frame(as.matrix(out$dataset$expression))) | ||
if (compute_cellwise_grn) { | ||
data[['bulk_grn']] <- as.data.frame(out$dataset$regulatory_network) | ||
data[['cellwise_grn']] <- as.data.frame(out$dataset$regulatory_network_sc) | ||
} | ||
if (compute_rna_velocity) { | ||
data[['rna_velocity']] <- as.data.frame(as.matrix(out$dataset$rna_velocity)) | ||
} | ||
data | ||
""", | ||
) | ||
|
||
|
||
def install( | ||
lib=None, dependencies=None, update=False, | ||
repos="http://cran.us.r-project.org", build_vignettes=False, | ||
force=False, verbose=True): | ||
"""Install Dyngen Github repository. | ||
Parameters | ||
---------- | ||
lib: string | ||
Directory to install the package. | ||
If missing, defaults to the first element of .libPaths(). | ||
dependencies: boolean, optional (default: None/NA) | ||
When True, installs all packages specified under "Depends", "Imports", | ||
"LinkingTo" and "Suggests". | ||
When False, installs no dependencies. | ||
When None/NA, installs all packages specified under "Depends", "Imports" | ||
and "LinkingTo". | ||
update: string or boolean, optional (default: False) | ||
One of "default", "ask", "always", or "never". "default" | ||
Respects R_REMOTES_UPGRADE environment variable if set, falls back to "ask" if unset. | ||
"ask" prompts the user for which out of date packages to upgrade. | ||
For non-interactive sessions "ask" is equivalent to "always". | ||
TRUE and FALSE are also accepted and correspond to "always" and "never" respectively. | ||
repos: string, optional (default: "http://cran.us.r-project.org"): | ||
R package repository. | ||
build_vignettes: boolean, optional (default: False) | ||
Builds Github vignettes. | ||
force: boolean, optional (default: False) | ||
Forces installation even if remote state has not changed since previous install. | ||
verbose: boolean, optional (default: True) | ||
Install script verbosity. | ||
""" | ||
|
||
r_function.install_github(repo="dynverse/dyngen", | ||
update=update, | ||
lib=lib, | ||
dependencies=dependencies, | ||
repos=repos, | ||
verbose=verbose) | ||
|
||
def get_backbones(): | ||
"""Output full list of cell trajectory backbones. | ||
Returns | ||
------- | ||
backbones: array of backbone names | ||
""" | ||
return(_get_backbones()) | ||
|
||
|
||
def DyngenSimulate(backbone, num_cells=500, num_tfs=100, num_targets=50, num_hks=25, | ||
simulation_census_interval=10, compute_cellwise_grn=False, | ||
compute_rna_velocity=False, n_jobs=7, random_state=None, verbose=True): | ||
"""Simulate dataset with cellular backbone. | ||
The backbone determines the overall dynamic process during a simulation. | ||
It consists of a set of gene modules, which regulate each other such that | ||
expression of certain genes change over time in a specific manner. | ||
DyngenSimulate is a Python wrapper for the R package Dyngen. | ||
Default values obtained from Github vignettes. | ||
For more details, read about Dyngen on Github_. | ||
.. _Github: https://github.com/dynverse/dyngen | ||
Parameters | ||
---------- | ||
backbone: string | ||
Backbone name from dyngen list of backbones. | ||
Get list with get_backbones()). | ||
num_cells: int, optional (default: 500) | ||
Number of cells. | ||
num_tfs: int, optional (default: 100) | ||
Number of transcription factors. | ||
The TFs are the main drivers of the molecular changes in the simulation. | ||
A TF can only be regulated by other TFs or itself. | ||
NOTE: If num_tfs input is less than nrow(backbone$module_info), | ||
Dyngen will default to nrow(backbone$module_info). | ||
This quantity varies between backbones and with each run (without seed). | ||
It is generally less than 75. | ||
It is recommended to input num_tfs >= 100 to stabilize the output. | ||
num_targets: int, optional (default: 50) | ||
Number of target genes. | ||
Target genes are regulated by a TF or another target gene, | ||
but are always downstream of at least one TF. | ||
num_hks: int, optional (default: 25) | ||
Number of housekeeping genees. | ||
Housekeeping genes are completely separate from any TFs or target genes. | ||
simulation_census_interval: int, optional (default: 10) | ||
Stores the abundance levels only after a specific interval has passed. | ||
The lower the interval, the higher detail of simulation trajectory retained, | ||
though many timepoints will contain similar information. | ||
compute_cellwise_grn: boolean, optional (default: False) | ||
If True, computes the ground truth cellwise gene regulatory networks. | ||
Also outputs ground truth bulk (entire dataset) regulatory network. | ||
NOTE: Increases compute time significantly. | ||
compute_rna_velocity: boolean, optional (default: False) | ||
If true, computes the ground truth propensity ratios after simulation. | ||
NOTE: Increases compute time significantly. | ||
n_jobs: int, optional (default: 8) | ||
Number of cores to use. | ||
random_state: int, optional (default: None) | ||
Fixes seed for simulation generator. | ||
verbose: boolean, optional (default: True) | ||
Data generation verbosity. | ||
Returns | ||
------- | ||
Dictionary data of pd.DataFrames: | ||
data['cell_info']: pd.DataFrame, shape (n_cells, 7) | ||
Columns: cell_id, step_ix, simulation_i, sim_time, num_molecules, mult, | ||
lib_size | ||
sim_time is the simulated timepoint for a given cell. | ||
data['expression']: pd.DataFrame, shape (n_cells, n_genes) | ||
Log-transformed counts with dropout. | ||
If compute_cellwise_grn is True, | ||
data['bulk_grn']: pd.DataFrame, shape (n_tf_target_interactions, 4) | ||
Columns: regulator, target, strength, effect. | ||
Strength is positive and unbounded. | ||
Effect is either +1 (for activation) or -1 (for inhibition). | ||
data['cellwise_grn']: pd.DataFrame, shape (n_tf_target_interactions_per_cell, 4) | ||
Columns: cell_id, regulator, target, strength. | ||
The output does not include all edges per cell. | ||
The regulatory effect lies between [−1, 1], where -1 is complete inhibition | ||
of target by TF, +1 is maximal activation of target by TF, | ||
and 0 is inactivity of the regulatory interaction between R and T. | ||
If compute_rna_velocity is True, | ||
data['rna_velocity']: pd.DataFrame, shape (n_cells, n_genes) | ||
Propensity ratios for each cell. | ||
Example | ||
-------- | ||
>>> import scprep | ||
>>> scprep.run.dyngen.install() | ||
>>> backbones = scprep.run.dyngen.get_backbones() | ||
>>> data = scprep.run.DyngenSimulate(backbone=backbones[0]) | ||
""" | ||
|
||
kwargs = {} | ||
if random_state is not None: | ||
kwargs["random_state"] = random_state | ||
|
||
rdata = _DyngenSimulate(backbone_name=backbone, | ||
num_cells=num_cells, | ||
num_tfs=num_tfs, | ||
num_targets=num_targets, | ||
num_hks=num_hks, | ||
simulation_census_interval=simulation_census_interval, | ||
compute_cellwise_grn=compute_cellwise_grn, | ||
compute_rna_velocity=compute_rna_velocity, | ||
n_jobs=n_jobs, | ||
verbose=verbose, | ||
rpy_verbose=verbose, | ||
**kwargs) | ||
data = {} | ||
data['cell_info'] = pd.DataFrame(rdata['cell_info']) | ||
data['expression'] = pd.DataFrame(rdata['expression']) | ||
if compute_cellwise_grn: | ||
data['cellwise_grn'] = pd.DataFrame(rdata['cellwise_grn']) | ||
data['bulk_grn'] = pd.DataFrame(rdata['bulk_grn']) | ||
if compute_rna_velocity: | ||
data['rna_velocity'] = pd.DataFrame(rdata['rna_velocity']) | ||
|
||
return(data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.