Skip to content

Commit

Permalink
Generalized install_github, formatting fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Aarthi Venkat committed Feb 18, 2021
1 parent 1a4f5e2 commit fed32fb
Show file tree
Hide file tree
Showing 4 changed files with 379 additions and 1 deletion.
1 change: 1 addition & 0 deletions scprep/run/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .r_function import RFunction, install_bioconductor
from .splatter import SplatSimulate
from .slingshot import Slingshot
from .dyngen import DyngenSimulate
266 changes: 266 additions & 0 deletions scprep/run/dyngen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,266 @@
import pandas as pd
from . import r_function

_get_backbones = r_function.RFunction(
setup="""
library(dyngen)
""",
body="""
names(list_backbones())
""",
)

_DyngenSimulate = r_function.RFunction(
args="""
backbone_name=character(), num_cells=500, num_tfs=100, num_targets=50,
num_hks=25,simulation_census_interval=10, compute_cellwise_grn=FALSE,
compute_rna_velocity=FALSE, n_jobs=7, random_state=NA, verbose=TRUE
""",
setup="""
library(dyngen)
""",
body="""
if (!(backbone_name %in% names(list_backbones()))) {
stop("Input not in list of dyngen backbones.
Choose name from get_backbones().")
}
if (!is.na(random_state)) {
set.seed(random_state)
}
backbones <- list('bifurcating'=backbone_bifurcating(),
'bifurcating_converging'=backbone_bifurcating_converging(),
'bifurcating_cycle'=backbone_bifurcating_cycle(),
'bifurcating_loop'=backbone_bifurcating_loop(),
'binary_tree'=backbone_binary_tree(),
'branching'=backbone_branching(),
'consecutive_bifurcating'=backbone_consecutive_bifurcating(),
'converging'=backbone_converging(),
'cycle'=backbone_cycle(),
'cycle_simple'=backbone_cycle_simple(),
'disconnected'=backbone_disconnected(),
'linear'=backbone_linear(),
'linear_simple'=backbone_linear_simple(),
'trifurcating'=backbone_trifurcating()
)
backbone <- backbones[[backbone_name]]
# silent default behavior of dyngen
if (num_tfs < nrow(backbone$module_info)) {
if (verbose) {
cat("If input num_tfs is less than backbone default,",
"Dyngen uses backbone default.\n")
}
num_tfs <- nrow(backbone$module_info)
}
if (verbose) {
cat('Run Parameters:')
cat('\n\tBackbone:', backbone_name)
cat('\n\tNumber of Cells:', num_cells)
cat('\n\tNumber of TFs:', num_tfs)
cat('\n\tNumber of Targets:', num_targets)
cat('\n\tNumber of HKs:', num_hks, '\n')
}
init <- initialise_model(
backbone=backbone,
num_cells=num_cells,
num_tfs=num_tfs,
num_targets=num_targets,
num_hks=num_hks,
simulation_params=simulation_default(
census_interval=as.double(simulation_census_interval),
kinetics_noise_function = kinetics_noise_simple(mean=1, sd=0.005),
ssa_algorithm = ssa_etl(tau=300/3600),
compute_cellwise_grn=compute_cellwise_grn,
compute_rna_velocity=compute_rna_velocity),
num_cores = n_jobs,
download_cache_dir=NULL,
verbose=verbose
)
out <- generate_dataset(init)
data <- list(cell_info = as.data.frame(out$dataset$cell_info),
expression = as.data.frame(as.matrix(out$dataset$expression)))
if (compute_cellwise_grn) {
data[['bulk_grn']] <- as.data.frame(out$dataset$regulatory_network)
data[['cellwise_grn']] <- as.data.frame(out$dataset$regulatory_network_sc)
}
if (compute_rna_velocity) {
data[['rna_velocity']] <- as.data.frame(as.matrix(out$dataset$rna_velocity))
}
data
""",
)


def install(
lib=None, dependencies=None, update=False,
repos="http://cran.us.r-project.org", build_vignettes=False,
force=False, verbose=True):
"""Install Dyngen Github repository.
Parameters
----------
lib: string
Directory to install the package.
If missing, defaults to the first element of .libPaths().
dependencies: boolean, optional (default: None/NA)
When True, installs all packages specified under "Depends", "Imports",
"LinkingTo" and "Suggests".
When False, installs no dependencies.
When None/NA, installs all packages specified under "Depends", "Imports"
and "LinkingTo".
update: string or boolean, optional (default: False)
One of "default", "ask", "always", or "never". "default"
Respects R_REMOTES_UPGRADE environment variable if set, falls back to "ask" if unset.
"ask" prompts the user for which out of date packages to upgrade.
For non-interactive sessions "ask" is equivalent to "always".
TRUE and FALSE are also accepted and correspond to "always" and "never" respectively.
repos: string, optional (default: "http://cran.us.r-project.org"):
R package repository.
build_vignettes: boolean, optional (default: False)
Builds Github vignettes.
force: boolean, optional (default: False)
Forces installation even if remote state has not changed since previous install.
verbose: boolean, optional (default: True)
Install script verbosity.
"""

r_function.install_github(repo="dynverse/dyngen",
update=update,
lib=lib,
dependencies=dependencies,
repos=repos,
verbose=verbose)

def get_backbones():
"""Output full list of cell trajectory backbones.
Returns
-------
backbones: array of backbone names
"""
return(_get_backbones())


def DyngenSimulate(backbone, num_cells=500, num_tfs=100, num_targets=50, num_hks=25,
simulation_census_interval=10, compute_cellwise_grn=False,
compute_rna_velocity=False, n_jobs=7, random_state=None, verbose=True):
"""Simulate dataset with cellular backbone.
The backbone determines the overall dynamic process during a simulation.
It consists of a set of gene modules, which regulate each other such that
expression of certain genes change over time in a specific manner.
DyngenSimulate is a Python wrapper for the R package Dyngen.
Default values obtained from Github vignettes.
For more details, read about Dyngen on Github_.
.. _Github: https://github.com/dynverse/dyngen
Parameters
----------
backbone: string
Backbone name from dyngen list of backbones.
Get list with get_backbones()).
num_cells: int, optional (default: 500)
Number of cells.
num_tfs: int, optional (default: 100)
Number of transcription factors.
The TFs are the main drivers of the molecular changes in the simulation.
A TF can only be regulated by other TFs or itself.
NOTE: If num_tfs input is less than nrow(backbone$module_info),
Dyngen will default to nrow(backbone$module_info).
This quantity varies between backbones and with each run (without seed).
It is generally less than 75.
It is recommended to input num_tfs >= 100 to stabilize the output.
num_targets: int, optional (default: 50)
Number of target genes.
Target genes are regulated by a TF or another target gene,
but are always downstream of at least one TF.
num_hks: int, optional (default: 25)
Number of housekeeping genees.
Housekeeping genes are completely separate from any TFs or target genes.
simulation_census_interval: int, optional (default: 10)
Stores the abundance levels only after a specific interval has passed.
The lower the interval, the higher detail of simulation trajectory retained,
though many timepoints will contain similar information.
compute_cellwise_grn: boolean, optional (default: False)
If True, computes the ground truth cellwise gene regulatory networks.
Also outputs ground truth bulk (entire dataset) regulatory network.
NOTE: Increases compute time significantly.
compute_rna_velocity: boolean, optional (default: False)
If true, computes the ground truth propensity ratios after simulation.
NOTE: Increases compute time significantly.
n_jobs: int, optional (default: 8)
Number of cores to use.
random_state: int, optional (default: None)
Fixes seed for simulation generator.
verbose: boolean, optional (default: True)
Data generation verbosity.
Returns
-------
Dictionary data of pd.DataFrames:
data['cell_info']: pd.DataFrame, shape (n_cells, 7)
Columns: cell_id, step_ix, simulation_i, sim_time, num_molecules, mult,
lib_size
sim_time is the simulated timepoint for a given cell.
data['expression']: pd.DataFrame, shape (n_cells, n_genes)
Log-transformed counts with dropout.
If compute_cellwise_grn is True,
data['bulk_grn']: pd.DataFrame, shape (n_tf_target_interactions, 4)
Columns: regulator, target, strength, effect.
Strength is positive and unbounded.
Effect is either +1 (for activation) or -1 (for inhibition).
data['cellwise_grn']: pd.DataFrame, shape (n_tf_target_interactions_per_cell, 4)
Columns: cell_id, regulator, target, strength.
The output does not include all edges per cell.
The regulatory effect lies between [−1, 1], where -1 is complete inhibition
of target by TF, +1 is maximal activation of target by TF,
and 0 is inactivity of the regulatory interaction between R and T.
If compute_rna_velocity is True,
data['rna_velocity']: pd.DataFrame, shape (n_cells, n_genes)
Propensity ratios for each cell.
Example
--------
>>> import scprep
>>> scprep.run.dyngen.install()
>>> backbones = scprep.run.dyngen.get_backbones()
>>> data = scprep.run.DyngenSimulate(backbone=backbones[0])
"""

kwargs = {}
if random_state is not None:
kwargs["random_state"] = random_state

rdata = _DyngenSimulate(backbone_name=backbone,
num_cells=num_cells,
num_tfs=num_tfs,
num_targets=num_targets,
num_hks=num_hks,
simulation_census_interval=simulation_census_interval,
compute_cellwise_grn=compute_cellwise_grn,
compute_rna_velocity=compute_rna_velocity,
n_jobs=n_jobs,
verbose=verbose,
rpy_verbose=verbose,
**kwargs)
data = {}
data['cell_info'] = pd.DataFrame(rdata['cell_info'])
data['expression'] = pd.DataFrame(rdata['expression'])
if compute_cellwise_grn:
data['cellwise_grn'] = pd.DataFrame(rdata['cellwise_grn'])
data['bulk_grn'] = pd.DataFrame(rdata['bulk_grn'])
if compute_rna_velocity:
data['rna_velocity'] = pd.DataFrame(rdata['rna_velocity'])

return(data)
71 changes: 70 additions & 1 deletion scprep/run/r_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,6 @@ def __call__(self, *args, rpy_cleanup=None, rpy_verbose=None, **kwargs):
""",
)


def install_bioconductor(
package=None, site_repository=None, update=False, version=None, verbose=True
):
Expand Down Expand Up @@ -178,3 +177,73 @@ def install_bioconductor(
if version is not None:
kwargs["version"] = version
_install_bioconductor(**kwargs)


_install_github = RFunction(
args="""repo=character(), lib=.libPaths()[1], dependencies=NA,
update=FALSE, repos='http://cran.us.r-project.org',
build_vignettes=FALSE, force=FALSE, verbose=TRUE""",
body="""
quiet <- !verbose
if (!require('remotes')) install.packages('remotes')
remotes::install_github(repo=repo,
lib=lib, dependencies=dependencies,
upgrade=update, repos=repos,
build_vignettes=build_vignettes,
force=force, quiet=quiet)
# prepend path to libPaths if new library
if (lib != .libPaths()[1]) .libPaths(c(lib, .libPaths()))
if (verbose) cat('.libPaths():', .libPaths())
"""
)

def install_github(
repo, lib=None, dependencies=None, update=False,
repos="http://cran.us.r-project.org", build_vignettes=False,
force=False, verbose=True):
"""Install a Github repository.
Parameters
----------
repo: string
Github repository name to install.
lib: string
Directory to install the package.
If missing, defaults to the first element of .libPaths().
dependencies: boolean, optional (default: None/NA)
When True, installs all packages specified under "Depends", "Imports",
"LinkingTo" and "Suggests".
When False, installs no dependencies.
When None/NA, installs all packages specified under "Depends", "Imports"
and "LinkingTo".
update: string or boolean, optional (default: False)
One of "default", "ask", "always", or "never". "default"
Respects R_REMOTES_UPGRADE environment variable if set, falls back to "ask" if unset.
"ask" prompts the user for which out of date packages to upgrade.
For non-interactive sessions "ask" is equivalent to "always".
TRUE and FALSE are also accepted and correspond to "always" and "never" respectively.
repos: string, optional (default: "http://cran.us.r-project.org"):
R package repository.
build_vignettes: boolean, optional (default: False)
Builds Github vignettes.
force: boolean, optional (default: False)
Forces installation even if remote state has not changed since previous install.
verbose: boolean, optional (default: True)
Install script verbosity.
"""
kwargs = {}
if lib is not None:
kwargs["lib"] = lib
if dependencies is not None:
kwargs["dependencies"] = dependencies

_install_github(repo=repo,
update=update,
repos=repos,
build_vignettes=build_vignettes,
force=force,
verbose=verbose,
**kwargs)
Loading

0 comments on commit fed32fb

Please sign in to comment.