-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun.R
executable file
·139 lines (111 loc) · 3.71 KB
/
run.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#!/usr/local/bin/Rscript
task <- dyncli::main()
# task = dyncli::main(
# c("--dataset", "/code/example.h5", "--output", "/mnt/output"),
# "/code/definition.yml"
# )
library(dplyr, warn.conflicts = FALSE)
library(purrr, warn.conflicts = FALSE)
library(RaceID, warn.conflicts = FALSE)
# ____________________________________________________________________________
# Load data ####
parameters <- task$parameters
counts <- as.matrix(task$counts)
# ____________________________________________________________________________
# Infer trajectory ####
# TIMING: done with preproc
checkpoints <- list(method_afterpreproc = as.numeric(Sys.time()))
# initialize SCseq object with transcript expression
sc <- SCseq(data.frame(t(counts), check.names = FALSE))
# filtering of expression data
sc <- sc %>% filterdata(
mintotal = 1,
minexpr = 0,
minnumber = 0,
knn = parameters$knn,
ccor = parameters$ccor
)
# compute pairwise distances
sc <- sc %>% compdist(
metric = parameters$metric,
FSelect = FALSE
)
# perform clustering
parameters$clustnr <- min(parameters$clustnr, ceiling(ncol(sc@expdata)/5))
sc <- sc %>% clustexp(
sat = parameters$sat,
samp = parameters$samp,
cln = parameters$cln,
clustnr = parameters$clustnr,
bootnr = parameters$bootnr,
FUNcluster = parameters$FUNcluster
)
# detect outliers and redefine clusters
sc <- sc %>% findoutliers(
probthr = parameters$probthr,
outminc = parameters$outminc,
outlg = parameters$outlg,
outdistquant = parameters$outdistquant
)
# compute t-SNE map
sc <- sc %>% comptsne(
initial_cmd = parameters$initial_cmd,
perplexity = parameters$perplexity
)
# initialization
ltr <- Ltree(sc)
# computation of the entropy
ltr <- ltr %>% compentropy()
# computation of the projections for all cells
ltr <- ltr %>% projcells(
cthr = parameters$cthr,
nmode = parameters$nmode,
knn = parameters$projcells_knn,
fr = parameters$fr
)
# computation of the projections for all cells after randomization
ltr <- ltr %>% projback(
pdishuf = parameters$pdishuf,
fast = parameters$fast
)
# assembly of the lineage tree
ltr <- ltr %>% lineagegraph()
# compute p-values for link significance
ltr <- ltr %>% comppvalue(
pthr = parameters$pthr
)
# compute p value
ltr <- ltr %>% comppvalue()
# TIMING: done with method
checkpoints$method_aftermethod <- as.numeric(Sys.time())
# collect information on dimreds
dimred_milestones <- ltr@ldata$cnl %>% as.matrix
rownames(dimred_milestones) <- paste0("M", ltr@ldata$m)
dimred <- ltr@ltcoord %>% na.omit
milestone_ids <- rownames(dimred_milestones)
grouping <- paste0("M", ltr@ldata$lp[rownames(dimred)])
# calculate distance between milestones
dist_milestones <- as.matrix(dist(dimred_milestones))
# fetch milestone network by filtering the linkscore
milestone_network <- ltr@cdata$linkscore %>%
as.matrix() %>%
reshape2::melt(varnames = c("from", "to"), value.name = "linkscore") %>%
na.omit() %>%
mutate_at(c("from", "to"), ~gsub("cl.", "M", ., fixed = TRUE)) %>%
filter(linkscore >= parameters$scthr) %>%
mutate(
length = dist_milestones[cbind(from, to)],
directed = FALSE
) %>%
dplyr::select(from, to, length, directed)
# ____________________________________________________________________________
# Save output ####
output <- dynwrap::wrap_data(cell_ids = rownames(dimred)) %>%
dynwrap::add_dimred_projection(
milestone_ids = milestone_ids,
milestone_network = milestone_network,
dimred = dimred,
dimred_milestones = dimred_milestones
) %>%
dynwrap::add_timings(checkpoints)
dyncli::write_output(output, task$output)