Skip to content

Commit

Permalink
updated metadata filtering
Browse files Browse the repository at this point in the history
  • Loading branch information
Qile0317 committed Dec 10, 2023
1 parent 968f749 commit 74e957a
Show file tree
Hide file tree
Showing 7 changed files with 134 additions and 34 deletions.
16 changes: 10 additions & 6 deletions R/ApotcData.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ initializeApotcData <- function(
)

methods::new(
Class = 'ApotcData',
Class = "ApotcData",

reduction_base = reduction_base,
clonecall = clonecall,
Expand Down Expand Up @@ -117,24 +117,28 @@ initializeSubsetApotcData <- function(
clone_scale_factor, rad_scale_factor
) {
default_obj_id <- "TODO" # TODO design decisions!!
default_obj_wasnt_computed <- !is.null(get_apotc_obj(seurat_obj, default_obj_id))

if (default_obj_wasnt_computed) {
if (!is.null(get_apotc_obj(seurat_obj, default_obj_id))) {
default_apotc_obj <- ApotcData(
seurat_obj, clonecall, reduction_base, clone_scale_factor, rad_scale_factor
)
seurat_obj@misc[["APackOfTheClones"]][[default_obj_id]] <- default_apotc_obj
}

# get the subsetted metadata
seurat_obj@meta.data %>% dplyr::filter(eval(parse(
text = metadata_filter_condition
)))

# create the apotc data for the subset
subset_apotc_obj <- seurat_obj@misc[["APackOfTheClones"]][[default_obj_id]]
subset_apotc_obj@metadata_filter_string <- metadata_filter_condition



# TODO handle which clusters are kept / recomputed, which also will recompute centroids!

if (default_obj_wasnt_computed) { # this shouldnt be needed
seurat_obj@misc[["APackOfTheClones"]][[default_obj_id]] <- NULL
}


subset_apotc_obj
}
Expand Down
108 changes: 94 additions & 14 deletions R/ApotcDataIndex.R
Original file line number Diff line number Diff line change
@@ -1,28 +1,108 @@
# script to manage the interface for accessing the apotc data
# could also put it in ApotcData.R

.defaultApotcDataObjId <- "__all__"
utils::globalVariables(c(".defaultApotcDataSample"))

# from the input of RunAPOTC, convert the condition to a call to be put in
# dpylr::filter for the metadata
parse_to_metadata_filter <- function(filter_samples, filter_ID, metadata_filter) {
if (all(is.null(c(filter_samples, filter_ID, metadata_filter)))) {
return(NULL)
# @meta.data %>% dpylr::filter(eval(parse(text = "output of this func")))
# assume that metadata_filter is a valid ADDITIONAL filter condition.
#
# assume that varargs_list is a valid named list where each name is a column
# and element is a string vector of which factors to INCLUDE
parse_to_metadata_filter_str <- function(metadata_filter, varargs_list) {

if (is.null(metadata_filter) && identical(varargs_list, list(NULL))) {
return("")
}
# TODO

filter_string <- ""
colnames <- names(varargs_list)

for (i in seq_along(varargs_list)) {

sub_filter_cond_string <- col_cond_vec_to_filter_str(
condition_vector = varargs_list[[i]], colname = colnames[i]
)

filter_string <- paste(
filter_string, "(", sub_filter_cond_string, ") & ", sep = ""
)
}

filter_string <- substr(filter_string, 1, length(filter_string) - 2)

if (!is.null(metadata_filter)) {
filter_string <- paste(
"((", filter_string, ") & (", metadata_filter, "))", sep = ""
)
}

filter_string
}

col_cond_vec_to_filter_str <- function(condition_vector, colname) {
UseMethod("col_condition_vec_to_filter_string")
}

col_cond_vec_to_filter_str.character <- function(
condition_vector, colname
) {
col_condition_vec_to_filter_string_with_insert(
condition_vector = condition_vector, colname = colname,
insert_char = "'"
)
}

col_cond_vec_to_filter_str.default <- function(
condition_vector, colname
) {
col_condition_vec_to_filter_string_with_insert(
condition_vector = condition_vector, colname = colname,
insert_char = ""
)
}

col_condition_vec_to_filter_string_with_insert <- function(
condition_vector, colname, insert_char
) {
filter_str <- ""
for (i in seq_along(condition_vector)) {
filter_str <- paste(
filter_str, "(",
colname, " == ", insert_char, condition_vector[i], insert_char,
") | ", sep = ""
)
}
substr(filter_str, 1, length(filter_str) - 2)
}

.defaultApotcDataObjId <- "__all__"
utils::globalVariables(c(".defaultApotcDataSample"))

# from the input of RunAPOTC, convert the condition to the apotc data sample id where
# its stored under @misc[["APackOfTheClones"]][[id]]
parse_to_object_id <- function(reduction_base, clonecall, ...) {
if (is.null(filter_string)) {
return(.defaultApotcDataSample)
}
# TODO
parse_to_object_id <- function(
reduction_base, clonecall, varargs_list, metadata_filter
) {
object_id <- paste("|", reduction_base, "|", clonecall, "|", sep = "")

if (identical(varargs_list, list(NULL))) {
object_id <- paste(object_id, "-|", sep = "")
} else {
object_id <- paste(
object_id, varargs_list_to_id_segment(varargs_list), "|", sep = ""
)
}

if (is.null(metadata_filter)) {
return(paste(object_id, "-|", sep = ""))
}
paste(object_id, metadata_filter)
}

varargs_list_to_id_segment <- function(varargs_list) {
# TODO
}

# make user getters
# make user getters for apotcData

# new format, there will be a list of apotc objects in the seurat@misc slot. the list will be named apotc.
# each is dependent on reduction/samples and within the list there will be named elements for each reduction/sample combo
Expand Down
17 changes: 14 additions & 3 deletions R/Main_algo.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
# vectorized circle_layout - outputs list of clusterlists
# now with new progress bar, only noting progress of total clones
pack_into_clusterlists <- function(
sizes, centroids, num_clusters, rad_decrease = 0,
ORDER = TRUE, scramble = FALSE, try_place = FALSE, verbose = TRUE
){
output_list <- vector("list", num_clusters)

# initialize progress bar stats
if (verbose) {
packed_clone_count <- 0
total_clone_count <- sum(sapply(sizes, length))
start_progress_bar()
}

for(i in 1:num_clusters){
input_rad_vec <- sizes[[i]]

Expand All @@ -13,16 +21,19 @@ pack_into_clusterlists <- function(
next
}

if(verbose){message(paste("\npacking cluster", as.character(i-1)))}

output_list[[i]] <- cpp_circle_layout(
input_rad_vec = process_rad_vec(input_rad_vec, ORDER, scramble),
centroid = centroids[[i]],
rad_decrease = rad_decrease,
try_place = try_place,
verbose = verbose
verbose = FALSE
)

packed_clone_count <- packed_clone_count + length(input_rad_vec)
if (verbose) progress_bar(packed_clone_count, total_clone_count)
}

if (verbose) message("")
output_list
}

Expand Down
19 changes: 11 additions & 8 deletions R/RunAPOTC.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,6 @@ RunAPOTC <- function(
reduction_base = "umap",
clonecall = "strict",

filter_samples = NULL,
filter_ID = NULL,
metadata_filter = NULL,

clone_scale_factor = "auto",
rad_scale_factor = 0.95,
ORDER = TRUE,
Expand All @@ -80,7 +76,10 @@ RunAPOTC <- function(
repulsion_threshold = 1,
repulsion_strength = 1,
max_repulsion_iter = 20L,
verbose = TRUE
verbose = TRUE,

metadata_filter = NULL,
... = NULL
) {
call_time <- Sys.time()

Expand All @@ -99,7 +98,7 @@ RunAPOTC <- function(
# then, check if this exact run has been performed already
# and check the command slot arguments to see if its been computed

if (verbose) message("Initializing APOTC run")
if (verbose) message("Initializing APOTC run...\n")

# compute inputs
if (should_estimate(clone_scale_factor)) {
Expand All @@ -108,8 +107,8 @@ RunAPOTC <- function(

clonecall <- scRepertoire:::.theCall(clonecall)

metadata_filter_string <- parse_to_metadata_filter(
filter_samples, filter_ID, metadata_filter
metadata_filter_string <- parse_to_metadata_filter_str(
metadata_filter = metadata_filter, varargs_list = list(...)
)

# run the packing algos
Expand All @@ -118,10 +117,14 @@ RunAPOTC <- function(
clone_scale_factor, rad_scale_factor
)

if (verbose) message("Packing clones into clusters\n")

apotc_obj <- circlepackClones(
apotc_obj, ORDER, scramble, try_place, verbose
)

if (verbose) message("Repulsing clusters\n")

if (repulse) {
apotc_obj <- repulseClusters(
apotc_obj, repulsion_threshold, repulsion_strength,
Expand Down
2 changes: 1 addition & 1 deletion R/get_clone_sizes.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ get_processed_clone_sizes <- function(apotc_obj) {
raw_tabled_clone_sizes <- apotc_obj@clone_sizes
processed_sizes <- init_list(apotc_obj@num_clusters, list())
for (i in 1:apotc_obj@num_clusters) {
if (!is_empty_table(raw_tabled_clone_sizes[[i]][[1]])) {
if (!is_empty_table(raw_tabled_clone_sizes[[i]])) {
processed_sizes[[i]] <- apotc_obj@clone_scale_factor *
sqrt(as.numeric(raw_tabled_clone_sizes[[i]][[1]]))
}
Expand Down
4 changes: 4 additions & 0 deletions tests/testthat/test-ApotcData.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,12 @@ test_that("The default case ApotcData constructor works", {

# TODO teset the constructor for a subset, assuming different reduction base.



# TODO test the circle packing with diff args, the @clusters slot shgould be identical to other tests



# TODO test the repulsion API

# TODO test getters and setters
2 changes: 0 additions & 2 deletions tests/testthat/testdata/cluster_lists.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,5 +53,3 @@ test_radii <- list(
"c2" = c2[[3]],
"c3" = c3[[3]]
)

#plot_clusters(test_cluster_lists)

0 comments on commit 74e957a

Please sign in to comment.