Skip to content

Commit

Permalink
Merge pull request #29 from Nowosad/cleancpp
Browse files Browse the repository at this point in the history
Cleaning
  • Loading branch information
Nowosad authored Jan 13, 2024
2 parents 21ba300 + 6bc13dd commit 0e32e34
Show file tree
Hide file tree
Showing 13 changed files with 687 additions and 680 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: supercells
Title: Superpixels of Spatial Data
Version: 0.9.5
Version: 0.9.9
Authors@R: c(
person(given = "Jakub", family = "Nowosad",
role = c("aut", "cre"),
Expand Down
4 changes: 2 additions & 2 deletions R/cpp11.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Generated by cpp11: do not edit by hand

run_slic <- function(mat, vals, step, nc, con, centers, type, type_fun, avg_fun_fun, avg_fun_name, iter, lims, input_centers, verbose) {
.Call(`_supercells_run_slic`, mat, vals, step, nc, con, centers, type, type_fun, avg_fun_fun, avg_fun_name, iter, lims, input_centers, verbose)
run_slic <- function(mat, vals, step, compactness, clean, centers, dist_name, dist_fun, avg_fun_fun, avg_fun_name, iter, minarea, input_centers, verbose) {
.Call(`_supercells_run_slic`, mat, vals, step, compactness, clean, centers, dist_name, dist_fun, avg_fun_fun, avg_fun_name, iter, minarea, input_centers, verbose)
}
79 changes: 51 additions & 28 deletions R/supercells.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,43 +62,48 @@ supercells = function(x, k, compactness, dist_fun = "euclidean", avg_fun = "mean
stop("The SpatRaster class is expected as an input", call. = FALSE)
}
}
mat = dim(x)[1:2]
mode(mat) = "integer"
new_centers = matrix(c(0L, 0L), ncol = 2)
# prepare initial supercells' centers
input_centers = matrix(c(0L, 0L), ncol = 2)
if (!missing(k) && inherits(k, "sf")){
if (chunks > 0){
stop(call. = FALSE, "Chunks cannot be used for custom cluster centers!")
}
new_centers = centers_to_dims(x, k)
input_centers = centers_to_dims(x, k)
} else if (!missing(step) && !missing(k)){
stop("You can specify either k or step, not both", call. = FALSE)
} else if (missing(step) && missing(k)){
stop("You need to specify either k or step", call. = FALSE)
} else if (missing(step)){
mat = dim(x)[1:2]; mode(mat) = "integer"
superpixelsize = round((mat[1] * mat[2]) / k + 0.5)
step = round(sqrt(superpixelsize) + 0.5)
}
# prepare averaging function (mean is the default)
if (is.character(avg_fun)){
avg_fun_name = avg_fun; avg_fun_fun = function() ""
} else {
avg_fun_name = ""; avg_fun_fun = avg_fun
}
# prepare distance function (euclidean is the default)
if (is.character(dist_fun)){
if (!(dist_fun %in% c("euclidean", "jsd", "dtw", "dtw2d", philentropy::getDistMethods()))){
stop("The provided distance function ('dist_fun') does not exist!", call. = FALSE)
}
dist_type = dist_fun; dist_fun = function() ""
dist_name = dist_fun; dist_fun = function() ""
} else {
dist_type = ""
dist_name = ""
}
# prepare minarea
if (missing(minarea)){
minarea = 0
}
# disables cleaning if iter = 0
if (iter == 0){
clean = FALSE
}
# split
# get extents of chunks
chunk_ext = prep_chunks_ext(dim(x), limit = chunks)
# run the algorithm on chunks
if (future){
if (in_memory(x)){
names_x = names(x)
Expand All @@ -111,51 +116,54 @@ supercells = function(x, k, compactness, dist_fun = "euclidean", avg_fun = "mean
oopts = options(future.globals.maxSize = +Inf)
on.exit(options(oopts))
slic_sf = future.apply::future_apply(chunk_ext, MARGIN = 1, run_slic_chunks, x = x,
step = step, compactness = compactness, dist_type = dist_type,
step = step, compactness = compactness, dist_name = dist_name,
dist_fun = dist_fun, avg_fun_fun = avg_fun_fun, avg_fun_name = avg_fun_name,
clean = clean, iter = iter, minarea = minarea, transform = transform,
new_centers = new_centers, verbose = verbose, future.seed = TRUE)
} else{
input_centers = input_centers, verbose = verbose, future.seed = TRUE)
} else {
slic_sf = apply(chunk_ext, MARGIN = 1, run_slic_chunks, x = x,
step = step, compactness = compactness, dist_type = dist_type,
step = step, compactness = compactness, dist_name = dist_name,
dist_fun = dist_fun, avg_fun_fun = avg_fun_fun, avg_fun_name = avg_fun_name,
clean = clean, iter = iter, minarea = minarea, transform = transform,
new_centers = new_centers, verbose = verbose)
input_centers = input_centers, verbose = verbose)
}

# combine
# combines the chunks results by updating supercells ids
slic_sf = update_supercells_ids(slic_sf)
# removes metadata columns if metadata = FALSE
if (isFALSE(metadata)){
slic_sf = slic_sf[, -which(names(slic_sf) %in% c("supercells", "x", "y"))]
}
# returns the result
return(slic_sf)
}

# ext = c(1, 10, 1, 10)
run_slic_chunks = function(ext, x, step, compactness, dist_type,
# run the algorithm on the area defined by 'ext'
run_slic_chunks = function(ext, x, step, compactness, dist_name,
dist_fun, avg_fun_fun, avg_fun_name, clean,
iter, minarea, transform, new_centers, verbose){
iter, minarea, transform, input_centers, verbose){
centers = TRUE
if (is.character(x)){
x = terra::rast(x)
}
# crops the input to the chunk extent
x = x[ext[1]:ext[2], ext[3]:ext[4], drop = FALSE]
ext_x = terra::ext(x)
mat = dim(x)[1:2]
mode(mat) = "integer"
# gets the number of rows and columns, and the values of the input
mat = dim(x)[1:2]; mode(mat) = "integer"
vals = as.matrix(terra::as.data.frame(x, cells = FALSE, na.rm = FALSE))
mode(vals) = "double"

# transforms the input to LAB color space if transform = "to_LAB"
if (!is.null(transform)){
if (transform == "to_LAB"){
vals = vals / 255
vals = grDevices::convertColor(vals, from = "sRGB", to = "Lab")
}
}
slic = run_slic(mat, vals = vals, step = step, nc = compactness, con = clean,
centers = centers, type = dist_type, type_fun = dist_fun,
# runs the algorithm
slic = run_slic(mat, vals = vals, step = step, compactness = compactness, clean = clean,
centers = centers, dist_name = dist_name, dist_fun = dist_fun,
avg_fun_fun = avg_fun_fun, avg_fun_name = avg_fun_name,
iter = iter, lims = minarea, input_centers = new_centers, verbose = verbose)
iter = iter, minarea = minarea, input_centers = input_centers, verbose = verbose)
# returns the initial centers if iter = 0
if (iter == 0){
slic_sf = data.frame(stats::na.omit(slic[[2]]))
slic_sf[["X1"]] = as.vector(terra::ext(x))[[1]] + (slic_sf[["X1"]] * terra::res(x)[[1]]) + (terra::res(x)[[1]]/2)
Expand All @@ -164,24 +172,26 @@ run_slic_chunks = function(ext, x, step, compactness, dist_type,
sf::st_crs(slic_sf) = terra::crs(x)
return(slic_sf)
}
# transforms the output back to RGB color space if transform = "to_LAB"
if (!is.null(transform)){
if (transform == "to_LAB"){
slic[[3]] = grDevices::convertColor(slic[[3]], from = "Lab", to = "sRGB") * 255
}
}
# prepares the output: an sf object with supercells' ids, coordinates, and average values
if (nrow(slic[[2]]) == 0 || all(slic[[2]] == 0)) stop("I cannot return supercells. This may be due to a large number of missing values in the 'x' object. Try to either trim your data to the non-NA area (e.g., with 'terra::trim()') or increase the number of expected supercells.", call. = FALSE)
slic_sf = terra::rast(slic[[1]])
terra::NAflag(slic_sf) = -1
terra::crs(slic_sf) = terra::crs(x)
terra::ext(slic_sf) = ext_x
terra::ext(slic_sf) = terra::ext(x)
slic_sf = sf::st_as_sf(terra::as.polygons(slic_sf, dissolve = TRUE))
if (nrow(slic_sf) > 0){
empty_centers = slic[[2]][,1] != 0 | slic[[2]][,2] != 0
slic_sf = cbind(slic_sf, stats::na.omit(slic[[2]][empty_centers, ]))
names(slic_sf) = c("supercells", "x", "y", "geometry")
slic_sf[["supercells"]] = slic_sf[["supercells"]] + 1
slic_sf[["x"]] = as.vector(ext_x)[[1]] + (slic_sf[["x"]] * terra::res(x)[[1]]) + (terra::res(x)[[1]]/2)
slic_sf[["y"]] = as.vector(ext_x)[[4]] - (slic_sf[["y"]] * terra::res(x)[[2]]) - (terra::res(x)[[1]]/2)
slic_sf[["x"]] = as.vector(terra::ext(x))[[1]] + (slic_sf[["x"]] * terra::res(x)[[1]]) + (terra::res(x)[[1]]/2)
slic_sf[["y"]] = as.vector(terra::ext(x))[[4]] - (slic_sf[["y"]] * terra::res(x)[[2]]) - (terra::res(x)[[1]]/2)
colnames(slic[[3]]) = names(x)
slic_sf = cbind(slic_sf, stats::na.omit(slic[[3]][empty_centers, , drop = FALSE]))
slic_sf = suppressWarnings(sf::st_collection_extract(slic_sf, "POLYGON"))
Expand All @@ -190,6 +200,7 @@ run_slic_chunks = function(ext, x, step, compactness, dist_type,
}
}

# updates supercells ids for chunks
update_supercells_ids = function(x){
x = x[lapply(x, length) > 0]
no_updates = length(x) - 1
Expand All @@ -201,12 +212,14 @@ update_supercells_ids = function(x){
return(x)
}

# predicts (rough estimation) memory usage of the algorithm
pred_mem_usage = function(dim_x){
mem_bytes = dim_x[1] * dim_x[2] * dim_x[3] * 8 #in bytes
mem_gb = mem_bytes / (1024 * 1024 * 1024)
mem_gb
}

# looks for the optimal chunk size
optimize_chunk_size = function(dim_x, limit, by = 500){
min_diff_memory = function(a, dim_x, limit){
abs((dim_x[3] * a^2 * 8 / (1024 * 1024 * 1024)) - limit)
Expand All @@ -217,6 +230,13 @@ optimize_chunk_size = function(dim_x, limit, by = 500){
return(opti$minimum)
}

# prepares the extents of chunks:
# if limit = FALSE, the extent of the whole input is returned
# if limit = TRUE, the extent of the input is split into chunks,
# where the size of each raster chunk is optimized to be as close to
# the (hardcoded) limit of 1GB as possible
# if limit is numeric, the extent of the input is split into chunks,
# where the width/height of each chunk is equal to the limit
prep_chunks_ext = function(dim_x, limit){
if (is.numeric(limit)){
wsize = limit
Expand Down Expand Up @@ -251,7 +271,7 @@ prep_chunks_ext = function(dim_x, limit){
row_cols_chunks[l, 4] = dims2[j + 1]
}
}
} else{
} else {
row_cols_chunks = cbind(min_row = 1,
max_row = dim_x[1],
min_col = 1,
Expand All @@ -260,10 +280,12 @@ prep_chunks_ext = function(dim_x, limit){
return(row_cols_chunks)
}

# check if raster is in memory
in_memory = function(x){
terra::sources(x) == ""
}

# converts sf object ('y') to a matrix of coordinates based on a raster ('x') dimensions
centers_to_dims = function(x, y){
y_coords = sf::st_coordinates(sf::st_geometry(y))
y_col = terra::colFromX(x, y_coords[, 1])
Expand All @@ -273,6 +295,7 @@ centers_to_dims = function(x, y){
unique(center_dims)
}

# creates a sequence of integers from 'from' to 'to' with a step 'by' (including the 'to' value)
seq_last = function(from, to, by){
vec = do.call(what = seq.int, args = list(from, to, by))
if (utils::tail(vec, 1) != to) {
Expand Down
8 changes: 4 additions & 4 deletions src/cpp11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
#include "cpp11/declarations.hpp"
#include <R_ext/Visibility.h>

// slic.h
list run_slic(cpp11::integers mat, cpp11::doubles_matrix<> vals, int step, double nc, bool con, bool centers, std::string type, cpp11::function type_fun, cpp11::function avg_fun_fun, std::string avg_fun_name, int iter, int lims, cpp11::integers_matrix<> input_centers, int verbose);
extern "C" SEXP _supercells_run_slic(SEXP mat, SEXP vals, SEXP step, SEXP nc, SEXP con, SEXP centers, SEXP type, SEXP type_fun, SEXP avg_fun_fun, SEXP avg_fun_name, SEXP iter, SEXP lims, SEXP input_centers, SEXP verbose) {
// run_slic.cpp
list run_slic(cpp11::integers mat, cpp11::doubles_matrix<> vals, int step, double compactness, bool clean, bool centers, std::string dist_name, cpp11::function dist_fun, cpp11::function avg_fun_fun, std::string avg_fun_name, int iter, int minarea, cpp11::integers_matrix<> input_centers, int verbose);
extern "C" SEXP _supercells_run_slic(SEXP mat, SEXP vals, SEXP step, SEXP compactness, SEXP clean, SEXP centers, SEXP dist_name, SEXP dist_fun, SEXP avg_fun_fun, SEXP avg_fun_name, SEXP iter, SEXP minarea, SEXP input_centers, SEXP verbose) {
BEGIN_CPP11
return cpp11::as_sexp(run_slic(cpp11::as_cpp<cpp11::decay_t<cpp11::integers>>(mat), cpp11::as_cpp<cpp11::decay_t<cpp11::doubles_matrix<>>>(vals), cpp11::as_cpp<cpp11::decay_t<int>>(step), cpp11::as_cpp<cpp11::decay_t<double>>(nc), cpp11::as_cpp<cpp11::decay_t<bool>>(con), cpp11::as_cpp<cpp11::decay_t<bool>>(centers), cpp11::as_cpp<cpp11::decay_t<std::string>>(type), cpp11::as_cpp<cpp11::decay_t<cpp11::function>>(type_fun), cpp11::as_cpp<cpp11::decay_t<cpp11::function>>(avg_fun_fun), cpp11::as_cpp<cpp11::decay_t<std::string>>(avg_fun_name), cpp11::as_cpp<cpp11::decay_t<int>>(iter), cpp11::as_cpp<cpp11::decay_t<int>>(lims), cpp11::as_cpp<cpp11::decay_t<cpp11::integers_matrix<>>>(input_centers), cpp11::as_cpp<cpp11::decay_t<int>>(verbose)));
return cpp11::as_sexp(run_slic(cpp11::as_cpp<cpp11::decay_t<cpp11::integers>>(mat), cpp11::as_cpp<cpp11::decay_t<cpp11::doubles_matrix<>>>(vals), cpp11::as_cpp<cpp11::decay_t<int>>(step), cpp11::as_cpp<cpp11::decay_t<double>>(compactness), cpp11::as_cpp<cpp11::decay_t<bool>>(clean), cpp11::as_cpp<cpp11::decay_t<bool>>(centers), cpp11::as_cpp<cpp11::decay_t<std::string>>(dist_name), cpp11::as_cpp<cpp11::decay_t<cpp11::function>>(dist_fun), cpp11::as_cpp<cpp11::decay_t<cpp11::function>>(avg_fun_fun), cpp11::as_cpp<cpp11::decay_t<std::string>>(avg_fun_name), cpp11::as_cpp<cpp11::decay_t<int>>(iter), cpp11::as_cpp<cpp11::decay_t<int>>(minarea), cpp11::as_cpp<cpp11::decay_t<cpp11::integers_matrix<>>>(input_centers), cpp11::as_cpp<cpp11::decay_t<int>>(verbose)));
END_CPP11
}

Expand Down
Loading

0 comments on commit 0e32e34

Please sign in to comment.