From a07eafac2bcb911301e2c0d5b69ac1b84c6925a7 Mon Sep 17 00:00:00 2001 From: Mark Keller <7525285+keller-mark@users.noreply.github.com> Date: Sat, 12 Aug 2023 23:16:52 -0400 Subject: [PATCH] WIP: setitem --- R/array.R | 126 +++++++++++++++++++++++++++++++-- R/utils.R | 18 +++++ tests/testthat/test-indexing.R | 0 3 files changed, 139 insertions(+), 5 deletions(-) create mode 100644 tests/testthat/test-indexing.R diff --git a/R/array.R b/R/array.R index 4d7bbe4..e0331cf 100644 --- a/R/array.R +++ b/R/array.R @@ -250,16 +250,62 @@ Array <- R6::R6Class("Array", # TODO }, set_basic_selection_nd = function(selection, value, fields = NA) { - # TODO + indexer <- BasicIndexer$new(selection, self) + return(private$set_selection(indexer, value = value, fields = fields)) }, set_selection = function(indexer, value, fields = NA) { # Reference: https://github.com/zarr-developers/zarr-python/blob/5dd4a0/zarr/core.py#L1682 - # TODO + # Reference: https://github.com/gzuidhof/zarr.js/blob/15e3a3f00eb19f0133018fb65f002311ea53bb7c/src/core/index.ts#L566 + + # // We iterate over all chunks which overlap the selection and thus contain data + # // that needs to be replaced. Each chunk is processed in turn, extracting the + # // necessary data from the value array and storing into the chunk array. + + # // N.B., it is an important optimisation that we only visit chunks which overlap + # // the selection. This minimises the number of iterations in the main for loop. + + selection_shape <- indexer$shape + + # Check value shape + if (length(selection_shape) == 0) { + # Setting a single value + } else if (is.scalar(value)) { + # Setting a scalar value + } else if ("NestedArray" %in% class(value)) { + # TODO: non stringify equality check + if (value$shape != selection_shape) { + stop("Shape mismatch in source NestedArray and set selection: ${value.shape} and ${selectionShape}") + } + } else { + # // TODO(zarr.js) support TypedArrays, buffers, etc + stop("Unknown data type for setting :(") + } + + # TODO: use queue to handle async iterator + for (proj in indexer$iter()) { + chunk_value <- private$get_chunk_value(proj, indexer, value, selection_shape) + private$chunk_setitem(proj$chunk_coords, proj$chunk_sel, chunk_value) + } }, process_chunk = function(out, cdata, chunk_selection, drop_axes, out_is_ndarray, fields, out_selection, partial_read_decode = FALSE) { # Reference: https://github.com/zarr-developers/zarr-python/blob/5dd4a0/zarr/core.py#L1755 # TODO }, + get_chunk_value = function(proj, indexer, value, selection_shape) { + # Reference: https://github.com/gzuidhof/zarr.js/blob/15e3a3f00eb19f0133018fb65f002311ea53bb7c/src/core/index.ts#L550 + + if (length(selection_shape) == 0) { + chunk_value <- value + } else if (is.scalar(value)) { + chunk_value <- value + } else { + chunk_value <- value$get(proj$out_sel) + if (!is.null(indexer$drop_axes)) { + stop("Handling drop axes not supported yet") + } + } + return(chunk_value) + }, chunk_getitem = function(chunk_coords, chunk_selection, out, out_selection, drop_axes = NA, fields = NA) { # TODO }, @@ -267,7 +313,73 @@ Array <- R6::R6Class("Array", # TODO }, chunk_setitem = function(chunk_coords, chunk_selection, value, fields = NA) { - # TODO + # Reference: https://github.com/gzuidhof/zarr.js/blob/15e3a3f00eb19f0133018fb65f002311ea53bb7c/src/core/index.ts#L625 + + if (private$order == "F" && self$get_ndim() > 1) { + stop("Setting content for arrays in F-order is not supported.") + } + + # Obtain key for chunk storage + chunk_key <- private$chunk_key(chunk_coords) + + chunk <- NULL + + dtype_constr = get_typed_array_ctr(private$dtype) + chunk_size <- private$chunk_size # TODO: what is chunk_size? + + if (is_total_slice(chunk_selection, private$chunks)) { + # Totally replace chunk + + # Optimization: we are completely replacing the chunk, so no need + # to access the existing chunk data + + if (is.scalar(value)) { + # TODO get the right type here + chunk <- dtype_constr(chunk_size) + chunk_fill(chunk, value) + } else { + chunk <- value$flatten() + } + } else { + # partially replace the contents of this chunk + + # Existing chunk data + #let chunkData: TypedArray; + + chunk_data <- tryCatch({ + + # Chunk is initialized if this does not error + chunk_store_data <- private$chunk_store$get_item(chunk_key) + dbytes <- private$decode_chunk(chunk_store_data) + return(private$to_typed_array(dbytes)) + }, error = function(cond) { + if (is_key_error(cond)) { + # Chunk is not initialized + chunk_data <- dtype_constr(chunk_size) + if (!is.null(private$fill_value)) { # TODO: should this be is.na + fill_chunk(chunk_data, private$fill_value) + } + return(chunk_data) + } else { + # // Different type of error - rethrow + stop("throw error;") + } + }) + + chunk_nested_array <- NestedArray$new( + chunk_data, + private$chunks, + private$dtype + ) + chunk_nested_array$set(chunk_selection, value) + chunk <- chunk_nested_array$flatten() + } + chunk_data <- private$encode_chunk(chunk) + private$chunk_store$set_item(chunk_key, chunk_data) + }, + to_typed_array = function(buffer) { + ctr <- get_typed_array_ctr(private$dtype) + return(ctr(buffer)) }, chunk_setitem_nosync = function(chunk_coords, chunk_selection, value, fields = NA) { # TODO @@ -508,10 +620,14 @@ Array <- R6::R6Class("Array", # TODO }, set_item = function(selection, value) { - # TODO + self$set_basic_selection(selection, value) }, set_basic_selection = function(selection, value, fields = NA) { - # TODO + # Handle zero-dimensional arrays + if(is.null(private$shape)) { + return(private$set_basic_selection_zd(selection, value = value, fields = fields)) + } + return(private$set_basic_selection_nd(selection, value = value, fields = fields)) }, set_orthogonal_selection = function(selection, value, fields = NA) { # TODO diff --git a/R/utils.R b/R/utils.R index 2866621..3d3ed19 100644 --- a/R/utils.R +++ b/R/utils.R @@ -301,4 +301,22 @@ is_na <- function(val) { } else { return(is.na(val)) } +} + +is_total_slice <- function(chunk_selection, chunks) { + # TODO +} + +chunk_fill <- function(chunk, value) { + # Chunk is an R array() + # Value is a scalar (after is.scalar() check) + + # TODO + # Need to do equivalent of chunk.fill(value) in JS + +} + +is_key_error <- function(e) { + # TODO + return(TRUE) } \ No newline at end of file diff --git a/tests/testthat/test-indexing.R b/tests/testthat/test-indexing.R new file mode 100644 index 0000000..e69de29