From 789bb9f391f5009bea467b90e2e3fe8cd9929e78 Mon Sep 17 00:00:00 2001 From: Mark Keller <7525285+keller-mark@users.noreply.github.com> Date: Fri, 18 Aug 2023 10:42:33 -0400 Subject: [PATCH] Remove extra to_nested_array and to_typed_array functions. Fix bugs in NestedArray constructor --- R/array-nested.R | 34 ++++++++++++++---- R/zarr-array.R | 75 +++++++++++++++++++-------------------- tests/testthat/test-get.R | 2 +- 3 files changed, 66 insertions(+), 45 deletions(-) diff --git a/R/array-nested.R b/R/array-nested.R index e21b25c..2e2abe5 100644 --- a/R/array-nested.R +++ b/R/array-nested.R @@ -41,6 +41,9 @@ zero_based_to_one_based <- function(selection, shape) { #' @rdname NestedArray #' @export NestedArray <- R6::R6Class("NestedArray", + private = list( + is_zero_dim = NULL + ), public = list( #' @field shape The shape of the array. shape = NULL, @@ -57,7 +60,7 @@ NestedArray <- R6::R6Class("NestedArray", #' @param dtype The Zarr dtype of the array, as a string like ">f8". #' @return A `NestedArray` instance. initialize = function(data, shape = NA, dtype = NA) { - if(is.null(shape) || is_na(shape)) { + if(is.null(shape) || (!is.list(shape) && is_na(shape))) { if(is.raw(data)) { stop("Cannot infer shape from raw data, please provide shape explicitly") } @@ -65,22 +68,25 @@ NestedArray <- R6::R6Class("NestedArray", } else { shape <- normalize_shape(shape) } - if(is_na(dtype)) { + if(is_na(dtype) && (is.numeric(data) || is.logical(data))) { dtype <- get_dtype_from_array(data) } else { dtype <- normalize_dtype(dtype) } self$shape <- shape self$dtype <- dtype + + private$is_zero_dim <- (is.null(shape) || length(shape) == 0) + if(is.null(data)) { # Create empty array. self$data <- array(data=get_dtype_rtype(dtype), dim=shape) - } else if(is.null(self$shape)) { + } else if(!is.raw(data) && is.null(self$shape)) { # Create zero-dimensional array. self$data <- data # TODO? - } else if(is.array(data) || is.numeric(data) || is.logical(data)) { + } else if(!is.raw(data) && (is.array(data) || is.numeric(data) || is.logical(data))) { # Create array from R atomic vector or array(). num_shape_elements <- compute_size(shape) @@ -95,10 +101,12 @@ NestedArray <- R6::R6Class("NestedArray", } else if(is.raw(data)) { # Create array from a raw vector. + num_shape_elements <- compute_size(shape) + # Reference: https://github.com/gzuidhof/zarr.js/blob/292804/src/nestedArray/index.ts#L134 buf <- data # Create from ArrayBuffer or Buffer - num_shape_elements <- compute_size(shape) + dtype_size <- get_dtype_numbytes(dtype) num_data_elements <- length(buf) / dtype_size if (num_shape_elements != num_data_elements) { @@ -125,9 +133,23 @@ NestedArray <- R6::R6Class("NestedArray", signed = dtype_signed, endian = endian ) - array_from_vec <- array(data = vec_from_raw, dim = shape) + if(private$is_zero_dim) { + array_from_vec <- array(data = vec_from_raw, dim = c(1)) + } else { + array_from_vec <- array(data = vec_from_raw, dim = shape) + } + self$data <- array_from_vec + } else if(is_scalar(data)) { + # Create array from a scalar value. + astype_func <- get_dtype_asrtype(dtype) + if(private$is_zero_dim) { + self$data <- array(data=get_dtype_rtype(dtype), dim=c(1)) + } else { + self$data <- array(data=get_dtype_rtype(dtype), dim=shape) + } + self$data[] <- astype_func(data) } else { #buf_len <- compute_size(shape) * get_dtype_numbytes(dtype) #buf <- raw(length = buf_len) diff --git a/R/zarr-array.R b/R/zarr-array.R index fe61f1c..bd09cb0 100644 --- a/R/zarr-array.R +++ b/R/zarr-array.R @@ -221,18 +221,18 @@ ZarrArray <- R6::R6Class("ZarrArray", # Obtain encoded data for chunk c_key <- private$chunk_key(c(0)) - chunk <- tryCatch({ + chunk_nested_array <- tryCatch({ c_data <- self$get_chunk_store()$get_item(c_key) chunk_inner <- private$decode_chunk(c_data) - return(chunk_inner) + return(NestedArray$new(chunk_inner, shape = private$chunks, dtype = private$dtype)) }, error = function(cond) { if(is_key_error(cond)) { # chunk not initialized as_dtype_func <- get_dtype_asrtype(private$dtype) chunk_inner <- as_dtype_func(private$fill_value) - return(chunk_inner) + return(NestedArray$new(chunk_inner, shape = private$chunks, dtype = private$dtype)) } else { - message(cond$message) + print(cond$message) stop("rethrow") } }) @@ -242,6 +242,8 @@ ZarrArray <- R6::R6Class("ZarrArray", # chunk <- chunk[fields] # } + chunk <- chunk_nested_array$data + # Handle selection of the scalar value via empty tuple if(is_na(out)) { out <- as_scalar(chunk) @@ -317,7 +319,7 @@ ZarrArray <- R6::R6Class("ZarrArray", # chunk_inner <- as_dtype_func(private$fill_value) # return(chunk_inner) # } else { - # message(cond$message) + # print(cond$message) # stop("rethrow") # } # }) @@ -341,7 +343,11 @@ ZarrArray <- R6::R6Class("ZarrArray", # else: # encode and store - c_data <- private$encode_chunk(as_scalar(value)) + + chunk_nested_array <- NestedArray$new(as_scalar(value), shape = NULL, dtype = private$dtype) + chunk_raw <- chunk_nested_array$flatten_to_raw() + + c_data <- private$encode_chunk(chunk_raw) self$get_chunk_store()$set_item(c_key, c_data) }, set_basic_selection_nd = function(selection, value, fields = NA) { @@ -410,11 +416,6 @@ ZarrArray <- R6::R6Class("ZarrArray", } return(chunk_value) }, - to_nested_array = function(decoded_chunk) { - - nested_array <- NestedArray$new(decoded_chunk, shape=private$chunks, dtype=private$dtype) - return(nested_array) - }, chunk_buffer_to_raw_array = function(decoded_chunk) { # TODO }, @@ -433,12 +434,12 @@ ZarrArray <- R6::R6Class("ZarrArray", if("NestedArray" %in% class(out)) { if(is_contiguous_selection(out_selection) && is_total_slice(chunk_selection, private$chunks) && is.null(private$filters)) { - out$set(out_selection, private$to_nested_array(decoded_chunk)) + out$set(out_selection, NestedArray$new(decoded_chunk, shape=private$chunks, dtype=private$dtype)) return(TRUE) } # Decode chunk - chunk <- private$to_nested_array(decoded_chunk) + chunk <- NestedArray$new(decoded_chunk, shape=private$chunks, dtype=private$dtype) tmp <- chunk$get(chunk_selection) if(!is_na(drop_axes)) { @@ -458,7 +459,7 @@ ZarrArray <- R6::R6Class("ZarrArray", out$set(out_selection, as_scalar(private$fill_value)) } } else { - message(cond$message) + print(cond$message) stop("Different type of error - rethrow") } }) @@ -488,25 +489,30 @@ ZarrArray <- R6::R6Class("ZarrArray", # to access the existing chunk data if (is_scalar(value)) { - # TODO get the right type here - chunk <- dtype_constr(chunk_size) - chunk_fill(chunk, value) - } else { - # value is a NestedArray - chunk <- value$flatten() + chunk <- NestedArray$new( + value, + shape = private$chunks, + dtype = private$dtype + ) } + # value was already a NestedArray + chunk_raw <- value$flatten_to_raw() } else { # partially replace the contents of this chunk # Existing chunk data #let chunkData: TypedArray; - chunk_data <- tryCatch({ + chunk_nested_array <- tryCatch({ # Chunk is initialized if this does not error chunk_store_data <- self$get_chunk_store()$get_item(chunk_key) dbytes <- private$decode_chunk(chunk_store_data) - return(private$to_typed_array(dbytes)) + return(NestedArray$new( + dbytes, + shape = private$chunks, + dtype = private$dtype + )) }, error = function(cond) { if (is_key_error(cond)) { # Chunk is not initialized @@ -514,32 +520,25 @@ ZarrArray <- R6::R6Class("ZarrArray", if (!is.null(private$fill_value)) { # TODO: should this be is.na chunk_fill(chunk_data, private$fill_value) } - return(chunk_data) + return(NestedArray$new( + chunk_data, + shape = private$chunks, + dtype = private$dtype + )) } else { - message(cond$message) + print(cond$message) # // Different type of error - rethrow stop("throw error;") } }) - chunk_nested_array <- NestedArray$new( - chunk_data, - shape = private$chunks, - dtype = private$dtype - ) chunk_nested_array$set(chunk_selection, value) - chunk <- chunk_nested_array$flatten() + chunk_raw <- chunk_nested_array$flatten_to_raw() } - chunk_data <- private$encode_chunk(chunk) - - + chunk_data <- private$encode_chunk(chunk_raw) self$get_chunk_store()$set_item(chunk_key, chunk_data) }, - to_typed_array = function(buffer) { - ctr <- get_typed_array_ctr(private$dtype) - return(ctr(buffer)) - }, chunk_setitem_nosync = function(chunk_coords, chunk_selection, value, fields = NA) { # TODO }, @@ -557,7 +556,7 @@ ZarrArray <- R6::R6Class("ZarrArray", }, decode_chunk = function(cdata, start = NA, nitems = NA, expected_shape = NA) { # TODO - return(cdata) + return(as.raw(cdata)) }, encode_chunk = function(chunk) { # TODO diff --git a/tests/testthat/test-get.R b/tests/testthat/test-get.R index dbdd7bb..d0fc60f 100644 --- a/tests/testthat/test-get.R +++ b/tests/testthat/test-get.R @@ -12,7 +12,7 @@ test_that("get_basic_selection_zd", { sel <- z$get_item("...") - expect_equal(as.numeric(a), as.numeric(sel)) + expect_equal(as.numeric(a), as.numeric(sel$data)) }) test_that("get_basic_selection_1d", {