Skip to content

Commit

Permalink
Merge
Browse files Browse the repository at this point in the history
  • Loading branch information
keller-mark committed Oct 18, 2023
2 parents 3c9bc95 + ab8a0b8 commit 3ec430f
Show file tree
Hide file tree
Showing 53 changed files with 884 additions and 252 deletions.
21 changes: 14 additions & 7 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,33 @@ S3method("[<-",ZarrArray)
S3method(as.array,NestedArray)
S3method(as.array,ZarrArray)
export(Attributes)
export(BasicIndexer)
export(BloscCodec)
export(Bz2Codec)
export(Codec)
export(DirectoryStore)
export(Dtype)
export(GzipCodec)
export(HttpStore)
export(IntDimIndexer)
export(Lz4Codec)
export(LzmaCodec)
export(MemoryStore)
export(NestedArray)
export(OIndex)
export(SliceDimIndexer)
export(Store)
export(VIndex)
export(VLenUtf8Codec)
export(ZarrArray)
export(ZarrGroup)
export(ZlibCodec)
export(ZstdCodec)
export(as_zarr)
export(as_scalar)
export(is_key_error)
export(is_scalar)
export(is_slice)
export(obj_list)
export(zarr)
export(slice)
export(zarr_create)
export(zarr_create_array)
export(zarr_create_empty)
export(zarr_create_group)
export(zarr_create_zeros)
export(zarr_open_group)
export(zb_slice)
66 changes: 38 additions & 28 deletions R/array-nested.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ NestedArray <- R6::R6Class("NestedArray",
shape = NULL,
#' @field dtype The Zarr dtype of the array, as a string like ">f8".
dtype = NULL,
#' @field dtype_obj The Zarr dtype of the array, as a Dtype instance.
dtype_obj = NULL,
#' @field data The array contents as a base R array.
data = NULL,
#' @description
Expand All @@ -75,41 +77,43 @@ NestedArray <- R6::R6Class("NestedArray",
shape <- normalize_shape(shape)
}
if(is_na(dtype) && (is.numeric(data) || is.logical(data))) {
dtype <- get_dtype_from_array(data)
self$dtype_obj <- Dtype$new(get_dtype_from_array(data))
} else if("Dtype" %in% class(dtype)) {
self$dtype_obj <- dtype
} else if(is.character(dtype)) {
self$dtype_obj <- Dtype$new(dtype)
if(self$dtype_obj$is_object) {
stop("Object dtype was initialized from string in NestedArray, so object_codec is missing.")
}
} else {
dtype <- normalize_dtype(dtype)
stop("dtype must be NA, string/character vector, or Dtype instance")
}
self$shape <- shape
self$dtype <- dtype

dtype_parts <- get_dtype_parts(dtype)
private$dtype_basic_type <- dtype_parts$basic_type
private$dtype_byte_order <- dtype_parts$byte_order
private$dtype_num_bytes <- dtype_parts$num_bytes
private$dtype_num_items <- dtype_parts$num_items

private$is_zero_dim <- (is.null(shape) || length(shape) == 0)

if(is.null(data)) {
# Create empty array.

self$data <- array(data=get_dtype_rtype(dtype), dim=shape)
dtype_rtype <- self$dtype_obj$get_rtype()

self$data <- array(data=dtype_rtype, dim=shape)
} else if(!is.raw(data) && is.null(self$shape)) {
# Create zero-dimensional array.

self$data <- data # TODO?
} else if(!is.raw(data) && (is.array(data) || is.vector(data)) && is.atomic(data)) {
# Create array from R atomic vector or array().

num_shape_elements <- compute_size(shape)
# Check that data array has same shape as expected
if(!is.null(dim(data)) && all(ensure_vec(dim(data)) == ensure_vec(shape))) {
self$data <- data
} else {
# Data array did not have the expected shape, so we need to reshape it.
astype_func <- get_dtype_asrtype(dtype)
astype_func <- self$dtype_obj$get_asrtype()
self$data <- array(data=as.array(astype_func(data)), dim=shape)
}
# TODO: account for order == "C"?
} else if(is.raw(data)) {
# Create array from a raw vector.

Expand All @@ -119,30 +123,30 @@ NestedArray <- R6::R6Class("NestedArray",
buf <- data
# Create from ArrayBuffer or Buffer

dtype_size <- private$dtype_num_bytes
dtype_size <- self$dtype_obj$num_bytes
num_data_elements <- length(buf) / dtype_size
if (num_shape_elements != num_data_elements) {
stop('Buffer has ${numDataElements} of dtype ${dtype}, shape is too large or small')
}

dtype_rtype <- get_dtype_rtype(dtype)
dtype_signed <- get_dtype_signed(dtype)
dtype_rtype <- self$dtype_obj$get_rtype()
dtype_signed <- self$dtype_obj$is_signed
if(!dtype_signed && !(dtype_size == 1 || dtype_size == 2)) {
# readBin will warn "signed = FALSE is only valid for integers of sizes 1 and 2"
dtype_signed <- TRUE
}

endian <- get_dtype_endianness(self$dtype)
endian <- self$dtype_obj$byte_order
# Normalize to only "little" or "big" since this is what writeBin accepts.
if(endian == "nr") {
endian <- "little"
}

if(private$dtype_basic_type %in% c("S", "U")) {
if(self$dtype_obj$basic_type %in% c("S", "U")) {
vec_from_raw <- raw_to_char_vec(
buf,
private$dtype_basic_type,
private$dtype_num_items,
self$dtype_obj$basic_type,
self$dtype_obj$num_items,
endian
)
} else {
Expand Down Expand Up @@ -175,11 +179,12 @@ NestedArray <- R6::R6Class("NestedArray",
self$data <- array_from_vec
} else if(is_scalar(data)) {
# Create array from a scalar value.
astype_func <- get_dtype_asrtype(dtype)
astype_func <- self$dtype_obj$get_asrtype()
dtype_rtype <- self$dtype_obj$get_rtype()
if(private$is_zero_dim) {
self$data <- array(data=get_dtype_rtype(dtype), dim=c(1))
self$data <- array(data=dtype_rtype, dim=c(1))
} else {
self$data <- array(data=get_dtype_rtype(dtype), dim=shape)
self$data <- array(data=dtype_rtype, dim=shape)
}
self$data[] <- astype_func(data)
} else {
Expand All @@ -199,7 +204,7 @@ NestedArray <- R6::R6Class("NestedArray",
# Using do.call here seems to work the same as `abind::asub(self$data, selection_list)`
# so we can use do.call to avoid the extra dependency.
subset_arr <- do.call("[", append(list(self$data), selection_list))
subset_nested_array <- NestedArray$new(subset_arr, shape = dim(subset_arr), dtype = self$dtype)
subset_nested_array <- NestedArray$new(subset_arr, shape = dim(subset_arr), dtype = self$dtype_obj)
return(subset_nested_array)
},
#' @description
Expand Down Expand Up @@ -265,26 +270,31 @@ NestedArray <- R6::R6Class("NestedArray",
flatten_to_raw = function(order = NA) {
data_as_vec <- self$flatten(order = order)

endian <- get_dtype_endianness(self$dtype)
if(self$dtype_obj$is_object) {
# The object_codec in filters will handle the conversion to raw.
return(data_as_vec)
}

endian <- self$dtype_obj$byte_order
# Normalize to only "little" or "big" since this is what writeBin accepts.
if(endian == "nr") {
endian <- "little"
}

# "If writeBin is called with con a raw vector, it is just an indication that a raw vector should be returned."
# Reference: https://stat.ethz.ch/R-manual/R-devel/library/base/html/readBin.html
if(private$dtype_basic_type %in% c("S", "U")) {
if(self$dtype_obj$basic_type %in% c("S", "U")) {
buf <- char_vec_to_raw(
data_as_vec,
private$dtype_basic_type,
private$dtype_num_items,
self$dtype_obj$basic_type,
self$dtype_obj$num_items,
endian
)
} else {
buf <- writeBin(
data_as_vec,
con = raw(),
size = private$dtype_num_bytes,
size = self$dtype_obj$num_bytes,
endian = endian
)
}
Expand Down
2 changes: 2 additions & 0 deletions R/atomic.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@
#' Convert a value to a scalar to opt-out of R default vector casting behavior.
#' @param obj The value to convert.
#' @return The value wrapped as a scalar.
#' @export
as_scalar <- function(obj) {
return(jsonlite::unbox(obj))
}

#' Check if a value is a scalar.
#' @param s The value to check.
#' @return TRUE if the value is a scalar, FALSE otherwise.
#' @export
is_scalar <- function(s) {
if(class(s)[[1]] == "scalar") {
return(TRUE)
Expand Down
42 changes: 25 additions & 17 deletions R/creation.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ init_array_metadata <- function(
}

# normalize metadata
dtype <- normalize_dtype(dtype)
dtype <- normalize_dtype(dtype, object_codec = object_codec)

# object_codec <- normalize_object_codec(dtype, object_codec) # TODO

Expand All @@ -91,7 +91,7 @@ init_array_metadata <- function(

shape <- normalize_shape(shape)

dtype_itemsize <- get_dtype_numbytes(dtype)
dtype_itemsize <- dtype$num_bytes
chunks <- normalize_chunks(chunks, shape, dtype_itemsize)
order <- normalize_order(order)
fill_value <- normalize_fill_value(fill_value, dtype)
Expand Down Expand Up @@ -130,21 +130,23 @@ init_array_metadata <- function(
}
}

# TODO: deal with object encoding
# if dtype.hasobject:
# if object_codec is None:
# if not filters:
# # there are no filters so we can be sure there is no object codec
# raise ValueError('missing object_codec for object array')
# else:
# # one of the filters may be an object codec, issue a warning rather
# # than raise an error to maintain backwards-compatibility
# warnings.warn('missing object_codec for object array; this will raise a '
# 'ValueError in version 3.0', FutureWarning)
# else:
# filters_config.insert(0, object_codec.get_config())
# elif object_codec is not None:
# warnings.warn('an object_codec is only needed for object arrays')
# Check object codec
if(dtype$is_object) {
if(is_na(object_codec)) {
if(length(filters_config) == 0) {
# there are no filters so we can be sure there is no object codec
stop("missing object_codec for object array")
} else {
# one of the filters may be an object codec, issue a warning rather
# than raise an error to maintain backwards-compatibility
stop("missing object_codec for object array")
}
} else {
filters_config <- append(filters_config, object_codec$get_config())
}
} else if(!is_na(object_codec)) {
warning("an object_codec is only needed for object arrays")
}

# use null to indicate no filters
if (length(filters_config) == 0) {
Expand Down Expand Up @@ -390,6 +392,7 @@ init_group <- function(
#' non-fill-value data are stored, at the expense of overhead associated
#' with checking the data of each chunk.
#' @returns ZarrArray
#' @export
zarr_create <- function(
shape,
chunks=TRUE,
Expand Down Expand Up @@ -449,6 +452,7 @@ zarr_create <- function(
#' @param shape : int or tuple of ints
#' @param ... The params of zarr_create()
#' @returns ZarrArray
#' @export
zarr_create_empty <- function(shape, ...) {
return(zarr_create(shape=shape, fill_value=NA, ...))
}
Expand All @@ -457,6 +461,7 @@ zarr_create_empty <- function(shape, ...) {
#' @param data A base R array() or pizzarr NestedArray instance.
#' @param ... The params of zarr_create()
#' @returns ZarrArray
#' @export
zarr_create_array <- function(data, ...) {
z <- zarr_create(...)
z$set_item("...", data)
Expand All @@ -468,6 +473,7 @@ zarr_create_array <- function(data, ...) {
#' @param shape : int or tuple of ints
#' @param ... The params of zarr_create()
#' @returns ZarrArray
#' @export
zarr_create_zeros <- function(shape, ...) {
return(zarr_create(shape=shape, fill_value=0, ...))
}
Expand All @@ -490,6 +496,7 @@ zarr_create_zeros <- function(shape, ...) {
#' @param path : string, optional
#' Group path within store.
#' @returns ZarrGroup
#' @export
zarr_create_group <- function(
store = NA,
overwrite = FALSE,
Expand Down Expand Up @@ -543,6 +550,7 @@ zarr_create_group <- function(
#' If using an fsspec URL to create the store, these will be passed to
#' the backend implementation. Ignored otherwise.
#' @returns ZarrGroup
#' @export
zarr_open_group <- function(
store = NA,
mode = NA,
Expand Down
Loading

0 comments on commit 3ec430f

Please sign in to comment.