Skip to content

Commit

Permalink
listdir in HttpStore and use consolidated metadata (#82)
Browse files Browse the repository at this point in the history
  • Loading branch information
dblodgett-usgs committed May 27, 2024
1 parent 572ff32 commit 6920e83
Show file tree
Hide file tree
Showing 38 changed files with 4,692 additions and 36 deletions.
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
* text=auto
tests/fixtures/**/* -diff
2 changes: 1 addition & 1 deletion .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
uses: actions/cache@v2
with:
path: ${{ env.R_LIBS_USER }}/*
key: ${{ steps.get-version.outputs.os-version }}-${{ steps.get-version.outputs.r-version }}-${{ env.cache-version }}-deps
key: ${{ hashFiles('DESCRIPTION') }}-${{ steps.get-version.outputs.os-version }}-${{ steps.get-version.outputs.r-version }}-${{ env.cache-version }}-deps
- name: Install dependencies
if: steps.cache-deps.outputs.cache-hit != 'true'
run: |
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,5 @@ vignettes/data/
*.Rproj
tests/testthat/test_data
.ipynb_checkpoints/
*.ipynb
*.ipynb
.Renviron
6 changes: 4 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ Imports:
methods,
R6,
qs,
stringr
stringr,
memoise
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
Expand All @@ -38,4 +39,5 @@ Suggests:
pkgdown,
rmarkdown,
crul,
Rarr
Rarr,
vcr (>= 0.6.0)
8 changes: 8 additions & 0 deletions R/attributes.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Attributes <- R6::R6Class("Attributes",

get_nosync = function() {
attrs_list <- tryCatch({
# TODO: use consolidated metadata?
return(self$store$metadata_class$decode_metadata(self$store$get_item(self$key), auto_unbox = TRUE))
}, error = function(cond) {
if(is_key_error(cond)) {
Expand Down Expand Up @@ -69,6 +70,13 @@ Attributes <- R6::R6Class("Attributes",
self$read_only <- read_only
self$cache <- cache
private$cached_aslist <- NA

check_cached <- try_from_zmeta(key, store)

if(cache & !is.null(check_cached)) {
private$cached_aslist <- check_cached
}

self$synchronizer <- synchronizer
},
#' @description
Expand Down
8 changes: 5 additions & 3 deletions R/creation.R
Original file line number Diff line number Diff line change
Expand Up @@ -808,13 +808,15 @@ zarr_save_array <- function(store, arr, ...) {
zarr_open <- function(store = NA, mode = NA, path = NA, ...) {
kwargs <- list(...)

store <- normalize_store_arg(store)
path <- normalize_storage_path(path)

if(is_na(mode)) {
mode <- "a"
if(inherits(store, "HttpStore"))
mode <- "r"
}

store <- normalize_store_arg(store)
path <- normalize_storage_path(path)

if(mode %in% c("w", "w-", "x")) {
if("shape" %in% names(kwargs)) {
return(zarr_open_array(store=store, mode=mode, path=path, ...))
Expand Down
3 changes: 3 additions & 0 deletions R/normalize.R
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,9 @@ normalize_store_arg <- function(store, storage_options=NA, mode=NA) {
}

if(is.character(store)) {
if(grepl("^https?://", store)) {
return(HttpStore$new(store))
}
if(grepl("://", store, fixed=TRUE) || grepl("::", store, fixed=TRUE)) {
# TODO: return FSStore
}
Expand Down
94 changes: 81 additions & 13 deletions R/stores.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Store <- R6::R6Class("Store",
erasable = NULL,
listable = NULL,
store_version = NULL,
zmetadata = NULL,
#' @keywords internal
listdir_from_keys = function(path) {
# TODO
Expand Down Expand Up @@ -115,7 +116,12 @@ Store <- R6::R6Class("Store",
#' @return A boolean value.
contains_item = function(key) {

}
},
#' @description
#' Get consolidated metadata if it exists.
get_consolidated_metadata = function() {
return(private$zmetadata)
}
)
)

Expand Down Expand Up @@ -355,16 +361,31 @@ HttpStore <- R6::R6Class("HttpStore",
options = NULL,
headers = NULL,
client = NULL,
zmetadata = NULL,
mem_get = NULL,
cache_time_seconds = 3600,
make_request = function(item) {
# Remove leading slash if necessary.
if(substr(item, 1, 1) == "/") {
key <- substr(item, 2, length(item))
} else {
key <- item
}
key <- item_to_key(item)

# mem_get caches in memory on a per-session basis.
res <- private$mem_get(private$client,
paste(private$base_path, key, sep="/"))

res <- private$client$get(paste(private$base_path, key, sep="/"))
return(res)
},
get_zmetadata = function() {
res <- private$make_request(".zmetadata")

if(res$status_code == 200) {
out <- tryCatch({
jsonlite::fromJSON(res$parse("UTF-8"))
}, error = \(e) {
warning("\n\nError parsing .zmetadata:\n\n", e)
NULL
})
} else out <- NULL

return(out)
}
),
public = list(
Expand All @@ -376,8 +397,8 @@ HttpStore <- R6::R6Class("HttpStore",
initialize = function(url, options = NA, headers = NA) {
super$initialize()
# Remove trailing slash if necessary.
if(substr(url, length(url), length(url)) == "/") {
private$url <- substr(url, 1, length(url)-1)
if(substr(url, nchar(url), nchar(url)) == "/") {
private$url <- substr(url, 1, nchar(url)-1)
} else {
private$url <- url
}
Expand All @@ -395,6 +416,11 @@ HttpStore <- R6::R6Class("HttpStore",
opts = private$options,
headers = private$headers
)

private$mem_get <- memoise::memoise(function(client, path) client$get(path),
~memoise::timeout(private$cache_time_seconds))

private$zmetadata <- private$get_zmetadata()
},
#' @description
#' Get an item from the store.
Expand All @@ -409,8 +435,50 @@ HttpStore <- R6::R6Class("HttpStore",
#' @param item The item key.
#' @return A boolean value.
contains_item = function(item) {
res <- private$make_request(item)
return(res$status_code == 200)

# use consolidated metadata if it exists
if(!is.null(try_from_zmeta(item_to_key(item), self))) {
return(TRUE)
} else if(!is.null(self$get_consolidated_metadata())) {
return(FALSE)
} else {
res <- private$make_request(item)

return(res$status_code == 200)
}

},
#' @description
#' Fetches .zmetadata from the store evaluates its names
#' @return character vector of unique keys that do note start with a `.`.
listdir = function() {

if(!is.null(private$zmetadata)) {
tryCatch({
out <- names(private$zmetadata$metadata) |>
stringr::str_subset("^\\.", negate = TRUE) |>
stringr::str_split("/") |>
vapply(\(x) head(x, 1), "") |>
unique()
}, error = \(e) warning("\n\nError parsing .zmetadata:\n\n", e))
} else {
out <- NULL
message(".zmetadata not found for this http store. Can't listdir")
}

return(out)

},
#' @description
#' Get cache time of http requests.
get_cache_time_seconds = function() {
return(private$cache_time_seconds)
},
#' @description
#' Set cache time of http requests.
#' @param seconds number of seconds until cache is invalid -- 0 for no cache
set_cache_time_seconds = function(seconds) {
private$cache_time_seconds <- seconds
}
)
)
)
15 changes: 15 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -283,3 +283,18 @@ get_list_product <- function(dim_indexer_iterables) {
}
return(partial_results)
}

#' @keywords internal
item_to_key <- function(item) {
# Remove leading slash if necessary.
if(substr(item, 1, 1) == "/") {
key <- substr(item, 2, length(item))
} else {
key <- item
}
key
}

try_from_zmeta <- function(key, store) {
store$get_consolidated_metadata()$metadata[[key]]
}
12 changes: 10 additions & 2 deletions R/zarr-array.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,18 @@ ZarrArray <- R6::R6Class("ZarrArray",
#' @description
#' (Re)load metadata from store.
load_metadata_nosync = function() {

mkey <- paste0(private$key_prefix, ARRAY_META_KEY)
meta_bytes <- private$store$get_item(mkey)
meta <- private$store$metadata_class$decode_array_metadata(meta_bytes)

meta <- try_from_zmeta(mkey, private$store)

if(is.null(meta)) {
meta_bytes <- private$store$get_item(mkey)
meta <- private$store$metadata_class$decode_array_metadata(meta_bytes)
}

private$meta <- meta

if(is.list(meta$shape)) {
private$shape <- as.integer(meta$shape)
} else {
Expand Down
41 changes: 28 additions & 13 deletions R/zarr-group.R
Original file line number Diff line number Diff line change
Expand Up @@ -136,19 +136,34 @@ ZarrGroup <- R6::R6Class("ZarrGroup",
stop("ContainsArrayError(path)")
}

# initialize metadata
meta_bytes <- tryCatch({
m_key <- paste0(private$key_prefix, GROUP_META_KEY)
meta_bytes <- store$get_item(m_key)
}, error = function(cond) {
if(is_key_error(cond)) {
stop("GroupNotFoundError(path) in Group$new")
} else {
stop(cond$message)
}
})
private$meta <- private$store$metadata_class$decode_group_metadata(meta_bytes)

m_key <- paste0(private$key_prefix, GROUP_META_KEY)

# use consolidated metadata if exists
meta <- try_from_zmeta(m_key, store)

if(!is.null(meta)) {

private$meta <- meta

} else {

# initialize metadata
meta_bytes <- tryCatch({

meta_bytes <- store$get_item(m_key)

}, error = function(cond) {
if(is_key_error(cond)) {
stop("GroupNotFoundError(path) in Group$new")
} else {
stop(cond$message)
}
})

private$meta <- private$store$metadata_class$decode_group_metadata(meta_bytes)

}

# setup attributes
a_key <- paste0(private$key_prefix, ATTRS_KEY)
private$attrs <- Attributes$new(store, key = a_key, read_only = read_only, cache = cache_attrs, synchronizer = synchronizer)
Expand Down
32 changes: 32 additions & 0 deletions inst/extdata/bcsd.zarr/.zattrs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{
"CDI": "Climate Data Interface version 1.5.6 (http://code.zmaw.de/projects/cdi)",
"CDO": "Climate Data Operators version 1.5.6.1 (http://code.zmaw.de/projects/cdo)",
"Conventions": "CF-1.0",
"History": "Translated to CF-1.0 Conventions by Netcdf-Java CDM (CFGridWriter2)\nOriginal Dataset = bcsd_obs; Translation Date = 2019-01-03T20:03:59.756Z",
"Metadata_Conventions": "Unidata Dataset Discovery v1.0",
"NCO": "netCDF Operators version 4.7.6 (Homepage = http://nco.sf.net, Code = http://github.com/nco/nco)",
"acknowledgment": "Maurer, E.P., A.W. Wood, J.C. Adam, D.P. Lettenmaier, and B. Nijssen, 2002, A Long-Term Hydrologically-Based Data Set of Land Surface Fluxes and States for the Conterminous United States, J. Climate 15(22), 3237-3251",
"cdm_data_type": "Grid",
"date_created": "2014",
"date_issued": "2015-11-01",
"geospatial_lat_max": 37.0625,
"geospatial_lat_min": 33.0625,
"geospatial_lon_max": -74.9375,
"geospatial_lon_min": -84.9375,
"history": "Mon Jan 7 18:59:08 2019: ncks -4 -L3 bcsd_obs_1999_two_var.nc bcsd_obs_1999_two_var.nc.comp\nThu May 08 12:07:18 2014: cdo monsum gridded_obs/daily/gridded_obs.daily.Prcp.1950.nc gridded_obs/monthly/gridded_obs.monthly.pr.1950.nc",
"id": "cida.usgs.gov/bcsd_obs",
"institution": "Varies, see http://gdo-dcp.ucllnl.org/downscaled_cmip_projections/",
"keywords": "Atmospheric Temperature, Air Temperature Atmosphere, Precipitation, Rain, Maximum Daily Temperature, Minimum Daily Temperature",
"keywords_vocabulary": "GCMD Science Keywords",
"license": "Freely available",
"naming_authority": "cida.usgs.gov",
"processing_level": "Gridded meteorological observations",
"publisher_email": "dblodgett@usgs.gov",
"publisher_name": "Center for Integrated Data Analytics",
"publisher_url": "https://www.cida.usgs.gov/",
"summary": "These are the monthly observational data used for BCSD downscaling. See: http://gdo-dcp.ucllnl.org/downscaled_cmip_projections/dcpInterface.html#About for more information.",
"time_coverage_end": "1999-12-15T00:00",
"time_coverage_resolution": "P1M",
"time_coverage_start": "1950-01-15T00:00",
"title": "Monthly Gridded Meteorological Observations"
}
3 changes: 3 additions & 0 deletions inst/extdata/bcsd.zarr/.zgroup
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"zarr_format": 2
}
Loading

0 comments on commit 6920e83

Please sign in to comment.