Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check at nc_open if file appears to be in NCZarr/Zarr format. #2658

Merged
merged 8 commits into from
Apr 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/run_tests_win_mingw.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ jobs:
- name: Check for plugins
run: |
dir ./plugins
dir ./plugins/.libs
if test -e ./plugins/.libs ; then dir ./plugins/.libs ; fi

- name: (Autotools) Build and Run Tests
run: make check -j 8 LDFLAGS="-Wl,--export-all-symbols"
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1069,7 +1069,7 @@ IF(ENABLE_DAP)
ELSE()
MESSAGE(STATUS "Disabling DAP4")
SET(ENABLE_DAP4 OFF CACHE BOOL "")
ENDIF(NOT ENABLE_HDF5)
ENDIF(ENABLE_HDF5)

ELSE()
SET(ENABLE_DAP2 OFF CACHE BOOL "")
Expand Down
3 changes: 3 additions & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ This file contains a high-level description of this package's evolution. Release

## 4.9.3 - TBD

* [Bug Fix] Add a crude test to see if an NCZarr path looks like a valid NCZarr/Zarr file. See [Github #2658](https://github.com/Unidata/netcdf-c/pull/2658).

## 4.9.2 - March 14, 2023

This is the maintenance release which adds support for HDF5 version 1.14.0, in addition to a handful of other changes and bugfixes.
Expand All @@ -16,6 +18,7 @@ This is the maintenance release which adds support for HDF5 version 1.14.0, in a
* Fix 'make distcheck' error in run_interop.sh. See [Github #2631](https://github.com/Unidata/netcdf-c/pull/2631).
* Update `nc-config` to remove inclusion from automatically-detected `nf-config` and `ncxx-config` files, as the wrong files could be included in the output. This is in support of [GitHub #2274](https://github.com/Unidata/netcdf-c/issues/2274).
* Update H5FDhttp.[ch] to work with HDF5 version 1.13.2 and later. See [Github #2635](https://github.com/Unidata/netcdf-c/pull/2635).
* Fix 'make distcheck' error in run_interop.sh. See [Github #2631](https://github.com/Unidata/netcdf-c/pull/2631).
* [Bug Fix] Update DAP code to enable CURLOPT_ACCEPT_ENCODING by default. See [Github #2630](https://github.com/Unidata/netcdf-c/pull/2630).
* [Bug Fix] Fix byterange failures for certain URLs. See [Github #2649](https://github.com/Unidata/netcdf-c/pull/2649).
* [Bug Fix] Fix 'make distcheck' error in run_interop.sh. See [Github #2631](https://github.com/Unidata/netcdf-c/pull/2631).
Expand Down
1 change: 1 addition & 0 deletions libnczarr/zarr.c
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ ncz_open_dataset(NC_FILE_INFO_T* file, const char** controls)
if((stat = nczmap_open(zinfo->controls.mapimpl,nc->path,mode,zinfo->controls.flags,NULL,&zinfo->map)))
goto done;

/* Ok, try to read superblock */
if((stat = ncz_read_superblock(file,&nczarr_version,&zarr_format))) goto done;

if(nczarr_version == NULL) /* default */
Expand Down
81 changes: 75 additions & 6 deletions libnczarr/zsync.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ static int computedimrefs(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int purezarr
static int json_convention_read(NCjson* jdict, NCjson** jtextp);
static int jtypes2atypes(NCjson* jtypes, NClist* atypes);

static int ncz_validate(NC_FILE_INFO_T* file);

/**************************************************/
/**************************************************/
/* Synchronize functions to make map and memory
Expand Down Expand Up @@ -1829,7 +1831,7 @@ ncz_read_superblock(NC_FILE_INFO_T* file, char** nczarrvp, char** zarrfp)
break;
default: goto done;
}
/* Also gett Zarr Root Group */
/* Get Zarr Root Group, if any */
switch(stat = NCZ_downloadjson(zinfo->map, ZMETAROOT, &jzgroup)) {
case NC_NOERR:
break;
Expand All @@ -1842,8 +1844,9 @@ ncz_read_superblock(NC_FILE_INFO_T* file, char** nczarrvp, char** zarrfp)
if(jzgroup != NULL) {
/* See if this NCZarr V2 */
if((stat = NCJdictget(jzgroup,NCZ_V2_SUPERBLOCK,&jsuper))) goto done;
if(!stat && jsuper == NULL)
{if((stat = NCJdictget(jzgroup,NCZ_V2_SUPERBLOCK_UC,&jsuper))) goto done;}
if(!stat && jsuper == NULL) { /* try uppercase name */
if((stat = NCJdictget(jzgroup,NCZ_V2_SUPERBLOCK_UC,&jsuper))) goto done;
}
if(jsuper != NULL) {
/* Extract the equivalent attribute */
if(jsuper->sort != NCJ_DICT)
Expand All @@ -1855,15 +1858,21 @@ ncz_read_superblock(NC_FILE_INFO_T* file, char** nczarrvp, char** zarrfp)
if((stat = NCJdictget(jzgroup,"zarr_format",&jtmp))) goto done;
zarr_format = nulldup(NCJstring(jtmp));
}
/* Set the controls */
/* Set the format flags */
if(jnczgroup == NULL && jsuper == NULL) {
zinfo->controls.flags |= FLAG_PUREZARR;
/* See if this is looks like a NCZarr/Zarr dataset at all
by looking for anything here of the form ".z*" */
if((stat = ncz_validate(file))) goto done;
/* ok, assume pure zarr with no groups */
zinfo->controls.flags |= FLAG_PUREZARR;
zinfo->controls.flags &= ~(FLAG_NCZARR_V1);
if(zarr_format == NULL) zarr_format = strdup("2");
} else if(jnczgroup != NULL) {
zinfo->controls.flags |= FLAG_NCZARR_V1;
/* Also means file is read only */
file->no_write = 1;
} else if(jsuper != NULL) {
/* ! FLAG_NCZARR_V1 && ! FLAG_PUREZARR */
/* ! FLAG_NCZARR_V1 && ! FLAG_PUREZARR */
}
if(nczarrvp) {*nczarrvp = nczarr_version; nczarr_version = NULL;}
if(zarrfp) {*zarrfp = zarr_format; zarr_format = NULL;}
Expand Down Expand Up @@ -2411,3 +2420,63 @@ jtypes2atypes(NCjson* jtypes, NClist* atypes)
done:
return stat;
}

/* See if there is reason to believe the specified path is a legitimate (NC)Zarr file
* Do a breadth first walk of the tree starting at file path.
* @param file to validate
* @return NC_NOERR if it looks ok
* @return NC_ENOTNC if it does not look ok
*/
static int
ncz_validate(NC_FILE_INFO_T* file)
{
int i,stat = NC_NOERR;
NCZ_FILE_INFO_T* zinfo = (NCZ_FILE_INFO_T*)file->format_file_info;
int validate = 0;
NCbytes* prefix = ncbytesnew();
NClist* queue = nclistnew();
NClist* nextlevel = nclistnew();
NCZMAP* map = zinfo->map;
char* path = NULL;
char* segment = NULL;
size_t seglen;

ZTRACE(3,"file=%s",file->controller->path);

path = strdup("/");
nclistpush(queue,path);
path = NULL;
do {
/* This should be full path key */
nullfree(path); path = NULL;
path = nclistremove(queue,0); /* remove from front of queue */
/* get list of next level segments (partial keys) */
nclistclear(nextlevel);
if((stat=nczmap_search(map,path,nextlevel))) {validate = 0; goto done;}
/* For each s in next level, test, convert to full path, and push onto queue */
for(i=0;i<nclistlength(nextlevel);i++) {
nullfree(segment); segment = NULL;
segment = nclistremove(nextlevel,0);
seglen = nulllen(segment);
if((seglen >= 2 && memcmp(segment,".z",2)==0) || (seglen >= 4 && memcmp(segment,".ncz",4)==0)) {
validate = 1;
goto done;
}
/* Convert to full path */
ncbytesclear(prefix);
ncbytescat(prefix,path);
if(strlen(path) > 1) ncbytescat(prefix,"/");
ncbytescat(prefix,segment);
/* push onto queue */
nclistpush(queue,ncbytesextract(prefix));
}
} while(nclistlength(queue) > 0);
done:
if(!validate) stat = NC_ENOTNC;
nullfree(path);
nullfree(segment);
nclistfreeall(queue);
nclistfreeall(nextlevel);
ncbytesfree(prefix);
return ZUNTRACE(THROW(stat));
}
3 changes: 3 additions & 0 deletions nczarr_test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,9 @@ IF(ENABLE_TESTS)
BUILD_BIN_TEST(test_quantize ${TSTCOMMONSRC})
add_sh_test(nczarr_test run_quantize)

BUILD_BIN_TEST(tst_notzarr ${TSTCOMMONSRC})
add_sh_test(nczarr_test run_notzarr)

if(ENABLE_S3)
add_sh_test(nczarr_test run_s3_cleanup)
ENDIF()
Expand Down
8 changes: 5 additions & 3 deletions nczarr_test/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ ut_projections_SOURCES = ut_projections.c ${commonsrc}
ut_chunking_SOURCES = ut_chunking.c ${commonsrc}
tst_fillonlyz_SOURCES = tst_fillonlyz.c ${tstcommonsrc}

check_PROGRAMS += tst_zchunks tst_zchunks2 tst_zchunks3 tst_fillonlyz test_quantize
check_PROGRAMS += tst_zchunks tst_zchunks2 tst_zchunks3 tst_fillonlyz test_quantize tst_notzarr

TESTS += run_ut_chunk.sh

Expand Down Expand Up @@ -64,7 +64,9 @@ TESTS += run_jsonconvention.sh
TESTS += run_strings.sh
TESTS += run_scalar.sh
TESTS += run_nulls.sh
endif
TESTS += run_notzarr.sh

endif #BUILD_UTILITIES

if BUILD_UTILITIES

Expand Down Expand Up @@ -139,7 +141,7 @@ run_purezarr.sh run_interop.sh run_misc.sh \
run_filter.sh \
run_newformat.sh run_nczarr_fill.sh run_quantize.sh \
run_jsonconvention.sh run_nczfilter.sh run_unknown.sh \
run_scalar.sh run_strings.sh run_nulls.sh
run_scalar.sh run_strings.sh run_nulls.sh run_notzarr.sh

EXTRA_DIST += \
ref_ut_map_create.cdl ref_ut_map_writedata.cdl ref_ut_map_writemeta2.cdl ref_ut_map_writemeta.cdl \
Expand Down
2 changes: 1 addition & 1 deletion nczarr_test/ref_jsonconvention.zmap
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[0] /.zattrs : (354) |{"globalfloat": 1, "globalfloatvec": [1,2], "globalchar": "abc", "globalillegal": "[ [ 1.0, 0.0, 0.0 ], [ 0.0, 1.0, 0.0 ], [ 0.0, 0.0, 1.0 ", "_NCProperties": "version=2,netcdf=4.9.3-development,nczarr=2.0.0", "_nczarr_attr": {"types": {"globalfloat": "<f8", "globalfloatvec": "<f8", "globalchar": ">S1", "globalillegal": ">S1", "_NCProperties": ">S1"}}}|
[0] /.zattrs : (354) |{"globalfloat": 1, "globalfloatvec": [1,2], "globalchar": "abc", "globalillegal": "[ [ 1.0, 0.0, 0.0 ], [ 0.0, 1.0, 0.0 ], [ 0.0, 0.0, 1.0 ", "_nczarr_attr": {"types": {"globalfloat": "<f8", "globalfloatvec": "<f8", "globalchar": ">S1", "globalillegal": ">S1", "_NCProperties": ">S1"}}}|
[1] /.zgroup : (129) |{"zarr_format": 2, "_nczarr_superblock": {"version": "2.0.0"}, "_nczarr_group": {"dims": {"d1": 1}, "vars": ["v"], "groups": []}}|
[3] /v/.zarray : (202) |{"zarr_format": 2, "shape": [1], "dtype": "<i4", "chunks": [1], "fill_value": -2147483647, "order": "C", "compressor": null, "filters": null, "_nczarr_array": {"dimrefs": ["/d1"], "storage": "chunked"}}|
[4] /v/.zattrs : (296) |{"varjson1": {"key1": [1,2,3], "key2": {"key3": "abc"}}, "varjson2": [[1.0,0.0,0.0],[0.0,1.0,0.0],[0.0,0.0,1.0]], "varvec1": "1.0, 0.0, 0.0", "varvec2": [0.,0.,1.], "_ARRAY_DIMENSIONS": ["d1"], "_nczarr_attr": {"types": {"varjson1": ">S1", "varjson2": ">S1", "varvec1": ">S1", "varvec2": ">S1"}}}|
Expand Down
15 changes: 9 additions & 6 deletions nczarr_test/run_jsonconvention.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,17 @@ echo "*** Test: write then read using json convention"
fileargs tmp_jsonconvention "mode=nczarr,$zext"
deletemap $zext $file
${NCGEN} -4 -b -o "$fileurl" $srcdir/ref_jsonconvention.cdl
# Clean up extraneous changes wrt _NCProperties
${ZMD} -h $fileurl | sed -e 's/,key1=value1|key2=value2//' -e '/"_NCProperties"/ s/(378)/(354)/' > tmp_jsonconvention_${zext}.txt
${NCDUMP} $fileurl > tmp_jsonconvention_${zext}.cdl
${ZMD} -h $fileurl > tmp_jsonconvention_${zext}.txt
# | sed -e 's/,key1=value1|key2=value2//' -e '/"_NCProperties"/ s/(378)/(354)/'
# Clean up extraneous changes so comparisons work
# remove '\n' from ref file before comparing
rm -f tmp_jsonconvention.cdl
sed -e 's|\\n||g' < ${srcdir}/ref_jsonconvention.cdl > tmp_jsonconvention.cdl
diff -b tmp_jsonconvention.cdl tmp_jsonconvention_${zext}.cdl
diff -b ${srcdir}/ref_jsonconvention.zmap tmp_jsonconvention_${zext}.txt
sed -e 's|\\n||g' < ${srcdir}/ref_jsonconvention.cdl > tmp_jsonconvention_clean.cdl
sed -e 's|\\n||g' < ${srcdir}/ref_jsonconvention.zmap > tmp_jsonconvention_clean.zmap
cat < tmp_jsonconvention_${zext}.cdl > tmp_jsonconvention_clean_${zext}.cdl
sed -e 's|"_NCProperties": "version=2,netcdf=[^,]*,nczarr=2.0.0",||' < tmp_jsonconvention_${zext}.txt > tmp_jsonconvention_clean_${zext}.txt
diff -b tmp_jsonconvention_clean.cdl tmp_jsonconvention_clean_${zext}.cdl
diff -b tmp_jsonconvention_clean.zmap tmp_jsonconvention_clean_${zext}.txt
}

testcase file
Expand Down
82 changes: 82 additions & 0 deletions nczarr_test/run_notzarr.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#!/bin/sh

if test "x$srcdir" = x ; then srcdir=`pwd`; fi
. ../test_common.sh

. "$srcdir/test_nczarr.sh"

# Test ability to detect NCZarr/Zarr files

URL="${NCZARR_S3_TEST_HOST}/${NCZARR_S3_TEST_BUCKET}"
KEY="/netcdf-c"

THISDIR=`pwd`
RESULTSDIR=tmp_notzarr
sometestfailed=

testfailed() {
if test "x$1" != "x-51" ; then
echo "*** Failed"
sometestfailed=1
fi
}

rm -fr ${RESULTSDIR}
mkdir -p ${RESULTSDIR}
cd ${RESULTSDIR}

# Make test sets
mkdir empty.file # empty
mkdir notzarr.file # non-empty, non-zarr
echo "random data" >notzarr.file/notzarr.txt
if test "x$FEATURE_NCZARR_ZIP" = xyes ; then
mkdir empty
zip -r empty.zip empty
cp -r notzarr.file ./notzarr
zip -r notzarr.zip notzarr
rm -fr empty notzarr
fi
if test "x$FEATURE_S3TESTS" = xyes ; then
cat /dev/null > empty.txt
# not possible: ${execdir}/s3util -f notzarr.txt -u "https://${URL}" -k "/netcdf-c/empty.s3" upload
${execdir}/s3util -f notzarr.file/notzarr.txt -u "https://${URL}" -k "/netcdf-c/notzarr.s3/notzarr.txt" upload
fi

echo "Test empty file"
RET=`${execdir}/tst_notzarr "file://empty.file#mode=zarr,file"`
testfailed "$RET"
echo "Test non-zarr file"
RET=`${execdir}/tst_notzarr "file://notzarr.file#mode=zarr,file"`
testfailed "$RET"

if test "x$FEATURE_NCZARR_ZIP" = xyes ; then
echo "Test empty zip file"
RET=`${execdir}/tst_notzarr "file://empty.zip#mode=zarr,zip"`
testfailed "$RET"
echo "Test non-zarr zip file"
RET=`${execdir}/tst_notzarr "file://notzarr.zip#mode=zarr,zip"`
testfailed "$RET"
fi

if test "x$FEATURE_S3TESTS" = xyes ; then
if test 1 = 0 ; then
# This test is NA for S3
echo "Test empty S3 file"
KEY="/netcdf-c/empty.s3"
RET=`${execdir}/tst_notzarr "https://$URL${KEY}#mode=zarr,s3"`
testfailed "$RET"
fi
echo "Test non-zarr S3 file"
RET=`${execdir}/tst_notzarr "https://$URL/netcdf-c/notzarr.s3#mode=zarr,s3"`
testfailed "$RET"
fi

cd ${THISDIR}

# Cleanup
rm -fr ${RESULTSDIR}
if test "x$FEATURE_S3TESTS" = xyes ; then
awsdelete "/netcdf-c"
fi

exit 0
31 changes: 31 additions & 0 deletions nczarr_test/tst_notzarr.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/* This is part of the netCDF package.
Copyright 2018 University Corporation for Atmospheric Research/Unidata
See COPYRIGHT file for conditions of use.

Test nczarr filter loading
Author: Dennis Heimbigner
*/

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include "netcdf.h"

#define ERR(r) {fprintf(stderr,"fail: line %d: (%d) %s\n",__LINE__,(r),nc_strerror((r)));}

int
main(int argc, char **argv)
{
int ret = NC_NOERR;
int ncid;

if(argc < 2) {
fprintf(stderr,"Usage: tst_notzarr <url>\n");
exit(1);
}
ret = nc_open(argv[1],NC_NETCDF4,&ncid);
printf("%d",ret);
if(ret == NC_NOERR) nc_close(ncid);
exit(0);
}
2 changes: 1 addition & 1 deletion nczarr_test/ut_test.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
x * Copyright 2018, University Corporation for Atmospheric Research
* Copyright 2018, University Corporation for Atmospheric Research
* See netcdf/COPYRIGHT file for copying and redistribution conditions.
*/

Expand Down
Loading