From 2d83cbbf0be1dd8e50821befc17b29b8869d9491 Mon Sep 17 00:00:00 2001 From: Mytherin Date: Thu, 25 Sep 2025 11:41:25 +0200 Subject: [PATCH 01/32] Move over httpfs tests from main repo to duckdb-httpfs + add CI run --- .github/workflows/Linux.yml | 65 +++ duckdb | 2 +- extension-ci-tools | 2 +- test/README.md | 69 +++ test/extension/autoloading_base.test | 103 +++++ .../autoloading_current_setting.test | 47 ++ test/extension/autoloading_filesystems.test | 47 ++ test/extension/autoloading_load_only.test | 45 ++ test/extension/autoloading_reset_setting.test | 49 +++ test/extension/duckdb_extension_settings.test | 30 ++ test/sql/attach/attach_httpfs.test | 45 ++ test/sql/attach/attach_remote.test | 17 + test/sql/attach/attach_s3.test | 56 +++ test/sql/attach/attach_s3_tpch.test_slow | 72 +++ test/sql/copy/csv/glob/copy_csv_glob_s3.test | 74 ++++ test/sql/copy/csv/glob/read_csv_glob_s3.test | 174 ++++++++ .../csv/parallel/csv_parallel_httpfs.test | 36 ++ .../copy/csv/parallel/test_parallel_csv.test | 157 +++++++ .../copy/csv/recursive_query_csv.test_slow | 215 +++++++++ test/sql/copy/csv/test_12314.test_slow | 18 + test/sql/copy/csv/test_csv_httpfs.test_slow | 33 ++ test/sql/copy/csv/test_csv_httpfs_main.test | 355 +++++++++++++++ .../copy/csv/test_csv_httpfs_prepared.test | 50 +++ test/sql/copy/csv/test_csv_remote.test | 34 ++ test/sql/copy/csv/test_csv_remote.test_slow | 15 + test/sql/copy/csv/test_mixed_lines.test_slow | 32 ++ test/sql/copy/csv/test_sniff_httpfs.test | 14 + test/sql/copy/csv/test_url_with_plus.test | 11 + .../copy/csv/unquoted_escape/human_eval.test | 86 ++++ .../encryption/different_aes_engines.test | 71 +++ .../delta_byte_array_length_mismatch.test | 10 + .../delta_byte_array_multiple_pages.test | 23 + test/sql/copy/parquet/parquet_2102.test_slow | 69 +++ test/sql/copy/parquet/parquet_5968.test | 24 + .../parquet/parquet_boolean_page.test_slow | 21 + .../parquet_encrypted_tpch_httpfs.test_slow | 94 ++++ .../parquet/parquet_encryption_httpfs.test | 66 +++ .../parquet_encryption_mbedtls_openssl.test | 52 +++ test/sql/copy/parquet/parquet_glob_s3.test | 186 ++++++++ .../copy/parquet/parquet_http_prefetch.test | 41 ++ test/sql/copy/parquet/snowflake_lineitem.test | 10 + .../parquet/test_parquet_force_download.test | 139 ++++++ .../sql/copy/parquet/test_parquet_remote.test | 83 ++++ .../test_parquet_remote_foreign_files.test | 88 ++++ .../copy/parquet/test_yellow_cab.test_slow | 34 ++ test/sql/copy/s3/download_config.test | 128 ++++++ test/sql/copy/s3/fully_qualified_s3_url.test | 204 +++++++++ test/sql/copy/s3/glob_s3_paging.test_slow | 94 ++++ .../copy/s3/hive_partitioned_write_s3.test | 183 ++++++++ .../s3/hive_partitioned_write_s3.test_slow | 72 +++ test/sql/copy/s3/http_log.test | 43 ++ test/sql/copy/s3/http_proxy.test | 155 +++++++ test/sql/copy/s3/http_secret.test | 44 ++ test/sql/copy/s3/metadata_cache.test | 90 ++++ test/sql/copy/s3/parquet_s3_tpcds.test_slow | 96 ++++ test/sql/copy/s3/parquet_s3_tpch.test_slow | 92 ++++ test/sql/copy/s3/s3_hive_partition.test | 104 +++++ test/sql/copy/s3/s3_presigned_read.test | 40 ++ test/sql/copy/s3/s3_presigned_read.test_slow | 48 ++ test/sql/copy/s3/starstar.test | 362 +++++++++++++++ .../copy/s3/upload_file_parallel.test_slow | 122 ++++++ test/sql/copy/s3/upload_large_file.test_slow | 79 ++++ .../copy/s3/upload_large_json_file.test_slow | 87 ++++ test/sql/copy/s3/upload_small_file.test | 77 ++++ test/sql/copy/s3/url_encode.test | 145 ++++++ test/sql/copy/test_remote_head_forbidden.test | 10 + test/sql/delete/test_issue_1834.test_slow | 26 ++ .../extensions/version_is_valid_httpfs.test | 21 + test/sql/httpfs/hffs.test | 42 ++ test/sql/httpfs/hffs.test_slow | 180 ++++++++ test/sql/httpfs/internal_issue_2490.test | 10 + test/sql/json/table/read_json.test | 414 ++++++++++++++++++ test/sql/json/table/read_json_auto.test_slow | 380 ++++++++++++++++ test/sql/json/table/read_json_objects.test | 252 +++++++++++ test/sql/logging/file_system_logging.test | 56 +++ test/sql/logging/http_logging.test | 45 ++ test/sql/secrets/create_secret.test_slow | 76 ++++ test/sql/secrets/create_secret_binding.test | 92 ++++ .../secrets/create_secret_cascading.test_slow | 58 +++ test/sql/secrets/create_secret_defaults.test | 60 +++ test/sql/secrets/create_secret_gcs.test_slow | 34 ++ test/sql/secrets/create_secret_hffs.test | 31 ++ .../secrets/create_secret_invalid_map.test | 24 + test/sql/secrets/create_secret_minio.test | 78 ++++ .../secrets/create_secret_name_conflicts.test | 89 ++++ ...te_secret_non_writable_persistent_dir.test | 46 ++ .../secrets/create_secret_overwriting.test | 73 +++ .../secrets/create_secret_persistence.test | 195 +++++++++ ...ate_secret_persistence_error_handling.test | 46 ++ test/sql/secrets/create_secret_r2.test | 65 +++ .../create_secret_r2_serialization.test | 70 +++ .../create_secret_s3_serialization.test | 99 +++++ .../secrets/create_secret_scope_matching.test | 61 +++ test/sql/secrets/create_secret_settings.test | 71 +++ .../create_secret_storage_backends.test | 111 +++++ .../secrets/create_secret_transactional.test | 146 ++++++ .../secrets/persistent_key_value_secret.test | 28 ++ .../secrets/secret_compatibility_httpfs.test | 21 + test/sql/secrets/secret_types_function.test | 20 + .../test_disabled_file_system_httpfs.test | 28 ++ .../encrypted_out_of_core.test_slow | 68 +++ .../external_file_cache_httpfs.test | 18 + .../external_file_cache_read_blob.test_slow | 25 ++ .../invalid_unicode_scrambled.test_slow | 14 + 104 files changed, 8410 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/Linux.yml create mode 100644 test/README.md create mode 100644 test/extension/autoloading_base.test create mode 100644 test/extension/autoloading_current_setting.test create mode 100644 test/extension/autoloading_filesystems.test create mode 100644 test/extension/autoloading_load_only.test create mode 100644 test/extension/autoloading_reset_setting.test create mode 100644 test/extension/duckdb_extension_settings.test create mode 100644 test/sql/attach/attach_httpfs.test create mode 100644 test/sql/attach/attach_remote.test create mode 100644 test/sql/attach/attach_s3.test create mode 100644 test/sql/attach/attach_s3_tpch.test_slow create mode 100644 test/sql/copy/csv/glob/copy_csv_glob_s3.test create mode 100644 test/sql/copy/csv/glob/read_csv_glob_s3.test create mode 100644 test/sql/copy/csv/parallel/csv_parallel_httpfs.test create mode 100644 test/sql/copy/csv/parallel/test_parallel_csv.test create mode 100644 test/sql/copy/csv/recursive_query_csv.test_slow create mode 100644 test/sql/copy/csv/test_12314.test_slow create mode 100644 test/sql/copy/csv/test_csv_httpfs.test_slow create mode 100644 test/sql/copy/csv/test_csv_httpfs_main.test create mode 100644 test/sql/copy/csv/test_csv_httpfs_prepared.test create mode 100644 test/sql/copy/csv/test_csv_remote.test create mode 100644 test/sql/copy/csv/test_csv_remote.test_slow create mode 100644 test/sql/copy/csv/test_mixed_lines.test_slow create mode 100644 test/sql/copy/csv/test_sniff_httpfs.test create mode 100644 test/sql/copy/csv/test_url_with_plus.test create mode 100644 test/sql/copy/csv/unquoted_escape/human_eval.test create mode 100644 test/sql/copy/encryption/different_aes_engines.test create mode 100644 test/sql/copy/parquet/delta_byte_array_length_mismatch.test create mode 100644 test/sql/copy/parquet/delta_byte_array_multiple_pages.test create mode 100644 test/sql/copy/parquet/parquet_2102.test_slow create mode 100644 test/sql/copy/parquet/parquet_5968.test create mode 100644 test/sql/copy/parquet/parquet_boolean_page.test_slow create mode 100644 test/sql/copy/parquet/parquet_encrypted_tpch_httpfs.test_slow create mode 100644 test/sql/copy/parquet/parquet_encryption_httpfs.test create mode 100644 test/sql/copy/parquet/parquet_encryption_mbedtls_openssl.test create mode 100644 test/sql/copy/parquet/parquet_glob_s3.test create mode 100644 test/sql/copy/parquet/parquet_http_prefetch.test create mode 100644 test/sql/copy/parquet/snowflake_lineitem.test create mode 100644 test/sql/copy/parquet/test_parquet_force_download.test create mode 100644 test/sql/copy/parquet/test_parquet_remote.test create mode 100644 test/sql/copy/parquet/test_parquet_remote_foreign_files.test create mode 100644 test/sql/copy/parquet/test_yellow_cab.test_slow create mode 100644 test/sql/copy/s3/download_config.test create mode 100644 test/sql/copy/s3/fully_qualified_s3_url.test create mode 100644 test/sql/copy/s3/glob_s3_paging.test_slow create mode 100644 test/sql/copy/s3/hive_partitioned_write_s3.test create mode 100644 test/sql/copy/s3/hive_partitioned_write_s3.test_slow create mode 100644 test/sql/copy/s3/http_log.test create mode 100644 test/sql/copy/s3/http_proxy.test create mode 100644 test/sql/copy/s3/http_secret.test create mode 100644 test/sql/copy/s3/metadata_cache.test create mode 100644 test/sql/copy/s3/parquet_s3_tpcds.test_slow create mode 100644 test/sql/copy/s3/parquet_s3_tpch.test_slow create mode 100644 test/sql/copy/s3/s3_hive_partition.test create mode 100644 test/sql/copy/s3/s3_presigned_read.test create mode 100644 test/sql/copy/s3/s3_presigned_read.test_slow create mode 100644 test/sql/copy/s3/starstar.test create mode 100644 test/sql/copy/s3/upload_file_parallel.test_slow create mode 100644 test/sql/copy/s3/upload_large_file.test_slow create mode 100644 test/sql/copy/s3/upload_large_json_file.test_slow create mode 100644 test/sql/copy/s3/upload_small_file.test create mode 100644 test/sql/copy/s3/url_encode.test create mode 100644 test/sql/copy/test_remote_head_forbidden.test create mode 100644 test/sql/delete/test_issue_1834.test_slow create mode 100644 test/sql/extensions/version_is_valid_httpfs.test create mode 100644 test/sql/httpfs/hffs.test create mode 100644 test/sql/httpfs/hffs.test_slow create mode 100644 test/sql/httpfs/internal_issue_2490.test create mode 100644 test/sql/json/table/read_json.test create mode 100644 test/sql/json/table/read_json_auto.test_slow create mode 100644 test/sql/json/table/read_json_objects.test create mode 100644 test/sql/logging/file_system_logging.test create mode 100644 test/sql/logging/http_logging.test create mode 100644 test/sql/secrets/create_secret.test_slow create mode 100644 test/sql/secrets/create_secret_binding.test create mode 100644 test/sql/secrets/create_secret_cascading.test_slow create mode 100644 test/sql/secrets/create_secret_defaults.test create mode 100644 test/sql/secrets/create_secret_gcs.test_slow create mode 100644 test/sql/secrets/create_secret_hffs.test create mode 100644 test/sql/secrets/create_secret_invalid_map.test create mode 100644 test/sql/secrets/create_secret_minio.test create mode 100644 test/sql/secrets/create_secret_name_conflicts.test create mode 100644 test/sql/secrets/create_secret_non_writable_persistent_dir.test create mode 100644 test/sql/secrets/create_secret_overwriting.test create mode 100644 test/sql/secrets/create_secret_persistence.test create mode 100644 test/sql/secrets/create_secret_persistence_error_handling.test create mode 100644 test/sql/secrets/create_secret_r2.test create mode 100644 test/sql/secrets/create_secret_r2_serialization.test create mode 100644 test/sql/secrets/create_secret_s3_serialization.test create mode 100644 test/sql/secrets/create_secret_scope_matching.test create mode 100644 test/sql/secrets/create_secret_settings.test create mode 100644 test/sql/secrets/create_secret_storage_backends.test create mode 100644 test/sql/secrets/create_secret_transactional.test create mode 100644 test/sql/secrets/persistent_key_value_secret.test create mode 100644 test/sql/secrets/secret_compatibility_httpfs.test create mode 100644 test/sql/secrets/secret_types_function.test create mode 100644 test/sql/settings/test_disabled_file_system_httpfs.test create mode 100644 test/sql/storage/encryption/temp_files/encrypted_out_of_core.test_slow create mode 100644 test/sql/storage/external_file_cache/external_file_cache_httpfs.test create mode 100644 test/sql/storage/external_file_cache/external_file_cache_read_blob.test_slow create mode 100644 test/sql/storage/invalid_unicode_scrambled.test_slow diff --git a/.github/workflows/Linux.yml b/.github/workflows/Linux.yml new file mode 100644 index 0000000..1c6e87c --- /dev/null +++ b/.github/workflows/Linux.yml @@ -0,0 +1,65 @@ +name: Integration Tests +on: [push, pull_request,repository_dispatch] +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }} + cancel-in-progress: true +defaults: + run: + shell: bash + +jobs: + linux-tests-postgres: + name: Run tests on Linux + runs-on: ubuntu-latest + strategy: + matrix: + # Add commits/tags to build against other DuckDB versions + duckdb_version: [ '' ] + arch: ['linux_amd64'] + vcpkg_version: [ '2023.04.15' ] + include: + - arch: 'linux_amd64' + vcpkg_triplet: 'x64-linux' + + env: + VCPKG_TARGET_TRIPLET: ${{ matrix.vcpkg_triplet }} + VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake + S3_TEST_SERVER_AVAILABLE: 1 + AWS_DEFAULT_REGION: eu-west-1 + AWS_ACCESS_KEY_ID: minio_duckdb_user + AWS_SECRET_ACCESS_KEY: minio_duckdb_user_password + DUCKDB_S3_ENDPOINT: duckdb-minio.com:9000 + DUCKDB_S3_USE_SSL: false + HTTP_PROXY_PUBLIC: localhost:3128 + TEST_PERSISTENT_SECRETS_AVAILABLE: true + GEN: ninja + + steps: + - name: Install required ubuntu packages + run: sudo apt-get update -y -qq && sudo apt-get install -y -qq ninja-build libcurl4-openssl-dev + + - name: Fix permissions of test secrets + shell: bash + run: chmod -R 700 data/secrets + + # TODO: fix the authenticated proxy here + - name: Install and run http proxy squid + shell: bash + run: | + sudo apt-get install squid + ./scripts/run_squid.sh --port 3128 --log_dir squid_logs & + + - name: Build + shell: bash + run: make reldebug + + - name: Start test server & run tests + shell: bash + run: | + sudo ./scripts/install_s3_test_server.sh + ./scripts/generate_presigned_url.sh + source ./scripts/run_s3_test_server.sh + source ./scripts/set_s3_test_server_variables.sh + sleep 60 + + ./build/reldebug/test/unittest '*' diff --git a/duckdb b/duckdb index b8a06e4..7d45b33 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit b8a06e4a22672e254cd0baa68a3dbed2eb51c56e +Subproject commit 7d45b33a308f787d7e09346ee76ea403bd140da5 diff --git a/extension-ci-tools b/extension-ci-tools index 86306f4..90757de 160000 --- a/extension-ci-tools +++ b/extension-ci-tools @@ -1 +1 @@ -Subproject commit 86306f45100210a75d1633521d83ebd1f640e960 +Subproject commit 90757de3f06c6802cd49732849b9e46eef75761f diff --git a/test/README.md b/test/README.md new file mode 100644 index 0000000..24cfaa7 --- /dev/null +++ b/test/README.md @@ -0,0 +1,69 @@ + +In order to test these locally, `minio` is used. This requires Docker to be installed. + +### Installing Docker on MacOS + +Install `docker` using `homebrew`. + + +```bash +brew install docker --cask +``` + +Then open `/Applications/Docker`. Note that the first time you open the application you need to go to the `Applications` folder, right-click `Docker` and select `open`. + +### Setting Up Docker + +In order to finish setting up Docker, you need to open the Docker application, and login to your Docker account. Create a Docker account if you do not have one and finish setting up. + +### Running Minio + +Run the `install_s3_test_server` script. This requires root. This makes a few changes to your system, specifically to `/etc/hosts` to set up a few redirect interfaces to localhost. This only needs to be run once. + +```bash +sudo ./scripts/install_s3_test_server.sh +``` + +Then, if this has not been done yet, we need to generate some data: + +``` +./scripts/generate_presigned_url.sh +``` + +Then run the test server in the back-ground using Docker. Note that Docker must be opened for this to work. On MacOS you can open the docker gui (`/Applications/Docker`) and leave it open to accomplish this. + + +```bash +source ./scripts/run_s3_test_server.sh +``` + +Now set up the following environment variables to enable running of the tests. + +This can be done either manually: +```bash +export S3_TEST_SERVER_AVAILABLE=1 +export AWS_DEFAULT_REGION=eu-west-1 +export AWS_ACCESS_KEY_ID=minio_duckdb_user +export AWS_SECRET_ACCESS_KEY=minio_duckdb_user_password +export DUCKDB_S3_ENDPOINT=duckdb-minio.com:9000 +export DUCKDB_S3_USE_SSL=false +``` + +Or using the `set_s3_test_server_variables.sh` script + +```bash +# use source so it sets the environment variables in your current environment +source scripts/set_s3_test_server_variables.sh +``` + +Now you should be able to run the S3 tests using minio, e.g.: + +```bash +build/debug/test/unittest test/sql/copy/s3/s3_hive_partition.test +``` + +> minio uses port 9000. Clickhouse also uses port 9000. If the tests are not working and you have a running Clickhouse service - try killing it first, e.g. using `killall -9 clickhouse` + +#### Test Data + +The configuration for minio is stored in `scripts/minio_s3.yml`. Data is stored in `/tmp/minio_test_data`. \ No newline at end of file diff --git a/test/extension/autoloading_base.test b/test/extension/autoloading_base.test new file mode 100644 index 0000000..1aca133 --- /dev/null +++ b/test/extension/autoloading_base.test @@ -0,0 +1,103 @@ +# name: test/extension/autoloading_base.test +# description: Base tests for the autoloading mechanism for extensions +# group: [extension] + +require httpfs + +# This test assumes icu and json to be available in the LOCAL_EXTENSION_REPO and NOT linked into duckdb statically +# -> this should be the case for our autoloading tests where we have the local_extension_repo variable set +require-env LOCAL_EXTENSION_REPO + +# Ensure we have a clean extension directory without any preinstalled extensions +statement ok +set extension_directory='__TEST_DIR__/autoloading_base' + +query I +SELECT (count(*) > 0) FROM duckdb_extensions() WHERE install_path ILIKE '%duckdb_extension' +---- +false + +# All extensions reported by duckdb are either statically linked or not installed +query I +SELECT count(*) FROM duckdb_extensions() WHERE install_mode != 'NOT_INSTALLED' AND install_mode != 'STATICALLY_LINKED' +---- +0 + +### No autoloading nor installing: throw error with installation hint +statement ok +set autoload_known_extensions=false + +statement ok +set autoinstall_known_extensions=false + +statement error +SET s3_region='eu-west-1'; +---- +:.*Catalog Error.*Setting with name "s3_region" is not in the catalog.* + +statement error +select * from read_json_auto('data/json/example_n.ndjson'); +---- +:.*Catalog Error.*Table Function with name "read_json_auto" is not in the catalog.* + +statement error +select * from thistablefunctionwillnotexistfosho(); +---- +:.*Catalog Error.*Table Function with name thistablefunctionwillnotexistfosho does not exist.* + +### Autoloading and installing, but the autoloading repository is set to non-existent location +statement ok +set autoload_known_extensions=true + +statement ok +set autoinstall_known_extensions=true + +# Override the default repo with a non-existent local repo +statement ok +set autoinstall_extension_repository='/tmp/non-existent-repo'; + +# Error should inform the user on whats happening +statement error +SET s3_region='eu-west-1'; +---- +:Extension Autoloading Error.*An error occurred while trying to automatically install the required extension 'httpfs'.* + +statement error +select * from read_json_auto('data/json/example_n.ndjson'); +---- +:Extension Autoloading Error.*An error occurred while trying to automatically install the required extension 'json'.* + +# Now override with non-existent remote repo +statement ok +set autoinstall_extension_repository='http://duckdb.org/what/are/the/odds/we/actually/make/this/path/and/break/this/tests'; + +# Error should inform the user on whats happening +statement error +SET s3_region='eu-west-1'; +---- +:Extension Autoloading Error.*An error occurred while trying to automatically install the required extension 'httpfs'.* + +statement error +select * from read_json_auto('data/json/example_n.ndjson'); +---- +:Extension Autoloading Error.*An error occurred while trying to automatically install the required extension 'json'.* + +statement error +select * from thistablefunctionwillnotexistfosho(); +---- +:Catalog Error.*Table Function with name thistablefunctionwillnotexistfosho does not exist.* + +### Autoloading with correct tmp repo +statement ok +set autoinstall_extension_repository='${LOCAL_EXTENSION_REPO}'; + +statement ok +SET s3_region='eu-west-1'; + +statement ok +select * from read_json_auto('data/json/example_n.ndjson'); + +query I +SELECT (count(*) > 0) FROM duckdb_extensions() WHERE install_path ILIKE '%duckdb_extension'; +---- +true diff --git a/test/extension/autoloading_current_setting.test b/test/extension/autoloading_current_setting.test new file mode 100644 index 0000000..119e63f --- /dev/null +++ b/test/extension/autoloading_current_setting.test @@ -0,0 +1,47 @@ +# name: test/extension/autoloading_current_setting.test +# description: More tests for extension autoloading. +# group: [extension] + +# This test assumes icu and json to be available in the LOCAL_EXTENSION_REPO and NOT linked into duckdb statically +# -> this should be the case for our autoloading tests where we have the local_extension_repo variable set +require-env LOCAL_EXTENSION_REPO + +require httpfs + +statement ok +set extension_directory='__TEST_DIR__/autoloading_current_setting' + +### No autoloading: throw error with installation hint +statement ok +set autoload_known_extensions=false + +statement ok +set autoinstall_known_extensions=false + +statement error +select current_setting('s3_region'); +---- +:.*Catalog Error.*Setting with name "s3_region" is not in the catalog.* + +### Autoloading, but but not autoinstall +statement ok +set autoload_known_extensions=true + +statement ok +set autoinstall_extension_repository='/tmp/non-existent-repo'; + +# Error should inform the user on whats happening +statement error +select current_setting('s3_region'); +---- +:.*Extension Autoloading Error.*An error occurred while trying to automatically install the required extension 'httpfs'.* + +### Autoloading with autoinstall and correct extension repo +statement ok +set autoinstall_extension_repository='${LOCAL_EXTENSION_REPO}'; + +statement ok +set autoinstall_known_extensions=true + +statement ok +select current_setting('s3_region'); diff --git a/test/extension/autoloading_filesystems.test b/test/extension/autoloading_filesystems.test new file mode 100644 index 0000000..8ad6b90 --- /dev/null +++ b/test/extension/autoloading_filesystems.test @@ -0,0 +1,47 @@ +# name: test/extension/autoloading_filesystems.test +# description: Tests for autoloading with filesystems +# group: [extension] + +require httpfs + +# This test assumes icu and json to be available in the LOCAL_EXTENSION_REPO and NOT linked into duckdb statically +# -> this should be the case for our autoloading tests where we have the local_extension_repo variable set +require-env LOCAL_EXTENSION_REPO + +statement ok +set allow_persistent_secrets=false; + +# Ensure we have a clean extension directory without any preinstalled extensions +statement ok +set extension_directory='__TEST_DIR__/autoloading_filesystems' + +### No autoloading nor installing: throw error with installation hint +statement ok +set autoload_known_extensions=false + +statement ok +set autoinstall_known_extensions=false + +statement error +SELECT * FROM 's3://some-bucket/a-file.csv' +---- +Missing Extension Error: File s3://some-bucket/a-file.csv requires the extension httpfs to be loaded + +### With autoloading, install and correct repo +statement ok +set autoload_known_extensions=true + +statement ok +set autoinstall_known_extensions=true + +statement ok +set autoinstall_extension_repository='${LOCAL_EXTENSION_REPO}'; + +# Set an invalid endpoint to ensure we fail in the httpfs extension when trying to connect +statement ok +SET s3_endpoint='false_endpoint'; + +statement error +SELECT * FROM 's3://some-bucket/a-file.csv' +---- +Could not establish connection error for HTTP HEAD to 'https://some-bucket.false_endpoint/a-file.csv' diff --git a/test/extension/autoloading_load_only.test b/test/extension/autoloading_load_only.test new file mode 100644 index 0000000..5bbda34 --- /dev/null +++ b/test/extension/autoloading_load_only.test @@ -0,0 +1,45 @@ +# name: test/extension/autoloading_load_only.test +# description: Tests for autoloading with no autoinstall +# group: [extension] + +require httpfs + +# This test assumes icu and json to be available in the LOCAL_EXTENSION_REPO and NOT linked into duckdb statically +# -> this should be the case for our autoloading tests where we have the local_extension_repo variable set +require-env LOCAL_EXTENSION_REPO + +# Ensure we have a clean extension directory without any preinstalled extensions +statement ok +set extension_directory='__TEST_DIR__/autoloading_load_only' + +### No autoloading nor installing: throw error with installation hint +statement ok +set autoload_known_extensions=false + +statement ok +set autoinstall_known_extensions=false + +statement error +SET s3_region='eu-west-1'; +---- +:.*Catalog Error.*Setting with name "s3_region" is not in the catalog.* + +### Autoloading but not autoinstall, while the extension is not installed: still not working +statement ok +set autoload_known_extensions=true + +statement ok +set autoinstall_extension_repository='/tmp/non-existent-repo'; + +statement error +SET s3_region='eu-west-1'; +---- +:.*Extension Autoloading Error.*An error occurred while trying to automatically install the required extension 'httpfs'.* + +### Manually install the extension from the local repo +statement ok +INSTALL httpfs FROM '${LOCAL_EXTENSION_REPO}' + +# now autoloading works! +statement ok +SET s3_region='eu-west-1'; diff --git a/test/extension/autoloading_reset_setting.test b/test/extension/autoloading_reset_setting.test new file mode 100644 index 0000000..999102a --- /dev/null +++ b/test/extension/autoloading_reset_setting.test @@ -0,0 +1,49 @@ +# name: test/extension/autoloading_reset_setting.test +# description: Testing reset setting that lives in an extension that can be autoloaded +# group: [extension] + +require httpfs + +# This test assumes httpfs and json to be available in the LOCAL_EXTENSION_REPO and NOT linked into duckdb statically +# -> this should be the case for our autoloading tests where we have the local_extension_repo variable set +require-env LOCAL_EXTENSION_REPO + +statement ok +set extension_directory='__TEST_DIR__/autoloading_reset_setting' + +### No autoloading: throw error with installation hint +statement ok +set autoload_known_extensions=false + +statement ok +set autoinstall_known_extensions=false + +# Testing reset setting +statement error +RESET s3_region; +---- +Catalog Error: Setting with name "s3_region" is not in the catalog, but it exists in the httpfs extension. + +### Autoloading, but no auto install +statement ok +set autoload_known_extensions=true + +statement ok +set autoinstall_extension_repository='/tmp/non-existent-repo'; + +# Error should inform the user on whats happening +statement error +RESET s3_region; +---- +Extension Autoloading Error: An error occurred while trying to automatically install the required extension 'httpfs': +Extension + +### Autoloading with correct tmp repo and autoinstall +statement ok +set autoinstall_extension_repository='${LOCAL_EXTENSION_REPO}'; + +statement ok +set autoinstall_known_extensions=true + +statement ok +RESET s3_region; diff --git a/test/extension/duckdb_extension_settings.test b/test/extension/duckdb_extension_settings.test new file mode 100644 index 0000000..9fa62c6 --- /dev/null +++ b/test/extension/duckdb_extension_settings.test @@ -0,0 +1,30 @@ +# name: test/extension/duckdb_extension_settings.test +# description: settings for extensions +# group: [extension] + +require httpfs + +statement ok +SET autoinstall_known_extensions = true; + +statement ok +SET autoload_known_extensions = true; + +statement ok +SET extension_directory = '__TEST_DIR__/custom_extension_directory'; + +statement ok +SET custom_extension_repository = '__TEST_DIR__/not_existing_folder' + +statement error +FROM read_csv('https://some.org/file.csv'); +---- +not_existing_folder + +statement ok +SET autoinstall_extension_repository = '__TEST_DIR__/other_folder'; + +statement error +FROM read_csv('https://some.org/file.csv'); +---- +other_folder diff --git a/test/sql/attach/attach_httpfs.test b/test/sql/attach/attach_httpfs.test new file mode 100644 index 0000000..8742ecb --- /dev/null +++ b/test/sql/attach/attach_httpfs.test @@ -0,0 +1,45 @@ +# name: test/sql/attach/attach_httpfs.test +# description: Test attach using httpfs +# group: [attach] + +require httpfs + +require-env S3_TEST_SERVER_AVAILABLE 1 + +require-env AWS_DEFAULT_REGION + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +require-env DUCKDB_S3_ENDPOINT + +require-env DUCKDB_S3_USE_SSL + +require-env S3_ATTACH_DB_PRESIGNED_URL + +# ATTACH a DuckDB database over HTTPFS +statement ok +ATTACH '${S3_ATTACH_DB_PRESIGNED_URL}' AS db (READONLY 1); + +query IIIII +SELECT * FROM db.integral_values +---- +1 2 3 4 5 +NULL NULL NULL NULL NULL + +statement error +CREATE TABLE db.integers(i INTEGER); +---- +read-only + +statement ok +SELECT * FROM db.all_types + +statement error +SELECT * FROM db.all_typez +---- +all_types + +statement ok +DETACH db diff --git a/test/sql/attach/attach_remote.test b/test/sql/attach/attach_remote.test new file mode 100644 index 0000000..285a941 --- /dev/null +++ b/test/sql/attach/attach_remote.test @@ -0,0 +1,17 @@ +# name: test/sql/attach/attach_remote.test +# description: Test attaching of remote database +# group: [attach] + +require httpfs + +statement error +ATTACH 'https://duckdb.org/non_existing.db' AS db2 (READ_ONLY) +---- + +statement error +ATTACH 'https://duckdb.org/non_existing.db' AS db2 +---- + +statement error +ATTACH 'https://duckdb.org/non_existing.db' AS db2 (READ_WRITE) +---- diff --git a/test/sql/attach/attach_s3.test b/test/sql/attach/attach_s3.test new file mode 100644 index 0000000..555d074 --- /dev/null +++ b/test/sql/attach/attach_s3.test @@ -0,0 +1,56 @@ +# name: test/sql/attach/attach_s3.test +# description: Test attach using httpfs +# group: [attach] + +require httpfs + +require-env S3_TEST_SERVER_AVAILABLE 1 + +require-env AWS_DEFAULT_REGION + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +require-env DUCKDB_S3_ENDPOINT + +require-env DUCKDB_S3_USE_SSL + +require-env S3_ATTACH_DB + +statement ok +CREATE SECRET ( + TYPE S3, + PROVIDER config, + KEY_ID '${AWS_ACCESS_KEY_ID}', + SECRET '${AWS_SECRET_ACCESS_KEY}', + REGION '${AWS_DEFAULT_REGION}', + ENDPOINT '${DUCKDB_S3_ENDPOINT}', + USE_SSL '${DUCKDB_S3_USE_SSL}' +) + +# ATTACH a DuckDB database over HTTPFS +statement ok +ATTACH '${S3_ATTACH_DB}' AS db (READONLY 1); + +query IIIII +SELECT * FROM db.integral_values +---- +1 2 3 4 5 +NULL NULL NULL NULL NULL + +statement error +CREATE TABLE db.integers(i INTEGER); +---- +read-only + +statement ok +SELECT * FROM db.all_types + +statement error +SELECT * FROM db.all_typez +---- +all_types + +statement ok +DETACH db diff --git a/test/sql/attach/attach_s3_tpch.test_slow b/test/sql/attach/attach_s3_tpch.test_slow new file mode 100644 index 0000000..721be60 --- /dev/null +++ b/test/sql/attach/attach_s3_tpch.test_slow @@ -0,0 +1,72 @@ +# name: test/sql/attach/attach_s3_tpch.test_slow +# description: Test running TPC-H over a database attached over S3 +# group: [attach] + +require httpfs + +require tpch + +require-env S3_TEST_SERVER_AVAILABLE 1 + +require-env AWS_DEFAULT_REGION + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +require-env DUCKDB_S3_ENDPOINT + +require-env DUCKDB_S3_USE_SSL + +statement ok +CREATE SECRET ( + TYPE S3, + PROVIDER config, + KEY_ID '${AWS_ACCESS_KEY_ID}', + SECRET '${AWS_SECRET_ACCESS_KEY}', + REGION '${AWS_DEFAULT_REGION}', + ENDPOINT '${DUCKDB_S3_ENDPOINT}', + USE_SSL '${DUCKDB_S3_USE_SSL}' +) + +# ATTACH a DuckDB database over HTTPFS +statement ok +ATTACH 's3://test-bucket/presigned/lineitem_sf1.db' AS db (READONLY 1); + +statement ok +USE db + +loop i 1 9 + +query I +PRAGMA tpch(${i}) +---- +:extension/tpch/dbgen/answers/sf1/q0${i}.csv + +endloop + +loop i 10 23 + +query I +PRAGMA tpch(${i}) +---- +:extension/tpch/dbgen/answers/sf1/q${i}.csv + +endloop + +statement ok +USE memory + +statement ok +DETACH db + +statement ok +ATTACH 's3://test-bucket/presigned/lineitem_sf1.db' AS db (READONLY 1); + +statement ok +USE db + +query IIIIIIIIIIIIIIII +select count(distinct columns(*)) from lineitem; +---- +1500000 200000 10000 7 50 933900 11 9 3 2 2526 2466 2554 4 7 3610733 diff --git a/test/sql/copy/csv/glob/copy_csv_glob_s3.test b/test/sql/copy/csv/glob/copy_csv_glob_s3.test new file mode 100644 index 0000000..7d0aae8 --- /dev/null +++ b/test/sql/copy/csv/glob/copy_csv_glob_s3.test @@ -0,0 +1,74 @@ +# name: test/sql/copy/csv/glob/copy_csv_glob_s3.test +# description: Test globbing CSVs on s3 +# group: [glob] + +statement ok +PRAGMA enable_verification + +require httpfs + +require-env S3_TEST_SERVER_AVAILABLE 1 + +# Require that these environment variables are also set + +require-env AWS_DEFAULT_REGION + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +require-env DUCKDB_S3_ENDPOINT + +require-env DUCKDB_S3_USE_SSL + +# override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues +set ignore_error_messages + +# copy files to S3 before beginning tests +statement ok +COPY (select * from 'data/csv/glob/a1/a1.csv') to 's3://test-bucket/copy_csv_glob_s3/copy/a1/a1.csv'; + +statement ok +COPY (select * from 'data/csv/glob/a2/a2.csv') to 's3://test-bucket/copy_csv_glob_s3/copy/a2/a2.csv'; + +statement ok +COPY (select * from 'data/csv/glob/a3/b1.csv') to 's3://test-bucket/copy_csv_glob_s3/copy/a3/b1.csv'; + +statement ok +COPY (select null) to 's3://test-bucket/glob/copy/empty/empty.csv'; + +statement ok +COPY (select * from 'data/csv/glob/i1/integer.csv') to 's3://test-bucket/copy_csv_glob_s3/copy/empty/integer.csv'; + +statement ok +CREATE TABLE dates(d DATE); + +statement ok +COPY dates FROM 's3://test-bucket/copy_csv_glob_s3/copy/a[123]/*.csv' (AUTO_DETECT 1); + +# simple globbing for both url styles +foreach urlstyle path vhost + +statement ok +SET s3_url_style='${urlstyle}' + +query I +SELECT * FROM dates ORDER BY 1 +---- +2019-06-05 +2019-06-15 +2019-06-25 +2019-07-05 +2019-07-15 +2019-07-25 +2019-08-05 +2019-08-15 +2019-08-25 + +# nothing matches the glob +statement error +INSERT INTO dates FROM read_csv('s3://test-bucket/copy_csv_glob_s3/copy/*/a*a.csv', auto_detect=1) +---- +No files found that match the pattern + +endloop diff --git a/test/sql/copy/csv/glob/read_csv_glob_s3.test b/test/sql/copy/csv/glob/read_csv_glob_s3.test new file mode 100644 index 0000000..f84d13a --- /dev/null +++ b/test/sql/copy/csv/glob/read_csv_glob_s3.test @@ -0,0 +1,174 @@ +# name: test/sql/copy/csv/glob/read_csv_glob_s3.test +# description: Test globbing CSVs over s3 +# group: [glob] + +statement ok +PRAGMA enable_verification + +require httpfs + +require-env S3_TEST_SERVER_AVAILABLE 1 + +# Require that these environment variables are also set + +require-env AWS_DEFAULT_REGION + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +require-env DUCKDB_S3_ENDPOINT + +require-env DUCKDB_S3_USE_SSL + +# override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues +set ignore_error_messages + +# Copy files to S3 before beginning tests +statement ok +COPY (select * from read_csv_auto('data/csv/glob/a1/a1.csv')) to 's3://test-bucket/read_csv_glob_s3/glob/a1/a1.csv' ( HEADER ); + +statement ok +COPY (select * from read_csv_auto('data/csv/glob/a2/a2.csv')) to 's3://test-bucket/read_csv_glob_s3/glob/a2/a2.csv' ( HEADER ); + +statement ok +COPY (select * from read_csv_auto('data/csv/glob/a3/b1.csv')) to 's3://test-bucket/read_csv_glob_s3/glob/a3/b1.csv' ( HEADER ); + +statement ok +COPY (select null) to 's3://test-bucket/read_csv_glob_s3/glob/empty/empty.csv' (HEADER 0); + +statement ok +COPY (select * from read_csv_auto('data/csv/glob/i1/integer.csv')) to 's3://test-bucket/read_csv_glob_s3/glob/i1/integer.csv' ( HEADER ); + +statement ok +COPY (select * from read_csv_auto('data/csv/glob/a1/a1.csv')) to 's3://test-bucket/read_csv_glob_s3/glob/f1/f*.csv' ( HEADER ); + +statement ok +COPY (select * from read_csv_auto('data/csv/glob/a2/a2.csv')) to 's3://test-bucket/read_csv_glob_s3/glob/f2/f[a].csv' ( HEADER ); + +foreach urlstyle path vhost + +statement ok +SET s3_url_style='${urlstyle}' + +# simple globbing +query I +SELECT * FROM read_csv('s3://test-bucket/read_csv_glob_s3/glob/a[123]/*.csv', auto_detect=1) ORDER BY 1 +---- +2019-06-05 +2019-06-15 +2019-06-25 +2019-07-05 +2019-07-15 +2019-07-25 +2019-08-05 +2019-08-15 +2019-08-25 + +query I +SELECT * FROM read_csv('s3://test-bucket/read_csv_glob_s3/*/a[123]/a*.csv', auto_detect=1) ORDER BY 1 +---- +2019-06-05 +2019-06-15 +2019-06-25 +2019-07-05 +2019-07-15 +2019-07-25 + +query II +SELECT a, b LIKE '%a1.csv%' FROM read_csv('s3://test-bucket/read_csv_glob_s3/gl*/a[123]/a*.csv', auto_detect=1, filename=1) t1(a,b) ORDER BY 1 +---- +2019-06-05 1 +2019-06-15 1 +2019-06-25 1 +2019-07-05 0 +2019-07-15 0 +2019-07-25 0 + +# read-csv auto fails here because of a type mismatch: most files contain dates, but one file contains integers +statement error +SELECT * FROM read_csv('s3://test-bucket/read_csv_glob_s3/glob/*/*.csv', auto_detect=1) ORDER BY 1 +---- + +# forcing string parsing works +query I +SELECT * FROM read_csv('s3://test-bucket/read_csv_glob_s3/glob/[aei]*/*.csv', columns=STRUCT_PACK(d := 'STRING')) ORDER BY 1 +---- +1 +2 +2019-06-05 +2019-06-15 +2019-06-25 +2019-07-05 +2019-07-15 +2019-07-25 +2019-08-05 +2019-08-15 +2019-08-25 +3 + +query II +SELECT a, b LIKE '%a_.csv' FROM read_csv('s3://test-bucket/read_csv_glob_s3/glob/[aei]*/*.csv', columns=STRUCT_PACK(d := 'STRING'), filename=1) t(a,b) ORDER BY 1 +---- +1 0 +2 0 +2019-06-05 1 +2019-06-15 1 +2019-06-25 1 +2019-07-05 1 +2019-07-15 1 +2019-07-25 1 +2019-08-05 0 +2019-08-15 0 +2019-08-25 0 +3 0 + +# test glob parsing +query I +SELECT COUNT(*) FROM glob('s3://test-bucket/read_csv_glob_s3/glob/[aei]*/*.csv') +---- +5 + +# nothing matches the glob +statement error +SELECT * FROM read_csv('s3://test-bucket/read_csv_glob_s3/glob/[aei]*/a*a.csv', auto_detect=1) ORDER BY 1 +---- + +query I +SELECT COUNT(*) FROM glob('s3://test-bucket/read_csv_glob_s3/glob/[aei]*/a*a.csv') +---- +0 + +query I +select count(*) from glob('s3://test-bucket/read_csv_glob_s3/glob/rewoiarwiouw3rajkawrasdf790273489*.csv') limit 10; +---- +0 + +# Escaping +query I +SELECT * FROM read_csv('s3://test-bucket/read_csv_glob_s3/glob/f*/f\*.csv', auto_detect=1) ORDER BY 1 +---- +2019-06-05 +2019-06-15 +2019-06-25 + +# TODO: for supporting this we need to combine s3 url encoding with duckdb pattern matching +#query I +#SELECT * FROM read_csv('s3://test-bucket/read_csv_glob_s3/glob/f2/f[a].csv', auto_detect=1) ORDER BY 1 +#---- +#2019-07-05 +#2019-07-15 +#2019-07-25 + +#query I +#SELECT * FROM read_csv('s3://test-bucket/read_csv_glob_s3/glob/f2/f\[a\].csv', auto_detect=1) ORDER BY 1 +#---- +#2019-07-05 +#2019-07-15 +#2019-07-25 + +statement error +SELECT * FROM read_csv('s3://test-bucket/read_csv_glob_s3/glob/e2/e[a].csv', auto_detect=1) ORDER BY 1 +---- + +endloop diff --git a/test/sql/copy/csv/parallel/csv_parallel_httpfs.test b/test/sql/copy/csv/parallel/csv_parallel_httpfs.test new file mode 100644 index 0000000..66847a4 --- /dev/null +++ b/test/sql/copy/csv/parallel/csv_parallel_httpfs.test @@ -0,0 +1,36 @@ +# name: test/sql/copy/csv/parallel/csv_parallel_httpfs.test +# description: This test issue #7336 and #7337 +# group: [parallel] + +statement ok +PRAGMA enable_verification + +require httpfs + +query IIII +select column00, column01, column02, column03 from read_csv_auto('https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv') +---- +1 AAAAAAAABAAAAAAA 980124 7135 +2 AAAAAAAACAAAAAAA 819667 1461 +3 AAAAAAAADAAAAAAA 1473522 6247 +4 AAAAAAAAEAAAAAAA 1703214 3986 +5 AAAAAAAAFAAAAAAA 953372 4470 +6 AAAAAAAAGAAAAAAA 213219 6374 +7 AAAAAAAAHAAAAAAA 68377 3219 +8 AAAAAAAAIAAAAAAA 1215897 2471 +9 AAAAAAAAJAAAAAAA 1168667 1404 +10 AAAAAAAAKAAAAAAA 1207553 5143 + +query IIIIIIIIIIIIIIIIII +from read_csv_auto('https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv'); +---- +1 AAAAAAAABAAAAAAA 980124 7135 32946 2452238 2452208 Mr. Javier Lewis Y 9 12 1936 CHILE NULL Javier.Lewis@VFAxlnZEvOx.org 2452508 +2 AAAAAAAACAAAAAAA 819667 1461 31655 2452318 2452288 Dr. Amy Moses Y 9 4 1966 TOGO NULL Amy.Moses@Ovk9KjHH.com 2452318 +3 AAAAAAAADAAAAAAA 1473522 6247 48572 2449130 2449100 Miss Latisha Hamilton Y 18 9 1979 NIUE NULL Latisha.Hamilton@V.com 2452313 +4 AAAAAAAAEAAAAAAA 1703214 3986 39558 2450030 2450000 Dr. Michael White Y 7 6 1983 MEXICO NULL Michael.White@i.org 2452361 +5 AAAAAAAAFAAAAAAA 953372 4470 36368 2449438 2449408 Sir Robert Moran N 8 5 1956 FIJI NULL Robert.Moran@Hh.edu 2452469 +6 AAAAAAAAGAAAAAAA 213219 6374 27082 2451883 2451853 Ms. Brunilda Sharp Y 4 12 1925 SURINAME NULL Brunilda.Sharp@T3pylZEUQjm.org 2452430 +7 AAAAAAAAHAAAAAAA 68377 3219 44814 2451438 2451408 Ms. Fonda Wiles N 24 4 1985 GAMBIA NULL Fonda.Wiles@S9KnyEtz9hv.org 2452360 +8 AAAAAAAAIAAAAAAA 1215897 2471 16598 2449406 2449376 Sir Ollie Shipman N 26 12 1938 KOREA, REPUBLIC OF NULL Ollie.Shipman@be.org 2452334 +9 AAAAAAAAJAAAAAAA 1168667 1404 49388 2452275 2452245 Sir Karl Gilbert N 26 10 1966 MONTSERRAT NULL Karl.Gilbert@Crg5KyP2IxX9C4d6.edu 2452454 +10 AAAAAAAAKAAAAAAA 1207553 5143 19580 2451353 2451323 Ms. Albert Brunson N 15 10 1973 JORDAN NULL Albert.Brunson@62.com 2452641 diff --git a/test/sql/copy/csv/parallel/test_parallel_csv.test b/test/sql/copy/csv/parallel/test_parallel_csv.test new file mode 100644 index 0000000..b70d1df --- /dev/null +++ b/test/sql/copy/csv/parallel/test_parallel_csv.test @@ -0,0 +1,157 @@ +# name: test/sql/copy/csv/parallel/test_parallel_csv.test +# description: Test parallel read CSV function on ghub bugs +# group: [parallel] + +statement ok +PRAGMA enable_verification + +query IIIIIIIIIIIIIIIIIIIIIIIIII +FROM read_csv('data/csv/14512_og.csv', buffer_size = 473, strict_mode = false, delim = ',', quote = '"', escape = '"') +---- +00000579000098 13.99 EA PINE RIDGE CHENIN VOIGNIER 750.0 ML 1 13 NULL 1 NULL NULL NULL NULL NULL NULL DEFAULT BRAND NULL NULL NULL NULL BEER & WINE NULL NULL 7.25 {"sales_tax":{ "tax_type": "rate_percent", "value" :0.0725}} +00000609082001 3.99 EA MADELAINE MINI MILK CHOCOLATE TURKEY 1.0 OZ 1 13 NULL NULL NULL NULL NULL NULL NULL NULL MADELEINE NULL NULL NULL NULL CANDY NULL NULL 7.25 {"sales_tax":{ "tax_type": "rate_percent", "value" :0.0725}} +00817566020096 9.99 EA COTSWOLD EW 5.3 OZ 1 13 NULL NULL NULL NULL NULL NULL NULL NULL LONG CLAWSON NULL NULL NULL NULL DELI INGREDIENTS: DOUBLE GLOUCESTER CHEESE (PASTEURIZED MILK SALT ENZYMES DAIRY CULTURES ANNATTO EXTRACT AS A COLOR) RECONSTITUTED MINCED ONIONS (2%) DRIED CHIVES. CONTAINS: MILK THIS PRODUCT WAS PRODUCED IN AN ENVIRONMENT THAT ALSO USES PEANUTS TREE NUTS EGGS MILK WHEAT SOY FISH SHELLFISH AND SESAME. NULL 2.0 {"sales_tax":{ "tax_type": "rate_percent", "value" :0.02}} + + +query III +select * from read_csv_auto('data/csv/dirty_line.csv', skip = 1) +---- +1.5 a 3 +2.5 b 4 + +query II +select * from read_csv_auto('data/csv/null_string.csv', nullstr="null") +---- +1 NULL +NULL 2 + +# We need to add header = false here. Because with vector_size=2 the sniffer will think we have a header, since the +# row 1 null has types INTEGER;VARCHAR at that point +query II +select * from read_csv_auto('data/csv/null_string.csv', header = false) +---- +a b +1 null +null 2 + +query IIIIIIIIII +select * from read_csv_auto('data/csv/aws_locations.csv') +---- +IAD Washington District of Columbia United States US 20 38.94449997 -77.45580292 North America United States, Mexico, & Canada +ORD Chicago Illinois United States US 20 41.978611 -87.904722 North America United States, Mexico, & Canada +JFK New York New York United States US 8 40.639801 -73.7789 North America United States, Mexico, & Canada +ATL Atlanta Georgia United States US 17 33.6367 -84.428101 North America United States, Mexico, & Canada +LAX Los Angeles California United States US 15 33.942501 -118.407997 North America United States, Mexico, & Canada +MIA Miami Florida United States US 11 25.79319953918457 -80.29060363769531 North America United States, Mexico, & Canada +DFW Dallas-Fort Worth Texas United States US 18 32.896801 -97.038002 North America United States, Mexico, & Canada +IAH Houston Texas United States US 6 29.984399795532227 -95.34140014648438 North America United States, Mexico, & Canada +SFO San Francisco California United States US 8 37.61899948120117 -122.375 North America United States, Mexico, & Canada +BOS Boston Massachusetts United States US 5 42.36429977 -71.00520325 North America United States, Mexico, & Canada +DEN Denver Colorado United States US 6 39.861698150635 -104.672996521 North America United States, Mexico, & Canada +PDX Portland Oregon United States US 2 45.58869934 -122.5979996 North America United States, Mexico, & Canada +SEA Seattle Washington United States US 6 47.448889 -122.309444 North America United States, Mexico, & Canada +MSP Minneapolis Minnesota United States US 4 44.882 -93.221802 North America United States, Mexico, & Canada +PHX Phoenix Arizona United States US 3 33.43429946899414 -112.01200103759766 North America United States, Mexico, & Canada +PHL Philadelphia Pennsylvania United States US 2 39.87189865112305 -75.24109649658203 North America United States, Mexico, & Canada +SLC Salt Lake City Utah United States US 1 40.78839874267578 -111.97799682617188 North America United States, Mexico, & Canada +BNA Nashville Tennessee United States US 2 36.1245002746582 -86.6781997680664 North America United States, Mexico, & Canada +DTW Detroit Michigan United States US 2 42.212398529052734 -83.35340118408203 North America United States, Mexico, & Canada +TPA Tampa Florida United States US 2 27.975500106811523 -82.533203125 North America United States, Mexico, & Canada +EWR Newark New Jersey United States US 10 40.692501068115234 -74.168701171875 North America United States, Mexico, & Canada +CMH Columbus Ohio United States US 2 39.998001 -82.891899 North America United States, Mexico, & Canada +MCI Kansas City Missouri United States US 2 39.2976 -94.713898 North America United States, Mexico, & Canada +QRO Queretaro NULL North America MX 1 20.6173 -100.185997 undefined null +FRA Frankfurt am Main NULL Germany DE 17 50.033333 8.570556 Europe Europe & Israel +DUS Düsseldorf NULL Germany DE 3 51.289501 6.76678 Europe Europe & Israel +HAM Hamburg NULL Germany DE 6 53.630401611328 9.9882297515869 Europe Europe & Israel +MUC Munich NULL Germany DE 4 48.353802 11.7861 Europe Europe & Israel +TXL Berlin NULL Germany DE 5 52.559722 13.287778 Europe Europe & Israel +CDG Paris NULL France FR 11 49.012798 2.55 Europe Europe & Israel +MRS Marseille NULL France FR 6 43.439271922 5.22142410278 Europe Europe & Israel +MXP Milan NULL Italy IT 9 45.6306 8.72811 Europe Europe & Israel +FCO Rome NULL Italy IT 6 41.8002778 12.2388889 Europe Europe & Israel +PMO Palermo NULL Italy IT 1 38.175999 13.091 Europe Europe & Israel +AMS Amsterdam NULL Netherlands NL 5 52.308601 4.76389 Europe Europe & Israel +MAN Manchester NULL UK GB 5 53.35369873046875 -2.2749500274658203 Europe Europe & Israel +LHR London NULL UK GB 25 51.4775 -0.461389 Europe Europe & Israel +DUB Dublin NULL Ireland IE 2 53.421299 -6.27007 Europe Europe & Israel +VIE Vienna NULL Austria AT 3 48.110298156738 16.569700241089 Europe Europe & Israel +ARN Stockholm NULL Sweden SE 4 59.651901245117 17.918600082397 Europe Europe & Israel +CPH Copenhagen NULL Denmark DK 3 55.617900848389 12.656000137329 Europe Europe & Israel +HEL Helsinki NULL Finland FI 4 60.317199707031 24.963300704956 Europe Europe & Israel +ATH Athens NULL Greece GR 1 37.9364013672 23.9444999695 Europe Europe & Israel +BRU Brussels NULL Belgium BE 1 50.901401519800004 4.48443984985 Europe Europe & Israel +BUD Budapest NULL Hungary HU 1 47.42976 19.261093 Europe Europe & Israel +LIS Lisbon NULL Portugal PT 1 38.7813 -9.13592 Europe Europe & Israel +OSL Oslo NULL Norway NO 2 60.193901062012 11.100399971008 Europe Europe & Israel +OTP Bucharest NULL Romania RO 1 44.5711111 26.085 Europe Europe & Israel +PRG Prague NULL Czech Republic CZ 1 50.1008 14.26 Europe Europe & Israel +SOF Sofia NULL Bulgaria BG 1 42.696693420410156 23.411436080932617 Europe Europe & Israel +WAW Warsaw NULL Poland PL 3 52.165833 20.967222 Europe Europe & Israel +ZAG Zagreb NULL Croatia HR 1 45.7429008484 16.0687999725 Europe Europe & Israel +ZRH Zurich NULL Switzerland CH 2 47.464699 8.54917 Europe Europe & Israel +BCN Barcelona NULL Spain ES 2 41.2971 2.07846 Europe Europe & Israel +MAD Madrid NULL Spain ES 10 40.471926 -3.56264 Europe Europe & Israel +DEL New Delhi NULL India IN 14 28.5665 77.103104 Asia India +MAA Chennai NULL India IN 8 12.990005493164062 80.16929626464844 Asia India +BOM Mumbai NULL India IN 8 19.0886993408 72.8678970337 Asia India +PNQ Pune NULL India IN 4 18.58209991455078 73.9197006225586 Asia India +BLR Bangalore NULL India IN 5 13.1979 77.706299 Asia India +HYD Hyderabad NULL India IN 5 17.231318 78.429855 Asia India +SIN Singapore NULL Singapore SG 7 1.35019 103.994003 Asia Hong Kong, Indonesia, Philippines, Singapore, South Korea, Taiwan, & Thailand +KIX Osaka NULL Japan JP 5 34.42729949951172 135.24400329589844 Asia Japan +NRT Tokyo NULL Japan JP 22 35.764702 140.386002 Asia Japan +TPE Taoyuan NULL Taiwan TW 3 25.0777 121.233002 Asia Hong Kong, Indonesia, Philippines, Singapore, South Korea, Taiwan, & Thailand +ICN Seoul NULL Korea KR 8 37.46910095214844 126.45099639892578 Asia Hong Kong, Indonesia, Philippines, Singapore, South Korea, Taiwan, & Thailand +BKK Bangkok NULL Thailand TH 2 13.689999 100.750114 Asia Hong Kong, Indonesia, Philippines, Singapore, South Korea, Taiwan, & Thailand +CCU Kolkata NULL India IN 2 22.654699325561523 88.44670104980469 Asia India +CGK Jakarta NULL Indonesia ID 5 -6.1255698204 106.65599823 Asia Hong Kong, Indonesia, Philippines, Singapore, South Korea, Taiwan, & Thailand +KUL Kuala Lumpur NULL Malaysia MY 2 2.745579957962 101.70999908447 Asia Hong Kong, Indonesia, Philippines, Singapore, South Korea, Taiwan, & Thailand +MNL Manila NULL Philippines PH 1 14.5086 121.019997 Asia Hong Kong, Indonesia, Philippines, Singapore, South Korea, Taiwan, & Thailand +HAN Hanoi NULL Vietnam VN 1 21.221200942993164 105.80699920654297 Asia Hong Kong, Indonesia, Philippines, Singapore, South Korea, Taiwan, & Thailand +SGN Ho Chi Minh City NULL Vietnam VN 1 10.8187999725 106.652000427 Asia Hong Kong, Indonesia, Philippines, Singapore, South Korea, Taiwan, & Thailand +SYD Sydney NULL Australia AU 4 -33.94609832763672 151.177001953125 Australia & New Zealand Australia & New Zealand +AKL Auckland NULL New Zealand NZ 2 -37.008098602299995 174.792007446 Australia & New Zealand Australia & New Zealand +MEL Melbourne NULL Australia AU 3 -37.673302 144.843002 Australia & New Zealand Australia & New Zealand +PER Perth NULL Australia AU 1 -31.94029998779297 115.96700286865234 Australia & New Zealand Australia & New Zealand +GRU Sao Paulo NULL Brazil BR 8 -23.435556 -46.473056 South America South America +GIG Rio De Janeiro NULL Brazil BR 5 -22.8099994659 -43.2505569458 South America South America +FOR Fortaleza NULL Brazil BR 4 -3.776279926300049 -38.53260040283203 South America South America +BOG Bogota NULL Colombia CO 3 4.70159 -74.1469 South America South America +EZE Buenos Aires NULL Argentina AR 2 -34.8222 -58.5358 South America South America +SCL Santiago NULL Chile CL 3 -33.393001556396484 -70.78579711914062 South America South America +LIM Lima NULL Peru PE 2 -12.0219 -77.114305 South America South America +TLV Tel Aviv NULL Israel IL 2 32.01139831542969 34.88669967651367 Middle East Europe & Israel +BAH Manama NULL Bahrain BH 2 26.27079963684082 50.63359832763672 Middle East South Africa, Kenya, & Middle East +DXB Dubai NULL UAE AE 1 25.2527999878 55.3643989563 Middle East South Africa, Kenya, & Middle East +FJR Fujairah NULL UAE AE 3 25.112222 56.324167 Middle East South Africa, Kenya, & Middle East +MCT Muscat NULL Oman OM 1 23.593299865722656 58.284400939941406 Middle East South Africa, Kenya, & Middle East +CPT Cape Town NULL South Africa ZA 1 -33.9648017883 18.6016998291 Africa South Africa, Kenya, & Middle East +JNB Johannesburg NULL South Africa ZA 1 -26.1392 28.246 Africa South Africa, Kenya, & Middle East +NBO Nairobi NULL Kenya KE 1 -1.31923997402 36.9277992249 Africa South Africa, Kenya, & Middle East +PVG Shanghai NULL China CN 1 31.143400192260742 121.80500030517578 China China +SZX Shenzhen NULL China CN 1 22.639299392700195 113.81099700927734 China China +ZHY Zhongwei NULL China CN 1 37.572778 105.154444 China China +PEK Beijing NULL China CN 1 40.080101013183594 116.58499908447266 China China +HKG Hong Kong NULL China HK 4 22.308901 113.915001 China China +CMH Columbus Ohio United States US 1 39.998056 -82.891944 North America United States, Mexico, & Canada +HIO Hillsboro Oregon United States US 1 45.540394 -122.949825 North America United States, Mexico, & Canada +TPA Tampa Florida United States US 1 27.979722 -82.534722 North America United States, Mexico, & Canada +PNQ Pune Maharashtra India IN 1 18.582222 73.919722 Asia India +MCT Muscat Muscat Oman OM 1 23.6015386 58.2899376 Middle East South Africa, Kenya, & Middle East + + +require httpfs + +query II +select * from read_csv_auto("https://duckdb-public-gzip-test.s3.us-east-2.amazonaws.com/test.csv", header = 0); +---- +foo bar +foo bar + + +query II +from read_csv_auto("https://duckdb-public-gzip-test.s3.us-east-2.amazonaws.com/test.csv.gz", header = 0); +---- +foo bar +foo bar diff --git a/test/sql/copy/csv/recursive_query_csv.test_slow b/test/sql/copy/csv/recursive_query_csv.test_slow new file mode 100644 index 0000000..e5e94ef --- /dev/null +++ b/test/sql/copy/csv/recursive_query_csv.test_slow @@ -0,0 +1,215 @@ +# name: test/sql/copy/csv/recursive_query_csv.test_slow +# description: Test read CSV function in a recursive CTE +# group: [csv] + +require httpfs + +statement ok +PRAGMA enable_verification + +# FIXME: bug in recursive CTE +mode skip + +query IIII +with recursive + base as + ( select * + from 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/Real_Estate_Sales_2001-2021_GL.csv' + where '2003-01-01' < "date recorded" and "date recorded" < '2010-01-01' and "sale amount" > 1000000 + ) + , chains as + ( + select + town + , "date recorded" as begTS + , "date recorded" as endTS + , [struct_pack(date:= "date recorded", amt:="sale amount", type:="property type")] as chain + from base + where "property type" = 'Condo' + union all + select + chains.town + , chains.begTS + , base."date recorded" as endTS + , list_append(chains.chain, struct_pack(date:= "date recorded", amt:="sale amount", type:="property type")) as chain + from base, chains + where + base.town = chains.town + and + ( + (len(chains.chain) = 1 and list_contains(['Residential', 'Single Family'], base."property type")) + or (len(chains.chain) = 2 and base."property type" = 'Condo') + or (len(chains.chain) = 3 and list_contains(['Residential', 'Single Family'], base."property type")) + ) + and chains.endTS < base."date recorded" + and base."date recorded" < (chains.endTS + interval 6 days) + ) + select * from chains + order by all; +---- +Clinton 2007-08-22 2007-08-22 [{'date': 2007-08-22, 'amt': 1175000.0, 'type': Condo}] +Danbury 2007-05-02 2007-05-02 [{'date': 2007-05-02, 'amt': 3105000.0, 'type': Condo}] +Danbury 2007-05-09 2007-05-09 [{'date': 2007-05-09, 'amt': 1014205.0, 'type': Condo}] +Darien 2007-09-12 2007-09-12 [{'date': 2007-09-12, 'amt': 1150000.0, 'type': Condo}] +Fairfield 2007-06-15 2007-06-15 [{'date': 2007-06-15, 'amt': 1100000.0, 'type': Condo}] +Greenwich 2006-11-20 2006-11-20 [{'date': 2006-11-20, 'amt': 2050000.0, 'type': Condo}] +Greenwich 2006-11-20 2006-11-21 [{'date': 2006-11-20, 'amt': 2050000.0, 'type': Condo}, {'date': 2006-11-21, 'amt': 6500000.0, 'type': Single Family}] +Greenwich 2006-12-14 2006-12-14 [{'date': 2006-12-14, 'amt': 1800000.0, 'type': Condo}] +Greenwich 2006-12-14 2006-12-15 [{'date': 2006-12-14, 'amt': 1800000.0, 'type': Condo}, {'date': 2006-12-15, 'amt': 2195000.0, 'type': Single Family}] +Greenwich 2006-12-14 2006-12-15 [{'date': 2006-12-14, 'amt': 1800000.0, 'type': Condo}, {'date': 2006-12-15, 'amt': 5500000.0, 'type': Single Family}] +Greenwich 2006-12-14 2006-12-18 [{'date': 2006-12-14, 'amt': 1800000.0, 'type': Condo}, {'date': 2006-12-18, 'amt': 5010000.0, 'type': Single Family}] +Greenwich 2007-01-19 2007-01-19 [{'date': 2007-01-19, 'amt': 2227500.0, 'type': Condo}] +Greenwich 2007-01-19 2007-01-24 [{'date': 2007-01-19, 'amt': 2227500.0, 'type': Condo}, {'date': 2007-01-24, 'amt': 1750000.0, 'type': Single Family}] +Greenwich 2007-01-31 2007-01-31 [{'date': 2007-01-31, 'amt': 4600000.0, 'type': Condo}] +Greenwich 2007-02-27 2007-02-27 [{'date': 2007-02-27, 'amt': 1120000.0, 'type': Condo}] +Greenwich 2007-02-27 2007-02-28 [{'date': 2007-02-27, 'amt': 1120000.0, 'type': Condo}, {'date': 2007-02-28, 'amt': 2260000.0, 'type': Single Family}] +Greenwich 2007-02-27 2007-03-01 [{'date': 2007-02-27, 'amt': 1120000.0, 'type': Condo}, {'date': 2007-03-01, 'amt': 1900000.0, 'type': Single Family}] +Greenwich 2007-02-27 2007-03-02 [{'date': 2007-02-27, 'amt': 1120000.0, 'type': Condo}, {'date': 2007-03-02, 'amt': 6500000.0, 'type': Single Family}] +Greenwich 2007-03-12 2007-03-12 [{'date': 2007-03-12, 'amt': 1084687.0, 'type': Condo}] +Greenwich 2007-03-12 2007-03-13 [{'date': 2007-03-12, 'amt': 1084687.0, 'type': Condo}, {'date': 2007-03-13, 'amt': 1600000.0, 'type': Single Family}] +Greenwich 2007-03-12 2007-03-16 [{'date': 2007-03-12, 'amt': 1084687.0, 'type': Condo}, {'date': 2007-03-16, 'amt': 6537500.0, 'type': Single Family}] +Greenwich 2007-03-12 2007-03-16 [{'date': 2007-03-12, 'amt': 1084687.0, 'type': Condo}, {'date': 2007-03-16, 'amt': 12500000.0, 'type': Single Family}] +Greenwich 2007-03-12 2007-03-20 [{'date': 2007-03-12, 'amt': 1084687.0, 'type': Condo}, {'date': 2007-03-16, 'amt': 6537500.0, 'type': Single Family}, {'date': 2007-03-20, 'amt': 4100000.0, 'type': Condo}] +Greenwich 2007-03-12 2007-03-20 [{'date': 2007-03-12, 'amt': 1084687.0, 'type': Condo}, {'date': 2007-03-16, 'amt': 12500000.0, 'type': Single Family}, {'date': 2007-03-20, 'amt': 4100000.0, 'type': Condo}] +Greenwich 2007-03-12 2007-03-22 [{'date': 2007-03-12, 'amt': 1084687.0, 'type': Condo}, {'date': 2007-03-16, 'amt': 6537500.0, 'type': Single Family}, {'date': 2007-03-20, 'amt': 4100000.0, 'type': Condo}, {'date': 2007-03-22, 'amt': 1580000.0, 'type': Single Family}] +Greenwich 2007-03-12 2007-03-22 [{'date': 2007-03-12, 'amt': 1084687.0, 'type': Condo}, {'date': 2007-03-16, 'amt': 12500000.0, 'type': Single Family}, {'date': 2007-03-20, 'amt': 4100000.0, 'type': Condo}, {'date': 2007-03-22, 'amt': 1580000.0, 'type': Single Family}] +Greenwich 2007-03-12 2007-03-23 [{'date': 2007-03-12, 'amt': 1084687.0, 'type': Condo}, {'date': 2007-03-16, 'amt': 6537500.0, 'type': Single Family}, {'date': 2007-03-20, 'amt': 4100000.0, 'type': Condo}, {'date': 2007-03-23, 'amt': 2850000.0, 'type': Single Family}] +Greenwich 2007-03-12 2007-03-23 [{'date': 2007-03-12, 'amt': 1084687.0, 'type': Condo}, {'date': 2007-03-16, 'amt': 12500000.0, 'type': Single Family}, {'date': 2007-03-20, 'amt': 4100000.0, 'type': Condo}, {'date': 2007-03-23, 'amt': 2850000.0, 'type': Single Family}] +Greenwich 2007-03-20 2007-03-20 [{'date': 2007-03-20, 'amt': 4100000.0, 'type': Condo}] +Greenwich 2007-03-20 2007-03-22 [{'date': 2007-03-20, 'amt': 4100000.0, 'type': Condo}, {'date': 2007-03-22, 'amt': 1580000.0, 'type': Single Family}] +Greenwich 2007-03-20 2007-03-23 [{'date': 2007-03-20, 'amt': 4100000.0, 'type': Condo}, {'date': 2007-03-23, 'amt': 2850000.0, 'type': Single Family}] +Greenwich 2007-03-30 2007-03-30 [{'date': 2007-03-30, 'amt': 1200000.0, 'type': Condo}] +Greenwich 2007-05-15 2007-05-15 [{'date': 2007-05-15, 'amt': 1215000.0, 'type': Condo}] +Greenwich 2007-05-15 2007-05-17 [{'date': 2007-05-15, 'amt': 1215000.0, 'type': Condo}, {'date': 2007-05-17, 'amt': 2250000.0, 'type': Single Family}] +Greenwich 2007-06-15 2007-06-15 [{'date': 2007-06-15, 'amt': 2264564.0, 'type': Condo}] +Greenwich 2007-06-15 2007-06-19 [{'date': 2007-06-15, 'amt': 2264564.0, 'type': Condo}, {'date': 2007-06-19, 'amt': 1470000.0, 'type': Single Family}] +Greenwich 2007-06-15 2007-06-19 [{'date': 2007-06-15, 'amt': 2264564.0, 'type': Condo}, {'date': 2007-06-19, 'amt': 1965000.0, 'type': Single Family}] +Greenwich 2007-07-10 2007-07-10 [{'date': 2007-07-10, 'amt': 1240000.0, 'type': Condo}] +Greenwich 2007-07-10 2007-07-11 [{'date': 2007-07-10, 'amt': 1240000.0, 'type': Condo}, {'date': 2007-07-11, 'amt': 3150000.0, 'type': Single Family}] +Greenwich 2007-07-10 2007-07-11 [{'date': 2007-07-10, 'amt': 1240000.0, 'type': Condo}, {'date': 2007-07-11, 'amt': 3250000.0, 'type': Single Family}] +Greenwich 2007-07-10 2007-07-11 [{'date': 2007-07-10, 'amt': 1240000.0, 'type': Condo}, {'date': 2007-07-11, 'amt': 7050000.0, 'type': Single Family}] +Greenwich 2007-07-10 2007-07-12 [{'date': 2007-07-10, 'amt': 1240000.0, 'type': Condo}, {'date': 2007-07-12, 'amt': 1269000.0, 'type': Single Family}] +Greenwich 2007-07-10 2007-07-12 [{'date': 2007-07-10, 'amt': 1240000.0, 'type': Condo}, {'date': 2007-07-12, 'amt': 3565000.0, 'type': Single Family}] +Greenwich 2007-07-10 2007-07-17 [{'date': 2007-07-10, 'amt': 1240000.0, 'type': Condo}, {'date': 2007-07-12, 'amt': 1269000.0, 'type': Single Family}, {'date': 2007-07-17, 'amt': 3000000.0, 'type': Condo}] +Greenwich 2007-07-10 2007-07-17 [{'date': 2007-07-10, 'amt': 1240000.0, 'type': Condo}, {'date': 2007-07-12, 'amt': 3565000.0, 'type': Single Family}, {'date': 2007-07-17, 'amt': 3000000.0, 'type': Condo}] +Greenwich 2007-07-10 2007-07-19 [{'date': 2007-07-10, 'amt': 1240000.0, 'type': Condo}, {'date': 2007-07-12, 'amt': 1269000.0, 'type': Single Family}, {'date': 2007-07-17, 'amt': 3000000.0, 'type': Condo}, {'date': 2007-07-19, 'amt': 3600000.0, 'type': Single Family}] +Greenwich 2007-07-10 2007-07-19 [{'date': 2007-07-10, 'amt': 1240000.0, 'type': Condo}, {'date': 2007-07-12, 'amt': 3565000.0, 'type': Single Family}, {'date': 2007-07-17, 'amt': 3000000.0, 'type': Condo}, {'date': 2007-07-19, 'amt': 3600000.0, 'type': Single Family}] +Greenwich 2007-07-10 2007-07-20 [{'date': 2007-07-10, 'amt': 1240000.0, 'type': Condo}, {'date': 2007-07-12, 'amt': 1269000.0, 'type': Single Family}, {'date': 2007-07-17, 'amt': 3000000.0, 'type': Condo}, {'date': 2007-07-20, 'amt': 7225000.0, 'type': Single Family}] +Greenwich 2007-07-10 2007-07-20 [{'date': 2007-07-10, 'amt': 1240000.0, 'type': Condo}, {'date': 2007-07-12, 'amt': 1269000.0, 'type': Single Family}, {'date': 2007-07-17, 'amt': 3000000.0, 'type': Condo}, {'date': 2007-07-20, 'amt': 18000000.0, 'type': Single Family}] +Greenwich 2007-07-10 2007-07-20 [{'date': 2007-07-10, 'amt': 1240000.0, 'type': Condo}, {'date': 2007-07-12, 'amt': 3565000.0, 'type': Single Family}, {'date': 2007-07-17, 'amt': 3000000.0, 'type': Condo}, {'date': 2007-07-20, 'amt': 7225000.0, 'type': Single Family}] +Greenwich 2007-07-10 2007-07-20 [{'date': 2007-07-10, 'amt': 1240000.0, 'type': Condo}, {'date': 2007-07-12, 'amt': 3565000.0, 'type': Single Family}, {'date': 2007-07-17, 'amt': 3000000.0, 'type': Condo}, {'date': 2007-07-20, 'amt': 18000000.0, 'type': Single Family}] +Greenwich 2007-07-17 2007-07-17 [{'date': 2007-07-17, 'amt': 3000000.0, 'type': Condo}] +Greenwich 2007-07-17 2007-07-19 [{'date': 2007-07-17, 'amt': 3000000.0, 'type': Condo}, {'date': 2007-07-19, 'amt': 3600000.0, 'type': Single Family}] +Greenwich 2007-07-17 2007-07-20 [{'date': 2007-07-17, 'amt': 3000000.0, 'type': Condo}, {'date': 2007-07-20, 'amt': 7225000.0, 'type': Single Family}] +Greenwich 2007-07-17 2007-07-20 [{'date': 2007-07-17, 'amt': 3000000.0, 'type': Condo}, {'date': 2007-07-20, 'amt': 18000000.0, 'type': Single Family}] +Greenwich 2007-08-16 2007-08-16 [{'date': 2007-08-16, 'amt': 2430000.0, 'type': Condo}] +Greenwich 2007-08-16 2007-08-17 [{'date': 2007-08-16, 'amt': 2430000.0, 'type': Condo}, {'date': 2007-08-17, 'amt': 1925000.0, 'type': Single Family}] +Greenwich 2007-08-16 2007-08-17 [{'date': 2007-08-16, 'amt': 2430000.0, 'type': Condo}, {'date': 2007-08-17, 'amt': 3400000.0, 'type': Single Family}] +Greenwich 2007-08-16 2007-08-20 [{'date': 2007-08-16, 'amt': 2430000.0, 'type': Condo}, {'date': 2007-08-20, 'amt': 2590000.0, 'type': Single Family}] +Greenwich 2007-08-16 2007-08-21 [{'date': 2007-08-16, 'amt': 2430000.0, 'type': Condo}, {'date': 2007-08-21, 'amt': 4100000.0, 'type': Single Family}] +New Canaan 2007-01-22 2007-01-22 [{'date': 2007-01-22, 'amt': 1735000.0, 'type': Condo}] +New Canaan 2007-02-15 2007-02-15 [{'date': 2007-02-15, 'amt': 2230000.0, 'type': Condo}] + +# JoinDependentFilter triggers on this test, make sure that the result is the same with and without +statement ok +set disabled_optimizers to 'expression_rewriter' + +query IIII +with recursive + base as + ( select * + from 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/Real_Estate_Sales_2001-2021_GL.csv' + where '2003-01-01' < "date recorded" and "date recorded" < '2010-01-01' and "sale amount" > 1000000 + ) + , chains as + ( + select + town + , "date recorded" as begTS + , "date recorded" as endTS + , [struct_pack(date:= "date recorded", amt:="sale amount", type:="property type")] as chain + from base + where "property type" = 'Condo' + union all + select + chains.town + , chains.begTS + , base."date recorded" as endTS + , list_append(chains.chain, struct_pack(date:= "date recorded", amt:="sale amount", type:="property type")) as chain + from base, chains + where + base.town = chains.town + and + ( + (len(chains.chain) = 1 and list_contains(['Residential', 'Single Family'], base."property type")) + or (len(chains.chain) = 2 and base."property type" = 'Condo') + or (len(chains.chain) = 3 and list_contains(['Residential', 'Single Family'], base."property type")) + ) + and chains.endTS < base."date recorded" + and base."date recorded" < (chains.endTS + interval 6 days) + ) + select * from chains + order by all; +---- +Clinton 2007-08-22 2007-08-22 [{'date': 2007-08-22, 'amt': 1175000.0, 'type': Condo}] +Danbury 2007-05-02 2007-05-02 [{'date': 2007-05-02, 'amt': 3105000.0, 'type': Condo}] +Danbury 2007-05-09 2007-05-09 [{'date': 2007-05-09, 'amt': 1014205.0, 'type': Condo}] +Darien 2007-09-12 2007-09-12 [{'date': 2007-09-12, 'amt': 1150000.0, 'type': Condo}] +Fairfield 2007-06-15 2007-06-15 [{'date': 2007-06-15, 'amt': 1100000.0, 'type': Condo}] +Greenwich 2006-11-20 2006-11-20 [{'date': 2006-11-20, 'amt': 2050000.0, 'type': Condo}] +Greenwich 2006-11-20 2006-11-21 [{'date': 2006-11-20, 'amt': 2050000.0, 'type': Condo}, {'date': 2006-11-21, 'amt': 6500000.0, 'type': Single Family}] +Greenwich 2006-12-14 2006-12-14 [{'date': 2006-12-14, 'amt': 1800000.0, 'type': Condo}] +Greenwich 2006-12-14 2006-12-15 [{'date': 2006-12-14, 'amt': 1800000.0, 'type': Condo}, {'date': 2006-12-15, 'amt': 2195000.0, 'type': Single Family}] +Greenwich 2006-12-14 2006-12-15 [{'date': 2006-12-14, 'amt': 1800000.0, 'type': Condo}, {'date': 2006-12-15, 'amt': 5500000.0, 'type': Single Family}] +Greenwich 2006-12-14 2006-12-18 [{'date': 2006-12-14, 'amt': 1800000.0, 'type': Condo}, {'date': 2006-12-18, 'amt': 5010000.0, 'type': Single Family}] +Greenwich 2007-01-19 2007-01-19 [{'date': 2007-01-19, 'amt': 2227500.0, 'type': Condo}] +Greenwich 2007-01-19 2007-01-24 [{'date': 2007-01-19, 'amt': 2227500.0, 'type': Condo}, {'date': 2007-01-24, 'amt': 1750000.0, 'type': Single Family}] +Greenwich 2007-01-31 2007-01-31 [{'date': 2007-01-31, 'amt': 4600000.0, 'type': Condo}] +Greenwich 2007-02-27 2007-02-27 [{'date': 2007-02-27, 'amt': 1120000.0, 'type': Condo}] +Greenwich 2007-02-27 2007-02-28 [{'date': 2007-02-27, 'amt': 1120000.0, 'type': Condo}, {'date': 2007-02-28, 'amt': 2260000.0, 'type': Single Family}] +Greenwich 2007-02-27 2007-03-01 [{'date': 2007-02-27, 'amt': 1120000.0, 'type': Condo}, {'date': 2007-03-01, 'amt': 1900000.0, 'type': Single Family}] +Greenwich 2007-02-27 2007-03-02 [{'date': 2007-02-27, 'amt': 1120000.0, 'type': Condo}, {'date': 2007-03-02, 'amt': 6500000.0, 'type': Single Family}] +Greenwich 2007-03-12 2007-03-12 [{'date': 2007-03-12, 'amt': 1084687.0, 'type': Condo}] +Greenwich 2007-03-12 2007-03-13 [{'date': 2007-03-12, 'amt': 1084687.0, 'type': Condo}, {'date': 2007-03-13, 'amt': 1600000.0, 'type': Single Family}] +Greenwich 2007-03-12 2007-03-16 [{'date': 2007-03-12, 'amt': 1084687.0, 'type': Condo}, {'date': 2007-03-16, 'amt': 6537500.0, 'type': Single Family}] +Greenwich 2007-03-12 2007-03-16 [{'date': 2007-03-12, 'amt': 1084687.0, 'type': Condo}, {'date': 2007-03-16, 'amt': 12500000.0, 'type': Single Family}] +Greenwich 2007-03-12 2007-03-20 [{'date': 2007-03-12, 'amt': 1084687.0, 'type': Condo}, {'date': 2007-03-16, 'amt': 6537500.0, 'type': Single Family}, {'date': 2007-03-20, 'amt': 4100000.0, 'type': Condo}] +Greenwich 2007-03-12 2007-03-20 [{'date': 2007-03-12, 'amt': 1084687.0, 'type': Condo}, {'date': 2007-03-16, 'amt': 12500000.0, 'type': Single Family}, {'date': 2007-03-20, 'amt': 4100000.0, 'type': Condo}] +Greenwich 2007-03-12 2007-03-22 [{'date': 2007-03-12, 'amt': 1084687.0, 'type': Condo}, {'date': 2007-03-16, 'amt': 6537500.0, 'type': Single Family}, {'date': 2007-03-20, 'amt': 4100000.0, 'type': Condo}, {'date': 2007-03-22, 'amt': 1580000.0, 'type': Single Family}] +Greenwich 2007-03-12 2007-03-22 [{'date': 2007-03-12, 'amt': 1084687.0, 'type': Condo}, {'date': 2007-03-16, 'amt': 12500000.0, 'type': Single Family}, {'date': 2007-03-20, 'amt': 4100000.0, 'type': Condo}, {'date': 2007-03-22, 'amt': 1580000.0, 'type': Single Family}] +Greenwich 2007-03-12 2007-03-23 [{'date': 2007-03-12, 'amt': 1084687.0, 'type': Condo}, {'date': 2007-03-16, 'amt': 6537500.0, 'type': Single Family}, {'date': 2007-03-20, 'amt': 4100000.0, 'type': Condo}, {'date': 2007-03-23, 'amt': 2850000.0, 'type': Single Family}] +Greenwich 2007-03-12 2007-03-23 [{'date': 2007-03-12, 'amt': 1084687.0, 'type': Condo}, {'date': 2007-03-16, 'amt': 12500000.0, 'type': Single Family}, {'date': 2007-03-20, 'amt': 4100000.0, 'type': Condo}, {'date': 2007-03-23, 'amt': 2850000.0, 'type': Single Family}] +Greenwich 2007-03-20 2007-03-20 [{'date': 2007-03-20, 'amt': 4100000.0, 'type': Condo}] +Greenwich 2007-03-20 2007-03-22 [{'date': 2007-03-20, 'amt': 4100000.0, 'type': Condo}, {'date': 2007-03-22, 'amt': 1580000.0, 'type': Single Family}] +Greenwich 2007-03-20 2007-03-23 [{'date': 2007-03-20, 'amt': 4100000.0, 'type': Condo}, {'date': 2007-03-23, 'amt': 2850000.0, 'type': Single Family}] +Greenwich 2007-03-30 2007-03-30 [{'date': 2007-03-30, 'amt': 1200000.0, 'type': Condo}] +Greenwich 2007-05-15 2007-05-15 [{'date': 2007-05-15, 'amt': 1215000.0, 'type': Condo}] +Greenwich 2007-05-15 2007-05-17 [{'date': 2007-05-15, 'amt': 1215000.0, 'type': Condo}, {'date': 2007-05-17, 'amt': 2250000.0, 'type': Single Family}] +Greenwich 2007-06-15 2007-06-15 [{'date': 2007-06-15, 'amt': 2264564.0, 'type': Condo}] +Greenwich 2007-06-15 2007-06-19 [{'date': 2007-06-15, 'amt': 2264564.0, 'type': Condo}, {'date': 2007-06-19, 'amt': 1470000.0, 'type': Single Family}] +Greenwich 2007-06-15 2007-06-19 [{'date': 2007-06-15, 'amt': 2264564.0, 'type': Condo}, {'date': 2007-06-19, 'amt': 1965000.0, 'type': Single Family}] +Greenwich 2007-07-10 2007-07-10 [{'date': 2007-07-10, 'amt': 1240000.0, 'type': Condo}] +Greenwich 2007-07-10 2007-07-11 [{'date': 2007-07-10, 'amt': 1240000.0, 'type': Condo}, {'date': 2007-07-11, 'amt': 3150000.0, 'type': Single Family}] +Greenwich 2007-07-10 2007-07-11 [{'date': 2007-07-10, 'amt': 1240000.0, 'type': Condo}, {'date': 2007-07-11, 'amt': 3250000.0, 'type': Single Family}] +Greenwich 2007-07-10 2007-07-11 [{'date': 2007-07-10, 'amt': 1240000.0, 'type': Condo}, {'date': 2007-07-11, 'amt': 7050000.0, 'type': Single Family}] +Greenwich 2007-07-10 2007-07-12 [{'date': 2007-07-10, 'amt': 1240000.0, 'type': Condo}, {'date': 2007-07-12, 'amt': 1269000.0, 'type': Single Family}] +Greenwich 2007-07-10 2007-07-12 [{'date': 2007-07-10, 'amt': 1240000.0, 'type': Condo}, {'date': 2007-07-12, 'amt': 3565000.0, 'type': Single Family}] +Greenwich 2007-07-10 2007-07-17 [{'date': 2007-07-10, 'amt': 1240000.0, 'type': Condo}, {'date': 2007-07-12, 'amt': 1269000.0, 'type': Single Family}, {'date': 2007-07-17, 'amt': 3000000.0, 'type': Condo}] +Greenwich 2007-07-10 2007-07-17 [{'date': 2007-07-10, 'amt': 1240000.0, 'type': Condo}, {'date': 2007-07-12, 'amt': 3565000.0, 'type': Single Family}, {'date': 2007-07-17, 'amt': 3000000.0, 'type': Condo}] +Greenwich 2007-07-10 2007-07-19 [{'date': 2007-07-10, 'amt': 1240000.0, 'type': Condo}, {'date': 2007-07-12, 'amt': 1269000.0, 'type': Single Family}, {'date': 2007-07-17, 'amt': 3000000.0, 'type': Condo}, {'date': 2007-07-19, 'amt': 3600000.0, 'type': Single Family}] +Greenwich 2007-07-10 2007-07-19 [{'date': 2007-07-10, 'amt': 1240000.0, 'type': Condo}, {'date': 2007-07-12, 'amt': 3565000.0, 'type': Single Family}, {'date': 2007-07-17, 'amt': 3000000.0, 'type': Condo}, {'date': 2007-07-19, 'amt': 3600000.0, 'type': Single Family}] +Greenwich 2007-07-10 2007-07-20 [{'date': 2007-07-10, 'amt': 1240000.0, 'type': Condo}, {'date': 2007-07-12, 'amt': 1269000.0, 'type': Single Family}, {'date': 2007-07-17, 'amt': 3000000.0, 'type': Condo}, {'date': 2007-07-20, 'amt': 7225000.0, 'type': Single Family}] +Greenwich 2007-07-10 2007-07-20 [{'date': 2007-07-10, 'amt': 1240000.0, 'type': Condo}, {'date': 2007-07-12, 'amt': 1269000.0, 'type': Single Family}, {'date': 2007-07-17, 'amt': 3000000.0, 'type': Condo}, {'date': 2007-07-20, 'amt': 18000000.0, 'type': Single Family}] +Greenwich 2007-07-10 2007-07-20 [{'date': 2007-07-10, 'amt': 1240000.0, 'type': Condo}, {'date': 2007-07-12, 'amt': 3565000.0, 'type': Single Family}, {'date': 2007-07-17, 'amt': 3000000.0, 'type': Condo}, {'date': 2007-07-20, 'amt': 7225000.0, 'type': Single Family}] +Greenwich 2007-07-10 2007-07-20 [{'date': 2007-07-10, 'amt': 1240000.0, 'type': Condo}, {'date': 2007-07-12, 'amt': 3565000.0, 'type': Single Family}, {'date': 2007-07-17, 'amt': 3000000.0, 'type': Condo}, {'date': 2007-07-20, 'amt': 18000000.0, 'type': Single Family}] +Greenwich 2007-07-17 2007-07-17 [{'date': 2007-07-17, 'amt': 3000000.0, 'type': Condo}] +Greenwich 2007-07-17 2007-07-19 [{'date': 2007-07-17, 'amt': 3000000.0, 'type': Condo}, {'date': 2007-07-19, 'amt': 3600000.0, 'type': Single Family}] +Greenwich 2007-07-17 2007-07-20 [{'date': 2007-07-17, 'amt': 3000000.0, 'type': Condo}, {'date': 2007-07-20, 'amt': 7225000.0, 'type': Single Family}] +Greenwich 2007-07-17 2007-07-20 [{'date': 2007-07-17, 'amt': 3000000.0, 'type': Condo}, {'date': 2007-07-20, 'amt': 18000000.0, 'type': Single Family}] +Greenwich 2007-08-16 2007-08-16 [{'date': 2007-08-16, 'amt': 2430000.0, 'type': Condo}] +Greenwich 2007-08-16 2007-08-17 [{'date': 2007-08-16, 'amt': 2430000.0, 'type': Condo}, {'date': 2007-08-17, 'amt': 1925000.0, 'type': Single Family}] +Greenwich 2007-08-16 2007-08-17 [{'date': 2007-08-16, 'amt': 2430000.0, 'type': Condo}, {'date': 2007-08-17, 'amt': 3400000.0, 'type': Single Family}] +Greenwich 2007-08-16 2007-08-20 [{'date': 2007-08-16, 'amt': 2430000.0, 'type': Condo}, {'date': 2007-08-20, 'amt': 2590000.0, 'type': Single Family}] +Greenwich 2007-08-16 2007-08-21 [{'date': 2007-08-16, 'amt': 2430000.0, 'type': Condo}, {'date': 2007-08-21, 'amt': 4100000.0, 'type': Single Family}] +New Canaan 2007-01-22 2007-01-22 [{'date': 2007-01-22, 'amt': 1735000.0, 'type': Condo}] +New Canaan 2007-02-15 2007-02-15 [{'date': 2007-02-15, 'amt': 2230000.0, 'type': Condo}] diff --git a/test/sql/copy/csv/test_12314.test_slow b/test/sql/copy/csv/test_12314.test_slow new file mode 100644 index 0000000..0d765dc --- /dev/null +++ b/test/sql/copy/csv/test_12314.test_slow @@ -0,0 +1,18 @@ +# name: test/sql/copy/csv/test_12314.test_slow +# description: Test CSV reading for issue 12314 +# group: [csv] + +require httpfs + +statement ok +PRAGMA enable_verification + +statement error +from read_csv('https://github.com/duckdb/duckdb-data/releases/download/v1.0/sample_data_12314.csv.gz',HEADER = 1, PARALLEL=false); +---- +Change the maximum length size, e.g., max_line_size=2097408 + +query I +select count(*) from read_csv('https://github.com/duckdb/duckdb-data/releases/download/v1.0/sample_data_12314.csv.gz',HEADER = 1, PARALLEL=false , max_line_size=2097408); +---- +26238 diff --git a/test/sql/copy/csv/test_csv_httpfs.test_slow b/test/sql/copy/csv/test_csv_httpfs.test_slow new file mode 100644 index 0000000..4406665 --- /dev/null +++ b/test/sql/copy/csv/test_csv_httpfs.test_slow @@ -0,0 +1,33 @@ +# name: test/sql/copy/csv/test_csv_httpfs.test_slow +# description: This test triggers the http prefetch mechanism. +# group: [csv] + +statement ok +pragma enable_verification; + +require httpfs + +require parquet + +#FIXME: remote changed? +mode skip + +# Add test for 3731 +query I +SELECT count(*) FROM read_csv_auto('https://datasets.imdbws.com/name.basics.tsv.gz', delim='\t', quote='') +---- +12783090 + +query I + copy ( + SELECT * + REPLACE ( + str_split(primaryProfession,',') as primaryProfession, + str_split(knownForTitles,',') as knownForTitles, + case WHEN regexp_matches(deathYear,'[0-9]+') THEN CAST(deathYear as integer) END as deathYear, + case WHEN regexp_matches(birthYear,'[0-9]+') THEN CAST(birthYear as integer) END as birthYear + ) + FROM read_csv_auto('https://datasets.imdbws.com/name.basics.tsv.gz', delim='\t', quote='') + ) to '__TEST_DIR__/name_basics.parquet' (FORMAT 'parquet', CODEC 'ZSTD') +---- +12783090 diff --git a/test/sql/copy/csv/test_csv_httpfs_main.test b/test/sql/copy/csv/test_csv_httpfs_main.test new file mode 100644 index 0000000..8ed9a6a --- /dev/null +++ b/test/sql/copy/csv/test_csv_httpfs_main.test @@ -0,0 +1,355 @@ +# name: test/sql/copy/csv/test_csv_httpfs_main.test +# description: This test triggers the http prefetch mechanism. +# group: [csv] + +require httpfs + +statement ok +PRAGMA enable_verification + + +#FIXME this test fails: file is nonexistent +mode skip + +query IIIIII rowsort +SELECT * from read_csv_auto('https://www.data.gouv.fr/fr/datasets/r/6d186965-f41b-41f3-9b23-88241cc6890c'); +---- +2020 Allemagne Germany 26.1 53196.069 200601.2 +2020 Autriche Austria 18.0 4723.5 26215.8 +2020 Belgique Belgium 28.999999999999996 9436.1 32553.0 +2020 Bulgarie Bulgaria 11.600000000000001 1124.1 9698.7 +2020 Chypre Cyprus 0.0 0.0 1627.6 +2020 Croatie Croatia 16.3 1094.8 6726.3 +2020 Danemark Denmark 11.600000000000001 1579.0 13601.4 +2020 Espagne Spain 17.4 14211.7 81512.9 +2020 Estonie Estonia 8.5 241.1 2827.3 +2020 Finlande Finland 2.8000000000000003 692.3 24674.4 +2020 France France 20.3 28278.9 139375.8 +2020 Grèce Greece 5.800000000000001 896.5 15401.9 +2020 Hongrie Hungary 30.5 5486.7 17872.4 +2020 Irlande Ireland 17.4 1968.477 11296.601 +2020 Italie Italy 29.2 33042.585 113119.475 +2020 Lettonie Latvia 8.200000000000001 323.605 3926.131 +2020 Lituanie Lithuania 10.7 584.104 5457.728 +2020 Luxembourg Luxembourg 16.5 623.165 3786.785 +2020 Malte Malta 0.0 0.0 547.5 +2020 Pays-Bas Netherlands 37.1 16588.314 44682.656 +2020 Pologne Poland 13.5 9323.205 69135.018 +2020 Portugal Portugal 11.1 1814.878 16354.725 +2020 Roumanie Romania 23.7 5626.161 23712.653 +2020 Royaume-Uni United Kingdom 32.4 39311.416 121414.483 +2020 République tchèque Czech Republic 21.4 5187.282 24263.896 +2020 Slovaquie Slovakia 25.0 2564.876 10248.401 +2020 Slovénie Slovenia 12.1 590.243 4861.315 +2020 Suède Sweden 1.5 475.195 31311.413 +2020 UE 28 Europe 28 22.5 238152.4 1056907.5 +2021 Allemagne Germany 26.760345686044435 51812.567 193616.957 +2021 Autriche Austria 18.720006775926056 4645.795 24817.272 +2021 Belgique Belgium 29.279402721103864 9088.083 31039.168 +2021 Bulgarie Bulgaria 12.368015142641884 1176.537 9512.739 +2021 Chypre Cyprus 0.0 0.0 1528.558 +2021 Croatie Croatia 17.10389029082304 1100.12 6431.987 +2021 Danemark Denmark 11.485631727184947 1508.152 13130.771 +2021 Espagne Spain 19.10173955663722 13815.0 72323.256 +2021 Estonie Estonia 8.988278645659518 245.094 2726.818 +2021 Finlande Finland 2.9937725178230212 694.288 23191.074 +2021 France France 20.649030024470434 26465.646 128168.955 +2021 Grèce Greece 7.580480506088059 1097.87 14482.855 +2021 Hongrie Hungary 32.344729318831554 5693.164 17601.52 +2021 Irlande Ireland 18.020604987495144 1953.468 10840.191 +2021 Italie Italy 30.86368769746751 31807.236 103057.147 +2021 Lettonie Latvia 8.502139837843602 322.927 3798.185 +2021 Lituanie Lithuania 11.029023816606903 582.797 5284.212 +2021 Luxembourg Luxembourg 17.282784281000467 564.365 3265.475 +2021 Malte Malta 0.0 0.0 499.875 +2021 Pays-Bas Netherlands 37.61392206122467 15896.316 42261.788 +2021 Pologne Poland 13.146720200313602 9235.656 70250.647 +2021 Portugal Portugal 11.437926753365227 1740.3 15215.17 +2021 Roumanie Romania 24.909638477223016 5846.885 23472.38 +2021 République tchèque Czech Republic 21.716683280446812 5158.445 23753.374 +2021 Slovaquie Slovakia 25.253930010417324 2427.134 9610.916 +2021 Slovénie Slovenia 13.141683407321874 582.024 4428.839 +2021 Suède Sweden 1.497679952802663 471.085 31454.317 +2021 UE 27 UE 27 21.894190365821018 193930.95399999994 885764.4460000001 + +query IIIIII rowsort res +SELECT * from read_csv('https://www.data.gouv.fr/fr/datasets/r/6d186965-f41b-41f3-9b23-88241cc6890c',DELIM=';',Columns={'annee_de_reference':'VARCHAR','pays':'VARCHAR','label_en':'VARCHAR','part_du_gaz_naturel_dans_la_consommation_finale_d_energie0':'VARCHAR','consommation_finale_de_gaz_naturel_mtep':'VARCHAR','consommation_finale_d_energie_totale_mtep':'VARCHAR'}); + + +query IIIIII rowsort res +SELECT * from read_csv('https://www.data.gouv.fr/fr/datasets/r/6d186965-f41b-41f3-9b23-88241cc6890c',DELIM=';',Columns={'annee_de_reference':'VARCHAR','pays':'VARCHAR','label_en':'VARCHAR','part_du_gaz_naturel_dans_la_consommation_finale_d_energie0':'VARCHAR','consommation_finale_de_gaz_naturel_mtep':'VARCHAR','consommation_finale_d_energie_totale_mtep':'VARCHAR'}); + + +# Give it a try to a request that returns length 0 +query I +SELECT count(*) from read_csv_auto('https://query1.finance.yahoo.com/v7/finance/download/^GSPC?period1=1512086400&period2=1670630400&interval=1d&events=history') +---- +1265 + +# Give it a try to a request that returns length 0 +query I +SELECT count(*) from read_csv_auto('https://query1.finance.yahoo.com/v7/finance/download/^GSPC?period1=1512086400&period2=1670630400&interval=1d&events=history') +---- +1265 + +# Give it a try to a request that returns length 0 +query I +SELECT count(*) from read_csv_auto('https://query1.finance.yahoo.com/v7/finance/download/^GSPC?period1=1512086400&period2=1670630400&interval=1d&events=history') +---- +1265 + +#Add test for 5924 +query IIIIII +select * from read_csv_auto('https://csvbase.com/meripaterson/stock-exchanges'); +---- +1 Africa Lesotho HYBSE NULL 2019-03-25 +2 Asia Kazakhstan Astana International Financial Centre AIXK 2018-11-18 +3 Africa South Africa ZAR X ZARX 2018-11-18 +4 South America Argentina Bolsas y Mercados Argentinos NULL 2018-04-02 +5 North America United States of America Delaware Board of Trade NULL 2018-04-02 +6 Australia & Oceania Australia Chi-X Asia Pacific NULL 2018-04-02 +7 Australia & Oceania Australia Chi-X Australia CHIA 2018-04-02 +8 South America Mexico BIVA BIVA 2018-01-06 +9 Africa South Africa Equity Express Securities Exchange NULL 2017-12-11 +10 Africa South Africa Cape Town Stock Exchange NULL 2021-10-22 +11 North America Curacao Dutch Caribbean Securities Exchange DCSX 2017-09-12 +12 North America Canada NEO NEOE 2017-09-06 +13 North America Canada Canadian Securities Exchange CNSX 2017-09-06 +14 Western Europe Germany XETRA XETR 2017-08-21 +15 Western Europe France Euronext Paris XPAR 2017-08-19 +16 Western Europe United Kingdom Euronext London XLDN 2017-08-19 +17 Eastern Europe Albania Tirana Stock Exchange XTIR 2017-08-16 +18 Africa Algeria Bourse d'Alger XALG 2017-08-16 +19 Africa Angola BODIVA XBDV 2017-08-16 +20 South America Argentina Buenos Aires Stock Exchange XBUE 2017-08-16 +21 South America Argentina Mercado Abierto Electrónico XMAB 2017-08-16 +22 Eastern Europe Armenia Armenia Securities Exchange XARM 2020-07-29 +23 Australia & Oceania Australia Australian Securities Exchange XASX 2017-08-16 +24 Australia & Oceania Australia Block Event BLEV 2017-08-16 +25 Australia & Oceania Australia IR Plus Securities Exchange SIMV 2017-08-16 +26 Australia & Oceania Australia National Stock Exchange of Australia XNEC 2017-08-16 +27 Australia & Oceania Australia Sydney Stock Exchange APXL 2017-08-16 +28 Western Europe Austria Wiener Börse XWBO 2017-08-16 +29 Asia Azerbaijan Baku Stock Exchange BSEX 2017-08-16 +30 North America Bahamas Bahamas International Securities Exchange XBAA 2017-08-16 +31 Middle East Bahrain Bahrain Bourse XBAH 2017-08-16 +32 Asia Bangladesh Chittagong Stock Exchange XCHG 2017-08-16 +33 Asia Bangladesh Dhaka Stock Exchange XDHA 2017-08-16 +34 North America Barbados Barbados Stock Exchange XBAB 2017-08-16 +35 Eastern Europe Belarus Belarusian Currency and Stock Exchange BCSE 2017-08-16 +36 Western Europe Belgium Euronext Brussels XBRU 2017-08-16 +37 North America Bermuda Bermuda Stock Exchange XBDA 2017-08-16 +38 Asia Bhutan Royal Securities Exchange of Bhutan NULL 2017-08-16 +39 South America Bolivia Bolsa de Valores de Bolivia XBOL 2017-08-16 +40 Eastern Europe Bosnia and Herzegovina Banja Luka Stock Exchange XBLB 2017-08-16 +41 Eastern Europe Bosnia and Herzegovina Sarajevo Stock Exchange XSSE 2017-08-16 +42 Africa Botswana Botswana Stock Exchange XBOT 2017-08-16 +43 South America Brazil B3 - Brasil Bolsa Balcão BVMF 2017-08-16 +44 South America Brazil Bolsa de Valores Minab - Espírito Santo BOVM 2017-08-16 +45 Eastern Europe Bulgaria Bulgarian Stock Exchange XBUL 2017-08-16 +46 Asia Cambodia Cambodia Securities Exchange XCSX 2017-08-16 +47 North America Canada Montreal Exchange XMOD 2017-08-16 +48 North America Canada Nasdaq Canada XCSD 2017-08-16 +49 North America Canada TMX TMXS 2017-08-16 +50 North America Canada Toronto Stock Exchange XTSE 2017-08-16 +51 Africa Cape Verde Bolsa de Valores de Cabo Verde XBVC 2017-08-16 +52 North America Cayman Islands Cayman Islands Stock Exchange XCAY 2017-08-16 +53 Western Europe Channel Islands Channel Islands Stock Exchange NULL 2017-08-16 +54 South America Chile Santiago Electronic Stock Exchange XBCL 2017-08-16 +55 South America Chile Santiago Stock Exchange XSGO 2017-08-16 +56 South America Chile Valparaiso Stock Exchange BOVA 2017-08-16 +57 Asia China Shanghai Stock Exchange XSHG 2017-08-16 +58 Asia China Shenzhen Stock Exchange XSHE 2017-08-16 +59 South America Colombia Bolsa de Valores de Colombia XBOG 2017-08-16 +60 North America Costa Rica Bolsa Nacional de Valores de Costa Rica XBNV 2017-08-16 +61 Eastern Europe Croatia Zagreb Stock Exchange XZAG 2017-08-16 +62 Eastern Europe Cyprus Cyprus Stock Exchange XCYS 2017-08-16 +63 Eastern Europe Czech Republic Prague Stock Exchange XPRAG 2017-08-16 +64 Eastern Europe Czech Republic RM-System Czech Stock Exchange XRMZ 2017-08-16 +65 Western Europe Denmark Nasdaq Copenhagen XCSE 2017-08-16 +66 North America Dominican Republic Bolsa de Valores de la República Dominicana XBVR 2017-08-16 +67 South America Ecuador Bolsa de Valores de Guayaquil XGUA 2017-08-16 +68 South America Ecuador Bolsa de Valores de Quito XQUI 2017-08-16 +69 Africa Egypt Egyptian Exchange XCAI 2017-08-16 +70 Africa Egypt Nilex NILX 2017-08-16 +71 North America El Salvador Bolsa de Valores de El Salvador XSVA 2017-08-16 +72 Eastern Europe Estonia Tallinn Stock Exchange XTAL 2017-08-16 +73 Australia & Oceania Fiji South Pacific Stock Exchange XSPS 2017-08-16 +74 Western Europe Finland Nasdaq Helsinki XHEL 2017-08-16 +75 Africa Gabon Bourse Régionale des Valeurs Mobilières d'Afrique Centrale NULL 2017-08-16 +76 Asia Georgia Georgian Stock Exchange XGSE 2017-08-16 +77 Western Europe Germany Börse Berlin XBER 2017-08-16 +78 Western Europe Germany Börse Düsseldorf XDUS 2017-08-16 +79 Western Europe Germany Börse Hamburg & Hannover HAMB 2017-08-16 +80 Western Europe Germany Börse München XMUN 2017-08-16 +81 Western Europe Germany Börse Stuttgart XSTU 2017-08-16 +82 Western Europe Germany Deutsche Börse Group XFRA 2017-08-16 +83 Western Europe Germany Eurex XEUR 2017-08-16 +84 Western Europe Germany Tradegate Exchange TGAT 2017-08-16 +85 Africa Ghana Ghana Stock Exchange XGHA 2017-08-16 +86 Western Europe Gibraltar Gibraltar Stock Exchange GSXL 2017-08-16 +87 Western Europe Greece Athens Stock Exchange ASEX 2017-08-16 +88 North America Guatemala Bolsa Nacional de Valores XGTG 2017-08-16 +89 Western Europe Guernsey International Stock Exchange XCIE 2017-08-16 +90 South America Guyana Guyana Stock Exchange GSCI 2017-08-16 +91 North America Haiti Haitian Stock Exchange NULL 2017-08-16 +92 North America Honduras Bolsa Centroamericana de Valores XBCV 2017-08-16 +93 Asia Hong Kong Hong Kong Growth Enterprise Market XGEM 2017-08-16 +94 Asia Hong Kong Hong Kong Stock Exchange XHKG 2017-08-16 +95 Eastern Europe Hungary Budapest Stock Exchange XBUD 2017-08-16 +96 Western Europe Iceland Nasdaq Iceland XICE 2017-08-16 +97 Asia India Ahmedabad Stock Exchange NULL 2017-08-16 +98 Asia India Bangalore Stock Exchange XBAN 2017-08-16 +99 Asia India Bombay Stock Exchange XBOM 2017-08-16 +100 Asia India BSE SME BSME 2017-08-16 +101 Asia India Calcutta Stock Exchange XCAL 2017-08-16 +102 Asia India Cochin Stock Exchange NULL 2017-08-16 +103 Asia India Coimbatore Stock Exchange NULL 2017-08-16 +104 Asia India Delhi Stock Exchange XDES 2017-08-16 +105 Asia India Inter-Connected Stock Exchange of India ISEX 2017-08-16 +106 Asia India Ludhiana Stock and Capital NULL 2017-08-16 +107 Asia India Metropolitan Stock Exchange NULL 2017-08-16 +108 Asia India National Stock Exchange of India XNSE 2017-08-16 +109 Asia India OTC Exchange of India OTCX 2017-08-16 +110 Asia India Pune Stock Exchange NULL 2017-08-16 +111 Asia India Saurashtra Kutch Stock Exchange NULL 2017-08-16 +112 Asia India United Stock Exchange of India XUSE 2017-08-16 +113 Asia India Vadodara Stock Exchange NULL 2017-08-16 +114 Asia Indonesia Indonesia Stock Exchange XIDX 2017-08-16 +115 Asia Iran Iran Fara Bourse NULL 2017-08-16 +116 Middle East Iran Tehran Stock Exchange XTEH 2017-08-16 +117 Middle East Iraq Iraq Stock Exchange XIQS 2017-08-16 +118 Western Europe Ireland Irish Stock Exchange XDUB 2017-08-16 +119 Middle East Israel Tel Aviv Stock Exchange XTAE 2017-08-16 +120 Western Europe Italy Borsa Italiana XMIL 2017-08-16 +121 Africa Ivory Coast Bourse Regionale des Valeurs Mobilieres XBRV 2017-08-16 +122 North America Jamaica Jamaica Stock Exchange XJAM 2017-08-16 +123 Asia Japan Chi-X Japan CHIJ 2017-08-16 +124 Asia Japan Daiwa Securities DRCT 2017-08-16 +125 Asia Japan Fukuoka Stock Exchange XFKA 2017-08-16 +126 Asia Japan Japan Exchange Group XJPX 2017-08-16 +127 Asia Japan Nagoya Stock Exchange XNGO 2017-08-16 +128 Asia Japan Sapporo Securities Exchange XSAP 2017-08-16 +129 Asia Japan SBI Japannext SBIJ 2017-08-16 +130 Middle East Jordan Amman Stock Exchange XAMM 2017-08-16 +131 Asia Kazakhstan Kazakhstan Stock Exchange XKAZ 2017-08-16 +132 Africa Kenya Nairobi Stock Exchange XNAI 2017-08-16 +133 Middle East Kuwait Kuwait Stock Exchange XKUW 2017-08-16 +134 Asia Kyrgyzstan Kyrgyz Stock Exchange XKSE 2017-08-16 +135 Asia Laos Lao Securities Exchange XLAO 2017-08-16 +136 Eastern Europe Latvia Riga Stock Exchange XRIS 2017-08-16 +137 Middle East Lebanon Beirut Stock Exchange XBEY 2017-08-16 +138 Africa Lesotho Maseru Securities Exchange NULL 2017-08-16 +139 Eastern Europe Lithuania Vilnius Stock Exchange XLIT 2017-08-16 +140 Western Europe Luxembourg Luxembourg Stock Exchange XLUX 2017-08-16 +141 Eastern Europe Macedonia Macedonian Stock Exchange XMAE 2017-08-16 +142 Africa Malawi Malawi Stock Exchange XMSW 2017-08-16 +143 Asia Malaysia Bursa Malaysia XKLS 2017-08-16 +144 Asia Maldives Maldives Stock Exchange MALX 2017-08-16 +145 Western Europe Malta Malta Stock Exchange XMAL 2017-08-16 +146 Western Europe Malta Malta Stock Exchange Prospects PROS 2017-08-16 +147 Africa Mauritius Stock Exchange of Mauritius XMAU 2017-08-16 +148 North America Mexico Bolsa Mexicana de Valores XMEX 2017-08-16 +149 Western Europe Moldova Moldova Stock Exchange XMOL 2017-08-16 +150 Asia Mongolia Mongolian Stock Exchange XULA 2017-08-16 +151 Eastern Europe Montenegro Montenegro Stock Exchange XMNX 2017-08-16 +152 Africa Morocco Casablanca Stock Exchange XCAS 2017-08-16 +153 Africa Mozambique Bolsa de Valores de Mozambique XBVM 2017-08-16 +154 Asia Myanmar Myanmar Securities Exchange Centre NULL 2017-08-16 +155 Asia Myanmar Yangon Stock Exchange NULL 2017-08-16 +156 Africa Namibia Namibian Stock Exchange XNAM 2017-08-16 +157 Asia Nepal Nepal Stock Exchange XNEP 2017-08-16 +158 Western Europe Netherlands Euronext Amsterdam XAMS 2017-08-16 +159 Western Europe Netherlands Nxchange XNXC 2017-08-16 +160 Australia & Oceania New Zealand New Zealand Exchange XNZE 2017-08-16 +161 North America Nicaragua Bolsa de Valores de Nicaragua XMAN 2017-08-16 +162 Africa Nigeria Nigerian Stock Exchange XNSA 2017-08-16 +163 Western Europe Norway Oslo Stock Exchange XOSL 2017-08-16 +164 Middle East Oman Muscat Securities Market XMUS 2017-08-16 +165 Asia Pakistan Lahore Stock Exchange NULL 2017-08-16 +166 Asia Pakistan Pakistan Stock Exchange XKAR 2017-08-16 +167 Middle East Palestine Palestine Securities Exchange XPAE 2017-08-16 +168 North America Panama Bolsa de Valores de Panama XPTY 2017-08-16 +169 Australia & Oceania Papua New Guinea Port Moresby Stock Exchange XPOM 2017-08-16 +170 South America Paraguay Bolsa de Valores & Productos de Asuncíon XVPA 2017-08-16 +171 South America Peru Bolsa de Valores de Lima XLIM 2017-08-16 +172 Asia Philippines Philippine Stock Exchange XPHS 2017-08-16 +173 Eastern Europe Poland NewConnect XNCO 2017-08-16 +174 Eastern Europe Poland Warsaw Stock Exchange XWAR 2017-08-16 +175 Western Europe Portugal Euronext Lisbon XLIS 2017-08-16 +176 Western Europe Portugal OPEX OPEX 2017-08-16 +177 Middle East Qatar Qatar Stock Exchange DSMD 2017-08-16 +178 Eastern Europe Romania Bucharest Stock Exchange XRAS 2017-08-16 +179 Eastern Europe Russia Moscow Exchange MISX 2017-08-16 +180 Eastern Europe Russia Saint Petersburg Stock Exchange XPET 2017-08-16 +181 Eastern Europe Russia Siberian Exchange XSIB 2017-08-16 +182 Africa Rwanda Rwanda Stock Exchange RSEX 2017-08-16 +183 North America Saint Kitts and Nevis Eastern Caribbean Securities Exchange XECS 2017-08-16 +184 Middle East Saudi Arabia Saudi Stock Exchange XSAU 2017-08-16 +185 Eastern Europe Serbia Belgrade Stock Exchange XBEL 2017-08-16 +186 Africa Seychelles Seychelles Securities Exchange (Trop-X) TRPX 2017-08-16 +187 Asia Singapore Singapore Exchange XSES 2017-08-16 +188 Eastern Europe Slovakia Bratislava Stock Exchange XBRA 2017-08-16 +189 Eastern Europe Slovenia Ljubljana Stock Exchange XLJU 2017-08-16 +190 Africa Somalia Somali Stock Exchange NULL 2017-08-16 +191 Africa South Africa A2X Markets A2XX 2017-08-16 +192 Africa South Africa Johannesburg Stock Exchange XJSE 2017-08-16 +193 Asia South Korea Korea New Exchange XKON 2017-08-16 +194 Asia South Korea Korea Stock Exchange XKRX 2017-08-16 +195 Asia South Korea KOSDAQ Securities Exchange XKOS 2017-08-16 +196 Western Europe Spain Bolsa de Bilbao XBIL 2017-08-16 +197 Western Europe Spain Bolsa de Madrid XMAD 2017-08-16 +198 Western Europe Spain Bolsa de Valencia XVAL 2017-08-16 +199 Western Europe Spain Borsa de Barcelona XBAR 2017-08-16 +200 Western Europe Spain Latibex XLAT 2017-08-16 +201 Asia Sri Lanka Colombo Stock Exchange XCOL 2017-08-16 +202 Africa Sudan Khartoum Stock Exchange XKHA 2017-08-16 +203 Africa Swaziland Swaziland Stock Exchange XSWA 2017-08-16 +204 Western Europe Sweden Aktietorget XSAT 2017-08-16 +205 Western Europe Sweden Nasdaq Stockholm XSTO 2017-08-16 +206 Western Europe Sweden Nordic Growth Market XNGM 2017-08-16 +207 Western Europe Switzerland Berne eXchange XBRN 2017-08-16 +208 Western Europe Switzerland SIX Swiss Exchange XSWX 2017-08-16 +209 Middle East Syria Damascus Securities Exchange XDSE 2017-08-16 +210 Asia Taiwan Taipei Exchange ROCO 2017-08-16 +211 Asia Taiwan Taiwan Stock Exchange XTAI 2017-08-16 +212 Africa Tanzania Dar-es-Salaam Stock Exchange XDAR 2017-08-16 +213 Asia Thailand Stock Exchange of Thailand XBKK 2017-08-16 +214 North America Trinidad and Tobago Trinidad and Tobago Stock Exchange XTRN 2017-08-16 +215 Africa Tunisia Bourse de Tunis XTUN 2017-08-16 +216 Eastern Europe Turkey Borsa İstanbul XIST 2017-08-16 +217 Africa Uganda Uganda Securities Exchange XUGA 2017-08-16 +218 Eastern Europe Ukraine East European Stock Exchange EESE 2017-08-16 +219 Eastern Europe Ukraine PFTS Ukraine Stock Exchange PFTS 2017-08-16 +220 Eastern Europe Ukraine Stock Exchange Perspectiva SEPE 2017-08-16 +221 Eastern Europe Ukraine Ukrainian Exchange UKEX 2017-08-16 +222 Middle East United Arab Emirates Abu Dhabi Securities Market XADS 2017-08-16 +223 Middle East United Arab Emirates Dubai Financial Market XDFM 2017-08-16 +224 Middle East United Arab Emirates Nasdaq Dubai DIFX 2017-08-16 +225 Western Europe United Kingdom Aquis Exchange AQXE 2017-08-16 +226 Western Europe United Kingdom Asset Match AMPX 2017-08-16 +227 Western Europe United Kingdom London Stock Exchange XLON 2017-08-16 +228 Western Europe United Kingdom NEX NEXS 2017-08-16 +229 Western Europe United Kingdom Turquoise TRQX 2017-08-16 +230 North America United States of America Bats BYX Exchange BYXD 2017-08-16 +231 North America United States of America Bats EDGA Exchange EDGA 2017-08-16 +232 North America United States of America Bats US BATS 2017-08-16 +233 North America United States of America BatsEDGX Exchange EDGX 2017-08-16 +234 North America United States of America Chicago Stock Exchange XCHI 2017-08-16 +235 North America United States of America Investors Exchange IEXG 2017-08-16 +236 North America United States of America NASDAQ XNAS 2017-08-16 +237 North America United States of America New York Stock Exchange XNYS 2017-08-16 +238 North America United States of America North American Derivatives Exchange NADEX HEGX 2017-08-16 +239 South America Uruguay Bolsa de Valores de Montevideo XMNT 2017-08-16 +240 South America Uruguay Bolsa Electronica de Valores de Uruguay BVUR 2017-08-16 +241 Asia Uzbekistan Tashkent Stock Exchange XSTE 2017-08-16 +242 Asia Vietnam Hanoi Stock Exchange HSTC 2017-08-16 +243 Asia Vietnam Ho Chi Minh Stock Exchange XSTC 2017-08-16 +244 Africa Zambia Lusaka Stock Exchange XLUS 2017-08-16 +245 Africa Zimbabwe Zimbabwe Stock Exchange XZIM 2017-08-16 +246 Eastern Europe Albania Albanian Securities Exchange XALS 2019-11-17 +247 North America United States of America Long-Term Stock Exchange LTSE 2020-09-14 +248 North America United States of America Miami International Securities Exchange MIHI 2020-09-24 +249 North America United States of America Members' Exchange NULL 2020-09-24 +250 Africa Zimbabwe Victoria Falls Stock Exchange NULL 2020-11-01 +251 Asia China Beijing Stock Exchange NULL 2021-12-27 diff --git a/test/sql/copy/csv/test_csv_httpfs_prepared.test b/test/sql/copy/csv/test_csv_httpfs_prepared.test new file mode 100644 index 0000000..43362c0 --- /dev/null +++ b/test/sql/copy/csv/test_csv_httpfs_prepared.test @@ -0,0 +1,50 @@ +# name: test/sql/copy/csv/test_csv_httpfs_prepared.test +# description: CSV Reading From HTTPFS in Prepared Statements +# group: [csv] + +require httpfs + +statement ok +PRAGMA enable_verification + +statement ok +PREPARE boaz_bug AS from read_csv_auto('https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv') order by 1 + +query ITIIIIITTTTIIITTTI +EXECUTE boaz_bug +---- +1 AAAAAAAABAAAAAAA 980124 7135 32946 2452238 2452208 Mr. Javier Lewis Y 9 12 1936 CHILE NULL Javier.Lewis@VFAxlnZEvOx.org 2452508 +2 AAAAAAAACAAAAAAA 819667 1461 31655 2452318 2452288 Dr. Amy Moses Y 9 4 1966 TOGO NULL Amy.Moses@Ovk9KjHH.com 2452318 +3 AAAAAAAADAAAAAAA 1473522 6247 48572 2449130 2449100 Miss Latisha Hamilton Y 18 9 1979 NIUE NULL Latisha.Hamilton@V.com 2452313 +4 AAAAAAAAEAAAAAAA 1703214 3986 39558 2450030 2450000 Dr. Michael White Y 7 6 1983 MEXICO NULL Michael.White@i.org 2452361 +5 AAAAAAAAFAAAAAAA 953372 4470 36368 2449438 2449408 Sir Robert Moran N 8 5 1956 FIJI NULL Robert.Moran@Hh.edu 2452469 +6 AAAAAAAAGAAAAAAA 213219 6374 27082 2451883 2451853 Ms. Brunilda Sharp Y 4 12 1925 SURINAME NULL Brunilda.Sharp@T3pylZEUQjm.org 2452430 +7 AAAAAAAAHAAAAAAA 68377 3219 44814 2451438 2451408 Ms. Fonda Wiles N 24 4 1985 GAMBIA NULL Fonda.Wiles@S9KnyEtz9hv.org 2452360 +8 AAAAAAAAIAAAAAAA 1215897 2471 16598 2449406 2449376 Sir Ollie Shipman N 26 12 1938 KOREA, REPUBLIC OF NULL Ollie.Shipman@be.org 2452334 +9 AAAAAAAAJAAAAAAA 1168667 1404 49388 2452275 2452245 Sir Karl Gilbert N 26 10 1966 MONTSERRAT NULL Karl.Gilbert@Crg5KyP2IxX9C4d6.edu 2452454 +10 AAAAAAAAKAAAAAAA 1207553 5143 19580 2451353 2451323 Ms. Albert Brunson N 15 10 1973 JORDAN NULL Albert.Brunson@62.com 2452641 + +statement ok +DEALLOCATE boaz_bug + +statement error +EXECUTE boaz_bug +---- +Prepared statement "boaz_bug" does not exist + +# Recreate prepared statement with different file + +#FIXME: FILE changed? +mode skip + +statement ok +PREPARE boaz_bug AS SELECT * from read_csv_auto('https://www.data.gouv.fr/fr/datasets/r/6d186965-f41b-41f3-9b23-88241cc6890c') order by all limit 5; + +query ITTRRR +EXECUTE boaz_bug +---- +2020 Allemagne Germany 26.1 53196.069 200601.2 +2020 Autriche Austria 18.0 4723.5 26215.8 +2020 Belgique Belgium 28.999999999999996 9436.1 32553.0 +2020 Bulgarie Bulgaria 11.600000000000001 1124.1 9698.7 +2020 Chypre Cyprus 0.0 0.0 1627.6 diff --git a/test/sql/copy/csv/test_csv_remote.test b/test/sql/copy/csv/test_csv_remote.test new file mode 100644 index 0000000..0555b6e --- /dev/null +++ b/test/sql/copy/csv/test_csv_remote.test @@ -0,0 +1,34 @@ +# name: test/sql/copy/csv/test_csv_remote.test +# description: Test reading csv files over http +# group: [csv] + +require httpfs + +statement ok +PRAGMA enable_verification + + +# regular csv file +query ITTTIITITTIIII nosort webpagecsv +SELECT * FROM read_csv_auto('data/csv/real/web_page.csv') ORDER BY 1; +---- + +# file with gzip +query IIIIIIIIIIIIIII nosort lineitemcsv +SELECT * FROM read_csv_auto('data/csv/lineitem1k.tbl.gz') ORDER BY ALL; +---- + +query ITTTIITITTIIII nosort webpagecsv +SELECT * FROM read_csv_auto('https://raw.githubusercontent.com/duckdb/duckdb/main/data/csv/real/web_page.csv') ORDER BY 1; +---- + +query IIIIIIIIIIIIIII nosort lineitemcsv +select * from read_csv_auto('https://raw.githubusercontent.com/duckdb/duckdb/main/data/csv/lineitem1k.tbl.gz') ORDER BY ALL; +---- + + +# Test load from url with query string +query IIIIIIIIIIII +FROM sniff_csv('https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv?v=1') +---- +, " (empty) \n (empty) 0 0 [{'name': column00, 'type': BIGINT}, {'name': column01, 'type': VARCHAR}, {'name': column02, 'type': BIGINT}, {'name': column03, 'type': BIGINT}, {'name': column04, 'type': BIGINT}, {'name': column05, 'type': BIGINT}, {'name': column06, 'type': BIGINT}, {'name': column07, 'type': VARCHAR}, {'name': column08, 'type': VARCHAR}, {'name': column09, 'type': VARCHAR}, {'name': column10, 'type': VARCHAR}, {'name': column11, 'type': BIGINT}, {'name': column12, 'type': BIGINT}, {'name': column13, 'type': BIGINT}, {'name': column14, 'type': VARCHAR}, {'name': column15, 'type': VARCHAR}, {'name': column16, 'type': VARCHAR}, {'name': column17, 'type': BIGINT}] NULL NULL NULL FROM read_csv('https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv?v=1', auto_detect=false, delim=',', quote='"', escape='', new_line='\n', skip=0, comment='', header=false, columns={'column00': 'BIGINT', 'column01': 'VARCHAR', 'column02': 'BIGINT', 'column03': 'BIGINT', 'column04': 'BIGINT', 'column05': 'BIGINT', 'column06': 'BIGINT', 'column07': 'VARCHAR', 'column08': 'VARCHAR', 'column09': 'VARCHAR', 'column10': 'VARCHAR', 'column11': 'BIGINT', 'column12': 'BIGINT', 'column13': 'BIGINT', 'column14': 'VARCHAR', 'column15': 'VARCHAR', 'column16': 'VARCHAR', 'column17': 'BIGINT'}); diff --git a/test/sql/copy/csv/test_csv_remote.test_slow b/test/sql/copy/csv/test_csv_remote.test_slow new file mode 100644 index 0000000..4df0fc6 --- /dev/null +++ b/test/sql/copy/csv/test_csv_remote.test_slow @@ -0,0 +1,15 @@ +# name: test/sql/copy/csv/test_csv_remote.test_slow +# description: Test reading csv files over http, slow queries +# group: [csv] + +statement ok +pragma enable_verification; + +require httpfs + +# Read a compressed file (~44MB compressed, ~700MB uncompressed) over HTTP +query IIIIII +select count(*), min(strain), max(strain), min(strlen(sequence)), max(strlen(sequence)), avg(strlen(sequence)) +from read_csv_auto('https://raw.githubusercontent.com/duckdb/duckdb/main/data/csv/sequences.csv.gz', delim=','); +---- +100000 ARG/Cordoba-1006-155/2020 tiger/NY/040420/2020 17340 30643 29821.264410 diff --git a/test/sql/copy/csv/test_mixed_lines.test_slow b/test/sql/copy/csv/test_mixed_lines.test_slow new file mode 100644 index 0000000..9da5cea --- /dev/null +++ b/test/sql/copy/csv/test_mixed_lines.test_slow @@ -0,0 +1,32 @@ +# name: test/sql/copy/csv/test_mixed_lines.test_slow +# description: Test mixed lines +# group: [csv] + +require httpfs + +statement ok +pragma enable_verification; + +statement error +from 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/202204-clean-sane-header.csv' +---- +* Disable the parser's strict mode (strict_mode=false) to allow reading rows that do not comply with the CSV standard. + +query I +select count(*) from read_csv('https://github.com/duckdb/duckdb-data/releases/download/v1.0/202204-clean-sane-header.csv', strict_mode=false) +---- +50000 + +query IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +from read_csv('https://github.com/duckdb/duckdb-data/releases/download/v1.0/202204-clean-sane-header.csv', strict_mode=false) order by all limit 10 +---- +11111111 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 1111-11-11 1111-11-11 11111111 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaa111111aaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaa 1111a1aa-1111-1111-aa11-11aa11a11111 aaaaaaaaaaaaaaaaaaaaaaaaaa 1111-11-11 aaaaaa-aaaaa-aaaaaaaaaaaaaaaaa a1a-11111 a1111111-1aaa-11a1-a1a1-aa1a111111a1 aaaaaaa aaaaaaa aaaaa NULL aaaaaaaaaaaaaaaaaaaa 1a1111 1a111111111111111 1a111111111111111 1a11111 aaa aaaaaaaaaaaa NULL aaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaa1111a1aa-1111-1111-aa11-11aa11a11111aaaaaaaaaaaaaaaaaa-aa-aaa-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaa NULL NULL NULL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa NULL aaaaaaa aaaaaaa 11a aa-aa-aaa-aaaaaa NULL NULL NULL NULL aa-aaa-1111a aaaa NULL NULL NULL aaaaa aaaaaaaaaa aaaaa 1a1 aaaaaaaa NULL NULL NULL aaaaaaaaaa +11111111 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 1111-11-11 1111-11-11 11111111 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaa111111aaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaa 1111a1aa-1111-1111-aa11-11aa11a11111 aaaaaaaaaaaaaaaaaaaaaaaaaa 1111-11-11 aaaaaa-aaaaa-aaaaaaaaaaaaaaaaa a1a-11111 a1111111-1aaa-11a1-a1a1-aa1a111111a1 aaaaaaa aaaaaaa aaaaa NULL aaaaaaaaaaaaaaaaaaaa 1a1111 1a111111111111111 1a111111111111111 1a11111 aaa aaaaaaaaaaaa NULL aaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaa1111a1aa-1111-1111-aa11-11aa11a11111aaaaaaaaaaaaaaaaaa-aaaa-aaaaaaaa-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaaaaaaaa NULL NULL NULL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa NULL aaaaaaa aaaaaaa 11a aa-aaaa-aaaaaaaa-aaa NULL NULL NULL NULL aa-aaa-1111a aaaa NULL NULL NULL aaaaa aaaaaaaaaa aaaaa 1a1 aaaaaaaa NULL NULL NULL aaaaaaaaaa +11111111 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 1111-11-11 1111-11-11 11111111 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaa111111aaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaa 1111a1aa-1111-1111-aa11-11aa11a11111 aaaaaaaaaaaaaaaaaaaaaaaaaa 1111-11-11 aaaaaa-aaaaa-aaaaaaaaaaaaaaaaa a1a-11111 a11a11a1-111a-11a1-1a1a-1aa11a111111 aaaaaaa aaaaaaa aaaaa NULL aaaaaaaaaaaaaaaaaaaa 1a1111 1a111111111111111 1a11111111111111 1a11111 aaa aaaaaaaaaaaa NULL aaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaa1111a1aa-1111-1111-aa11-11aa11a11111aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa11 aaaaaaaaaaaaaaaaa11 NULL NULL NULL NULL NULL aaaaaaa aaaaaaa 11a aaaaaaaaaa NULL NULL NULL NULL aa-aaa-1111a aaaa NULL NULL NULL aaaaa aaaaaaaaaa aaaaa 1a1 aaaaaaaa NULL NULL NULL aaaaaaaaaa +11111111 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 1111-11-11 1111-11-11 11111111 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaa111111aaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaa 1111a1aa-1111-1111-aa11-11aa11a11111 aaaaaaaaaaaaaaaaaaaaaaaaaa 1111-11-11 aaaaaa-aaaaaaaaaaaaaaaa a1a-11111 111aaaa1-111a-1111-a1aa-111aa11aa111 aaaaaaa aaaaaaa aaaaa NULL aaaaaaaaaaaaaaa 1a1111 1a111111111111111 1a111111111111111 1a11111 aaa aaaaaaaaaaaa NULL aaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaa1111a1aa-1111-1111-aa11-11aa11a11111aaaaaaaaaaaaaaaaaa-aaaa-aaaaaaaa-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaaaaaaaa NULL NULL NULL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa NULL aaaaaaa aaaaaaa 11a aa-aaaa-aaaaaaaa-aaa NULL NULL NULL NULL aa-aaa-1111a aaaa NULL NULL NULL aaaaa aaaaaaaaaa aaaaa 1a1 aaaaaaaa NULL NULL NULL aaaaaaaaaa +11111111 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 1111-11-11 1111-11-11 11111111 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaa111111aaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaa 1111a1aa-1111-1111-aa11-11aa11a11111 aaaaaaaaaaaaaaaaaaaaaaaaaa 1111-11-11 aaaaaa-aaaaaaaaaaaaaaaa a1a-11111 111aaaa1-111a-1111-a1aa-111aa11aa111 aaaaaaa aaaaaaa aaaaa NULL aaaaaaaaaaaaaaa 1a1111 1a111111111111111 1a111111111111111 1a11111 aaa aaaaaaaaaaaa NULL aaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaa1111a1aa-1111-1111-aa11-11aa11a11111aaaaaaaaaaaaaaaaaa-aaaa-aaaaaaaa-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaaaaaaaa NULL NULL NULL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa NULL aaaaaaa aaaaaaa 11a aa-aaaa-aaaaaaaa-aaa NULL NULL NULL NULL aa-aaa-1111a aaaa NULL NULL NULL aaaaa aaaaaaaaaa aaaaa 1a1 aaaaaaaa NULL NULL NULL aaaaaaaaaa +11111111 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 1111-11-11 1111-11-11 11111111 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaa111111aaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaa 1111a1aa-1111-1111-aa11-11aa11a11111 aaaaaaaaaaaaaaaaaaaaaaaaaa 1111-11-11 aaaaaa-aaaaaaaaaaaaaaaa a1a-11111 111aaaa1-111a-1111-a1aa-111aa11aa111 aaaaaaa aaaaaaa aaaaa NULL aaaaaaaaaaaaaaa 1a1111 1a111111111111111 1a111111111111111 1a11111 aaa aaaaaaaaaaaa NULL aaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaa1111a1aa-1111-1111-aa11-11aa11a11111aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa11 aaaaaaaaaaaaaaaaa11 NULL NULL NULL NULL NULL aaaaaaa aaaaaaa 11a aaaaaaaaaa NULL NULL NULL NULL aa-aaa-1111a aaaa NULL NULL NULL aaaaa aaaaaaaaaa aaaaa 1a1 aaaaaaaa NULL NULL NULL aaaaaaaaaa +11111111 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 1111-11-11 1111-11-11 11111111 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaa111111aaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaa 1111a1aa-1111-1111-aa11-11aa11a11111 aaaaaaaaaaaaaaaaaaaaaaaaaa 1111-11-11 aaaaaa-aaaaaaaaaaaaaaaa a1a-11111 111aaaa1-111a-1111-a1aa-111aa11aa111 aaaaaaa aaaaaaa aaaaa NULL aaaaaaaaaaaaaaa 1a1111 1a111111111111111 1a111111111111111 1a11111 aaa aaaaaaaaaaaa NULL aaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaa1111a1aa-1111-1111-aa11-11aa11a11111aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa11 aaaaaaaaaaaaaaaaa11 NULL NULL NULL NULL NULL aaaaaaa aaaaaaa 11a aaaaaaaaaa NULL NULL NULL NULL aa-aaa-1111a aaaa NULL NULL NULL aaaaa aaaaaaaaaa aaaaa 1a1 aaaaaaaa NULL NULL NULL aaaaaaaaaa +11111111 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 1111-11-11 1111-11-11 11111111 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaa111111aaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaa 1111a1aa-1111-1111-aa11-11aa11a11111 aaaaaaaaaaaaaaaaaaaaaaaaaa 1111-11-11 aaaaaa-aaaaaaaaaaaaaaaa a1a-11111 111aaaa1-111a-1111-a1aa-111aa11aa111 aaaaaaa aaaaaaa aaaaa NULL aaaaaaaaaaaaaaa 1a1111 1a111111111111111 1a111111111a-11 1a11111 aaa aaaaaaaaaaaa NULL aaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaa1111a1aa-1111-1111-aa11-11aa11a11111aaaaaaaaaaaaaaaaaa-aaa-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaaaaa NULL NULL NULL NULL NULL aaaaaaa aaaaaaa 11a aa-aaa-aaa NULL NULL NULL NULL aa-aaa-1111a aaaa NULL NULL NULL aaaaa aaaaaaaaaa aaaaa 1a1 aaaaaaaa NULL NULL NULL aaaaaaaaaa +11111111 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 1111-11-11 1111-11-11 11111111 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaa111111aaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaa 1111a1aa-1111-1111-aa11-11aa11a11111 aaaaaaaaaaaaaaaaaaaaaaaaaa 1111-11-11 aaaaaa-aaaaaaaaaaaaaaaa a1a-11111 aa1a111a-a1a1-1aa1-a1aa-1a1aa1aa11aa aaaaaaa aaaaaaa aaaaa NULL aaaaaaaaaaaaaaa 1a111111 1a111111111111111 1a111111111111111 1a1111 aaa aaaaaaaaaaaaa NULL aaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaa1111a1aa-1111-1111-aa11-11aa11a11111aaaaaaaaaaaaaaaaaa-a1a-aaa1aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1111111aaaa11a1aa1aaaa aa1111111aaaa11a1aa1aaaa NULL NULL NULL aaa-aaaaaaaa-aaaaaaaaaaaaaa-aaaaa-aaaaaa NULL aaaaaaa aaaaaaa 1aaaaaaaaa aa-a1a-aaa1 NULL NULL NULL NULL aa-aaa-1111a aaaa NULL NULL NULL aaaaa aaaaaaaaaa aaaaa 1a1 aaaaaaaa NULL NULL NULL aaaaaaaaaaaa +11111111 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 1111-11-11 1111-11-11 11111111 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaa111111aaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaa 1111a1aa-1111-1111-aa11-11aa11a11111 aaaaaaaaaaaaaaaaaaaaaaaaaa 1111-11-11 aaaaaa-aaaaaaaaaaaaaaaa a1a-11111 aa1a111a-a1a1-1aa1-a1aa-1a1aa1aa11aa aaaaaaa aaaaaaa aaaaa NULL aaaaaaaaaaaaaaa 1a111111 1a111111111111111 1a111111111111111 1a1111 aaa aaaaaaaaaaaaa NULL aaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaa1111a1aa-1111-1111-aa11-11aa11a11111aaaaaaaaaaaaaaaaaa-a1a-aaa1aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1111111aaaa11a1aa1aaaa aa1111111aaaa11a1aa1aaaa NULL NULL NULL aaa-aaaaaaaa-aaaaaaaaaaaaaa-aaaaa-aaaaaa NULL aaaaaaa aaaaaaa 1aaaaaaaaa aa-a1a-aaa1 NULL NULL NULL NULL aa-aaa-1111a aaaa NULL NULL NULL aaaaa aaaaaaaaaa aaaaa 1a1 aaaaaaaa NULL NULL NULL aaaaaaaaaaaa diff --git a/test/sql/copy/csv/test_sniff_httpfs.test b/test/sql/copy/csv/test_sniff_httpfs.test new file mode 100644 index 0000000..ea36b9b --- /dev/null +++ b/test/sql/copy/csv/test_sniff_httpfs.test @@ -0,0 +1,14 @@ +# name: test/sql/copy/csv/test_sniff_httpfs.test +# description: Test sniff_csv functions over httpfs with auto-detection on compression +# group: [csv] + +require httpfs + +statement ok +PRAGMA enable_verification + +statement ok +from sniff_csv('https://github.com/duckdb/duckdb/raw/main/data/csv/who.csv.gz'); + +statement ok +from sniff_csv('https://github.com/duckdb/duckdb/raw/main/data/csv/who.csv.gz?v=1'); diff --git a/test/sql/copy/csv/test_url_with_plus.test b/test/sql/copy/csv/test_url_with_plus.test new file mode 100644 index 0000000..cede25b --- /dev/null +++ b/test/sql/copy/csv/test_url_with_plus.test @@ -0,0 +1,11 @@ +# name: test/sql/copy/csv/test_url_with_plus.test +# description: Tests url with plus +# group: [csv] + +require httpfs + +statement ok +PRAGMA enable_verification + +statement ok +FROM read_csv('https://d37ci6vzurychx.cloudfront.net/misc/taxi+_zone_lookup.csv'); diff --git a/test/sql/copy/csv/unquoted_escape/human_eval.test b/test/sql/copy/csv/unquoted_escape/human_eval.test new file mode 100644 index 0000000..099dd3a --- /dev/null +++ b/test/sql/copy/csv/unquoted_escape/human_eval.test @@ -0,0 +1,86 @@ +# name: test/sql/copy/csv/unquoted_escape/human_eval.test +# description: Test the parsing of unquoted escape characters +# group: [unquoted_escape] + +# +# The data file is generated by the following workflow: +# +# duckdb -c "COPY (SELECT REPLACE(COLUMNS(*), ' ', E'\t') FROM read_ndjson_auto('https://raw.githubusercontent.com/openai/human-eval/refs/heads/master/data/HumanEval.jsonl.gz')) to 'HumanEval.csv'" +# +# docker run --rm -d --name tmp-gen-csv \ +# -e MYSQL_ROOT_PASSWORD=root \ +# -p 13316:3306 \ +# mysql:latest \ +# mysqld --secure-file-priv=/tmp +# +# mysql -h127.0.0.1 -uroot -proot -P13316 --local-infile <= 10; + +statement ok +CREATE TABLE human_eval_csv(task_id TEXT, prompt TEXT, entry_point TEXT, canonical_solution TEXT, test TEXT); + +statement ok +CREATE TABLE human_eval_tsv(task_id TEXT, prompt TEXT, entry_point TEXT, canonical_solution TEXT, test TEXT); + +loop buffer_size 10 25 + +statement ok +TRUNCATE human_eval_csv; + +statement ok +TRUNCATE human_eval_tsv; + +# replace the CRLF with LF to pass the test on Windows +statement ok +INSERT INTO human_eval_csv +SELECT replace(COLUMNS(*), E'\r\n', E'\n') +FROM read_csv('data/csv/unquoted_escape/human_eval.csv', quote = '', escape = '\', sep = ',', header = false, strict_mode = false); + +statement ok +INSERT INTO human_eval_tsv +SELECT replace(COLUMNS(*), E'\r\n', E'\n') +FROM read_csv('data/csv/unquoted_escape/human_eval.tsv', quote = '', escape = '\', sep = '\t', header = false, strict_mode = false); + +# Verify that the three copies are the same +query II +SELECT count(*), bool_and( + j.task_id = c.task_id AND j.task_id = t.task_id AND + j.prompt = c.prompt AND j.prompt = t.prompt AND + j.entry_point = c.entry_point AND j.entry_point = t.entry_point AND + j.canonical_solution = c.canonical_solution AND j.canonical_solution = t.canonical_solution AND + j.test = c.test AND j.test = t.test +)::int +FROM human_eval_jsonl j, human_eval_csv c, human_eval_tsv t +WHERE j.task_id = c.task_id AND j.task_id = t.task_id +---- +10 1 + +endloop \ No newline at end of file diff --git a/test/sql/copy/encryption/different_aes_engines.test b/test/sql/copy/encryption/different_aes_engines.test new file mode 100644 index 0000000..5d1e525 --- /dev/null +++ b/test/sql/copy/encryption/different_aes_engines.test @@ -0,0 +1,71 @@ +# name: test/sql/copy/encryption/different_aes_engines.test +# group: [encryption] + +foreach cipher GCM CTR + +statement ok +ATTACH '__TEST_DIR__/enc_test_${cipher}.db' as enc (ENCRYPTION_KEY 'asdf', ENCRYPTION_CIPHER '${cipher}'); + +statement ok +CREATE TABLE enc.test (a INTEGER, b INTEGER); + +statement ok +INSERT INTO enc.test VALUES (11, 22), (13, 22), (12, 21) + +statement ok +DETACH enc + +restart + +require httpfs + +statement ok +ATTACH '__TEST_DIR__/enc_test_${cipher}.db' as enc (ENCRYPTION_KEY 'asdf'); + + +query II +FROM enc.test +---- +11 22 +13 22 +12 21 + + +restart + +endloop + + +foreach cipher GCM CTR + +require httpfs + +statement ok +ATTACH '__TEST_DIR__/enc_test_${cipher}.db' as enc (ENCRYPTION_KEY 'asdf', ENCRYPTION_CIPHER '${cipher}'); + +statement ok +CREATE TABLE enc.test (a INTEGER, b INTEGER); + +statement ok +INSERT INTO enc.test VALUES (11, 22), (13, 22), (12, 21) + +statement ok +DETACH enc + +restart + +statement ok +ATTACH '__TEST_DIR__/enc_test_${cipher}.db' as enc (ENCRYPTION_KEY 'asdf'); + +query II +FROM enc.test +---- +11 22 +13 22 +12 21 + + +restart + + +endloop diff --git a/test/sql/copy/parquet/delta_byte_array_length_mismatch.test b/test/sql/copy/parquet/delta_byte_array_length_mismatch.test new file mode 100644 index 0000000..320c76f --- /dev/null +++ b/test/sql/copy/parquet/delta_byte_array_length_mismatch.test @@ -0,0 +1,10 @@ +# name: test/sql/copy/parquet/delta_byte_array_length_mismatch.test +# description: Test reading a delta +# group: [parquet] + +require parquet + +require httpfs + +statement ok +SELECT * FROM parquet_scan('https://github.com/duckdb/duckdb-data/releases/download/v1.0/delta_byte_array_length_mismatch.parquet') diff --git a/test/sql/copy/parquet/delta_byte_array_multiple_pages.test b/test/sql/copy/parquet/delta_byte_array_multiple_pages.test new file mode 100644 index 0000000..0e04ca5 --- /dev/null +++ b/test/sql/copy/parquet/delta_byte_array_multiple_pages.test @@ -0,0 +1,23 @@ +# name: test/sql/copy/parquet/delta_byte_array_multiple_pages.test +# description: Test delta byte array parquet file with multiple pages +# group: [parquet] + +require parquet + +require httpfs + +statement ok +CREATE TABLE delta_byte_array AS SELECT * FROM parquet_scan('https://github.com/duckdb/duckdb-data/releases/download/v1.0/delta_byte_array_multiple_pages.parquet') + +query I +SELECT COUNT(*) FROM delta_byte_array +---- +100000 + +query II +SELECT min(strlen(json_column)), max(strlen(json_column)) FROM delta_byte_array +---- +54 54 + + + diff --git a/test/sql/copy/parquet/parquet_2102.test_slow b/test/sql/copy/parquet/parquet_2102.test_slow new file mode 100644 index 0000000..5abce39 --- /dev/null +++ b/test/sql/copy/parquet/parquet_2102.test_slow @@ -0,0 +1,69 @@ +# name: test/sql/copy/parquet/parquet_2102.test_slow +# description: Missing Column Data After Adding Left Join To Query in DuckDB Version 0.2.8 +# group: [parquet] + +require parquet + +require httpfs + +statement ok +CREATE TABLE view_one AS SELECT * FROM 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/issue2102_one.parquet'; + +statement ok +CREATE TABLE view_two AS SELECT * FROM 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/issue2102_two.parquet'; + +query I +SELECT COUNT(*) FROM view_one WHERE date IS NULL +---- +6219 + +statement ok +CREATE TABLE tbl1 AS SELECT one.id id, one.date date +FROM + view_one AS one +JOIN + view_two two ON two.id = one.id AND two.line = 1; + +query I +SELECT COUNT(*) FROM tbl1 +---- +691951 + +query I +SELECT COUNT(*) FROM tbl1 WHERE date IS NULL +---- +4742 + +statement ok +CREATE TABLE tbl2 AS SELECT one.id id, one.date date +FROM + view_one AS one +LEFT JOIN + view_two two ON two.id = one.id AND two.line = 1; + +query I +SELECT COUNT(*) FROM tbl2 +---- +695434 + +query I +SELECT COUNT(*) FROM tbl2 WHERE date IS NULL +---- +6219 + +statement ok +CREATE TABLE tbl3 AS SELECT one.id id, one.date date +FROM + view_one AS one +LEFT JOIN + view_two two ON two.id = one.id; + +query I +SELECT COUNT(*) FROM tbl3 +---- +768666 + +query I +SELECT COUNT(*) FROM tbl3 WHERE date IS NULL +---- +7124 diff --git a/test/sql/copy/parquet/parquet_5968.test b/test/sql/copy/parquet/parquet_5968.test new file mode 100644 index 0000000..ed6f625 --- /dev/null +++ b/test/sql/copy/parquet/parquet_5968.test @@ -0,0 +1,24 @@ +# name: test/sql/copy/parquet/parquet_5968.test +# description: Issue #5968: Segmentation fault on reading parquet file +# group: [parquet] + +require parquet + +require httpfs + +statement ok +CREATE TABLE issue_5968 AS FROM 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/issue_5968.parquet'; + +query I +SELECT COUNT(*) FROM issue_5968 +---- +2028587 + +query I +SELECT * FROM issue_5968 LIMIT 5 +---- +B00001 +B00001 +B00009 +B00009 +B00009 diff --git a/test/sql/copy/parquet/parquet_boolean_page.test_slow b/test/sql/copy/parquet/parquet_boolean_page.test_slow new file mode 100644 index 0000000..ca305df --- /dev/null +++ b/test/sql/copy/parquet/parquet_boolean_page.test_slow @@ -0,0 +1,21 @@ +# name: test/sql/copy/parquet/parquet_boolean_page.test_slow +# description: Test that boolean values that cross column pages are correctly read +# group: [parquet] + +require parquet + +require httpfs + +statement ok +PRAGMA enable_verification + +query IIIII +SELECT + SUM(CASE WHEN is_successful THEN 1 ELSE 0 END), + SUM(CASE WHEN advanced_on_error_flag THEN 1 ELSE 0 END), + SUM(CASE WHEN safe_on_error_flag THEN 1 ELSE 0 END), + SUM(CASE WHEN rbi_flag THEN 1 ELSE 0 END), + SUM(CASE WHEN team_unearned_flag THEN 1 ELSE 0 END) +FROM read_parquet('https://github.com/duckdb/duckdb-data/releases/download/v1.0/event_baserunning_advance_attempt.parquet'); +---- +9252616 111041 7120 1609612 1860 diff --git a/test/sql/copy/parquet/parquet_encrypted_tpch_httpfs.test_slow b/test/sql/copy/parquet/parquet_encrypted_tpch_httpfs.test_slow new file mode 100644 index 0000000..d4697a4 --- /dev/null +++ b/test/sql/copy/parquet/parquet_encrypted_tpch_httpfs.test_slow @@ -0,0 +1,94 @@ +# name: test/sql/copy/parquet/parquet_encrypted_tpch_httpfs.test_slow +# description: Test Parquet encryption with OpenSSL for TPC-H +# group: [parquet] + +require parquet + +require httpfs + +require tpch + +statement ok +CALL dbgen(sf=1) + +statement ok +PRAGMA add_parquet_key('key128', '0123456789112345') + +statement ok +EXPORT DATABASE '__TEST_DIR__/tpch_encrypted' (FORMAT 'parquet', ENCRYPTION_CONFIG {footer_key: 'key128'}) + +load :memory: + +# re-add key upon loading the DB again +statement ok +PRAGMA add_parquet_key('key128', '0123456789112345') + +statement ok +IMPORT DATABASE '__TEST_DIR__/tpch_encrypted' + +loop i 1 9 + +query I +PRAGMA tpch(${i}) +---- +:extension/tpch/dbgen/answers/sf1/q0${i}.csv + +endloop + +loop i 10 23 + +query I +PRAGMA tpch(${i}) +---- +:extension/tpch/dbgen/answers/sf1/q${i}.csv + +endloop + +# now again without importing the DB, just with views, so we can test projection/filter pushdown +load :memory: + +# re-add key upon loading the DB again +statement ok +PRAGMA add_parquet_key('key128', '0123456789112345') + +statement ok +CREATE VIEW lineitem AS SELECT * FROM read_parquet('__TEST_DIR__/tpch_encrypted/lineitem.parquet', encryption_config={footer_key: 'key128'}); + +statement ok +CREATE VIEW orders AS SELECT * FROM read_parquet('__TEST_DIR__/tpch_encrypted/orders.parquet', encryption_config={footer_key: 'key128'}); + +statement ok +CREATE VIEW partsupp AS SELECT * FROM read_parquet('__TEST_DIR__/tpch_encrypted/partsupp.parquet', encryption_config={footer_key: 'key128'}); + +statement ok +CREATE VIEW part AS SELECT * FROM read_parquet('__TEST_DIR__/tpch_encrypted/part.parquet', encryption_config={footer_key: 'key128'}); + +statement ok +CREATE VIEW customer AS SELECT * FROM read_parquet('__TEST_DIR__/tpch_encrypted/customer.parquet', encryption_config={footer_key: 'key128'}); + +statement ok +CREATE VIEW supplier AS SELECT * FROM read_parquet('__TEST_DIR__/tpch_encrypted/supplier.parquet', encryption_config={footer_key: 'key128'}); + +statement ok +CREATE VIEW nation AS SELECT * FROM read_parquet('__TEST_DIR__/tpch_encrypted/nation.parquet', encryption_config={footer_key: 'key128'}); + +statement ok +CREATE VIEW region AS SELECT * FROM read_parquet('__TEST_DIR__/tpch_encrypted/region.parquet', encryption_config={footer_key: 'key128'}); + +loop i 1 9 + +query I +PRAGMA tpch(${i}) +---- +:extension/tpch/dbgen/answers/sf1/q0${i}.csv + +endloop + +loop i 10 23 + +query I +PRAGMA tpch(${i}) +---- +:extension/tpch/dbgen/answers/sf1/q${i}.csv + +endloop \ No newline at end of file diff --git a/test/sql/copy/parquet/parquet_encryption_httpfs.test b/test/sql/copy/parquet/parquet_encryption_httpfs.test new file mode 100644 index 0000000..370d2a9 --- /dev/null +++ b/test/sql/copy/parquet/parquet_encryption_httpfs.test @@ -0,0 +1,66 @@ +# name: test/sql/copy/parquet/parquet_encryption_httpfs.test +# description: Test Parquet encryption with OpenSSL +# group: [parquet] + +require parquet + +require httpfs + +# parquet keys are not persisted across restarts +statement ok +PRAGMA enable_verification + +# add keys of 3 different lengths +statement ok +PRAGMA add_parquet_key('key128', '0123456789112345') + +statement ok +PRAGMA add_parquet_key('key192', '012345678911234501234567') + +statement ok +PRAGMA add_parquet_key('key256', '01234567891123450123456789112345') + +# test all valid AES key lengths +foreach key_len 128 192 256 + +statement ok +COPY (SELECT 42 i) to '__TEST_DIR__/encrypted${key_len}_openssl.parquet' (ENCRYPTION_CONFIG {footer_key: 'key${key_len}'}) + +query I +SELECT * FROM read_parquet('__TEST_DIR__/encrypted${key_len}_openssl.parquet', encryption_config={footer_key: 'key${key_len}'}) +---- +42 + +statement ok +CREATE OR REPLACE TABLE test (i INTEGER) + +statement ok +COPY test FROM '__TEST_DIR__/encrypted${key_len}_openssl.parquet' (ENCRYPTION_CONFIG {footer_key: 'key${key_len}'}) + +query I +SELECT * FROM test +---- +42 + +endloop + +# what happens if we don't try to decrypt even if the file is encrypted? +statement error +SELECT * FROM read_parquet('__TEST_DIR__/encrypted128_openssl.parquet') +---- +Invalid Input Error + +# what if we try to decrypt with the wrong key? +statement error +SELECT * FROM read_parquet('__TEST_DIR__/encrypted128_openssl.parquet', encryption_config={footer_key: 'key192'}) +---- +Invalid Input Error: Computed AES tag differs from read AES tag, are you using the right key? + +# what if we don't encrypt, but try to decrypt? +statement ok +COPY (SELECT 42 i) to '__TEST_DIR__/unencrypted.parquet' + +statement error +SELECT * FROM read_parquet('__TEST_DIR__/unencrypted.parquet', encryption_config={footer_key: 'key256'}) +---- +Invalid Input Error diff --git a/test/sql/copy/parquet/parquet_encryption_mbedtls_openssl.test b/test/sql/copy/parquet/parquet_encryption_mbedtls_openssl.test new file mode 100644 index 0000000..cc493fb --- /dev/null +++ b/test/sql/copy/parquet/parquet_encryption_mbedtls_openssl.test @@ -0,0 +1,52 @@ +# name: test/sql/copy/parquet/parquet_encryption_mbedtls_openssl.test +# description: Test Parquet encryption with OpenSSL +# group: [parquet] + +require parquet + +require httpfs + +# parquet keys are not persisted across restarts +statement ok +PRAGMA enable_verification + +# add keys of 3 different lengths +statement ok +PRAGMA add_parquet_key('key128', '0123456789112345') + +statement ok +PRAGMA add_parquet_key('key192', '012345678911234501234567') + +statement ok +PRAGMA add_parquet_key('key256', '01234567891123450123456789112345') + +# test all valid AES key lengths +foreach key_len 128 192 256 + +# write files with OpenSSL enabled +statement error +COPY (SELECT 42 i) to '__TEST_DIR__/encrypted${key_len}_openssl.parquet' (ENCRYPTION_CONFIG {footer_key: 'key${key_len}'}, DEBUG_USE_OPENSSL randomval) +---- +BOOL + +# write files with OpenSSL enabled +statement ok +COPY (SELECT 42 i) to '__TEST_DIR__/encrypted${key_len}_openssl.parquet' (ENCRYPTION_CONFIG {footer_key: 'key${key_len}'}, DEBUG_USE_OPENSSL true) + +# read OpenSSL encrypted files by using mbedtls +query I +SELECT * FROM read_parquet('__TEST_DIR__/encrypted${key_len}_openssl.parquet', encryption_config={footer_key: 'key${key_len}'}, debug_use_openssl=false) +---- +42 + +# write files with default mbedtls +statement ok +COPY (SELECT 42 i) to '__TEST_DIR__/encrypted${key_len}_mbedtls.parquet' (ENCRYPTION_CONFIG {footer_key: 'key${key_len}'}, DEBUG_USE_OPENSSL false) + +# read mbedtls encrypted files using OpenSSL +query I +SELECT * FROM read_parquet('__TEST_DIR__/encrypted${key_len}_mbedtls.parquet', encryption_config={footer_key: 'key${key_len}'}, debug_use_openssl=true) +---- +42 + +endloop diff --git a/test/sql/copy/parquet/parquet_glob_s3.test b/test/sql/copy/parquet/parquet_glob_s3.test new file mode 100644 index 0000000..ea5df0c --- /dev/null +++ b/test/sql/copy/parquet/parquet_glob_s3.test @@ -0,0 +1,186 @@ +# name: test/sql/copy/parquet/parquet_glob_s3.test +# description: Test basic globbing of parquet files over s3 +# group: [parquet] + +require parquet + +require httpfs + +require-env S3_TEST_SERVER_AVAILABLE 1 + +# Require that these environment variables are also set + +require-env AWS_DEFAULT_REGION + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +require-env DUCKDB_S3_ENDPOINT + +require-env DUCKDB_S3_USE_SSL + +# override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues +set ignore_error_messages + +# this test was written before we implemented the external file cache +# when it is enabled, the request counts are different +# we disable it so this test still makes sense +statement ok +set enable_external_file_cache=false; + +# Copy files to S3 before beginning tests +statement ok +COPY (select * from 'data/parquet-testing/glob/t1.parquet') to 's3://test-bucket/parquet_glob_s3/glob/t1.parquet'; +COPY (select * from 'data/parquet-testing/glob/t2.parquet') to 's3://test-bucket/parquet_glob_s3/glob/t2.parquet'; +COPY (select * from 'data/parquet-testing/glob2/t1.parquet') to 's3://test-bucket/parquet_glob_s3/glob2/t1.parquet'; +COPY (select * from 'data/parquet-testing/glob/t1.parquet') to 's3://test-bucket/parquet_glob_s3/with+plus/t1.parquet'; +COPY (select * from 'data/parquet-testing/glob/t1.parquet') to 's3://test-bucket/parquet_glob_s3/with space/t1.parquet'; + +# parquet glob with COPY FROM +statement ok +CREATE TABLE vals (i INTEGER, j BLOB) + +statement ok +COPY vals FROM 's3://test-bucket/parquet_glob_s3/glob/t[0-9].parquet' (FORMAT PARQUET); + +query II +SELECT * FROM vals ORDER BY 1, 2 +---- +1 a +2 b + +# failed to copy: incorrect types found in parquet file +statement ok +CREATE TABLE vals2 (i INTEGER, j INTEGER) + +statement error +COPY vals2 FROM 's3://test-bucket/parquet_glob_s3/nonexistentfolderblablabla/t*.parquet' (FORMAT PARQUET); +---- + +# Test variety of urls with both url styles +foreach urlstyle path vhost + +statement ok +SET s3_url_style='${urlstyle}' + +# Begin tests +query I +select count(*) from parquet_scan('s3://test-bucket/parquet_glob_s3/glob/t[0-9].parquet') +---- +2 + +query I +select count(*) from parquet_scan('s3://test-bucket/parquet_glob_s3/glob/*') +---- +2 + +query I +select count(*) from parquet_scan('s3://test-bucket/parquet_glob_s3/glob/*.parquet') +---- +2 + +query I +select count(*) from parquet_scan('s3://test-bucket/parquet_glob_s3/g*/*.parquet') +---- +3 + +query I +select count(*) from parquet_scan('s3://test-bucket/parquet_glob_s3/g*/t1.parquet') +---- +2 + +query I +select count(*) from parquet_scan('s3://test-bucket/parquet_glob_s3/with*/*.parquet') +---- +2 + +# schema mismatch in parquet glob +statement error +select count(*) from parquet_scan('s3://test-bucket/parquet_glob_s3/notglob/*.parquet') +---- + +# parallel testing +statement ok +PRAGMA threads=4 + +query I +select count(*) from parquet_scan('s3://test-bucket/parquet_glob_s3/glob/*') +---- +2 + +query I +select count(*) from parquet_scan('s3://test-bucket/parquet_glob_s3/glob/*.parquet') +---- +2 + +query I +select count(*) from parquet_scan('s3://test-bucket/parquet_glob_s3/g*/*.parquet') +---- +3 + +query I +select count(*) from parquet_scan('s3://test-bucket/parquet_glob_s3/g*/t1.parquet') +---- +2 + +# Question mark is not supported for S3 due to our use of query parameters +statement error +select count(*) from parquet_scan('s3://test-bucket/parquet_glob_s3/glob/t?.parquet') +---- +Invalid query parameters found. + +statement error +select count(*) from parquet_scan('s3://test-bucket/parquet_glob_s3/?lob/t?.parquet') +---- +Invalid query parameters found. + +# Finally, enabling url compatibility mode will disable globs allowing a user to query files with special chars +statement ok +SET s3_url_compatibility_mode=true; + +# Note that this is actually a file called '?.*[1-0]parquet??' which S3 should theoretically accept; +statement ok +COPY vals TO 's3://test-bucket/the_horror/?.*[1-0]parquetta??' (FORMAT parquet); + +query I +select count(*) from parquet_scan('s3://test-bucket/the_horror/?.*[1-0]parquetta??'); +---- +2 + +statement ok +SET s3_url_compatibility_mode=false; + +endloop + +# S3 glob gives us information necessary to skip HEAD requests +query II +EXPLAIN ANALYZE SELECT COUNT(*) FROM 's3://test-bucket/parquet_glob_s3/g*/*.parquet'; +---- +analyzed_plan :.*HTTP Stats.*\#HEAD\: 0.*GET\: 3.*PUT\: 0.*\#POST\: 0.* + +statement ok +SET VARIABLE file_list = (SELECT LIST(file) FROM GLOB('s3://test-bucket/parquet_glob_s3/g*/*.parquet')) + +# sanity check for request count +# we expect 1 HEAD request per file for the open call, then 1 GET for the list call +# then for each file 1 for the metadata offset and 1 for the metadata +query II +EXPLAIN ANALYZE SELECT COUNT(*) FROM read_parquet(getvariable('file_list')); +---- +analyzed_plan :.*HTTP Stats.*\#HEAD\: 3.*GET\: 3.*PUT\: 0.*\#POST\: 0.* + +statement ok +SET enable_http_metadata_cache=true; + +# metadata cache was just enabled, its still cold +query II +EXPLAIN ANALYZE SELECT COUNT(*) FROM read_parquet(getvariable('file_list')); +---- +analyzed_plan :.*HTTP Stats.*\#HEAD\: 3.*GET\: 3.*PUT\: 0.*\#POST\: 0.* + +# now head request count should be 0 +query II +EXPLAIN ANALYZE SELECT COUNT(*) FROM read_parquet(getvariable('file_list')); +---- +analyzed_plan :.*HTTP Stats.*\#HEAD\: 0.*GET\: 3.*PUT\: 0.*\#POST\: 0.* diff --git a/test/sql/copy/parquet/parquet_http_prefetch.test b/test/sql/copy/parquet/parquet_http_prefetch.test new file mode 100644 index 0000000..09303e8 --- /dev/null +++ b/test/sql/copy/parquet/parquet_http_prefetch.test @@ -0,0 +1,41 @@ +# name: test/sql/copy/parquet/parquet_http_prefetch.test +# description: This test triggers the http prefetch mechanism. +# group: [parquet] + +require parquet + +require httpfs + +require-env S3_TEST_SERVER_AVAILABLE 1 + +# Require that these environment variables are also set + +require-env AWS_DEFAULT_REGION + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +require-env DUCKDB_S3_ENDPOINT + +require-env DUCKDB_S3_USE_SSL + +# override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues +set ignore_error_messages + +statement ok +CREATE TABLE test_fetch_delay (a INT, b INT); + +statement ok +INSERT INTO test_fetch_delay (SELECT (i%2) * 2, (i%2) * 2 from range(0,2500000) as tbl(i)); + +statement ok +COPY test_fetch_delay to 's3://test-bucket/skip_delay.parquet'; + +statement ok +CREATE TABLE test as SELECT * from 's3://test-bucket/skip_delay.parquet' where a = 1; + +query I +SELECT COUNT(*) FROM test; +---- +0 diff --git a/test/sql/copy/parquet/snowflake_lineitem.test b/test/sql/copy/parquet/snowflake_lineitem.test new file mode 100644 index 0000000..b22b9ad --- /dev/null +++ b/test/sql/copy/parquet/snowflake_lineitem.test @@ -0,0 +1,10 @@ +# name: test/sql/copy/parquet/snowflake_lineitem.test +# description: Test parquet file exported from snowflake +# group: [parquet] + +require parquet + +require httpfs + +statement ok +CREATE TABLE snowflake_lineitem AS FROM 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/snowflake_lineitem_export.parquet' diff --git a/test/sql/copy/parquet/test_parquet_force_download.test b/test/sql/copy/parquet/test_parquet_force_download.test new file mode 100644 index 0000000..87bc984 --- /dev/null +++ b/test/sql/copy/parquet/test_parquet_force_download.test @@ -0,0 +1,139 @@ +# name: test/sql/copy/parquet/test_parquet_force_download.test +# description: Test Force download +# group: [parquet] + +require parquet + +require httpfs + +require tpch + +statement ok +SET force_download=true; + +# we query the same file multiple times, so we have to disable the cache to verify the GET request count +statement ok +set enable_external_file_cache=false; + +query II +explain analyze SELECT id, first_name, last_name, email FROM PARQUET_SCAN('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/userdata1.parquet') +---- +analyzed_plan :.*GET: 1.* + +query I +SELECT count(*) FROM PARQUET_SCAN('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/userdata1.parquet') +---- +1000 + +statement ok +SET force_download=false; + +query II +explain analyze SELECT id, first_name, last_name, email FROM PARQUET_SCAN('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/userdata1.parquet') +---- +analyzed_plan :.*GET: 2.* + +query I +SELECT count(*) FROM PARQUET_SCAN('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/userdata1.parquet') +---- +1000 + +require-env S3_TEST_SERVER_AVAILABLE 1 + +# Require that these environment variables are also set + +require-env AWS_DEFAULT_REGION + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +require-env DUCKDB_S3_ENDPOINT + +require-env DUCKDB_S3_USE_SSL + +statement ok +SET force_download=true; + +statement ok +SET threads=1 + +statement ok +create table user_info as SELECT * FROM PARQUET_SCAN('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/userdata1.parquet') +UNION ALL SELECT * FROM PARQUET_SCAN('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/userdata1.parquet') +UNION ALL SELECT * FROM PARQUET_SCAN('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/userdata1.parquet') +UNION ALL SELECT * FROM PARQUET_SCAN('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/userdata1.parquet') +UNION ALL SELECT * FROM PARQUET_SCAN('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/userdata1.parquet') +UNION ALL SELECT * FROM PARQUET_SCAN('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/userdata1.parquet') +UNION ALL SELECT * FROM PARQUET_SCAN('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/userdata1.parquet') +UNION ALL SELECT * FROM PARQUET_SCAN('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/userdata1.parquet') +UNION ALL SELECT * FROM PARQUET_SCAN('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/userdata1.parquet') + +# ROW_GROUP_SIZE for these tests used to be 2048, but because the parquet writer checked '>' instead of '>=' +# the row group sizes were actually two chunks, so 4096. This has since been fixed, and this needed to be updated +statement ok +COPY (from user_info) TO 's3://test-bucket/row-user-data.parquet' (FORMAT PARQUET, ROW_GROUP_SIZE 4096); + +statement ok +COPY (from user_info limit 100) TO 's3://test-bucket/row-user-data_1.parquet' (FORMAT PARQUET, ROW_GROUP_SIZE 4096); + +statement ok +PRAGMA threads=10 + +statement ok +PRAGMA verify_parallelism + +query II +explain analyze SELECT id, first_name, last_name, email FROM PARQUET_SCAN('s3://test-bucket/row-user-data.parquet') +---- +analyzed_plan :.*GET: 1.* + + +query I +SELECT count(*) FROM PARQUET_SCAN('s3://test-bucket/row-user-data.parquet') +---- +9000 + +query I +SELECT count(*) FROM PARQUET_SCAN('s3://test-bucket/row-user-data_1.parquet') +---- +100 + +statement ok +SET force_download=false; + +query I +SELECT count(*) FROM PARQUET_SCAN('s3://test-bucket/row-user-data.parquet') +---- +9000 + +query II +explain analyze SELECT id, first_name, last_name, email FROM PARQUET_SCAN('s3://test-bucket/row-user-data.parquet') +---- +analyzed_plan :.*GET: 4.* + +statement ok +SET force_download=true; + +query I +SELECT count(*) FROM (SELECT * FROM PARQUET_SCAN('s3://test-bucket/row-user-data.parquet') union all select * from PARQUET_SCAN('s3://test-bucket/row-user-data_1.parquet')) as t +---- +9100 + +query II +explain analyze SELECT id, first_name, last_name, email FROM PARQUET_SCAN('s3://test-bucket/row-user-data.parquet') union all select id, first_name, last_name, email from PARQUET_SCAN('s3://test-bucket/row-user-data_1.parquet') +---- +analyzed_plan :.*GET: 2.* + +statement ok +SET force_download=false; + +query I +SELECT count(*) FROM (SELECT * FROM PARQUET_SCAN('s3://test-bucket/row-user-data.parquet') union all select * from PARQUET_SCAN('s3://test-bucket/row-user-data_1.parquet')) as t +---- +9100 + +query II +explain analyze SELECT id, first_name, last_name, email FROM PARQUET_SCAN('s3://test-bucket/row-user-data.parquet') union all select id, first_name, last_name, email from PARQUET_SCAN('s3://test-bucket/row-user-data_1.parquet') +---- +analyzed_plan :.*GET: 6.* diff --git a/test/sql/copy/parquet/test_parquet_remote.test b/test/sql/copy/parquet/test_parquet_remote.test new file mode 100644 index 0000000..d1c82e0 --- /dev/null +++ b/test/sql/copy/parquet/test_parquet_remote.test @@ -0,0 +1,83 @@ +# name: test/sql/copy/parquet/test_parquet_remote.test +# description: Parquet read from S3/HTTPS +# group: [parquet] + +require httpfs + +require parquet + +# non existent host +statement error +SELECT * FROM PARQUET_SCAN('https://this-host-does-not-exist-for-sure/test.parquet'); +---- + +# non existent file +statement error +SELECT * FROM PARQUET_SCAN('https://duckdb.org/test.parquet'); +---- + +# missing path +statement error +SELECT * FROM PARQUET_SCAN('https://duckdb.org'); +---- + +# empty path +statement error +SELECT * FROM PARQUET_SCAN('https://duckdb.org/'); +---- + +# straightforward +query IIII +SELECT id, first_name, last_name, email FROM PARQUET_SCAN('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/userdata1.parquet') LIMIT 10; +---- +1 Amanda Jordan ajordan0@com.com +2 Albert Freeman afreeman1@is.gd +3 Evelyn Morgan emorgan2@altervista.org +4 Denise Riley driley3@gmpg.org +5 Carlos Burns cburns4@miitbeian.gov.cn +6 Kathryn White kwhite5@google.com +7 Samuel Holmes sholmes6@foxnews.com +8 Harry Howell hhowell7@eepurl.com +9 Jose Foster jfoster8@yelp.com +10 Emily Stewart estewart9@opensource.org + + +# with redirects +query IIII +SELECT id, first_name, last_name, email FROM PARQUET_SCAN('https://github.com/duckdb/duckdb/blob/main/data/parquet-testing/userdata1.parquet?raw=true') LIMIT 10; +---- +1 Amanda Jordan ajordan0@com.com +2 Albert Freeman afreeman1@is.gd +3 Evelyn Morgan emorgan2@altervista.org +4 Denise Riley driley3@gmpg.org +5 Carlos Burns cburns4@miitbeian.gov.cn +6 Kathryn White kwhite5@google.com +7 Samuel Holmes sholmes6@foxnews.com +8 Harry Howell hhowell7@eepurl.com +9 Jose Foster jfoster8@yelp.com +10 Emily Stewart estewart9@opensource.org + +# with explicit port nr +query IIII +SELECT id, first_name, last_name, email FROM PARQUET_SCAN('https://github.com:443/duckdb/duckdb/blob/main/data/parquet-testing/userdata1.parquet?raw=true') LIMIT 10; +---- +1 Amanda Jordan ajordan0@com.com +2 Albert Freeman afreeman1@is.gd +3 Evelyn Morgan emorgan2@altervista.org +4 Denise Riley driley3@gmpg.org +5 Carlos Burns cburns4@miitbeian.gov.cn +6 Kathryn White kwhite5@google.com +7 Samuel Holmes sholmes6@foxnews.com +8 Harry Howell hhowell7@eepurl.com +9 Jose Foster jfoster8@yelp.com +10 Emily Stewart estewart9@opensource.org + +query IIII +SELECT id, first_name, last_name, email FROM PARQUET_SCAN('https://github.com/duckdb/duckdb-data/releases/download/v1.0/us+er+da+ta.parquet') LIMIT 1; +---- +1 Amanda Jordan ajordan0@com.com + +query IIII +SELECT id, first_name, last_name, email FROM PARQUET_SCAN('https://github.com/duckdb/duckdb-data/releases/download/v1.0/us%2Ber%2Bda%2Bta.parquet') LIMIT 1; +---- +1 Amanda Jordan ajordan0@com.com diff --git a/test/sql/copy/parquet/test_parquet_remote_foreign_files.test b/test/sql/copy/parquet/test_parquet_remote_foreign_files.test new file mode 100644 index 0000000..0a210b4 --- /dev/null +++ b/test/sql/copy/parquet/test_parquet_remote_foreign_files.test @@ -0,0 +1,88 @@ +# name: test/sql/copy/parquet/test_parquet_remote_foreign_files.test +# description: Test queries on tricky parquet files over http. Note: on GH connection issues, these tests fail silently +# group: [parquet] + +require parquet + +require httpfs + +# /data/parquet-testing/bug1554.parquet +query I +SELECT COUNT(backlink_count) FROM parquet_scan('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/bug1554.parquet') WHERE http_status_code=200 +---- +0 + +query II +SELECT http_status_code, COUNT(backlink_count) FROM parquet_scan('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/bug1554.parquet') GROUP BY http_status_code ORDER BY http_status_code +---- +200 0 +301 0 + +# /data/parquet-testing/bug1588.parquet + +query I +SELECT has_image_link FROM parquet_scan('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/bug1588.parquet') where has_image_link = 1 +---- +1 +1 +1 + +# /data/parquet-testing/bug1589.parquet +query I +SELECT backlink_count FROM parquet_scan('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/bug1589.parquet') LIMIT 1 +---- +NULL + +statement ok +SELECT * FROM parquet_scan('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/bug1589.parquet') + + +query I +SELECT "inner"['str_field'] FROM parquet_scan('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/bug1618_struct_strings.parquet') +---- +hello +NULL + +query I +SELECT "inner"['f64_field'] FROM parquet_scan('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/bug1618_struct_strings.parquet') +---- +NULL +1.23 + +query I +SELECT "inner" FROM parquet_scan('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/bug1618_struct_strings.parquet') +---- +{'str_field': hello, 'f64_field': NULL} +{'str_field': NULL, 'f64_field': 1.23} + +# /data/parquet-testing/struct.parquet +query I +select "inner"['f64_field'] from parquet_scan('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/struct.parquet'); +---- +NULL +1.23 + +# /data/parquet-testing/bug2267.parquet +query I +SELECT * FROM parquet_scan('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/bug2267.parquet') +---- +[{'disabledPlans': [bea4c11e-220a-4e6d-8eb8-8ea15d019f90], 'skuId': c7df2760-2c81-4ef7-b578-5b5392b571df}, {'disabledPlans': [8a256a2b-b617-496d-b51b-e76466e88db0, 41781fb2-bc02-4b7c-bd55-b576c07bb09d, eec0eb4f-6444-4f95-aba0-50c24d67f998], 'skuId': 84a661c4-e949-4bd2-a560-ed7766fcaf2b}, {'disabledPlans': [], 'skuId': b05e124f-c7cc-45a0-a6aa-8cf78c946968}, {'disabledPlans': [], 'skuId': f30db892-07e9-47e9-837c-80727f46fd3d}] + +query I +SELECT assignedLicenses[1] FROM parquet_scan('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/bug2267.parquet') +---- +{'disabledPlans': [bea4c11e-220a-4e6d-8eb8-8ea15d019f90], 'skuId': c7df2760-2c81-4ef7-b578-5b5392b571df} + +# multiple files +query II +select * from parquet_scan(['https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/glob/t1.parquet', 'https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/glob/t2.parquet']) +---- +1 a +2 b + +# Malformed parquet to test fallback from prefetch +query IIII +select * from parquet_scan('https://raw.githubusercontent.com/duckdb/duckdb/main/data/parquet-testing/arrow/nation.dict-malformed.parquet') limit 2; +---- +0 ALGERIA 0 haggle. carefully final deposits detect slyly agai +1 ARGENTINA 1 al foxes promise slyly according to the regular accounts. bold requests alon diff --git a/test/sql/copy/parquet/test_yellow_cab.test_slow b/test/sql/copy/parquet/test_yellow_cab.test_slow new file mode 100644 index 0000000..83ec356 --- /dev/null +++ b/test/sql/copy/parquet/test_yellow_cab.test_slow @@ -0,0 +1,34 @@ +# name: test/sql/copy/parquet/test_yellow_cab.test_slow +# description: Test yellow cab parquet file +# group: [parquet] + +require parquet + +require httpfs + +statement ok +CREATE TABLE yellow_cab AS SELECT * FROM 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/yellowcab.parquet' + +statement ok +PRAGMA enable_verification + +query IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +select min(VendorID::VARCHAR), max(VendorID::VARCHAR), min(tpep_pickup_datetime::VARCHAR), max(tpep_pickup_datetime::VARCHAR), min(tpep_dropoff_datetime::VARCHAR), max(tpep_dropoff_datetime::VARCHAR), min(passenger_count::VARCHAR), max(passenger_count::VARCHAR), min(trip_distance::VARCHAR), max(trip_distance::VARCHAR), min(pickup_longitude::VARCHAR), max(pickup_longitude::VARCHAR), min(pickup_latitude::VARCHAR), max(pickup_latitude::VARCHAR), min(RatecodeID::VARCHAR), max(RatecodeID::VARCHAR), min(store_and_fwd_flag::VARCHAR), max(store_and_fwd_flag::VARCHAR), min(dropoff_longitude::VARCHAR), max(dropoff_longitude::VARCHAR), min(dropoff_latitude::VARCHAR), max(dropoff_latitude::VARCHAR), min(payment_type::VARCHAR), max(payment_type::VARCHAR), min(fare_amount::VARCHAR), max(fare_amount::VARCHAR), min(extra::VARCHAR), max(extra::VARCHAR), min(mta_tax::VARCHAR), max(mta_tax::VARCHAR), min(tip_amount::VARCHAR), max(tip_amount::VARCHAR), min(tolls_amount::VARCHAR), max(tolls_amount::VARCHAR), min(improvement_surcharge::VARCHAR), max(improvement_surcharge::VARCHAR), min(total_amount::VARCHAR), max(total_amount::VARCHAR) from yellow_cab; +---- +1 2 2016-01-01 00:00:00 2016-01-29 12:08:57 2016-01-01 00:00:00 2016-01-30 12:05:11 0 8 .00 97.40 -0.13990700244903564 0 0 57.269275665283203 1 99 (empty) Y -73.210006713867188 0 0 41.317001342773437 1 4 -10 998 -0.5 2.0 -0.5 0.5 0 998.14 -10.5 9.75 -0.3 0.3 -10.8 998.3 + + +# writer round-trip +statement ok +COPY yellow_cab TO '__TEST_DIR__/yellowcab.parquet' (FORMAT PARQUET); + +query IIIIIIIIIIIIIIIIIII nosort yellowcab +SELECT * FROM yellow_cab +---- + +query IIIIIIIIIIIIIIIIIII nosort yellowcab +SELECT * FROM '__TEST_DIR__/yellowcab.parquet' +---- + + + diff --git a/test/sql/copy/s3/download_config.test b/test/sql/copy/s3/download_config.test new file mode 100644 index 0000000..7ff2c79 --- /dev/null +++ b/test/sql/copy/s3/download_config.test @@ -0,0 +1,128 @@ +# name: test/sql/copy/s3/download_config.test +# description: Test S3 configuration +# group: [s3] + +require parquet + +require httpfs + +require-env S3_TEST_SERVER_AVAILABLE 1 + +## Require that these environment variables are also set +require-env AWS_DEFAULT_REGION + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +require-env DUCKDB_S3_ENDPOINT + +require-env DUCKDB_S3_USE_SSL + +# override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues +set ignore_error_messages + +statement ok +CREATE TABLE test as SELECT * FROM range(0,10) tbl(i); + +foreach url_style path vhost +# Have to set these because they get altered during the loop +statement ok +SET s3_secret_access_key='${AWS_SECRET_ACCESS_KEY}'; + +statement ok +SET s3_access_key_id='${AWS_ACCESS_KEY_ID}'; + +statement ok +SET s3_endpoint='${DUCKDB_S3_ENDPOINT}'; + +statement ok +SET http_retries=2; + +statement ok +SET http_retry_wait_ms=10; + +statement ok +SET http_retry_backoff=1; + +statement ok +SET http_timeout=50000; + +statement ok +SET http_keep_alive=false; + +# Test the vhost style urls (this is the default) +statement ok +SET s3_url_style='${url_style}'; + +statement ok +COPY test TO 's3://test-bucket-public/root-dir/test_${url_style}_url_style.parquet'; + +# vhost style access +query I +SELECT i FROM "http://test-bucket-public.${DUCKDB_S3_ENDPOINT}/root-dir/test_${url_style}_url_style.parquet" LIMIT 3 +---- +0 +1 +2 + +# path style access +query I +SELECT i FROM "http://${DUCKDB_S3_ENDPOINT}/test-bucket-public/root-dir/test_${url_style}_url_style.parquet" LIMIT 3 +---- +0 +1 +2 + +# Test public access through s3 url +statement ok +SET s3_secret_access_key='';SET s3_access_key_id=''; + +query I +SELECT i FROM "s3://test-bucket-public/root-dir/test_${url_style}_url_style.parquet" LIMIT 3 +---- +0 +1 +2 + +endloop + +# empty url style is also allowed to select the default +statement ok +SET s3_secret_access_key='${AWS_SECRET_ACCESS_KEY}';SET s3_access_key_id='${AWS_ACCESS_KEY_ID}';SET s3_region='${AWS_DEFAULT_REGION}'; SET s3_endpoint='${DUCKDB_S3_ENDPOINT}'; SET s3_use_ssl=${DUCKDB_S3_USE_SSL}; + +statement ok +COPY test TO 's3://test-bucket-public/root-dir/test_default_url_style.parquet'; + +query I +SELECT i FROM "http://test-bucket-public.${DUCKDB_S3_ENDPOINT}/root-dir/test_default_url_style.parquet" LIMIT 3 +---- +0 +1 +2 + +# Incorrect path style throws error +statement ok +SET s3_url_style='handwritten'; + +statement error +COPY test TO 's3://test-bucket-public/root-dir/test2.parquet'; +---- + +# 404 +statement error +SELECT i FROM "http://test-bucket-public.${DUCKDB_S3_ENDPOINT}/root-dir/non-existent-file-ljaslkjdas.parquet" LIMIT 3 +---- +Unable to connect to URL "http://test-bucket-public. + +# Connection error +statement error +SELECT i FROM "http://test-bucket-public.duckdb-minio-non-existent-host.com:9000/root-dir/non-existent-file-ljaslkjdas.parquet" LIMIT 3 +---- +Could not establish connection error for HTTP HEAD to 'http://test-bucket-public. + +# S3 errors should throw on +statement error +SELECT * FROM parquet_scan('s3://this-aint-no-bucket/no-path/no-file'); +---- +Unable to connect to URL "http:// diff --git a/test/sql/copy/s3/fully_qualified_s3_url.test b/test/sql/copy/s3/fully_qualified_s3_url.test new file mode 100644 index 0000000..99b9113 --- /dev/null +++ b/test/sql/copy/s3/fully_qualified_s3_url.test @@ -0,0 +1,204 @@ +# name: test/sql/copy/s3/fully_qualified_s3_url.test +# description: Test S3, credentials override with query parameters +# group: [s3] + +require parquet + +require httpfs + +require-env S3_TEST_SERVER_AVAILABLE 1 + +# Require that these environment variables are also set + +require-env AWS_DEFAULT_REGION + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +require-env DUCKDB_S3_ENDPOINT + +require-env DUCKDB_S3_USE_SSL + +# override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues +set ignore_error_messages + +statement ok +CREATE TABLE test as SELECT * FROM range(0,10) tbl(i); + +#set some false credentials to verify query param override +statement ok +SET s3_secret_access_key='false_pw';SET s3_access_key_id='false_key'; + +statement ok +SET s3_url_style='path'; + +statement error +COPY test TO 's3://test-bucket/s3_query_params/test.csv'; +---- +Unable to connect to URL + +#test with .csv file +statement ok +COPY test TO 's3://test-bucket/s3_query_params/test.csv?s3_access_key_id=${AWS_ACCESS_KEY_ID}&s3_secret_access_key=${AWS_SECRET_ACCESS_KEY}'; + +query I +SELECT i FROM "s3://test-bucket/s3_query_params/test.csv?s3_access_key_id=${AWS_ACCESS_KEY_ID}&s3_secret_access_key=${AWS_SECRET_ACCESS_KEY}" LIMIT 3 +---- +0 +1 +2 + +#test with .parquet file +statement ok +COPY test TO 's3://test-bucket/s3_query_params/test.parquet?s3_access_key_id=${AWS_ACCESS_KEY_ID}&s3_secret_access_key=${AWS_SECRET_ACCESS_KEY}' (FORMAT 'parquet'); + +query I +SELECT i FROM "s3://test-bucket/s3_query_params/test.parquet?s3_access_key_id=${AWS_ACCESS_KEY_ID}&s3_secret_access_key=${AWS_SECRET_ACCESS_KEY}" LIMIT 3 +---- +0 +1 +2 + +#test GLOB with .parquet file +query I +SELECT i FROM "s3://test-bucket/s3_query_params/*.parquet?s3_access_key_id=${AWS_ACCESS_KEY_ID}&s3_secret_access_key=${AWS_SECRET_ACCESS_KEY}" LIMIT 3 +---- +0 +1 +2 + +#global settings have not been modified by query parameters +query I +SELECT CURRENT_SETTING('s3_access_key_id'); +---- +false_key + +query I +SELECT CURRENT_SETTING('s3_secret_access_key'); +---- +false_pw + +#setting up a second s3 file with different credentials +statement ok +CREATE TABLE test_2 as SELECT * FROM range(100,110) tbl(j); + +statement ok +COPY test_2 TO 's3://test-bucket/s3_query_params/test_2.csv?s3_access_key_id=minio_duckdb_user_2&s3_secret_access_key=minio_duckdb_user_2_password' ; + +query I +SELECT j FROM "s3://test-bucket/s3_query_params/test_2.csv?s3_access_key_id=minio_duckdb_user_2&s3_secret_access_key=minio_duckdb_user_2_password" LIMIT 3 +---- +100 +101 +102 + +#test a joining of two tables with different credentials +query II +SELECT T1.i, T2.j FROM "s3://test-bucket/s3_query_params/test.parquet?s3_access_key_id=${AWS_ACCESS_KEY_ID}&s3_secret_access_key=${AWS_SECRET_ACCESS_KEY}" T1 + INNER JOIN "s3://test-bucket/s3_query_params/test_2.csv?s3_access_key_id=minio_duckdb_user_2&s3_secret_access_key=minio_duckdb_user_2_password" T2 + ON T1.i+100=T2.j LIMIT 3; +---- +0 100 +1 101 +2 102 + +statement ok +SET s3_secret_access_key='${AWS_SECRET_ACCESS_KEY}';SET s3_access_key_id='${AWS_ACCESS_KEY_ID}'; + +#test region param +statement ok +SET s3_region='false_region'; + +statement ok +SELECT i FROM "s3://test-bucket/s3_query_params/test.parquet?s3_region=${AWS_DEFAULT_REGION}" LIMIT 3 + +statement ok +SET s3_region='${AWS_DEFAULT_REGION}'; + +# test endpoint param +statement ok +SET s3_endpoint='false_endpoint'; + +statement ok +SELECT i FROM "s3://test-bucket/s3_query_params/test.parquet?s3_endpoint=${DUCKDB_S3_ENDPOINT}" LIMIT 3 + +statement ok +SET s3_endpoint='${DUCKDB_S3_ENDPOINT}'; + +#test secret_access_key +statement ok +SET s3_secret_access_key='false_acces_key'; + +statement ok +SELECT i FROM "s3://test-bucket/s3_query_params/test.parquet?s3_secret_access_key=${AWS_SECRET_ACCESS_KEY}" LIMIT 3 + +statement ok +SET s3_secret_access_key='${AWS_SECRET_ACCESS_KEY}'; + +#test access_key +statement ok +SET s3_access_key_id='false_acces_key_id'; + +statement ok +SELECT i FROM "s3://test-bucket/s3_query_params/test.parquet?s3_access_key_id=${AWS_ACCESS_KEY_ID}" LIMIT 3 + +statement ok +SET s3_access_key_id='${AWS_ACCESS_KEY_ID}'; + +#test use_ssl +statement ok +SELECT i FROM "s3://test-bucket/s3_query_params/test.parquet?s3_use_ssl=false" LIMIT 3 + +statement error +SELECT i FROM "s3://test-bucket/s3_query_params/test.parquet?s3_use_ssl=bla" LIMIT 3 +---- + +#test url_style +statement ok +SELECT i FROM "s3://test-bucket/s3_query_params/test.parquet?s3_url_style=vhost" LIMIT 3 + +statement ok +SET s3_url_style='path'; + +# test combinations +statement ok +SET s3_access_key_id='false_id'; SET s3_region='false_region'; + +statement ok +SELECT i FROM "s3://test-bucket/s3_query_params/test.parquet?s3_region=${AWS_DEFAULT_REGION}&s3_access_key_id=${AWS_ACCESS_KEY_ID}&s3_endpoint=${DUCKDB_S3_ENDPOINT}" LIMIT 3 + +statement ok +SET s3_access_key_id='${AWS_ACCESS_KEY_ID}'; SET s3_region='${AWS_DEFAULT_REGION}'; + +# test faulty input +statement error +SELECT i FROM "s3://test-bucket/s3_query_params/test.parquet?s3_region=${AWS_DEFAULT_REGION}&s3_access_key_id=incorrect_key_id" LIMIT 3 +---- + +statement error +SELECT i FROM "s3://test-bucket/s3_query_params/test.parquet?s3_region=${AWS_DEFAULT_REGION}&bla=bla" LIMIT 3 +---- + +# test endpoint valid path +statement ok +SET s3_endpoint='${DUCKDB_S3_ENDPOINT}/test-bucket'; SET s3_url_style='path'; + +statement ok +SELECT i FROM "s3://s3_query_params/test.parquet" LIMIT 3 + +# test endpoint invalid url style +statement ok +SET s3_url_style='vhost'; + +statement error +SELECT i FROM "s3://s3_query_params/test.parquet" LIMIT 3 +---- + +# test endpoint invalid path +statement ok +SET s3_endpoint='${DUCKDB_S3_ENDPOINT}/s3_query_params'; SET s3_url_style='path'; + +statement error +SELECT i FROM "s3://test-bucket/test.parquet" LIMIT 3 +---- diff --git a/test/sql/copy/s3/glob_s3_paging.test_slow b/test/sql/copy/s3/glob_s3_paging.test_slow new file mode 100644 index 0000000..db1b5b7 --- /dev/null +++ b/test/sql/copy/s3/glob_s3_paging.test_slow @@ -0,0 +1,94 @@ +# name: test/sql/copy/s3/glob_s3_paging.test_slow +# description: Test globbing of a large number of parquet files to test the paging mechanism +# group: [s3] + +require parquet + +require httpfs + +require-env S3_TEST_SERVER_AVAILABLE 1 + +# Require that these environment variables are also set + +require-env AWS_DEFAULT_REGION + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +require-env DUCKDB_S3_ENDPOINT + +require-env DUCKDB_S3_USE_SSL + +# override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues +set ignore_error_messages + +statement ok +set http_timeout=120000; + +# More retries (longest wait will be 25600ms) +statement ok +set http_retries=6; + +# Test should be a bit faster using the metadata cache +statement ok +SET enable_http_metadata_cache=true; + +foreach urlstyle path vhost + +statement ok +SET s3_url_style='${urlstyle}' + +## For both formats we generate 2000 files which we will glob to test the paging mechanism of aws ListObjectV2 call is handled properly +foreach format parquet csv + +foreach i 0 1 + +foreach j 0 1 2 3 4 5 6 7 8 9 + +foreach k 0 1 2 3 4 5 6 7 8 9 + +foreach l 0 1 2 3 4 5 6 7 8 9 + +statement ok +COPY (select (${i}${j}${k}${l})::INT as column0) to 's3://test-bucket/parquet_glob_s3_paging/paging/t${i}${j}${k}${l}-${urlstyle}-urls.${format}'; + +endloop + +endloop + +endloop + +endloop + +# Begin tests +query I +select sum(column0) from 's3://test-bucket/parquet_glob_s3_paging/paging/t*-${urlstyle}-urls.${format}' +---- +1999000 + +endloop + +endloop + +# test with parquet_metadata_cache = true +statement ok +SET parquet_metadata_cache=true; + +foreach urlstyle path vhost + +foreach format parquet + +loop i 0 2 + +# Begin tests +query I +select sum(column0) from 's3://test-bucket/parquet_glob_s3_paging/paging/t*-${urlstyle}-urls.${format}' +---- +1999000 + +endloop + +endloop + +endloop diff --git a/test/sql/copy/s3/hive_partitioned_write_s3.test b/test/sql/copy/s3/hive_partitioned_write_s3.test new file mode 100644 index 0000000..85ece68 --- /dev/null +++ b/test/sql/copy/s3/hive_partitioned_write_s3.test @@ -0,0 +1,183 @@ +# name: test/sql/copy/s3/hive_partitioned_write_s3.test +# description: basic tests for the hive partitioned write to s3 +# group: [s3] + +require parquet + +require httpfs + +require-env S3_TEST_SERVER_AVAILABLE 1 + +require-env AWS_DEFAULT_REGION + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +require-env DUCKDB_S3_ENDPOINT + +require-env DUCKDB_S3_USE_SSL + +# override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues +set ignore_error_messages + +statement ok +CREATE SECRET ( + TYPE S3, + PROVIDER config, + KEY_ID '${AWS_ACCESS_KEY_ID}', + SECRET '${AWS_SECRET_ACCESS_KEY}', + REGION '${AWS_DEFAULT_REGION}', + ENDPOINT '${DUCKDB_S3_ENDPOINT}', + USE_SSL '${DUCKDB_S3_USE_SSL}' +) + +statement ok +set s3_endpoint='ensure.secret.is.used.instead.of.duckdb.setting.com' + +# Simple table that is easy to partition +statement ok +CREATE TABLE test as SELECT i%2 as part_col, (i+1)%5 as value_col, i as value2_col from range(0,10) tbl(i); + +statement ok +COPY test TO 's3://test-bucket/partitioned1' (FORMAT PARQUET, PARTITION_BY (part_col), OVERWRITE_OR_IGNORE TRUE); + +query III +SELECT part_col, value_col, value2_col FROM 's3://test-bucket/partitioned1/part_col=0/*.parquet' ORDER BY value2_col; +---- +0 1 0 +0 3 2 +0 0 4 +0 2 6 +0 4 8 + +query III +SELECT part_col, value_col, value2_col FROM 's3://test-bucket/partitioned1/part_col=1/*.parquet' ORDER BY value2_col; +---- +1 2 1 +1 4 3 +1 1 5 +1 3 7 +1 0 9 + +# Want a modified version of the partition_col? (for example to do custom string conversion?) No problem: +statement ok +COPY (SELECT * EXCLUDE (part_col), 'prefix-'::VARCHAR || part_col::VARCHAR as part_col FROM test) TO 's3://test-bucket/partitioned2' (FORMAT PARQUET, PARTITION_BY (part_col), OVERWRITE_OR_IGNORE TRUE); + +query III +SELECT part_col, value_col, value2_col FROM 's3://test-bucket/partitioned2/part_col=prefix-0/*.parquet' ORDER BY value2_col; +---- +prefix-0 1 0 +prefix-0 3 2 +prefix-0 0 4 +prefix-0 2 6 +prefix-0 4 8 + +query III +SELECT part_col, value_col, value2_col FROM 's3://test-bucket/partitioned2/part_col=prefix-1/*.parquet' ORDER BY value2_col; +---- +prefix-1 2 1 +prefix-1 4 3 +prefix-1 1 5 +prefix-1 3 7 +prefix-1 0 9 + +# Test partitioning by all +statement error +COPY test TO 's3://test-bucket/partitioned3' (FORMAT PARQUET, PARTITION_BY '*', OVERWRITE_OR_IGNORE TRUE); +---- +No column to write as all columns are specified as partition columns + +statement ok +COPY test TO 's3://test-bucket/partitioned3' (FORMAT PARQUET, PARTITION_BY '*', OVERWRITE_OR_IGNORE TRUE, WRITE_PARTITION_COLUMNS 1); + +query I +SELECT min(value2_col) as min_val +FROM parquet_scan('s3://test-bucket/partitioned3/part_col=*/value_col=*/value2_col=*/*.parquet', FILENAME=1) +GROUP BY filename +ORDER BY min_val +---- +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 + +# single col as param is also fine +statement ok +COPY test TO 's3://test-bucket/partitioned4' (FORMAT PARQUET, PARTITION_BY part_col, OVERWRITE_OR_IGNORE TRUE); + +query III +SELECT part_col, value_col, value2_col FROM parquet_scan('s3://test-bucket/partitioned4/*/*.parquet', HIVE_PARTITIONING=1) WHERE part_col=0 ORDER BY value2_col; +---- +0 1 0 +0 3 2 +0 0 4 +0 2 6 +0 4 8 + + +# a file already exist and, OVERWRITE_OR_IGNORE is not set, throw error +statement error +COPY test TO 's3://test-bucket/partitioned4' (FORMAT PARQUET, PARTITION_BY part_col); +---- +Directory + +# a file already exist and, OVERWRITE_OR_IGNORE is set to false, throw error +statement error +COPY test TO 's3://test-bucket/partitioned4' (FORMAT PARQUET, PARTITION_BY part_col, OVERWRITE_OR_IGNORE FALSE); +---- +Directory + +# Trailing slash ist auch gut! +statement ok +COPY test TO 's3://test-bucket/partitioned5/' (FORMAT PARQUET, PARTITION_BY part_col, OVERWRITE_OR_IGNORE TRUE); + +query III +SELECT part_col, value_col, value2_col FROM 's3://test-bucket/partitioned5/part_col=0/*.parquet' ORDER BY value2_col; +---- +0 1 0 +0 3 2 +0 0 4 +0 2 6 +0 4 8 + +# Cannot use the USE_TMP_FILE option simulatiously with partitioning +statement error +COPY test TO 's3://test-bucket/partitioned6' (FORMAT PARQUET, PARTITION_BY part_col, USE_TMP_FILE TRUE); +---- +Not implemented Error: Can't combine USE_TMP_FILE and PARTITION_BY for COPY + +# Technically it doesn't really matter, as currently out parition_by behaves similarly, but for clarity user should just +# EITHER use partition_by or per_thread_output. +statement error +COPY test TO 's3://test-bucket/partitioned6' (FORMAT PARQUET, PARTITION_BY part_col, PER_THREAD_OUTPUT TRUE); +---- +Not implemented Error: Can't combine PER_THREAD_OUTPUT and PARTITION_BY for COPY + +# partitioning csv files is also a thing +statement ok +COPY test TO 's3://test-bucket/partitioned7' (FORMAT CSV, PARTITION_BY part_col, OVERWRITE_OR_IGNORE TRUE); + +query III +SELECT part_col, value_col, value2_col FROM 's3://test-bucket/partitioned7/part_col=0/*.csv' ORDER BY value2_col; +---- +0 1 0 +0 3 2 +0 0 4 +0 2 6 +0 4 8 + +query III +SELECT part_col, value_col, value2_col FROM 's3://test-bucket/partitioned7/part_col=1/*.csv' ORDER BY value2_col; +---- +1 2 1 +1 4 3 +1 1 5 +1 3 7 +1 0 9 diff --git a/test/sql/copy/s3/hive_partitioned_write_s3.test_slow b/test/sql/copy/s3/hive_partitioned_write_s3.test_slow new file mode 100644 index 0000000..05c6d14 --- /dev/null +++ b/test/sql/copy/s3/hive_partitioned_write_s3.test_slow @@ -0,0 +1,72 @@ +# name: test/sql/copy/s3/hive_partitioned_write_s3.test_slow +# description: slow test for the hive partitioned write to s3 +# group: [s3] + +require parquet + +require httpfs + +require tpch + +require-env S3_TEST_SERVER_AVAILABLE 1 + +require-env AWS_DEFAULT_REGION + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +require-env DUCKDB_S3_ENDPOINT + +require-env DUCKDB_S3_USE_SSL + +# override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues +set ignore_error_messages + +statement ok +pragma memory_limit='200MB' + +statement ok +set http_timeout=120000; + +# More retries (longest wait will be 25600ms) +statement ok +set http_retries=6; + +# around 200MB worth of data, will require the PartitionedColumnData to spill to disk +statement ok +COPY (SELECT i%2::INT32 as part_col, i::INT32 FROM range(0,25000000) tbl(i)) TO 's3://test-bucket/partitioned_memory_spill' (FORMAT parquet, PARTITION_BY part_col, overwrite_or_ignore TRUE); + +statement ok +pragma memory_limit='-1' + +statement ok +call dbgen(sf=1); + +# Partition by 2 columns +statement ok +COPY lineitem TO 's3://test-bucket/lineitem_sf1_partitioned' (FORMAT PARQUET, PARTITION_BY (l_returnflag, l_linestatus), overwrite_or_ignore TRUE); + +statement ok +DROP TABLE lineitem; + +statement ok +CREATE VIEW lineitem as SELECT * FROM parquet_scan('s3://test-bucket/lineitem_sf1_partitioned/*/*/*.parquet', HIVE_PARTITIONING=1); + +loop i 1 9 + +query I +PRAGMA tpch(${i}) +---- +:extension/tpch/dbgen/answers/sf1/q0${i}.csv + +endloop + +loop i 10 23 + +query I +PRAGMA tpch(${i}) +---- +:extension/tpch/dbgen/answers/sf1/q${i}.csv + +endloop diff --git a/test/sql/copy/s3/http_log.test b/test/sql/copy/s3/http_log.test new file mode 100644 index 0000000..854f772 --- /dev/null +++ b/test/sql/copy/s3/http_log.test @@ -0,0 +1,43 @@ +# name: test/sql/copy/s3/http_log.test +# description: Test http logger +# group: [s3] + +require parquet + +require httpfs + +require-env S3_TEST_SERVER_AVAILABLE 1 + +require-env AWS_DEFAULT_REGION + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +require-env DUCKDB_S3_ENDPOINT + +require-env DUCKDB_S3_USE_SSL + +# override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues +set ignore_error_messages + +# Create some test data +statement ok +COPY (SELECT 'value-1' as value) TO 's3://test-bucket/http_log/test.parquet'; + +statement ok +CALL enable_logging('HTTP') + +statement ok +set logging_level='debug' + +query I +FROM 's3://test-bucket/http_log/test.parquet' +---- +value-1 + +query II rowsort +SELECT request.type, parse_filename(request.url) FROM duckdb_logs_parsed('HTTP'); +---- +GET test.parquet +HEAD test.parquet diff --git a/test/sql/copy/s3/http_proxy.test b/test/sql/copy/s3/http_proxy.test new file mode 100644 index 0000000..a7d81b1 --- /dev/null +++ b/test/sql/copy/s3/http_proxy.test @@ -0,0 +1,155 @@ +# name: test/sql/copy/s3/http_proxy.test +# description: Test http proxy +# group: [s3] + +require parquet + +require httpfs + +require-env S3_TEST_SERVER_AVAILABLE 1 + +require-env HTTP_PROXY_PUBLIC + +require-env AWS_DEFAULT_REGION + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +require-env DUCKDB_S3_ENDPOINT + +require-env DUCKDB_S3_USE_SSL + +# override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues +set ignore_error_messages + +statement ok +PRAGMA enable_verification + +statement ok +COPY (SELECT 'value-1' as value) TO 's3://test-bucket/proxy-test/test.parquet'; + +query I +FROM 's3://test-bucket/proxy-test/test.parquet' +---- +value-1 + +# Lets try a faulty proxy first +statement ok +set http_proxy='blabla:1337' + +statement ok +set http_proxy_username='xxx' + +statement ok +set http_proxy_password='yyy' + +statement error +FROM 's3://test-bucket/proxy-test/test.parquet' +---- +Could not establish connection + +# Now a working one +statement ok +set http_proxy='${HTTP_PROXY_PUBLIC}' + +statement ok +RESET http_proxy_username + +statement ok +RESET http_proxy_password + +query I +FROM 's3://test-bucket/proxy-test/test.parquet' +---- +value-1 + +# And try the working one with an 'http://' prefix. +statement ok +set http_proxy='http://${HTTP_PROXY_PUBLIC}' + +query I +FROM 's3://test-bucket/proxy-test/test.parquet' +---- +value-1 + +# Now we revert to the failing one +statement ok +set http_proxy='blabla:1337' + +# But we create a HTTP secret with the proxy +statement ok +CREATE SECRET http1 ( + TYPE HTTP, + http_proxy '${HTTP_PROXY_PUBLIC}' +); + +# This works now, because it uses the secret +query I +FROM 's3://test-bucket/proxy-test/test.parquet' +---- +value-1 + +statement ok +DROP SECRET http1 + +require-env HTTP_PROXY + +statement error +FROM 's3://test-bucket/proxy-test/test.parquet' +---- +Could not establish connection + +statement ok +CREATE SECRET http1 ( + TYPE HTTP, + PROVIDER env +); + +# This works now, because it uses the secret +query I +FROM 's3://test-bucket/proxy-test/test.parquet' +---- +value-1 + +statement ok +DROP SECRET http1 + +require-env HTTP_PROXY_PRIVATE + +require-env HTTP_PROXY_PRIVATE_USERNAME + +require-env HTTP_PROXY_PRIVATE_PASSWORD + +# Let's try the private proxy +statement ok +CREATE SECRET http2 ( + TYPE HTTP, + http_proxy '${HTTP_PROXY_PRIVATE}', + http_proxy_username '${HTTP_PROXY_PRIVATE_USERNAME}', + http_proxy_password '${HTTP_PROXY_PRIVATE_PASSWORD}' +); + +# Correct auth means it works! +query I +FROM 's3://test-bucket/proxy-test/test.parquet' +---- +value-1 + +statement ok +DROP SECRET http2 + +# Now lets try incorrect auth +statement ok +CREATE SECRET http3 ( + TYPE HTTP, + http_proxy '${HTTP_PROXY_PRIVATE}', + http_proxy_username 'malicious', + http_proxy_password 'intruder' +); + +# We get a tasty HTTP 407 +statement error +FROM 's3://test-bucket/proxy-test/test.parquet' +---- +HTTP GET error on 'http://test-bucket.duckdb-minio.com:9000/proxy-test/test.parquet' (HTTP 407) diff --git a/test/sql/copy/s3/http_secret.test b/test/sql/copy/s3/http_secret.test new file mode 100644 index 0000000..ea526c6 --- /dev/null +++ b/test/sql/copy/s3/http_secret.test @@ -0,0 +1,44 @@ +# name: test/sql/copy/s3/http_secret.test +# description: Test http secret +# group: [s3] + +require parquet + +require httpfs + +require-env S3_TEST_SERVER_AVAILABLE 1 + +require-env AWS_DEFAULT_REGION + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +require-env DUCKDB_S3_ENDPOINT + +require-env DUCKDB_S3_USE_SSL + +# override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues +set ignore_error_messages + +# Create some test data +statement ok +COPY (SELECT 'value-1' as value) TO 's3://test-bucket/http-secret-test/test.parquet'; + +statement ok +PRAGMA enable_verification + +# Create some wonky headers +statement ok +CREATE SECRET http3 ( + TYPE HTTP, + EXTRA_HTTP_HEADERS MAP{ + 'Authorization': 'Im very important', + 'CustomHeader': 'fliepflap' + } +); + +query I +FROM 's3://test-bucket/http-secret-test/test.parquet' +---- +value-1 diff --git a/test/sql/copy/s3/metadata_cache.test b/test/sql/copy/s3/metadata_cache.test new file mode 100644 index 0000000..c3e7e8d --- /dev/null +++ b/test/sql/copy/s3/metadata_cache.test @@ -0,0 +1,90 @@ +# name: test/sql/copy/s3/metadata_cache.test +# description: Test metadata cache that caches reponses from the initial HEAD requests to open a file. +# group: [s3] + +require parquet + +require httpfs + +require-env S3_TEST_SERVER_AVAILABLE 1 + +# Require that these environment variables are also set + +require-env AWS_DEFAULT_REGION + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +require-env DUCKDB_S3_ENDPOINT + +require-env DUCKDB_S3_USE_SSL + +# override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues +set ignore_error_messages + +# this test was written before we implemented the external file cache +# when it is enabled, the request counts are different +# we disable it so this test still makes sense +statement ok +set enable_external_file_cache=false; + +statement ok +CREATE TABLE test as SELECT * FROM range(0,10) tbl(i); + +statement ok +CREATE TABLE test1 as SELECT * FROM range(10,20) tbl(i); + +query II +EXPLAIN ANALYZE COPY test TO 's3://test-bucket-public/root-dir/metadata_cache/test.parquet'; +---- +analyzed_plan :.*HTTP Stats.*\#HEAD\: 0.*GET\: 0.*PUT\: 1.*\#POST\: 2.* + +query II +EXPLAIN ANALYZE COPY test TO 's3://test-bucket-public/root-dir/metadata_cache/test1.parquet'; +---- +analyzed_plan :.*HTTP Stats.*\#HEAD\: 0.*GET\: 0.*PUT\: 1.*\#POST\: 2.* + +# Now we query the file metadata without the global metadata cache: There should be 1 HEAD request for the file size, +# then a GET for the pointer to the parquet metadata, then a GET for the metadata. +query II +EXPLAIN ANALYZE SELECT COUNT(*) FROM 's3://test-bucket-public/root-dir/metadata_cache/test.parquet'; +---- +analyzed_plan :.*HTTP Stats.*\#HEAD\: 1.*GET\: 1.*PUT\: 0.*\#POST\: 0.* + +# Redoing query should still result in same request count +query II +EXPLAIN ANALYZE SELECT COUNT(*) FROM 's3://test-bucket-public/root-dir/metadata_cache/test.parquet'; +---- +analyzed_plan :.*HTTP Stats.*\#HEAD\: 1.*GET\: 1.*PUT\: 0.*\#POST\: 0.* + +# Now enable the global metadata cache to store the results of the head requests, saving 1 HEAD per file +statement ok +SET enable_http_metadata_cache=true; + +query II +EXPLAIN ANALYZE SELECT COUNT(*) FROM 's3://test-bucket-public/root-dir/metadata_cache/test1.parquet'; +---- +analyzed_plan :.*HTTP Stats.*\#HEAD\: 1.*GET\: 1.*PUT\: 0.*\#POST\: 0.* + +# Now with global metadata cache, we dont need to do the head request again. noice. +query II +EXPLAIN ANALYZE SELECT COUNT(*) FROM 's3://test-bucket-public/root-dir/metadata_cache/test1.parquet'; +---- +analyzed_plan :.*HTTP Stats.*\#HEAD\: 0.*GET\: 1.*PUT\: 0.*\#POST\: 0.* + +# Now when we write a file to a cached url, this would break so the cache entry should be invalidated +statement ok +COPY (SELECT * from range(0,100) tbl(i)) TO 's3://test-bucket-public/root-dir/metadata_cache/test1.parquet'; + +# We need to do a new head request here +query II +EXPLAIN ANALYZE SELECT COUNT(*) FROM 's3://test-bucket-public/root-dir/metadata_cache/test1.parquet'; +---- +analyzed_plan :.*HTTP Stats.*\#HEAD\: 1.*GET\: 1.*PUT\: 0.*\#POST\: 0.* + +# but now its cached again +query II +EXPLAIN ANALYZE SELECT COUNT(*) FROM 's3://test-bucket-public/root-dir/metadata_cache/test1.parquet'; +---- +analyzed_plan :.*HTTP Stats.*\#HEAD\: 0.*GET\: 1.*PUT\: 0.*\#POST\: 0.* diff --git a/test/sql/copy/s3/parquet_s3_tpcds.test_slow b/test/sql/copy/s3/parquet_s3_tpcds.test_slow new file mode 100644 index 0000000..757a4b5 --- /dev/null +++ b/test/sql/copy/s3/parquet_s3_tpcds.test_slow @@ -0,0 +1,96 @@ +# name: test/sql/copy/s3/parquet_s3_tpcds.test_slow +# description: Test all tpcds queries on tpch sf0.01 over s3 +# group: [s3] + +require parquet + +require httpfs + +require tpcds + +require-env S3_TEST_SERVER_AVAILABLE 1 + +# Require that these environment variables are also set + +require-env AWS_DEFAULT_REGION + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +require-env DUCKDB_S3_ENDPOINT + +require-env DUCKDB_S3_USE_SSL + +# override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues +set ignore_error_messages + +# answers are generated from postgres +# hence check with NULLS LAST flag +statement ok +PRAGMA default_null_order='NULLS LAST' + +statement ok +SET enable_http_metadata_cache=true; + +statement ok +set http_timeout=120000; + +# More retries (longest wait will be 25600ms) +statement ok +set http_retries=6; + +statement ok +CREATE SCHEMA tpcds; + +statement ok +CALL dsdgen(sf=0.01, schema='tpcds'); + +foreach tbl call_center catalog_page catalog_returns catalog_sales customer customer_demographics customer_address date_dim household_demographics inventory income_band item promotion reason ship_mode store store_returns store_sales time_dim warehouse web_page web_returns web_sales web_site + +statement ok +COPY tpcds.${tbl} TO 's3://test-bucket/tpcds-sf0_01/${tbl}.parquet' (FORMAT 'PARQUET', COMPRESSION 'ZSTD'); + +statement ok +CREATE VIEW ${tbl} AS SELECT * FROM parquet_scan('s3://test-bucket/tpcds-sf0_01/${tbl}.parquet'); + +endloop + +# too slow queries: +# 64, 85 + +loop i 1 9 + +query I +PRAGMA tpcds(${i}) +---- +:extension/tpcds/dsdgen/answers/sf0.01/0${i}.csv + +endloop + +loop i 10 64 + +query I +PRAGMA tpcds(${i}) +---- +:extension/tpcds/dsdgen/answers/sf0.01/${i}.csv + +endloop + +loop i 65 85 + +query I +PRAGMA tpcds(${i}) +---- +:extension/tpcds/dsdgen/answers/sf0.01/${i}.csv + +endloop + +loop i 86 99 + +query I +PRAGMA tpcds(${i}) +---- +:extension/tpcds/dsdgen/answers/sf0.01/${i}.csv + +endloop diff --git a/test/sql/copy/s3/parquet_s3_tpch.test_slow b/test/sql/copy/s3/parquet_s3_tpch.test_slow new file mode 100644 index 0000000..425e245 --- /dev/null +++ b/test/sql/copy/s3/parquet_s3_tpch.test_slow @@ -0,0 +1,92 @@ +# name: test/sql/copy/s3/parquet_s3_tpch.test_slow +# description: Test all tpch queries on tpch sf0.01 over s3 +# group: [s3] + +require parquet + +require httpfs + +require tpch + +require-env S3_TEST_SERVER_AVAILABLE 1 + +# Require that these environment variables are also set + +require-env AWS_DEFAULT_REGION + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +require-env DUCKDB_S3_ENDPOINT + +require-env DUCKDB_S3_USE_SSL + +# override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues +set ignore_error_messages + +statement ok +SET enable_http_metadata_cache=true; + +statement ok +set http_timeout=120000; + +# More retries (longest wait will be 25600ms) +statement ok +set http_retries=6; + +# Copy files to S3 before beginning tests +statement ok +CALL DBGEN(sf=0.01); + +# copy tpch files to S3 +statement ok +COPY lineitem to 's3://test-bucket/tpch-sf0_01/lineitem.parquet'; +COPY nation to 's3://test-bucket/tpch-sf0_01/nation.parquet'; +COPY region to 's3://test-bucket/tpch-sf0_01/region.parquet'; +COPY part to 's3://test-bucket/tpch-sf0_01/part.parquet'; +COPY supplier to 's3://test-bucket/tpch-sf0_01/supplier.parquet'; +COPY partsupp to 's3://test-bucket/tpch-sf0_01/partsupp.parquet'; +COPY customer to 's3://test-bucket/tpch-sf0_01/customer.parquet'; +COPY orders to 's3://test-bucket/tpch-sf0_01/orders.parquet'; + +# clears tables +statement ok +DROP TABLE lineitem; +DROP TABLE nation; +DROP TABLE region; +DROP TABLE part; +DROP TABLE supplier; +DROP TABLE partsupp; +DROP TABLE customer; +DROP TABLE orders; + +statement ok +CREATE VIEW lineitem as SELECT * FROM 's3://test-bucket/tpch-sf0_01/lineitem.parquet'; +CREATE VIEW nation as SELECT * FROM 's3://test-bucket/tpch-sf0_01/nation.parquet'; +CREATE VIEW region as SELECT * FROM 's3://test-bucket/tpch-sf0_01/region.parquet'; +CREATE VIEW part as SELECT * FROM 's3://test-bucket/tpch-sf0_01/part.parquet'; +CREATE VIEW supplier as SELECT * FROM 's3://test-bucket/tpch-sf0_01/supplier.parquet'; +CREATE VIEW partsupp as SELECT * FROM 's3://test-bucket/tpch-sf0_01/partsupp.parquet'; +CREATE VIEW customer as SELECT * FROM 's3://test-bucket/tpch-sf0_01/customer.parquet'; +CREATE VIEW orders as SELECT * FROM 's3://test-bucket/tpch-sf0_01/orders.parquet'; + + +# Run TPCH SF1 +loop i 1 9 + +query I +PRAGMA tpch(${i}) +---- +:extension/tpch/dbgen/answers/sf0.01/q0${i}.csv + +endloop + +loop i 10 23 + +query I +PRAGMA tpch(${i}) +---- +:extension/tpch/dbgen/answers/sf0.01/q${i}.csv + +endloop diff --git a/test/sql/copy/s3/s3_hive_partition.test b/test/sql/copy/s3/s3_hive_partition.test new file mode 100644 index 0000000..535a3ca --- /dev/null +++ b/test/sql/copy/s3/s3_hive_partition.test @@ -0,0 +1,104 @@ +# name: test/sql/copy/s3/s3_hive_partition.test +# description: Test the automatic parsing of the hive partitioning scheme +# group: [s3] + +require parquet + +require httpfs + +require-env S3_TEST_SERVER_AVAILABLE 1 + +## Require that these environment variables are also set +require-env AWS_DEFAULT_REGION + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +require-env DUCKDB_S3_ENDPOINT + +require-env DUCKDB_S3_USE_SSL + +# override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues +set ignore_error_messages + +# Parquet filename name conflict +statement ok +CREATE TABLE test AS SELECT 1 as id, 'value1' as value; +CREATE TABLE test2 AS SELECT 2 as id, 'value2' as value; + +statement ok +COPY test TO 's3://test-bucket/hive-partitioning/simple/key_!-_.*()=zisiswurking1/test.parquet'; +COPY test2 TO 's3://test-bucket/hive-partitioning/simple/key_!-_.*()=zisiswurking2/test.parquet'; + +# test parsing hive partitioning scheme, with some common special characters +query III +select id, value, "key_!-_.*()" from parquet_scan('s3://test-bucket/hive-partitioning/simple/*/test.parquet', HIVE_PARTITIONING=1) +---- +1 value1 zisiswurking1 +2 value2 zisiswurking2 + +# Test some medium sized files +statement ok +CREATE TABLE test3 as SELECT id FROM range(0,10000) tbl(id); +CREATE TABLE test4 as SELECT id FROM range(10000,20000) tbl(id); + +statement ok +COPY test3 TO 's3://test-bucket/hive-partitioning/medium/part=1/part2=1/test.parquet'; +COPY test4 TO 's3://test-bucket/hive-partitioning/medium/part=1/part2=2/test.parquet'; +COPY test3 TO 's3://test-bucket/hive-partitioning/medium/part=1/part2=1/test.csv'; +COPY test4 TO 's3://test-bucket/hive-partitioning/medium/part=1/part2=2/test.csv'; + +query II +select min(id), max(id) from parquet_scan('s3://test-bucket/hive-partitioning/medium/*/*/test.parquet', HIVE_PARTITIONING=1) where part2=2 +---- +10000 19999 + +query II +select min(id), max(id) from parquet_scan('s3://test-bucket/hive-partitioning/medium/*/*/test.parquet', HIVE_PARTITIONING=1) where part2=1 +---- +0 9999 + +query II +select min(id), max(id) from read_csv_auto('s3://test-bucket/hive-partitioning/medium/*/*/test.csv', HIVE_PARTITIONING=1) where part2=2 +---- +10000 19999 + +query II +select min(id), max(id) from read_csv_auto('s3://test-bucket/hive-partitioning/medium/*/*/test.csv', HIVE_PARTITIONING=1) where part2=1 +---- +0 9999 + +# check cases where there are file filters AND table filters +statement ok +Create table t1 (a int, b int, c int); + +foreach i 0 1 2 3 4 5 6 7 8 9 + +statement ok +insert into t1 (select range, ${i}*10, ${i}*100 from range(0,10)); + +endloop + +statement ok +COPY (SELECT * FROM t1) TO 's3://test-bucket/hive-partitioning/filter-test-parquet' (FORMAT PARQUET, PARTITION_BY c, OVERWRITE_OR_IGNORE); + +statement ok +COPY (SELECT * FROM t1) TO 's3://test-bucket/hive-partitioning/filter-test-csv' (FORMAT CSV, PARTITION_BY c, OVERWRITE_OR_IGNORE); + +# There should be Table Filters (id < 50) and file filters (c = 500) +query II +EXPLAIN select a from parquet_scan('s3://test-bucket/hive-partitioning/filter-test-parquet/*/*.parquet', HIVE_PARTITIONING=1, HIVE_TYPES_AUTOCAST=0) where c=500 and a < 4; +---- +physical_plan :.*PARQUET_SCAN.*Filters:.*a<4.*File Filters:.* \(CAST\(c AS.*INTEGER\) = 500\).* + +# There should be Table Filters (id < 50) and file filters (c = 500) +query II +EXPLAIN select a from read_csv_auto('s3://test-bucket/hive-partitioning/filter-test-csv/*/*.csv', HIVE_PARTITIONING=1, HIVE_TYPES_AUTOCAST=0) where c=500 and a < 4; +---- +physical_plan :.*FILTER.*(a < 4).*READ_CSV_AUTO.*File Filters:.* \(CAST\(c AS.*INTEGER\) = 500\).* + +statement error +COPY (SELECT * FROM t1) TO 's3://test-bucket/hive-partitioning/filter-test-parquet' (FORMAT PARQUET, PARTITION_BY c, OVERWRITE); +---- +OVERWRITE is not supported for remote file systems diff --git a/test/sql/copy/s3/s3_presigned_read.test b/test/sql/copy/s3/s3_presigned_read.test new file mode 100644 index 0000000..21c344f --- /dev/null +++ b/test/sql/copy/s3/s3_presigned_read.test @@ -0,0 +1,40 @@ +# name: test/sql/copy/s3/s3_presigned_read.test +# description: Read small csv/parquet files from S3 Presigned URL. +# group: [s3] + +require parquet + +require httpfs + +require-env S3_TEST_SERVER_AVAILABLE 1 + +# Require that these environment variables are also set + +require-env AWS_DEFAULT_REGION + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +require-env DUCKDB_S3_ENDPOINT + +require-env DUCKDB_S3_USE_SSL + +require-env S3_SMALL_CSV_PRESIGNED_URL + +require-env S3_SMALL_PARQUET_PRESIGNED_URL + +# override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues +set ignore_error_messages + +query I +SELECT phone FROM read_csv_auto('${S3_SMALL_CSV_PRESIGNED_URL}'); +---- ++318855443322 ++552244331122 ++12233445567 + +query I +SELECT i FROM '${S3_SMALL_PARQUET_PRESIGNED_URL}'; +---- +1 diff --git a/test/sql/copy/s3/s3_presigned_read.test_slow b/test/sql/copy/s3/s3_presigned_read.test_slow new file mode 100644 index 0000000..9bd3da9 --- /dev/null +++ b/test/sql/copy/s3/s3_presigned_read.test_slow @@ -0,0 +1,48 @@ +# name: test/sql/copy/s3/s3_presigned_read.test_slow +# description: Read large csv/parquet files from S3 Presigned URL. +# group: [s3] + +require parquet + +require httpfs + +require-env S3_TEST_SERVER_AVAILABLE 1 + +# Require that these environment variables are also set + +require-env AWS_DEFAULT_REGION + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +require-env DUCKDB_S3_ENDPOINT + +require-env DUCKDB_S3_USE_SSL + + +require-env S3_LARGE_PARQUET_PRESIGNED_URL + +# override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues +set ignore_error_messages + +statement ok +set http_timeout=120000; + +# More retries (longest wait will be 25600ms) +statement ok +set http_retries=6; + +query I +SELECT + sum(l_extendedprice * l_discount) AS revenue +FROM + '${S3_LARGE_PARQUET_PRESIGNED_URL}' +WHERE + l_shipdate >= CAST('1994-01-01' AS date) + AND l_shipdate < CAST('1995-01-01' AS date) + AND l_discount BETWEEN 0.05 + AND 0.07 + AND l_quantity < 24; +---- +123141078.2283 diff --git a/test/sql/copy/s3/starstar.test b/test/sql/copy/s3/starstar.test new file mode 100644 index 0000000..a8ade29 --- /dev/null +++ b/test/sql/copy/s3/starstar.test @@ -0,0 +1,362 @@ +# name: test/sql/copy/s3/starstar.test +# description: Test the glob "**" in s3 +# group: [s3] + +require parquet + +require httpfs + +require-env S3_TEST_SERVER_AVAILABLE 1 + +## Require that these environment variables are also set +require-env AWS_DEFAULT_REGION + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +require-env DUCKDB_S3_ENDPOINT + +require-env DUCKDB_S3_USE_SSL + +# override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues +set ignore_error_messages + +# create a table +statement ok +CREATE TABLE mytable AS SELECT i as a, (i*2) as b, power(i,2) as c from range(0,10) tbl(i); + + + +# sanity check: the bucket is empty +query I +FROM GLOB('s3://test-bucket/glob_ss/*'); +---- + +query I +FROM GLOB('s3://test-bucket/glob_ss/**'); +---- + + + +# one file at the top +statement ok +COPY mytable TO 's3://test-bucket/glob_ss/t0.csv'; + +query I +FROM GLOB('s3://test-bucket/glob_ss/*'); +---- +s3://test-bucket/glob_ss/t0.csv + +query I +FROM GLOB('s3://test-bucket/glob_ss/**'); +---- +s3://test-bucket/glob_ss/t0.csv + + + +# and 1 file at depth 1 in dir 'a' +statement ok +COPY mytable TO 's3://test-bucket/glob_ss/a/t0.csv'; + +query I +FROM GLOB('s3://test-bucket/glob_ss/*/t0.csv'); +---- +s3://test-bucket/glob_ss/a/t0.csv + +query I +FROM GLOB('s3://test-bucket/glob_ss/**/t0.csv'); +---- +s3://test-bucket/glob_ss/a/t0.csv +s3://test-bucket/glob_ss/t0.csv + +query I +FROM GLOB('s3://test-bucket/glob_ss/**/*/t0.csv'); +---- +s3://test-bucket/glob_ss/a/t0.csv + +query I +FROM GLOB('s3://test-bucket/glob_ss/*/*/t0.csv'); +---- + +query I +FROM GLOB('s3://test-bucket/glob_ss/**'); +---- +s3://test-bucket/glob_ss/a/t0.csv +s3://test-bucket/glob_ss/t0.csv + +query I +FROM GLOB('s3://test-bucket/glob_ss/**/*'); +---- +s3://test-bucket/glob_ss/a/t0.csv +s3://test-bucket/glob_ss/t0.csv + +query I +FROM GLOB('s3://test-bucket/glob_ss/*/**'); +---- +s3://test-bucket/glob_ss/a/t0.csv + +query I +FROM GLOB('s3://test-bucket/glob_ss/a/**'); +---- +s3://test-bucket/glob_ss/a/t0.csv + +query I +FROM GLOB('s3://test-bucket/glob_ss/*/a/t0.csv'); +---- + +query I +FROM GLOB('s3://test-bucket/glob_ss/**/a/t0.csv'); +---- +s3://test-bucket/glob_ss/a/t0.csv + + + +# add 1 file at depth 2 in dir 'a/b/' +statement ok +COPY mytable TO 's3://test-bucket/glob_ss/a/b/t0.csv'; + +query I +FROM GLOB('s3://test-bucket/glob_ss/*/t0.csv'); +---- +s3://test-bucket/glob_ss/a/t0.csv + +query I +FROM GLOB('s3://test-bucket/glob_ss/*/*/t0.csv'); +---- +s3://test-bucket/glob_ss/a/b/t0.csv + +query I +FROM GLOB('s3://test-bucket/glob_ss/**/t0.csv'); +---- +s3://test-bucket/glob_ss/a/b/t0.csv +s3://test-bucket/glob_ss/a/t0.csv +s3://test-bucket/glob_ss/t0.csv + +query I +FROM GLOB('s3://test-bucket/glob_ss/**/*/t0.csv'); +---- +s3://test-bucket/glob_ss/a/b/t0.csv +s3://test-bucket/glob_ss/a/t0.csv + +query I +FROM GLOB('s3://test-bucket/glob_ss/*/*/*/t0.csv'); +---- + +query I +FROM GLOB('s3://test-bucket/glob_ss/**'); +---- +s3://test-bucket/glob_ss/a/b/t0.csv +s3://test-bucket/glob_ss/a/t0.csv +s3://test-bucket/glob_ss/t0.csv + +query I +FROM GLOB('s3://test-bucket/glob_ss/**/*'); +---- +s3://test-bucket/glob_ss/a/b/t0.csv +s3://test-bucket/glob_ss/a/t0.csv +s3://test-bucket/glob_ss/t0.csv + +query I +FROM GLOB('s3://test-bucket/glob_ss/*/**'); +---- +s3://test-bucket/glob_ss/a/b/t0.csv +s3://test-bucket/glob_ss/a/t0.csv + +query I +FROM GLOB('s3://test-bucket/glob_ss/a/**'); +---- +s3://test-bucket/glob_ss/a/b/t0.csv +s3://test-bucket/glob_ss/a/t0.csv + +query I +FROM GLOB('s3://test-bucket/glob_ss/*/a/t0.csv'); +---- + +query I +FROM GLOB('s3://test-bucket/glob_ss/**/a/t0.csv'); +---- +s3://test-bucket/glob_ss/a/t0.csv + +### these next 3 require 'first_wildcard_pos == string::npos' to not trigger a return +# # sanity check +# query I +# FROM GLOB('s3://test-bucket/glob_ss/partitioned'); +# ---- + +# # sanity check +# query I +# SELECT COUNT(*) FROM GLOB('s3://test-bucket/glob_ss/partitioned'); +# ---- +# 0 + +# # sanity check +# query I +# SELECT COUNT(*) FROM GLOB('s3://test-bucket/glob_ss/partitioned/'); +# ---- +# 0 + +# sanity check +query I +SELECT COUNT(*) FROM GLOB('s3://test-bucket/glob_ss/partitioned/*'); +---- +0 + +# partitioned mytable +statement ok +COPY mytable TO 's3://test-bucket/glob_ss/partitioned' (FORMAT PARQUET, PARTITION_BY (a, b)); + +query I +SELECT COUNT(*) FROM GLOB('s3://test-bucket/glob_ss/partitioned/**'); +---- +10 + +query I +SELECT COUNT(*) FROM GLOB('s3://test-bucket/glob_ss/partitioned/*/**'); +---- +10 + +query I +SELECT COUNT(*) FROM GLOB('s3://test-bucket/glob_ss/partitioned/**/*'); +---- +10 + +query I +FROM GLOB('s3://test-bucket/glob_ss/partitioned/**/*.parquet'); +---- +s3://test-bucket/glob_ss/partitioned/a=0/b=0/data_0.parquet +s3://test-bucket/glob_ss/partitioned/a=1/b=2/data_0.parquet +s3://test-bucket/glob_ss/partitioned/a=2/b=4/data_0.parquet +s3://test-bucket/glob_ss/partitioned/a=3/b=6/data_0.parquet +s3://test-bucket/glob_ss/partitioned/a=4/b=8/data_0.parquet +s3://test-bucket/glob_ss/partitioned/a=5/b=10/data_0.parquet +s3://test-bucket/glob_ss/partitioned/a=6/b=12/data_0.parquet +s3://test-bucket/glob_ss/partitioned/a=7/b=14/data_0.parquet +s3://test-bucket/glob_ss/partitioned/a=8/b=16/data_0.parquet +s3://test-bucket/glob_ss/partitioned/a=9/b=18/data_0.parquet + +query I +FROM GLOB('s3://test-bucket/glob_ss/partitioned/**/*2/*.parquet'); +---- +s3://test-bucket/glob_ss/partitioned/a=1/b=2/data_0.parquet +s3://test-bucket/glob_ss/partitioned/a=6/b=12/data_0.parquet + + + +# use multiple ** +query I +FROM GLOB('s3://test-bucket/glob_ss/partitioned/**/*2/**/*.parquet'); +---- +s3://test-bucket/glob_ss/partitioned/a=1/b=2/data_0.parquet +s3://test-bucket/glob_ss/partitioned/a=2/b=4/data_0.parquet +s3://test-bucket/glob_ss/partitioned/a=6/b=12/data_0.parquet + + + +# fun stuff +statement ok +COPY mytable TO 's3://test-bucket/glob_ss/a/b/a/t0.csv'; +COPY mytable TO 's3://test-bucket/glob_ss/a/b/a/b/t0.csv'; +COPY mytable TO 's3://test-bucket/glob_ss/a/b/a/b/a/t0.csv'; +COPY mytable TO 's3://test-bucket/glob_ss/a/b/a/b/a/b/t0.csv'; + +query I +SELECT COUNT(*) FROM GLOB('s3://test-bucket/glob_ss/*/**'); +---- +16 + +query I +SELECT COUNT(*) FROM GLOB('s3://test-bucket/glob_ss/**/*'); +---- +17 + +query I +SELECT COUNT(*) FROM GLOB('s3://test-bucket/glob_ss/**/a/*'); +---- +3 + +query I +SELECT COUNT(*) FROM GLOB('s3://test-bucket/glob_ss/**/b/*'); +---- +3 + +query I +SELECT COUNT(*) FROM GLOB('s3://test-bucket/glob_ss/**/[a-b]/*'); +---- +6 + +statement ok +COPY mytable TO 's3://test-bucket/glob_ss/a/b/a/b/a/b/c/d/e/t0.csv'; + +query I +FROM GLOB('s3://test-bucket/glob_ss/**/b/**/*'); +---- +s3://test-bucket/glob_ss/a/b/a/b/a/b/c/d/e/t0.csv +s3://test-bucket/glob_ss/a/b/a/b/a/b/t0.csv +s3://test-bucket/glob_ss/a/b/a/b/a/t0.csv +s3://test-bucket/glob_ss/a/b/a/b/t0.csv +s3://test-bucket/glob_ss/a/b/a/t0.csv +s3://test-bucket/glob_ss/a/b/t0.csv + +query I +FROM GLOB('s3://test-bucket/glob_ss/**/b*/**/*'); +---- +s3://test-bucket/glob_ss/a/b/a/b/a/b/c/d/e/t0.csv +s3://test-bucket/glob_ss/a/b/a/b/a/b/t0.csv +s3://test-bucket/glob_ss/a/b/a/b/a/t0.csv +s3://test-bucket/glob_ss/a/b/a/b/t0.csv +s3://test-bucket/glob_ss/a/b/a/t0.csv +s3://test-bucket/glob_ss/a/b/t0.csv +s3://test-bucket/glob_ss/partitioned/a=0/b=0/data_0.parquet +s3://test-bucket/glob_ss/partitioned/a=1/b=2/data_0.parquet +s3://test-bucket/glob_ss/partitioned/a=2/b=4/data_0.parquet +s3://test-bucket/glob_ss/partitioned/a=3/b=6/data_0.parquet +s3://test-bucket/glob_ss/partitioned/a=4/b=8/data_0.parquet +s3://test-bucket/glob_ss/partitioned/a=5/b=10/data_0.parquet +s3://test-bucket/glob_ss/partitioned/a=6/b=12/data_0.parquet +s3://test-bucket/glob_ss/partitioned/a=7/b=14/data_0.parquet +s3://test-bucket/glob_ss/partitioned/a=8/b=16/data_0.parquet +s3://test-bucket/glob_ss/partitioned/a=9/b=18/data_0.parquet + +query I +FROM GLOB('s3://test-bucket/glob_ss/**/b*/*/**/*'); +---- +s3://test-bucket/glob_ss/a/b/a/b/a/b/c/d/e/t0.csv +s3://test-bucket/glob_ss/a/b/a/b/a/b/t0.csv +s3://test-bucket/glob_ss/a/b/a/b/a/t0.csv +s3://test-bucket/glob_ss/a/b/a/b/t0.csv +s3://test-bucket/glob_ss/a/b/a/t0.csv + +query I +FROM GLOB('s3://test-bucket/glob_ss/**/a*/b*/*/**/*'); +---- +s3://test-bucket/glob_ss/a/b/a/b/a/b/c/d/e/t0.csv +s3://test-bucket/glob_ss/a/b/a/b/a/b/t0.csv +s3://test-bucket/glob_ss/a/b/a/b/a/t0.csv +s3://test-bucket/glob_ss/a/b/a/b/t0.csv +s3://test-bucket/glob_ss/a/b/a/t0.csv + +query I +FROM GLOB('s3://test-bucket/glob_ss/**/a*/b*/**/*'); +---- +s3://test-bucket/glob_ss/a/b/a/b/a/b/c/d/e/t0.csv +s3://test-bucket/glob_ss/a/b/a/b/a/b/t0.csv +s3://test-bucket/glob_ss/a/b/a/b/a/t0.csv +s3://test-bucket/glob_ss/a/b/a/b/t0.csv +s3://test-bucket/glob_ss/a/b/a/t0.csv +s3://test-bucket/glob_ss/a/b/t0.csv +s3://test-bucket/glob_ss/partitioned/a=0/b=0/data_0.parquet +s3://test-bucket/glob_ss/partitioned/a=1/b=2/data_0.parquet +s3://test-bucket/glob_ss/partitioned/a=2/b=4/data_0.parquet +s3://test-bucket/glob_ss/partitioned/a=3/b=6/data_0.parquet +s3://test-bucket/glob_ss/partitioned/a=4/b=8/data_0.parquet +s3://test-bucket/glob_ss/partitioned/a=5/b=10/data_0.parquet +s3://test-bucket/glob_ss/partitioned/a=6/b=12/data_0.parquet +s3://test-bucket/glob_ss/partitioned/a=7/b=14/data_0.parquet +s3://test-bucket/glob_ss/partitioned/a=8/b=16/data_0.parquet +s3://test-bucket/glob_ss/partitioned/a=9/b=18/data_0.parquet + +query I +SELECT COUNT(*) FROM GLOB('s3://test-bucket/glob_ss/**'); +---- +18 diff --git a/test/sql/copy/s3/upload_file_parallel.test_slow b/test/sql/copy/s3/upload_file_parallel.test_slow new file mode 100644 index 0000000..dc5086a --- /dev/null +++ b/test/sql/copy/s3/upload_file_parallel.test_slow @@ -0,0 +1,122 @@ +# name: test/sql/copy/s3/upload_file_parallel.test_slow +# description: Copy large parquet files from and to S3 in parallel. +# group: [s3] + +require tpch + +require parquet + +require httpfs + +require-env S3_TEST_SERVER_AVAILABLE 1 + +# Require that these environment variables are also set + +require-env AWS_DEFAULT_REGION + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +require-env DUCKDB_S3_ENDPOINT + +require-env DUCKDB_S3_USE_SSL + +# override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues +set ignore_error_messages + +statement ok +CALL DBGEN(sf=1) + +statement ok +set http_timeout=120000; + +# More retries (longest wait will be 25600ms) +statement ok +set http_retries=6; + +query I +SELECT + sum(l_extendedprice * l_discount) AS revenue +FROM + lineitem +WHERE + l_shipdate >= CAST('1994-01-01' AS date) + AND l_shipdate < CAST('1995-01-01' AS date) + AND l_discount BETWEEN 0.05 + AND 0.07 + AND l_quantity < 24; +---- +123141078.2283 + +# We do this in parallel to also test synchronization of s3fs between 2 connections +concurrentloop threadid 0 2 + +statement ok +SET s3_endpoint='${DUCKDB_S3_ENDPOINT}';SET s3_use_ssl=${DUCKDB_S3_USE_SSL}; + +# Parquet file +statement ok +COPY lineitem TO 's3://test-bucket/multipart/export_large_${threadid}.parquet' (FORMAT 'parquet'); + +query I +SELECT + sum(l_extendedprice * l_discount) AS revenue +FROM + "s3://test-bucket/multipart/export_large_${threadid}.parquet" +WHERE + l_shipdate >= CAST('1994-01-01' AS date) + AND l_shipdate < CAST('1995-01-01' AS date) + AND l_discount BETWEEN 0.05 + AND 0.07 + AND l_quantity < 24; +---- +123141078.2283 + +endloop + +statement ok +CALL dbgen(sf=0.01, suffix='_small'); + +query I +SELECT + sum(l_extendedprice * l_discount) AS revenue +FROM + lineitem_small +WHERE + l_shipdate >= CAST('1994-01-01' AS date) + AND l_shipdate < CAST('1995-01-01' AS date) + AND l_discount BETWEEN 0.05 + AND 0.07 + AND l_quantity < 24; +---- +1193053.2253 + +# Upload and query 100 tiny files in parallel +concurrentloop threadid 0 100 + +statement ok +SET s3_secret_access_key='${AWS_SECRET_ACCESS_KEY}';SET s3_access_key_id='${AWS_ACCESS_KEY_ID}';SET s3_region='${AWS_DEFAULT_REGION}'; SET s3_endpoint='${DUCKDB_S3_ENDPOINT}';SET s3_use_ssl=${DUCKDB_S3_USE_SSL}; + +statement ok +SET s3_uploader_thread_limit=1 + +# Parquet file +statement ok +COPY lineitem_small TO 's3://test-bucket/multipart/export_small_${threadid}.parquet' (FORMAT 'parquet'); + +query I +SELECT + sum(l_extendedprice * l_discount) AS revenue +FROM + "s3://test-bucket/multipart/export_small_${threadid}.parquet" +WHERE + l_shipdate >= CAST('1994-01-01' AS date) + AND l_shipdate < CAST('1995-01-01' AS date) + AND l_discount BETWEEN 0.05 + AND 0.07 + AND l_quantity < 24; +---- +1193053.2253 + +endloop diff --git a/test/sql/copy/s3/upload_large_file.test_slow b/test/sql/copy/s3/upload_large_file.test_slow new file mode 100644 index 0000000..12a07c3 --- /dev/null +++ b/test/sql/copy/s3/upload_large_file.test_slow @@ -0,0 +1,79 @@ +# name: test/sql/copy/s3/upload_large_file.test_slow +# description: Copy large csv/parquet files from and to S3. +# group: [s3] + +require tpch + +require parquet + +require httpfs + +require-env S3_TEST_SERVER_AVAILABLE 1 + +# Require that these environment variables are also set + +require-env AWS_DEFAULT_REGION + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +require-env DUCKDB_S3_ENDPOINT + +require-env DUCKDB_S3_USE_SSL + +# override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues +set ignore_error_messages + +# confirm we use a reasonable amount of memory +statement ok +SET memory_limit='2.5GB'; + +statement ok +set http_timeout=120000; + +# More retries (longest wait will be 25600ms) +statement ok +set http_retries=6; + +# disable tmp dir to force OOM if we exceed our set limit +statement ok +PRAGMA temp_directory='' + +statement ok +SET s3_uploader_thread_limit = 5; + +statement ok +CALL DBGEN(sf=1) + +query I +SELECT + sum(l_extendedprice * l_discount) AS revenue +FROM + lineitem +WHERE + l_shipdate >= CAST('1994-01-01' AS date) + AND l_shipdate < CAST('1995-01-01' AS date) + AND l_discount BETWEEN 0.05 + AND 0.07 + AND l_quantity < 24; +---- +123141078.2283 + +# Parquet file ~300MB +statement ok +COPY lineitem TO 's3://test-bucket/multipart/export_large.parquet' (FORMAT 'parquet'); + +query I +SELECT + sum(l_extendedprice * l_discount) AS revenue +FROM + "s3://test-bucket/multipart/export_large.parquet" +WHERE + l_shipdate >= CAST('1994-01-01' AS date) + AND l_shipdate < CAST('1995-01-01' AS date) + AND l_discount BETWEEN 0.05 + AND 0.07 + AND l_quantity < 24; +---- +123141078.2283 diff --git a/test/sql/copy/s3/upload_large_json_file.test_slow b/test/sql/copy/s3/upload_large_json_file.test_slow new file mode 100644 index 0000000..b7d7741 --- /dev/null +++ b/test/sql/copy/s3/upload_large_json_file.test_slow @@ -0,0 +1,87 @@ +# name: test/sql/copy/s3/upload_large_json_file.test_slow +# description: Copy large json files from and to S3. +# group: [s3] + +require tpch + +require json + +require parquet + +require httpfs + +require-env S3_TEST_SERVER_AVAILABLE 1 + +# Require that these environment variables are also set + +require-env AWS_DEFAULT_REGION + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +require-env DUCKDB_S3_ENDPOINT + +require-env DUCKDB_S3_USE_SSL + +# override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues +set ignore_error_messages + +statement ok +set http_timeout=120000; + +# More retries (longest wait will be 25600ms) +statement ok +set http_retries=6; + +statement ok +CALL DBGEN(sf=0.1) + +query I +SELECT + sum(l_extendedprice * l_discount) AS revenue +FROM + lineitem +WHERE + l_shipdate >= CAST('1994-01-01' AS date) + AND l_shipdate < CAST('1995-01-01' AS date) + AND l_discount BETWEEN 0.05 + AND 0.07 + AND l_quantity < 24; +---- +11803420.2534 + +statement ok +COPY lineitem TO 's3://test-bucket/multipart/export_large.json' (FORMAT 'json'); + +query I +SELECT + sum(l_extendedprice * l_discount) AS revenue +FROM + "s3://test-bucket/multipart/export_large.json" +WHERE + l_shipdate >= CAST('1994-01-01' AS date) + AND l_shipdate < CAST('1995-01-01' AS date) + AND l_discount BETWEEN 0.05 + AND 0.07 + AND l_quantity < 24; +---- +11803420.2534 + +# This query triggers an edge case where we apply an S3-specific optimization using multiple cached filehandles +query I +SELECT + sum(l_extendedprice * l_discount)/3 AS revenue +FROM + read_json_auto([ + 's3://test-bucket/multipart/export_large.json', + 's3://test-bucket/multipart/export_large.json', + 's3://test-bucket/multipart/export_large.json',]) +WHERE + l_shipdate >= CAST('1994-01-01' AS date) + AND l_shipdate < CAST('1995-01-01' AS date) + AND l_discount BETWEEN 0.05 + AND 0.07 + AND l_quantity < 24; +---- +11803420.2534 \ No newline at end of file diff --git a/test/sql/copy/s3/upload_small_file.test b/test/sql/copy/s3/upload_small_file.test new file mode 100644 index 0000000..b2e8cb3 --- /dev/null +++ b/test/sql/copy/s3/upload_small_file.test @@ -0,0 +1,77 @@ +# name: test/sql/copy/s3/upload_small_file.test +# description: Copy small csv/parquet files from and to S3. +# group: [s3] + +require parquet + +require httpfs + +require-env S3_TEST_SERVER_AVAILABLE 1 + +# Require that these environment variables are also set + +require-env AWS_DEFAULT_REGION + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +require-env DUCKDB_S3_ENDPOINT + +require-env DUCKDB_S3_USE_SSL + +# override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues +set ignore_error_messages + +statement ok +CREATE TABLE web_page as (SELECT * FROM "data/csv/real/web_page.csv"); + +query IIIIIIIIIIIIII +SELECT * FROM web_page LIMIT 10; +---- +1 AAAAAAAABAAAAAAA 1997-09-03 NULL 2450810 2452620 Y 98539 http://www.foo.com welcome 2531 8 3 4 +2 AAAAAAAACAAAAAAA 1997-09-03 2000-09-02 2450814 2452580 N NULL http://www.foo.com protected 1564 4 3 1 +3 AAAAAAAACAAAAAAA 2000-09-03 NULL 2450814 2452611 N NULL http://www.foo.com feedback 1564 4 3 4 +4 AAAAAAAAEAAAAAAA 1997-09-03 1999-09-03 2450812 2452579 N NULL http://www.foo.com general 3732 18 7 1 +5 AAAAAAAAEAAAAAAA 1999-09-04 2001-09-02 2450812 2452597 N NULL http://www.foo.com welcome 3732 18 3 1 +6 AAAAAAAAEAAAAAAA 2001-09-03 NULL 2450814 2452597 N NULL http://www.foo.com ad 3732 18 7 4 +7 AAAAAAAAHAAAAAAA 1997-09-03 NULL 2450815 2452574 N NULL http://www.foo.com feedback 3034 18 7 4 +8 AAAAAAAAIAAAAAAA 1997-09-03 2000-09-02 2450815 2452646 Y 1898 http://www.foo.com protected 3128 12 2 4 +9 AAAAAAAAIAAAAAAA 2000-09-03 NULL 2450807 2452579 Y 84146 http://www.foo.com welcome 3128 13 5 3 +10 AAAAAAAAKAAAAAAA 1997-09-03 1999-09-03 NULL 2452623 N NULL http://www.foo.com NULL NULL NULL NULL NULL + +# Parquet file +statement ok +COPY web_page TO 's3://test-bucket/multipart/web_page.parquet' (FORMAT 'parquet'); + +query IIIIIIIIIIIIII +SELECT * FROM "s3://test-bucket/multipart/web_page.parquet" LIMIT 10; +---- +1 AAAAAAAABAAAAAAA 1997-09-03 NULL 2450810 2452620 Y 98539 http://www.foo.com welcome 2531 8 3 4 +2 AAAAAAAACAAAAAAA 1997-09-03 2000-09-02 2450814 2452580 N NULL http://www.foo.com protected 1564 4 3 1 +3 AAAAAAAACAAAAAAA 2000-09-03 NULL 2450814 2452611 N NULL http://www.foo.com feedback 1564 4 3 4 +4 AAAAAAAAEAAAAAAA 1997-09-03 1999-09-03 2450812 2452579 N NULL http://www.foo.com general 3732 18 7 1 +5 AAAAAAAAEAAAAAAA 1999-09-04 2001-09-02 2450812 2452597 N NULL http://www.foo.com welcome 3732 18 3 1 +6 AAAAAAAAEAAAAAAA 2001-09-03 NULL 2450814 2452597 N NULL http://www.foo.com ad 3732 18 7 4 +7 AAAAAAAAHAAAAAAA 1997-09-03 NULL 2450815 2452574 N NULL http://www.foo.com feedback 3034 18 7 4 +8 AAAAAAAAIAAAAAAA 1997-09-03 2000-09-02 2450815 2452646 Y 1898 http://www.foo.com protected 3128 12 2 4 +9 AAAAAAAAIAAAAAAA 2000-09-03 NULL 2450807 2452579 Y 84146 http://www.foo.com welcome 3128 13 5 3 +10 AAAAAAAAKAAAAAAA 1997-09-03 1999-09-03 NULL 2452623 N NULL http://www.foo.com NULL NULL NULL NULL NULL + +# CSV file +statement ok +COPY web_page TO 's3://test-bucket/multipart/web_page.csv'; + +query IIIIIIIIIIIIII +SELECT * FROM "s3://test-bucket/multipart/web_page.csv" LIMIT 10; +---- +1 AAAAAAAABAAAAAAA 1997-09-03 NULL 2450810 2452620 Y 98539 http://www.foo.com welcome 2531 8 3 4 +2 AAAAAAAACAAAAAAA 1997-09-03 2000-09-02 2450814 2452580 N NULL http://www.foo.com protected 1564 4 3 1 +3 AAAAAAAACAAAAAAA 2000-09-03 NULL 2450814 2452611 N NULL http://www.foo.com feedback 1564 4 3 4 +4 AAAAAAAAEAAAAAAA 1997-09-03 1999-09-03 2450812 2452579 N NULL http://www.foo.com general 3732 18 7 1 +5 AAAAAAAAEAAAAAAA 1999-09-04 2001-09-02 2450812 2452597 N NULL http://www.foo.com welcome 3732 18 3 1 +6 AAAAAAAAEAAAAAAA 2001-09-03 NULL 2450814 2452597 N NULL http://www.foo.com ad 3732 18 7 4 +7 AAAAAAAAHAAAAAAA 1997-09-03 NULL 2450815 2452574 N NULL http://www.foo.com feedback 3034 18 7 4 +8 AAAAAAAAIAAAAAAA 1997-09-03 2000-09-02 2450815 2452646 Y 1898 http://www.foo.com protected 3128 12 2 4 +9 AAAAAAAAIAAAAAAA 2000-09-03 NULL 2450807 2452579 Y 84146 http://www.foo.com welcome 3128 13 5 3 +10 AAAAAAAAKAAAAAAA 1997-09-03 1999-09-03 NULL 2452623 N NULL http://www.foo.com NULL NULL NULL NULL NULL diff --git a/test/sql/copy/s3/url_encode.test b/test/sql/copy/s3/url_encode.test new file mode 100644 index 0000000..66cbd5c --- /dev/null +++ b/test/sql/copy/s3/url_encode.test @@ -0,0 +1,145 @@ +# name: test/sql/copy/s3/url_encode.test +# description: S3 Url encoding +# group: [s3] + +require parquet + +require httpfs + +require-env S3_TEST_SERVER_AVAILABLE 1 + +# Require that these environment variables are also set + +require-env AWS_DEFAULT_REGION + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +require-env DUCKDB_S3_ENDPOINT + +require-env DUCKDB_S3_USE_SSL + +# override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues +set ignore_error_messages + +statement ok +CREATE TABLE test_1 as (SELECT 1 FROM range(0,5)); +CREATE TABLE test_2 as (SELECT 2 FROM range(0,5)); +CREATE TABLE test_3 as (SELECT 3 FROM range(0,5)); +CREATE TABLE test_4 as (SELECT 4 FROM range(0,5)); + +foreach prefix s3:// r2:// s3a:// s3n:// + +statement ok +COPY test_1 TO '${prefix}test-bucket-public/url_encode/just because you can doesnt mean you should.parquet' (FORMAT 'parquet'); + +statement ok +COPY test_2 TO '${prefix}test-bucket-public/url_encode/just+dont+use+plus+or+spaces+please.parquet' (FORMAT 'parquet'); + +statement ok +COPY test_3 TO '${prefix}test-bucket-public/url_encode/should:avoid:using:colon:in:paths.parquet' (FORMAT 'parquet'); + +# For S3 urls spaces are fine +query I +SELECT * FROM "${prefix}test-bucket-public/url_encode/just because you can doesnt mean you should.parquet" LIMIT 1; +---- +1 + +# In S3 urls, + means a plus symbol +query I +SELECT * FROM "${prefix}test-bucket-public/url_encode/just+dont+use+plus+or+spaces+please.parquet" LIMIT 1; +---- +2 + +# Colons in S3 urls are encoded by duckdb internaly like boto3 (issue #5502) +query I +SELECT * FROM "${prefix}test-bucket-public/url_encode/should:avoid:using:colon:in:paths.parquet" LIMIT 1; +---- +3 + +# NOTE! For HTTP(s) urls, the + symbol is not encoded by duckdb, leaving it up to the server to decide if it should be interpreted +# as a space or a plus. In the case of AWS S3, they are interpreted as encoded spaces, however Minio does not +#query I +#SELECT * FROM "http://test-bucket-public.${DUCKDB_S3_ENDPOINT}/url_encode/just+because+you+can+doesnt+mean+you+should.parquet" LIMIT 1; +#---- +#1 + +# For HTTP urls, we also allow regular spaces, which will get encoded to %20 by duckdb +query I +SELECT * FROM "http://test-bucket-public.${DUCKDB_S3_ENDPOINT}/url_encode/just because you can doesnt mean you should.parquet" LIMIT 1; +---- +1 + +# For HTTP urls from AWS with + symbols, encoding them with %2B is required +query I +SELECT * FROM "http://test-bucket-public.${DUCKDB_S3_ENDPOINT}/url_encode/just%2Bdont%2Buse%2Bplus%2Bor%2Bspaces%2Bplease.parquet" LIMIT 1; +---- +2 + +# However Minio interprets them as spaces so this works too +query I +SELECT * FROM "http://test-bucket-public.${DUCKDB_S3_ENDPOINT}/url_encode/just+dont+use+plus+or+spaces+please.parquet" LIMIT 1; +---- +2 + +# Due to our support for query parameters, this will fail +statement error +COPY test_4 TO '${prefix}test-bucket-public/url_encode/question?marks?are?even?worse.parquet' (FORMAT 'parquet'); +---- +Invalid query parameters found. + +# Enabling url compatibility mode will disable both Globs and query params +# allowing a user to query those hard-to-reach files +statement ok +SET s3_url_compatibility_mode=true; + +statement ok +COPY test_4 TO '${prefix}test-bucket-public/url_encode/question?marks?and*stars[and]brackets.parquet' (FORMAT 'parquet'); + +query I +SELECT * FROM "${prefix}test-bucket-public/url_encode/question?marks?and*stars[and]brackets.parquet" LIMIT 1; +---- +4 + +# HTTP urls will be encoded here +query I +SELECT * FROM "http://test-bucket-public.${DUCKDB_S3_ENDPOINT}/url_encode/question%3Fmarks%3Fand%2Astars%5Band%5Dbrackets.parquet" LIMIT 1; +---- +4 + +statement ok +SET s3_url_compatibility_mode=false; + +# Check that the generated urls are correct +statement ok +set s3_endpoint='s3.some.random.endpoint.com'; + +statement error +SELECT * FROM '${prefix}test-bucket/whatever.parquet'; +---- +Could not establish connection error for HTTP HEAD to 'http://test-bucket.s3.some.random.endpoint.com/whatever.parquet' + +statement ok +set s3_endpoint='${DUCKDB_S3_ENDPOINT}' + +endloop + +# Check that the generated urls are correct for an empty endpoint +statement ok +set s3_endpoint=''; + +statement error +SELECT * FROM 's3://test-bucket/whatever.parquet'; +---- +:.*Unknown error for HTTP HEAD to 'http://test-bucket.s3.eu-west-1.amazonaws.com/whatever.parquet'.* + +statement error +SELECT * FROM 'r2://test-bucket/whatever.parquet'; +---- +:.*Unknown error for HTTP HEAD to 'http://test-bucket.s3.eu-west-1.amazonaws.com/whatever.parquet'.* + +statement error +SELECT * FROM 'gcs://test-bucket/whatever.parquet'; +---- +HTTP GET error on 'http://storage.googleapis.com/test-bucket/whatever.parquet' diff --git a/test/sql/copy/test_remote_head_forbidden.test b/test/sql/copy/test_remote_head_forbidden.test new file mode 100644 index 0000000..67d2e14 --- /dev/null +++ b/test/sql/copy/test_remote_head_forbidden.test @@ -0,0 +1,10 @@ +# name: test/sql/copy/test_remote_head_forbidden.test +# description: Test Force download with server that doesn't want to give us the head +# group: [copy] + +require httpfs + +require json + +statement ok +FROM read_json('https://api.spring.io/projects/spring-boot/generations') diff --git a/test/sql/delete/test_issue_1834.test_slow b/test/sql/delete/test_issue_1834.test_slow new file mode 100644 index 0000000..029f6a7 --- /dev/null +++ b/test/sql/delete/test_issue_1834.test_slow @@ -0,0 +1,26 @@ +# name: test/sql/delete/test_issue_1834.test_slow +# description: Deleting with DELETE USING causes a segmentation fault +# group: [delete] + +require httpfs + +statement ok +CREATE TABLE Person_likes_Comment (creationDate timestamp without time zone not null, id bigint not null, likes_Comment bigint not null); + +statement ok +CREATE TABLE Person_Delete_candidates (deletionDate timestamp without time zone not null, id bigint); + +statement ok +COPY Person_likes_Comment FROM 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/Person_likes_Comment.csv' (DELIMITER '|', TIMESTAMPFORMAT '%Y-%m-%dT%H:%M:%S.%g+00:00'); + +statement ok +COPY Person_Delete_candidates FROM 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/Person_Delete_candidates.csv' (DELIMITER '|', HEADER, TIMESTAMPFORMAT '%Y-%m-%dT%H:%M:%S.%g+00:00'); + +statement ok +DELETE FROM Person_likes_Comment USING Person_Delete_candidates WHERE Person_Delete_candidates.id = Person_likes_Comment.id; + +# all tuples fulfilling this predicate should have been deleted +query I +SELECT COUNT(*) FROM Person_likes_Comment, Person_Delete_candidates WHERE Person_Delete_candidates.id = Person_likes_Comment.id; +---- +0 diff --git a/test/sql/extensions/version_is_valid_httpfs.test b/test/sql/extensions/version_is_valid_httpfs.test new file mode 100644 index 0000000..4fc2319 --- /dev/null +++ b/test/sql/extensions/version_is_valid_httpfs.test @@ -0,0 +1,21 @@ +# name: test/sql/extensions/version_is_valid_httpfs.test +# description: Test version metadata on load +# group: [extensions] + +require-env LOCAL_EXTENSION_REPO + +require httpfs + +statement ok +SET autoinstall_known_extensions=true; + +statement ok +SET autoload_known_extensions=true; + +statement ok +SET enable_server_cert_verification = true; + +query I +SELECT count(*) FROM duckdb_extensions() WHERE extension_version != '' AND extension_name == 'httpfs'; +---- +1 diff --git a/test/sql/httpfs/hffs.test b/test/sql/httpfs/hffs.test new file mode 100644 index 0000000..82231fb --- /dev/null +++ b/test/sql/httpfs/hffs.test @@ -0,0 +1,42 @@ +# name: test/sql/httpfs/hffs.test +# description: Ensure the HuggingFace filesystem works as expected +# group: [httpfs] + +require parquet + +require httpfs + +statement error +FROM parquet_scan('hf://') +---- +IO Error: Failed to parse 'hf://'. Please format url like: 'hf://datasets/my-username/my-dataset/path/to/file.parquet' + +statement error +FROM 'hf://file.parquet' +---- +IO Error: Failed to parse 'hf://file.parquet'. Please format url like: 'hf://datasets/my-username/my-dataset/path/to/file.parquet' + +statement error +FROM 'hf://yepthisdoesntwork/file.parquet' +---- +IO Error: Failed to parse: 'hf://yepthisdoesntwork/file.parquet'. Currently DuckDB only supports querying datasets or spaces, so the url should start with 'hf://datasets' or 'hf://spaces' + +statement error +FROM 'hf://stil/not/file.parquet' +---- +IO Error: Failed to parse: 'hf://stil/not/file.parquet'. Currently DuckDB only supports querying datasets or spaces, so the url should start with 'hf://datasets' or 'hf://spaces' + +statement error +FROM 'hf://datasets/file.parquet' +---- +IO Error: Failed to parse 'hf://datasets/file.parquet'. Please format url like: 'hf://datasets/my-username/my-dataset/path/to/file.parquet' + +statement error +FROM 'hf://datasets/myname/file.parquet' +---- +IO Error: Failed to parse 'hf://datasets/myname/file.parquet'. Please format url like: 'hf://datasets/my-username/my-dataset/path/to/file.parquet' + +statement error +FROM 'hf://datasets/**/file.parquet' +---- +IO Error: Failed to parse 'hf://datasets/**/file.parquet'. Please format url like: 'hf://datasets/my-username/my-dataset/path/to/file.parquet' diff --git a/test/sql/httpfs/hffs.test_slow b/test/sql/httpfs/hffs.test_slow new file mode 100644 index 0000000..5870366 --- /dev/null +++ b/test/sql/httpfs/hffs.test_slow @@ -0,0 +1,180 @@ +# name: test/sql/httpfs/hffs.test_slow +# description: Ensure the HuggingFace filesystem works as expected +# group: [httpfs] + +require parquet + +require httpfs + +# FIXME: currently this will not fail the Linux HTTPFS ci job if it fails, because it might do so due to networking issues +# however having a CI job dedicated to remote tests that may spuriously fail would solve this + +# Non existent repos get a 401 +statement error +FROM parquet_scan('hf://datasets/samansmink/non-existent/*.parquet'); +---- + +# Globbing non-existent repo is also 401 +statement error +FROM parquet_scan('hf://datasets/samansmink/non-existent/**/*.parquet'); +---- + +query III rowsort +FROM parquet_scan('hf://datasets/samansmink/duckdb_ci_tests/hive_data/**/*.parquet', FILENAME=1, hive_partitioning=0); +---- +1 value1 hf://datasets/samansmink/duckdb_ci_tests/hive_data/part=a/date=2012-01-01/test.parquet +2 value2 hf://datasets/samansmink/duckdb_ci_tests/hive_data/part=b/date=2013-01-01/test.parquet + + +query III rowsort +FROM parquet_scan('hf://datasets/samansmink/duckdb_ci_tests/hive_data/*/*/**/*.parquet', FILENAME=1, hive_partitioning=0); +---- +1 value1 hf://datasets/samansmink/duckdb_ci_tests/hive_data/part=a/date=2012-01-01/test.parquet +2 value2 hf://datasets/samansmink/duckdb_ci_tests/hive_data/part=b/date=2013-01-01/test.parquet + +query III rowsort +FROM parquet_scan('hf://datasets/samansmink/duckdb_ci_tests/hive_data/part=[ab]/**/*.parquet', FILENAME=1, hive_partitioning=0); +---- +1 value1 hf://datasets/samansmink/duckdb_ci_tests/hive_data/part=a/date=2012-01-01/test.parquet +2 value2 hf://datasets/samansmink/duckdb_ci_tests/hive_data/part=b/date=2013-01-01/test.parquet + +# This ensures the next query is forced to use pagination, testing our support for it +statement ok +set hf_max_per_page=1; + +query III rowsort +FROM parquet_scan('hf://datasets/samansmink/duckdb_ci_tests/hive_data/part=[b]/**/*.parquet', FILENAME=1, hive_partitioning=0); +---- +2 value2 hf://datasets/samansmink/duckdb_ci_tests/hive_data/part=b/date=2013-01-01/test.parquet + +statement ok +reset hf_max_per_page; + +# Ensure we only open 1 of the files here to confirm filter pushdown has eliminated the other paths +query II rowsort +explain analyze SELECT id, part FROM parquet_scan('hf://datasets/samansmink/duckdb_ci_tests/hive_data/**/*.parquet') where part='a'; +---- +analyzed_plan :.*HTTP Stats.*\#HEAD\: 1 .* + +statement ok +set hf_max_per_page=1; + +# Branches can be specified, including the special branch types with '~' +query III rowsort +FROM parquet_scan('hf://datasets/samansmink/duckdb_ci_tests@~parquet/**/*.parquet', FILENAME=1, hive_partitioning=0); +---- +1 value1 hf://datasets/samansmink/duckdb_ci_tests@~parquet/default/test/0000.parquet +2 value2 hf://datasets/samansmink/duckdb_ci_tests@~parquet/default/test/0001.parquet + +# Secret provider 'config' (default) allows setting the token directly +statement ok +CREATE SECRET hf_token (TYPE HUGGINGFACE, token 'some_hf_token'); + +# Secret provider 'credential chain' scans several places for a token +statement ok +CREATE SECRET hf_token_from_credential_chain (TYPE HUGGINGFACE, PROVIDER credential_chain); + +statement ok +DROP SECRET hf_token + +statement ok +DROP SECRET hf_token_from_credential_chain + +# Private bucket is not allowed without credentials +statement error +FROM parquet_scan('hf://datasets/samansmink/duckdb_ci_private/hive_data/**/*.parquet', FILENAME=1, hive_partitioning=0); +---- +401 + +# Ensure spaces work too +query I +select size from read_text('hf://spaces/samansmink/duckdb_ci_tests/README.md'); +---- +199 + +# FIXME: push auth key into CI for this to ensure it is tested in CI properly +require-env HUGGING_FACE_TOKEN + +statement ok +CREATE SECRET hf1 (TYPE HUGGINGFACE, TOKEN '${HUGGING_FACE_TOKEN}'); + +query III rowsort +FROM parquet_scan('hf://datasets/samansmink/duckdb_ci_private/hive_data/**/*.parquet', FILENAME=1, hive_partitioning=0); +---- +1 value1 hf://datasets/samansmink/duckdb_ci_private/hive_data/part=a/date=2012-01-01/test.parquet +2 value2 hf://datasets/samansmink/duckdb_ci_private/hive_data/part=b/date=2013-01-01/test.parquet + +statement ok +DROP SECRET hf1 + +# Same can be achieved with an http secret setting the bearer token manually + +statement ok +CREATE SECRET http1 (TYPE HTTP, BEARER_TOKEN '${HUGGING_FACE_TOKEN}'); + +query III rowsort +FROM parquet_scan('hf://datasets/samansmink/duckdb_ci_private/hive_data/**/*.parquet', FILENAME=1, hive_partitioning=0); +---- +1 value1 hf://datasets/samansmink/duckdb_ci_private/hive_data/part=a/date=2012-01-01/test.parquet +2 value2 hf://datasets/samansmink/duckdb_ci_private/hive_data/part=b/date=2013-01-01/test.parquet + +statement ok +DROP SECRET http1 + +# Note that the huggingface secret takes precedence over the http secret + +statement ok +CREATE SECRET hf2 (TYPE HUGGINGFACE, TOKEN '${HUGGING_FACE_TOKEN}'); + +statement ok +CREATE SECRET http2 (TYPE HTTP, BEARER_TOKEN 'hocus pocus this token is bogus'); + +# Works because hf secret is selected +query III rowsort +FROM parquet_scan('hf://datasets/samansmink/duckdb_ci_private/hive_data/**/*.parquet', FILENAME=1, hive_partitioning=0); +---- +1 value1 hf://datasets/samansmink/duckdb_ci_private/hive_data/part=a/date=2012-01-01/test.parquet +2 value2 hf://datasets/samansmink/duckdb_ci_private/hive_data/part=b/date=2013-01-01/test.parquet + +statement ok +DROP SECRET hf2; + +# the http secret does not work +statement error +FROM parquet_scan('hf://datasets/samansmink/duckdb_ci_private/hive_data/**/*.parquet', FILENAME=1, hive_partitioning=0); +---- +401 + +statement ok +DROP SECRET http2 + +# Finally we can also manually set the bearer token header +statement ok +CREATE SECRET http3 ( + TYPE HTTP, + EXTRA_HTTP_HEADERS MAP{ + 'Authorization': 'Bearer ${HUGGING_FACE_TOKEN}', + } +); + +# Works because hf secret is selected +query III rowsort +FROM parquet_scan('hf://datasets/samansmink/duckdb_ci_private/hive_data/**/*.parquet', FILENAME=1, hive_partitioning=0); +---- +1 value1 hf://datasets/samansmink/duckdb_ci_private/hive_data/part=a/date=2012-01-01/test.parquet +2 value2 hf://datasets/samansmink/duckdb_ci_private/hive_data/part=b/date=2013-01-01/test.parquet + +statement ok +DROP SECRET http3 + +# FIXME: test this from CI as well +require-env HUGGING_FACE_TOKEN_IN_CACHE + +statement ok +CREATE SECRET hf1 (TYPE HUGGINGFACE, PROVIDER credential_chain); + +query III rowsort +FROM parquet_scan('hf://datasets/samansmink/duckdb_ci_private/hive_data/**/*.parquet', FILENAME=1, hive_partitioning=0); +---- +1 value1 hf://datasets/samansmink/duckdb_ci_private/hive_data/part=a/date=2012-01-01/test.parquet +2 value2 hf://datasets/samansmink/duckdb_ci_private/hive_data/part=b/date=2013-01-01/test.parquet diff --git a/test/sql/httpfs/internal_issue_2490.test b/test/sql/httpfs/internal_issue_2490.test new file mode 100644 index 0000000..4aa11f7 --- /dev/null +++ b/test/sql/httpfs/internal_issue_2490.test @@ -0,0 +1,10 @@ +# name: test/sql/httpfs/internal_issue_2490.test +# description: Internal issue 2490 - Wrong URL encoding leads to 404 for redirects with httplib v0.14.3 +# group: [httpfs] + +require httpfs + +require parquet + +statement ok +FROM 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/us+er+da+ta.parquet' LIMIT 1; diff --git a/test/sql/json/table/read_json.test b/test/sql/json/table/read_json.test new file mode 100644 index 0000000..8ece129 --- /dev/null +++ b/test/sql/json/table/read_json.test @@ -0,0 +1,414 @@ +# name: test/sql/json/table/read_json.test +# description: Read json files straight to columnar data +# group: [table] + +require json + +statement ok +pragma enable_verification + +statement error +SELECT * FROM read_json('data/json/example_n.ndjson', auto_detect=false) +---- +Binder Error + +# can't read ndjson with array +statement error +SELECT * FROM read_json('data/json/example_n.ndjson', columns={id: 'INTEGER', name: 'VARCHAR'}, format='array') +---- +Invalid Input Error: Expected top-level JSON array + +# read_ndjson works +query II +SELECT * FROM read_ndjson('data/json/example_n.ndjson', columns={id: 'INTEGER', name: 'VARCHAR'}) +---- +1 O Brother, Where Art Thou? +2 Home for the Holidays +3 The Firm +4 Broadcast News +5 Raising Arizona + +# We can also read only one of the columns +query I +SELECT * FROM read_ndjson('data/json/example_n.ndjson', columns={id: 'INTEGER'}) +---- +1 +2 +3 +4 +5 + +query I +SELECT * FROM read_ndjson('data/json/example_n.ndjson', columns={name: 'VARCHAR'}) +---- +O Brother, Where Art Thou? +Home for the Holidays +The Firm +Broadcast News +Raising Arizona + +# what about a broken JSON file +query II +SELECT * FROM read_ndjson('data/json/unterminated_quotes.ndjson', columns={id: 'INTEGER', name: 'VARCHAR'}, ignore_errors=true) +---- +1 O Brother, Where Art Thou? +2 Home for the Holidays +NULL NULL +4 Broadcast News +5 Raising Arizona + +# some of these values don't have "name" +query II +SELECT * FROM read_ndjson('data/json/different_schemas.ndjson', columns={id: 'INTEGER', name: 'VARCHAR'}) +---- +1 O Brother, Where Art Thou? +2 NULL +3 The Firm +4 NULL +5 Raising Arizona + +# test projection pushdown (unstructured json) +query I +SELECT id FROM read_json('data/json/example_n.ndjson', columns={id: 'INTEGER', name: 'VARCHAR'}, format='unstructured') +---- +1 +2 +3 +4 +5 + +query I +SELECT name FROM read_json('data/json/example_n.ndjson', columns={id: 'INTEGER', name: 'VARCHAR'}, format='unstructured') +---- +O Brother, Where Art Thou? +Home for the Holidays +The Firm +Broadcast News +Raising Arizona + +# test projection pushdown (newline-delimited json) +query I +SELECT id FROM read_json('data/json/example_n.ndjson', columns={id: 'INTEGER', name: 'VARCHAR'}, format='newline_delimited') +---- +1 +2 +3 +4 +5 + +query I +SELECT name FROM read_ndjson('data/json/example_n.ndjson', columns={id: 'INTEGER', name: 'VARCHAR'}, format='nd') +---- +O Brother, Where Art Thou? +Home for the Holidays +The Firm +Broadcast News +Raising Arizona + +# auto-detect +query II +SELECT * FROM read_json_auto('data/json/example_n.ndjson') +---- +1 O Brother, Where Art Thou? +2 Home for the Holidays +3 The Firm +4 Broadcast News +5 Raising Arizona + +query II +SELECT * FROM 'data/json/example_n.ndjson' +---- +1 O Brother, Where Art Thou? +2 Home for the Holidays +3 The Firm +4 Broadcast News +5 Raising Arizona + +# we can detect at varying levels, level 0 is just JSON +query I +SELECT * FROM read_json_auto('data/json/with_list.json', maximum_depth=0) +---- +{"id":1,"name":["O","Brother,","Where","Art","Thou?"]} +{"id":2,"name":["Home","for","the","Holidays"]} +{"id":3,"name":["The","Firm"]} +{"id":4,"name":["Broadcast","News"]} +{"id":5,"name":["Raising","Arizona"]} + +# at level one we get JSON and JSON +query II +SELECT * FROM read_json_auto('data/json/with_list.json', maximum_depth=1) +---- +1 ["O","Brother,","Where","Art","Thou?"] +2 ["Home","for","the","Holidays"] +3 ["The","Firm"] +4 ["Broadcast","News"] +5 ["Raising","Arizona"] + +# at level 2 we get BIGINT and JSON[] +query II +SELECT * FROM read_json_auto('data/json/with_list.json', maximum_depth=2) +---- +1 ["O", "Brother,", "Where", "Art", "Thou?"] +2 ["Home", "for", "the", "Holidays"] +3 ["The", "Firm"] +4 ["Broadcast", "News"] +5 ["Raising", "Arizona"] + +# at level 3 it's fully detected, and we get BIGINT and VARCHAR[] +query II +SELECT * FROM read_json_auto('data/json/with_list.json', maximum_depth=3) +---- +1 [O, 'Brother,', Where, Art, Thou?] +2 [Home, for, the, Holidays] +3 [The, Firm] +4 [Broadcast, News] +5 [Raising, Arizona] + +# we can detect lists too +query III +SELECT id, typeof(name), unnest(name) FROM 'data/json/with_list.json' +---- +1 VARCHAR[] O +1 VARCHAR[] Brother, +1 VARCHAR[] Where +1 VARCHAR[] Art +1 VARCHAR[] Thou? +2 VARCHAR[] Home +2 VARCHAR[] for +2 VARCHAR[] the +2 VARCHAR[] Holidays +3 VARCHAR[] The +3 VARCHAR[] Firm +4 VARCHAR[] Broadcast +4 VARCHAR[] News +5 VARCHAR[] Raising +5 VARCHAR[] Arizona + +# with depth 2 we don't bother detecting inside of the list - defaults to JSON +query III +SELECT id, typeof(name), unnest(name) FROM read_json_auto('data/json/with_list.json', maximum_depth=2) +---- +1 JSON[] "O" +1 JSON[] "Brother," +1 JSON[] "Where" +1 JSON[] "Art" +1 JSON[] "Thou?" +2 JSON[] "Home" +2 JSON[] "for" +2 JSON[] "the" +2 JSON[] "Holidays" +3 JSON[] "The" +3 JSON[] "Firm" +4 JSON[] "Broadcast" +4 JSON[] "News" +5 JSON[] "Raising" +5 JSON[] "Arizona" + +# with depth 0 we don't bother detecting anything, everything defaults to JSON (even the "id" column in this case) +query II +SELECT typeof(id), typeof(name) FROM read_json_auto('data/json/with_list.json', maximum_depth=1) +---- +JSON JSON +JSON JSON +JSON JSON +JSON JSON +JSON JSON + +# we can detect UUID's +query II +SELECT id, typeof(id) FROM 'data/json/with_uuid.json' +---- +bbd05ae7-76e5-4f1a-a31f-247408251fc9 UUID +d5c52052-5f8e-473f-bc8d-176342643ef5 UUID +3b6a6de3-0732-4591-93ed-8df6091eb00d UUID +ae24e69e-e0bf-4e85-9848-27d35df85b8b UUID +63928b16-1814-436f-8b30-b3c40cc31d51 UUID + +# top-level array of values +query I +select * from read_json('data/json/top_level_array.json', columns={conclusion: 'VARCHAR'}) +---- +cancelled +cancelled + +query I +select * from read_json('data/json/top_level_array.json', auto_detect=true) +---- +cancelled +cancelled + +# if we try to read it as 'unstructured' records +statement error +select * from read_json('data/json/top_level_array.json', columns={conclusion: 'VARCHAR'}, format='unstructured', records=true) +---- +Invalid Input Error: JSON transform error in file "data/json/top_level_array.json", in record/value 1: Expected OBJECT, but got ARRAY + +# if we try to read an ndjson file as if it is an array of values, we get an error +statement error +select * from read_json_auto('data/json/example_n.ndjson', format='array') +---- +Invalid Input Error: Expected top-level JSON array + +# test that we can read a list of longer than STANDARD_VECTOR_SIZE properly +statement ok +copy (select 42 duck from range(10000)) to '__TEST_DIR__/my_file.json' (array true) + +query T +select count(*) from read_json('__TEST_DIR__/my_file.json', columns={duck: 'INTEGER'}, format='array') +---- +10000 + +query T +select sum(duck) = 42*10000 from read_json('__TEST_DIR__/my_file.json', columns={duck: 'INTEGER'}, format='array') +---- +true + +# read_json_auto also understands ARRAY format +query T +select count(*) from '__TEST_DIR__/my_file.json' +---- +10000 + +query T +select sum(duck) = 42*10000 from '__TEST_DIR__/my_file.json' +---- +true + +# what if we do an array of non-records? +statement ok +copy (select list(range) from range(10)) to '__TEST_DIR__/my_file.json' (format csv, quote '', HEADER 0) + +query T +select * from '__TEST_DIR__/my_file.json' +---- +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 + +# fails because it's not records +statement error +select * from read_json('__TEST_DIR__/my_file.json', format='array', columns={range: 'INTEGER'}, records=true) +---- +Invalid Input Error: JSON transform error + +# fails because it's not records +statement error +select * from read_json_auto('__TEST_DIR__/my_file.json', format='array', records=true) +---- +Binder Error: json_read expected records + +query T +select * from read_json('__TEST_DIR__/my_file.json', format='auto', records=false, auto_detect=true) +---- +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 + +# need to supply columns +statement error +select * from read_json('__TEST_DIR__/my_file.json', format='auto', records='false', auto_detect=false) +---- +Binder Error + +# read as unstructured values, so we just get the array +query T +select * from read_json('__TEST_DIR__/my_file.json', format='unstructured', records='false', auto_detect=true) +---- +[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + +# array of non-records +query T +select * from read_json('__TEST_DIR__/my_file.json', format='array', records='false', auto_detect=true) +---- +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 + +# also works with auto +query T +select * from read_json('__TEST_DIR__/my_file.json', format='array', records='auto', auto_detect=true) +---- +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 + +# lower thread count so the next tests don't OOM on many-core machines +statement ok +SET threads=2 + +# issue 6646, this is not an array, but we try to read it as one +statement error +select json_structure(json ->> '$.metadata') as structure, +from read_json('data/json/issue.json', format='array', columns={'json': 'JSON'}, maximum_object_size=104857600) +limit 1; +---- +Invalid Input Error: Expected top-level JSON array + +# let's try a variation +statement error +select json_structure(json ->> '$.metadata') as structure, +from read_json('data/json/issue.json', format='array', records='false', columns={'json': 'JSON'}, maximum_object_size=104857600) +limit 1; +---- +Invalid Input Error: Expected top-level JSON array + +# we can parse it as unstructured values, and give it a different col name +query I +select json_structure(my_json ->> '$.metadata') as structure, +from read_json('data/json/issue.json', format='unstructured', records='false', columns={'my_json': 'JSON'}, maximum_object_size=104857600) +limit 1; +---- +{"argv":["VARCHAR"],"dag":{"dag_size":"VARCHAR","tasks":{"load_oscar":{"status":"VARCHAR","type":"VARCHAR","upstream":"VARCHAR","products":{"nb":"VARCHAR"}},"load_weather":{"status":"VARCHAR","type":"VARCHAR","upstream":"VARCHAR","products":{"nb":"VARCHAR"}},"compress":{"status":"VARCHAR","type":"VARCHAR","upstream":{"load_oscar":"VARCHAR"},"products":{"nb":"VARCHAR"}}}}} + +statement ok +pragma disable_verification + +# test that we can read a JSON list that spans more than one buffer size +# the JSON is 55 bytes, and the minimum buffer size is 32MB +# let's do 50k to be safe +statement ok +copy (select 42 this_is_a_very_long_field_name_yes_very_much_so from range(50000)) to '__TEST_DIR__/my_file.json' (array true) + +query T +select sum(this_is_a_very_long_field_name_yes_very_much_so) = 42 * 50000 from '__TEST_DIR__/my_file.json' +---- +true + +require httpfs + +query II +select * from read_json_auto('https://github.com/duckdb/duckdb-data/releases/download/v1.0/example_rn.ndjson'); +---- +1 O Brother, Where Art Thou? +2 Home for the Holidays +3 The Firm +4 Broadcast News +5 Raising Arizona diff --git a/test/sql/json/table/read_json_auto.test_slow b/test/sql/json/table/read_json_auto.test_slow new file mode 100644 index 0000000..efed7ee --- /dev/null +++ b/test/sql/json/table/read_json_auto.test_slow @@ -0,0 +1,380 @@ +# name: test/sql/json/table/read_json_auto.test_slow +# description: Read json files - schema detection +# group: [table] + +require json + +statement ok +pragma enable_verification + +# some arrow tests (python/pyarrow/tests/test_json.py) on their github +# these are very similar to the pandas tests, so let's not copy those +# instead of adding all of these files to data/test we just create them on the fly here +# whenever we add a '' at the end it's just to check we skip the newline at the end that's sometimes there +statement ok +copy (select * from (values ('{"a": 1, "b": 2}'), (''))) to '__TEST_DIR__/my_file.json' (format csv, quote '', header 0); + +query II +select * from '__TEST_DIR__/my_file.json' +---- +1 2 + +statement ok +copy (select * from (values ('{"a": 1}'), ('{"a": 2}'), ('{"a": 3}'))) to '__TEST_DIR__/my_file.json' (format csv, quote '', header 0) + +query I +select * from '__TEST_DIR__/my_file.json' +---- +1 +2 +3 + +query I +select count(*) from '__TEST_DIR__/my_file.json' +---- +3 + +statement ok +copy (select * from (values ('{"a": 1,"b": 2, "c": 3}'), ('{"a": 4,"b": 5, "c": 6}'))) to '__TEST_DIR__/my_file.json' (format csv, quote '', header 0) + +query III +select * from '__TEST_DIR__/my_file.json' +---- +1 2 3 +4 5 6 + +statement ok +copy (select * from (values ('{"a": 1,"b": 2, "c": "3", "d": false}'), ('{"a": 4.0, "b": -5, "c": "foo", "d": true}'), (''))) to '__TEST_DIR__/my_file.json' (format csv, quote '', header 0) + +query IIII +select * from '__TEST_DIR__/my_file.json' +---- +1.0 2 3 false +4.0 -5 foo true + +# mixed types that cannot be resolved, defaults to JSON (column 3) +statement ok +copy (select * from (values ('{"a": 1, "b": 2, "c": null, "d": null, "e": null}'), ('{"a": null, "b": -5, "c": "foo", "d": null, "e": true}'), ('{"a": 4.5, "b": null, "c": "nan", "d": null,"e": false}'), (''))) to '__TEST_DIR__/my_file.json' (format csv, quote '', header 0) + +query IIIII +select * from '__TEST_DIR__/my_file.json' +---- +1.0 2 NULL NULL NULL +NULL -5 foo NULL true +4.5 NULL nan NULL false + +# mixed types are resolved to DOUBLE here +statement ok +copy (select * from (values ('{"a": 1}'), ('{"a": 1.45}'), ('{"a": -23.456}'), ('{}'), (''))) to '__TEST_DIR__/my_file.json' (format csv, quote '', header 0) + +query II +select typeof(a), a from '__TEST_DIR__/my_file.json' +---- +DOUBLE 1.0 +DOUBLE 1.45 +DOUBLE -23.456 +DOUBLE NULL + +statement ok +copy (select * from (values ('{"foo": "bar", "num": 0}'), ('{"foo": "baz", "num": 1}'), (''))) to '__TEST_DIR__/my_file.json' (format csv, quote '', header 0) + +query II +select * from '__TEST_DIR__/my_file.json' +---- +bar 0 +baz 1 + +# we can read values from a top-level list +query I +select * from 'data/json/top_level_array.json' +---- +cancelled +cancelled + +query I +select count(*) from 'data/json/top_level_array.json' +---- +2 + +# for maximum_depth=0 this is two records of JSON +query I +select * from read_json_auto('data/json/top_level_array.json', maximum_depth=0) +---- +{"conclusion":"cancelled"} +{"conclusion":"cancelled"} + +# for 1 it's 1 column of JSON +query I +select * from read_json_auto('data/json/top_level_array.json', maximum_depth=1) +---- +"cancelled" +"cancelled" + +# if we read this with records='false', we get the struct instead of the unpacked columns +query I +select typeof(json) from read_json_auto('data/json/top_level_array.json', records='false') +---- +STRUCT(conclusion VARCHAR) +STRUCT(conclusion VARCHAR) + +# however, if there are multiple top-level arrays, we default to reading them as lists +query I +select * from 'data/json/top_level_two_arrays.json' +---- +[{'conclusion': cancelled}, {'conclusion': cancelled}] +[{'conclusion': cancelled}, {'conclusion': cancelled}] + +# if we read a top-level array as if it is a record, then we get an error +statement error +select * from read_json_auto('data/json/top_level_array.json', format='unstructured', records='true') +---- +Binder Error: json_read expected records + +# issue Mark found when analyzing a JSON dump of our CI - projection pushdown wasn't working properly +statement ok +select * from 'data/json/projection_pushdown_example.json' WHERE status <> 'completed' + +# different schema's - this one should work regardless of sampling 1 or all lines +query II +select * from read_json_auto('data/json/different_schemas.ndjson', sample_size=1) +---- +1 O Brother, Where Art Thou? +2 NULL +3 The Firm +4 NULL +5 Raising Arizona + +query II +select * from read_json_auto('data/json/different_schemas.ndjson', sample_size=-1) +---- +1 O Brother, Where Art Thou? +2 NULL +3 The Firm +4 NULL +5 Raising Arizona + +# if we require fields to appear in all objects by setting field_appearance_threshold=1, we default to MAP +query I +select typeof(COLUMNS(*)) from read_json_auto('data/json/different_schemas.ndjson', field_appearance_threshold=1) limit 1 +---- +MAP(VARCHAR, JSON) + +query I +select * from read_json_auto('data/json/different_schemas.ndjson', field_appearance_threshold=1) +---- +{id=1, name='"O Brother, Where Art Thou?"'} +{id=2} +{id=3, name='"The Firm"'} +{id=4} +{id=5, name='"Raising Arizona"'} + +# if we set it to 0.5 it should work already since "name" appears in 3/5 objects, which is greater than 0.5 +query II +select * from read_json_auto('data/json/different_schemas.ndjson', field_appearance_threshold=0.5) +---- +1 O Brother, Where Art Thou? +2 NULL +3 The Firm +4 NULL +5 Raising Arizona + +# can't set it to less than 0 or more than 1 +statement error +select * from read_json_auto('data/json/different_schemas.ndjson', field_appearance_threshold=-1) +---- +Binder Error: read_json_auto "field_appearance_threshold" parameter must be between 0 and 1 + +statement error +select * from read_json_auto('data/json/different_schemas.ndjson', field_appearance_threshold=2) +---- +Binder Error: read_json_auto "field_appearance_threshold" parameter must be between 0 and 1 + +# inconsistent schema's - if we only sample 1 row, we get an error, because we only see a NULL value for the 2nd column +statement error +select * from read_json_auto('data/json/inconsistent_schemas.ndjson', sample_size=1, convert_strings_to_integers=true) +---- +Invalid Input Error: JSON transform error in file "data/json/inconsistent_schemas.ndjson", in line 3 + +# if we increase the sample size to 2, we can read it just fine +query II +select * from read_json_auto('data/json/inconsistent_schemas.ndjson', sample_size=2) +---- +"1" NULL +2 Home for the Holidays +[3] The Firm +4 Broadcast News +5 Raising Arizona + +# we can also find bigint in strings (happens a lot in JSON for some reason ...) +statement ok +copy (select * from (values ('{"id": "26941143801"}'), ('{"id": "26941143807"}'))) to '__TEST_DIR__/my_file.json' (format csv, quote '', header 0) + +# but only if we set the parameter to true +query T +select typeof(id) from read_json('__TEST_DIR__/my_file.json', convert_strings_to_integers=true) +---- +BIGINT +BIGINT + +# empty array and the example file works +query II +select * from read_json_auto(['data/json/empty_array.json', 'data/json/example_n.ndjson']); +---- +1 O Brother, Where Art Thou? +2 Home for the Holidays +3 The Firm +4 Broadcast News +5 Raising Arizona + +# Simple map inference with default threshold +query T +select distinct typeof(a) from read_json_auto('data/json/simple_map.jsonl') +---- +MAP(VARCHAR, BIGINT) + +# Test setting map_inference_threshold high +query T +select distinct typeof(a) from read_json_auto('data/json/simple_map.jsonl', map_inference_threshold=1000) +---- +MAP(VARCHAR, BIGINT) + +# Map inference can be disabled +query T +select distinct typeof(a) from read_json_auto('data/json/simple_map.jsonl', map_inference_threshold=-1, field_appearance_threshold=0) +---- +STRUCT("1" JSON, "2" BIGINT, "3" BIGINT, "4" BIGINT, "5" BIGINT, "6" BIGINT, "7" BIGINT, "8" BIGINT, "9" BIGINT, "10" BIGINT, "11" BIGINT, "12" BIGINT, "13" BIGINT, "14" BIGINT, "15" BIGINT, "16" JSON, "17" BIGINT, "18" BIGINT, "19" BIGINT, "20" BIGINT, "21" BIGINT, "22" BIGINT, "23" BIGINT, "24" BIGINT, "25" BIGINT, "26" BIGINT, "27" BIGINT, "28" BIGINT, "29" BIGINT, "30" BIGINT, "31" BIGINT, "32" BIGINT, "33" BIGINT, "34" BIGINT, "35" BIGINT, "36" BIGINT, "37" BIGINT, "38" BIGINT, "39" BIGINT, "40" BIGINT, "41" BIGINT, "42" BIGINT, "43" BIGINT, "44" BIGINT, "45" BIGINT, "46" BIGINT, "47" BIGINT, "48" BIGINT, "49" BIGINT, "50" BIGINT, "51" BIGINT, "52" BIGINT, "53" BIGINT, "54" BIGINT, "55" BIGINT, "56" BIGINT, "57" BIGINT, "58" BIGINT, "59" BIGINT, "60" BIGINT, "61" BIGINT, "62" BIGINT, "63" BIGINT, "64" BIGINT, "65" BIGINT, "66" BIGINT, "67" BIGINT, "68" BIGINT, "69" BIGINT, "70" BIGINT, "71" BIGINT, "72" BIGINT, "73" BIGINT, "74" BIGINT, "75" BIGINT, "76" BIGINT, "77" BIGINT, "78" BIGINT, "79" BIGINT, "80" BIGINT, "81" BIGINT, "82" BIGINT, "83" BIGINT, "84" BIGINT, "85" BIGINT, "86" BIGINT, "87" BIGINT, "88" BIGINT, "89" BIGINT, "90" BIGINT, "91" BIGINT, "92" BIGINT, "93" BIGINT, "94" BIGINT, "95" BIGINT, "96" BIGINT, "97" BIGINT, "98" BIGINT, "99" BIGINT, "100" BIGINT) + +# Map inference with max_depth works as expected +query T +select distinct typeof(a) from read_json_auto('data/json/simple_map.jsonl', maximum_depth=2) +---- +MAP(VARCHAR, JSON) + +query T +select distinct typeof(a) from read_json_auto('data/json/simple_map.jsonl', maximum_depth=1) +---- +JSON + +# Map where all values are null +query T +select distinct typeof(a) from read_json_auto('data/json/map_of_nulls.jsonl') +---- +MAP(VARCHAR, JSON) + +# Map type can be inferred at the top level +query T +select distinct typeof(json) from read_json_auto('data/json/top_level_map.jsonl') +---- +MAP(VARCHAR, BIGINT) + +# Map type can be inferred for struct value type +query T +select distinct typeof(a) from read_json_auto('data/json/map_of_structs.jsonl') +---- +MAP(VARCHAR, STRUCT(b BIGINT)) + +# Map 80% similarity check works +query T +select distinct typeof(a) from read_json_auto('data/json/map_50_50.jsonl', map_inference_threshold=10) +---- +STRUCT(s1 STRUCT(f1 BIGINT[]), s2 STRUCT(f2 BIGINT[]), s3 STRUCT(f1 BIGINT[]), s4 STRUCT(f2 BIGINT[]), s5 STRUCT(f1 BIGINT[]), s6 STRUCT(f2 BIGINT[]), s7 STRUCT(f1 BIGINT[]), s8 STRUCT(f2 BIGINT[]), s9 STRUCT(f1 BIGINT[]), s10 STRUCT(f2 BIGINT[])) + +# Map of maps +query T +select distinct typeof(a) from read_json_auto('data/json/map_of_map.jsonl', map_inference_threshold=10) +---- +MAP(VARCHAR, MAP(VARCHAR, BIGINT)) + +# All NULL types get converted to JSON if we do map inference +query T +select distinct typeof(a) from read_json_auto('data/json/map_of_struct_with_nulls.jsonl', map_inference_threshold=10) +---- +MAP(VARCHAR, STRUCT(a JSON[])) + +# Candidate types are properly handled for map inference +query I +SELECT distinct typeof(a) FROM read_json_auto('data/json/map_of_dates.jsonl', map_inference_threshold=25) +---- +MAP(VARCHAR, DATE) + +# Mixed candidate types are also handled +query I +SELECT distinct typeof(a) FROM read_json_auto('data/json/map_of_mixed_date_timestamps.jsonl', map_inference_threshold=25) +---- +MAP(VARCHAR, VARCHAR) + +# Incompatible types are handled correctly +query T +select distinct typeof(a) from read_json_auto('data/json/map_incompatible.jsonl', map_inference_threshold=10) +---- +STRUCT(s1 STRUCT("1" JSON), s2 STRUCT("1" MAP(VARCHAR, JSON)), s3 STRUCT("1" VARCHAR), s4 STRUCT("1" BIGINT[]), s5 STRUCT("1" BIGINT), s6 STRUCT("1" VARCHAR), s7 STRUCT("1" BIGINT[]), s8 STRUCT("1" BIGINT), s9 STRUCT("1" VARCHAR), s10 STRUCT("1" BIGINT[])) + +# Can't set map_inference_threshold to a negative value (except -1) +statement error +select * from read_json_auto('data/json/simple_map.jsonl', map_inference_threshold=-10) +---- +Binder Error: read_json_auto "map_inference_threshold" parameter must be 0 or positive, or -1 to disable map inference for consistent objects. + +# if we only sample the first file, we default to a single JSON column +query I +select * from read_json_auto(['data/json/empty_array.json', 'data/json/example_n.ndjson'], maximum_sample_files=1); +---- +{"id":1,"name":"O Brother, Where Art Thou?"} +{"id":2,"name":"Home for the Holidays"} +{"id":3,"name":"The Firm"} +{"id":4,"name":"Broadcast News"} +{"id":5,"name":"Raising Arizona"} + +# -1 is unlimited +query II +select * from read_json_auto(['data/json/empty_array.json', 'data/json/example_n.ndjson'], maximum_sample_files=-1); +---- +1 O Brother, Where Art Thou? +2 Home for the Holidays +3 The Firm +4 Broadcast News +5 Raising Arizona + +# can't be -2 or lower +statement error +select * from read_json_auto(['data/json/empty_array.json', 'data/json/example_n.ndjson'], maximum_sample_files=-2); +---- +Binder Error + +# can't be 0 +statement error +select * from read_json_auto(['data/json/empty_array.json', 'data/json/example_n.ndjson'], maximum_sample_files=0); +---- +Binder Error + +# cannot be NULL either +statement error +select * from read_json_auto(['data/json/empty_array.json', 'data/json/example_n.ndjson'], maximum_sample_files=NULL); +---- +Binder Error + +statement ok +pragma disable_verification + +require httpfs + +# this is one big object - yyjson uses it as a benchmark +query II +select typeof("type"), typeof(features) from read_json_auto('https://github.com/duckdb/duckdb-data/releases/download/v1.0/canada.json', maximum_depth=3); +---- +VARCHAR STRUCT("type" JSON, properties JSON, geometry JSON)[] + +# let's crank up maximum_depth and see if we can fully unnest this big object +query II +select typeof("type"), typeof(features) from read_json_auto('https://github.com/duckdb/duckdb-data/releases/download/v1.0/canada.json', maximum_depth=8); +---- +VARCHAR STRUCT("type" VARCHAR, properties STRUCT("name" VARCHAR), geometry STRUCT("type" VARCHAR, coordinates DOUBLE[][][]))[] + +# ^ fully unnested, no more JSON type in there + +# the "coordinates" array in "features.geometry" is huge, let's just check the length - not all the values +query IIIII +select type, features[1].type, features[1].properties.name, features[1].geometry.type, length(features[1].geometry.coordinates) +from read_json_auto('https://github.com/duckdb/duckdb-data/releases/download/v1.0/canada.json', maximum_depth=8); +---- +FeatureCollection Feature Canada Polygon 480 diff --git a/test/sql/json/table/read_json_objects.test b/test/sql/json/table/read_json_objects.test new file mode 100644 index 0000000..ecf73b6 --- /dev/null +++ b/test/sql/json/table/read_json_objects.test @@ -0,0 +1,252 @@ +# name: test/sql/json/table/read_json_objects.test +# description: Read ndjson files +# group: [table] + +require json + +# we cannot check the error output for the specific byte, because on Windows the \n are replaced with \r\n +# therefore, the byte count is different. So, we cut off the error message here +statement error +select * from read_json_objects('data/json/unterminated_quotes.ndjson') +---- +Invalid Input Error: Malformed JSON + +# now it should work! +query I +SELECT * FROM read_csv('data/json/example_n.ndjson', columns={'json': 'JSON'}, delim=NULL, header=0, quote=NULL, escape=NULL, auto_detect = false) +---- +{"id":1,"name":"O Brother, Where Art Thou?"} +{"id":2,"name":"Home for the Holidays"} +{"id":3,"name":"The Firm"} +{"id":4,"name":"Broadcast News"} +{"id":5,"name":"Raising Arizona"} + +# example_n is with regular \n newlines +query I +SELECT * FROM read_ndjson_objects('data/json/example_n.ndjson') +---- +{"id":1,"name":"O Brother, Where Art Thou?"} +{"id":2,"name":"Home for the Holidays"} +{"id":3,"name":"The Firm"} +{"id":4,"name":"Broadcast News"} +{"id":5,"name":"Raising Arizona"} + +# this one does not have the 'records' param +statement error +SELECT * FROM read_ndjson_objects('data/json/example_n.ndjson', records='false') +---- +Binder Error: Invalid named parameter + +query I +SELECT * FROM read_ndjson_objects('data/json/example_n.ndjson') +---- +{"id":1,"name":"O Brother, Where Art Thou?"} +{"id":2,"name":"Home for the Holidays"} +{"id":3,"name":"The Firm"} +{"id":4,"name":"Broadcast News"} +{"id":5,"name":"Raising Arizona"} + +# we can auto-detect that it's newline-delimited +query I +SELECT * FROM read_json_objects('data/json/example_n.ndjson', format='auto') +---- +{"id":1,"name":"O Brother, Where Art Thou?"} +{"id":2,"name":"Home for the Holidays"} +{"id":3,"name":"The Firm"} +{"id":4,"name":"Broadcast News"} +{"id":5,"name":"Raising Arizona"} + +# example_r is with \r newlines - works with unstructured +query I +SELECT * FROM read_json_objects('data/json/example_r.ndjson', format='unstructured') +---- +{"id":1,"name":"O Brother, Where Art Thou?"} +{"id":2,"name":"Home for the Holidays"} +{"id":3,"name":"The Firm"} +{"id":4,"name":"Broadcast News"} +{"id":5,"name":"Raising Arizona"} + +# we can detect that it's not newline-delimited +query I +SELECT * FROM read_json_objects('data/json/example_r.ndjson', format='auto') +---- +{"id":1,"name":"O Brother, Where Art Thou?"} +{"id":2,"name":"Home for the Holidays"} +{"id":3,"name":"The Firm"} +{"id":4,"name":"Broadcast News"} +{"id":5,"name":"Raising Arizona"} + +# \r newlines are NOT valid according to ndjson spec - this does not work, all a single line +statement error +SELECT * FROM read_ndjson_objects('data/json/example_r.ndjson') +---- +Invalid Input Error: Malformed JSON in file "data/json/example_r.ndjson" + +# example_rn is with \r\n newlines +query I +SELECT * FROM read_ndjson_objects('data/json/example_rn.ndjson') +---- +{"id":1,"name":"O Brother, Where Art Thou?"} +{"id":2,"name":"Home for the Holidays"} +{"id":3,"name":"The Firm"} +{"id":4,"name":"Broadcast News"} +{"id":5,"name":"Raising Arizona"} + +query I +SELECT * FROM read_ndjson_objects('data/json/example_rn.ndjson') +---- +{"id":1,"name":"O Brother, Where Art Thou?"} +{"id":2,"name":"Home for the Holidays"} +{"id":3,"name":"The Firm"} +{"id":4,"name":"Broadcast News"} +{"id":5,"name":"Raising Arizona"} + +# same but gzipped +query I +SELECT * FROM read_ndjson_objects('data/json/example_rn.ndjson.gz') +---- +{"id":1,"name":"O Brother, Where Art Thou?"} +{"id":2,"name":"Home for the Holidays"} +{"id":3,"name":"The Firm"} +{"id":4,"name":"Broadcast News"} +{"id":5,"name":"Raising Arizona"} + +query I +SELECT * FROM read_json_objects('data/json/example_rn.ndjson.gz', format='nd') +---- +{"id":1,"name":"O Brother, Where Art Thou?"} +{"id":2,"name":"Home for the Holidays"} +{"id":3,"name":"The Firm"} +{"id":4,"name":"Broadcast News"} +{"id":5,"name":"Raising Arizona"} + +# multi-file scan +query I +SELECT count(*) from read_json_objects(['data/json/example_n.ndjson', 'data/json/example_r.ndjson', 'data/json/example_rn.ndjson'], format='auto') +---- +15 + +query I +SELECT count(*) from read_ndjson_objects(['data/json/example_n.ndjson', 'data/json/example_rn.ndjson']) +---- +10 + +# globbing +query I +SELECT count(*) from read_json_objects('data/json/example_*.ndjson', format='auto') +---- +15 + +query I +SELECT count(*) from read_ndjson_objects('data/json/example_*n.ndjson') +---- +10 + +require httpfs + +# same file but hosted on github +query I +select * from read_json_objects('https://github.com/duckdb/duckdb-data/releases/download/v1.0/example_rn.ndjson', format='nd') +---- +{"id":1,"name":"O Brother, Where Art Thou?"} +{"id":2,"name":"Home for the Holidays"} +{"id":3,"name":"The Firm"} +{"id":4,"name":"Broadcast News"} +{"id":5,"name":"Raising Arizona"} + +query I +select * from read_ndjson_objects('https://github.com/duckdb/duckdb-data/releases/download/v1.0/example_rn.ndjson') +---- +{"id":1,"name":"O Brother, Where Art Thou?"} +{"id":2,"name":"Home for the Holidays"} +{"id":3,"name":"The Firm"} +{"id":4,"name":"Broadcast News"} +{"id":5,"name":"Raising Arizona"} + +# empty file +query I +select * from read_json_objects('data/json/empty.ndjson') +---- + +query I +select * from read_ndjson_objects('data/json/empty.ndjson') +---- + +# invalid json stuff +statement error +select * from read_json_objects('data/json/unterminated_quotes.ndjson', format='nd') +---- +Invalid Input Error: Malformed JSON in file "data/json/unterminated_quotes.ndjson" + +statement error +select * from read_ndjson_objects('data/json/unterminated_quotes.ndjson') +---- +Invalid Input Error: Malformed JSON in file "data/json/unterminated_quotes.ndjson" + +# we can auto-detect and ignore the error (becomes NULL) +query I +select * from read_json_objects('data/json/unterminated_quotes.ndjson', format='auto', ignore_errors=true) +---- +{"id":1,"name":"O Brother, Where Art Thou?"} +{"id":2,"name":"Home for the Holidays"} +NULL +{"id":4,"name":"Broadcast News"} +{"id":5,"name":"Raising Arizona"} + +# multiple values per line (works for read_json_objects) +query I +select * from read_json_objects('data/json/multiple_objects_per_line.ndjson', format='unstructured') +---- +{"id":1,"name":"O Brother, Where Art Thou?"} +{"id":2,"name":"Home for the Holidays"} +{"id":3,"name":"The Firm"} +{"id":4,"name":"Broadcast News"} +{"id":5,"name":"Raising Arizona"} + +# does not work for read_ndjson_objects +statement error +select * from read_ndjson_objects('data/json/multiple_objects_per_line.ndjson') +---- +Invalid Input Error: Malformed JSON in file "data/json/multiple_objects_per_line.ndjson" + +# what if we try to read a CSV? +statement error +select * from read_json_objects('data/csv/tpcds_14.csv') +---- +Invalid Input Error: Malformed JSON + +statement error +select * from read_ndjson_objects('data/csv/tpcds_14.csv') +---- +Invalid Input Error: Malformed JSON in file "data/csv/tpcds_14.csv" + +# how about parquet? +statement error +select * from read_json_objects('data/parquet-testing/blob.parquet') +---- +Invalid Input Error: Malformed JSON + +statement error +select * from read_ndjson_objects('data/parquet-testing/blob.parquet') +---- +Invalid Input Error: Malformed JSON in file "data/parquet-testing/blob.parquet" + +# we can also read the objects from a JSON array (not newline-delimited) +query I +select * from read_json_objects('data/json/top_level_array.json') +---- +{"conclusion":"cancelled"} +{"conclusion":"cancelled"} + +# and auto-detect it +query I +select * from read_json_objects('data/json/top_level_array.json', format='auto') +---- +{"conclusion":"cancelled"} +{"conclusion":"cancelled"} + +# the file only has one line, so if we read this as ndjson, we just get the array +query I +select * from read_json_objects('data/json/top_level_array.json', format='nd') +---- +[{"conclusion":"cancelled"}, {"conclusion":"cancelled"}] diff --git a/test/sql/logging/file_system_logging.test b/test/sql/logging/file_system_logging.test new file mode 100644 index 0000000..6aa2ed0 --- /dev/null +++ b/test/sql/logging/file_system_logging.test @@ -0,0 +1,56 @@ +# name: test/sql/logging/file_system_logging.test +# group: [logging] + +require parquet + +require noforcestorage + +statement ok +set enable_logging = true; + +statement ok +set logging_level='trace'; + +statement ok +COPY (SELECT 1 as a) TO '__TEST_DIR__/test.csv' + +statement ok +FROM '__TEST_DIR__/test.csv' + +statement ok +pragma threads=1 + +# Note: regex for test stability +query IIII +SELECT scope, type, log_level, regexp_replace(message, '\"path\":.*test.csv"', '"test.csv"') +FROM duckdb_logs +WHERE type = 'FileSystem' +ORDER BY timestamp +---- +CONNECTION FileSystem TRACE {"fs":"LocalFileSystem","test.csv","op":"OPEN"} +CONNECTION FileSystem TRACE {"fs":"LocalFileSystem","test.csv","op":"WRITE","bytes":"4","pos":"0"} +CONNECTION FileSystem TRACE {"fs":"LocalFileSystem","test.csv","op":"CLOSE"} +CONNECTION FileSystem TRACE {"fs":"LocalFileSystem","test.csv","op":"OPEN"} +CONNECTION FileSystem TRACE {"fs":"LocalFileSystem","test.csv","op":"READ","bytes":"4","pos":"0"} +CONNECTION FileSystem TRACE {"fs":"LocalFileSystem","test.csv","op":"READ","bytes":"0","pos":"4"} +CONNECTION FileSystem TRACE {"fs":"LocalFileSystem","test.csv","op":"CLOSE"} + +statement ok +CALL truncate_duckdb_logs(); + +require httpfs + +statement ok +FROM 'https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv' + +# FIXME: investigate why we call READ twice? +query IIII +SELECT scope, type, log_level, regexp_replace(message, '\"path\":.*test.csv"', '"test.csv"') +FROM duckdb_logs +WHERE type = 'FileSystem' AND message NOT LIKE '%duckdb_extension%' +ORDER BY timestamp +---- +CONNECTION FileSystem TRACE {"fs":"HTTPFileSystem","path":"https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv","op":"OPEN"} +CONNECTION FileSystem TRACE {"fs":"HTTPFileSystem","path":"https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv","op":"READ","bytes":"1276","pos":"0"} +CONNECTION FileSystem TRACE {"fs":"HTTPFileSystem","path":"https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv","op":"READ","bytes":"0","pos":"1276"} +CONNECTION FileSystem TRACE {"fs":"HTTPFileSystem","path":"https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv","op":"CLOSE"} diff --git a/test/sql/logging/http_logging.test b/test/sql/logging/http_logging.test new file mode 100644 index 0000000..031e43e --- /dev/null +++ b/test/sql/logging/http_logging.test @@ -0,0 +1,45 @@ +# name: test/sql/logging/http_logging.test +# group: [logging] + +require parquet + +require httpfs + +statement ok +CALL enable_logging('HTTP'); + +statement ok +FROM 'https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv' + +query IIII +SELECT + request.type, + request.url, + response.status, + response.reason, +FROM duckdb_logs_parsed('HTTP') WHERE response.status != 'ServiceUnavailable_503' +---- +HEAD https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv OK_200 OK +GET https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv PartialContent_206 Partial Content + +query II +SELECT request.headers['Range'], response.headers['Content-Range'] +FROM duckdb_logs_parsed('HTTP') +WHERE request.type='GET' +---- +bytes=0-1275 bytes 0-1275/1276 + +statement ok +CALL truncate_duckdb_logs() + +# This old option still exists, however it now logs to the duckdb log instead of printing straight to stdout +statement ok +set enable_http_logging=false; + +statement ok +FROM 'https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv' + +query I +select count(*) FROM duckdb_logs_parsed('HTTP'); +---- +0 diff --git a/test/sql/secrets/create_secret.test_slow b/test/sql/secrets/create_secret.test_slow new file mode 100644 index 0000000..602ee12 --- /dev/null +++ b/test/sql/secrets/create_secret.test_slow @@ -0,0 +1,76 @@ +# name: test/sql/secrets/create_secret.test_slow +# description: Test secret creation using the default s3 secret provider +# group: [secrets] + +statement ok +PRAGMA enable_verification; + +require httpfs + +# Ensure any currently stored secrets don't interfere with the test +statement ok +set allow_persistent_secrets=false; + +statement ok +reset s3_use_ssl + +# Create an S3 secret using the default provider (for s3, this will be the "config" provider, requiring the user to set all) +statement ok +CREATE SECRET default_provider_secret ( + TYPE S3, + KEY_ID 'my_key', + SECRET 'my_secret', + REGION 'my_region', + ENDPOINT 'invalid-on-purpose' +) + +# The secret will be created for the default scope for this type +query III +SELECT name, type, scope FROM duckdb_secrets() WHERE name='default_provider_secret'; +---- +default_provider_secret s3 ['s3://', 's3n://', 's3a://'] + +# Note the endpoint is now using the one in the default_provider_secret +statement error +FROM 's3://test-bucket/test.csv' +---- +HTTP HEAD to 'https://test-bucket.invalid-on-purpose/test.csv' + +# Now create an S3 secret using the default (config) provider by explicitly passing it +statement ok +CREATE SECRET secret_scope_1 ( + TYPE S3, + PROVIDER config, + SCOPE 's3://b1', + ENDPOINT 'invalid-on-purpose-2' +) + +query III +SELECT name, type, scope FROM duckdb_secrets() WHERE name='secret_scope_1'; +---- +secret_scope_1 s3 ['s3://b1'] + +# Longest match of credential scope takes the win so, this is will grab the secret_scope_1 secret +statement error +FROM 's3://b1/test.csv' +---- +Could not establish connection error for HTTP HEAD to 'https://b1.invalid-on-purpose-2/test.csv' + +# Now confirm we can also set multiple scopes +statement ok +CREATE SECRET secret_scope_2 ( + TYPE S3, + PROVIDER config, + SCOPE ['s3://b2', 's3://b3'], + ENDPOINT 'invalid-on-purpose-3' +) + +query III +SELECT name, type, scope FROM duckdb_secrets() WHERE name='secret_scope_2'; +---- +secret_scope_2 s3 ['s3://b2', 's3://b3'] + +statement error +FROM 's3://b2/test.csv' +---- +Could not establish connection error for HTTP HEAD to 'https://b2.invalid-on-purpose-3/test.csv' diff --git a/test/sql/secrets/create_secret_binding.test b/test/sql/secrets/create_secret_binding.test new file mode 100644 index 0000000..bf1aa42 --- /dev/null +++ b/test/sql/secrets/create_secret_binding.test @@ -0,0 +1,92 @@ +# name: test/sql/secrets/create_secret_binding.test +# description: Test secret binding & types +# group: [secrets] + +statement ok +PRAGMA enable_verification; + +require httpfs + +# Ensure any currently stored secrets don't interfere with the test +statement ok +set allow_persistent_secrets=false; + +# Binder autocasts options, also both with single quotes and without is allowed +statement ok +CREATE SECRET s1 ( + TYPE R2, + PROVIDER config, + SCOPE ['s3://my_r2_scope', 's3://my_r2_scope2'], + ACCOUNT_ID 'some_bogus_account', + KEY_ID '123', + USE_SSL 1, + URL_COMPATIBILITY_MODE false +) + +query I nosort s1 +FROM duckdb_secrets(); +---- + +statement ok +DROP SECRET s1 + +# Create the secret again but in a different way to demonstrate casting and case insensitivity of param names +statement ok +CREATE SECRET s1 ( + TYPE R2, + PROVIDER config, + SCOPE ['s3://my_r2_scope', 's3://my_r2_scope2'], + account_id 'some_bogus_account', + key_id 123, + USE_SSL 'true', + URL_COMPATIBILITY_MODE '0' +) + +query I nosort s1 +FROM duckdb_secrets(); +---- + +### Now let's try some incorrect inputs + +# Incorrect type +statement error +CREATE SECRET incorrect_type ( + TYPE R2, + PROVIDER config, + USE_SSL 'fliepflap' +) +---- +Binder Error: Failed to cast option 'use_ssl' to type 'BOOLEAN': 'Could not convert string 'fliepflap' to BOOL' + +# Incorrect param altogether +statement error +CREATE SECRET incorrect_type ( + TYPE R2, + PROVIDER config, + FLIEPFLAP true +) +---- +Binder Error: Unknown parameter 'fliepflap' for secret type 'r2' with provider 'config' + +# Incorrect param for this type, but correct for other +statement error +CREATE SECRET incorrect_type ( + TYPE S3, + PROVIDER config, + ACCOUNT_ID 'my_acount' +) +---- +Binder Error: Unknown parameter 'account_id' for secret type 's3' with provider 'config' + +# Params can only occur once +statement error +CREATE SECRET duplicate_param ( + TYPE R2, + PROVIDER config, + account_id 'some_bogus_account', + key_id 123, + KEY_ID 12098, + account_id blablabla +) +---- +Binder Error: Duplicate query param found while parsing create secret: 'key_id' diff --git a/test/sql/secrets/create_secret_cascading.test_slow b/test/sql/secrets/create_secret_cascading.test_slow new file mode 100644 index 0000000..8dd8cb8 --- /dev/null +++ b/test/sql/secrets/create_secret_cascading.test_slow @@ -0,0 +1,58 @@ +# name: test/sql/secrets/create_secret_cascading.test_slow +# description: Test the cascading mechanism of secret settings +# group: [secrets] + +statement ok +PRAGMA enable_verification; + +require httpfs + +# Ensure any currently stored secrets don't interfere with the test +statement ok +set allow_persistent_secrets=false; + +statement ok +set s3_endpoint = 'invalid-on-purpose-setting' + +statement ok +set s3_url_style = 'path' + +statement ok +set s3_use_ssl = false + +# This secret overrides only the url style, not the endpoint +statement ok +CREATE SECRET s1 ( + TYPE S3, + REGION 'my_region', + URL_STYLE 'vhost', + SCOPE 's3://url-style-only' +) + +# This secret overrides both the url style and the endpoint +statement ok +CREATE SECRET s2 ( + TYPE S3, + REGION 'my_region', + URL_STYLE 'vhost', + ENDPOINT 'invalid-on-purpose-secret', + SCOPE 's3://url-style-and-endpoint' +) + +# Only the url style from the secret is used +statement error +FROM 's3://url-style-only/test.csv' +---- +Could not establish connection error for HTTP HEAD to 'http://url-style-only.invalid-on-purpose-setting/test.csv' + +# Both Url style and endpoint are used now +statement error +FROM 's3://url-style-and-endpoint/test.csv' +---- +Could not establish connection error for HTTP HEAD to 'http://url-style-and-endpoint.invalid-on-purpose-secret/test.csv' + +# This request matches none of the secrets, we use the settings +statement error +FROM 's3://test-bucket/test.csv' +---- +Could not establish connection error for HTTP HEAD to 'http://invalid-on-purpose-setting/test-bucket/test.csv' diff --git a/test/sql/secrets/create_secret_defaults.test b/test/sql/secrets/create_secret_defaults.test new file mode 100644 index 0000000..fd15aed --- /dev/null +++ b/test/sql/secrets/create_secret_defaults.test @@ -0,0 +1,60 @@ +# name: test/sql/secrets/create_secret_defaults.test +# description: Test default values during secret creation +# group: [secrets] + +statement ok +PRAGMA enable_verification; + +require httpfs + +# Ensure any currently stored secrets don't interfere with the test +statement ok +set allow_persistent_secrets=false; + +statement ok +DROP SECRET IF EXISTS s1; + +# Without name we use the __default_ name. The default config for for the S3 type is config +statement ok +CREATE SECRET ( + TYPE S3, + KEY_ID 'my_key', + SECRET 'my_secret' +) + +query IIII +SELECT name, provider, type, scope FROM duckdb_secrets(); +---- +__default_s3 config s3 ['s3://', 's3n://', 's3a://'] + +# Without name we use the __default_ name. The default config for for the R2 type is config +statement ok +CREATE SECRET ( + TYPE R2, + KEY_ID 'my_key', + SECRET 'my_secret', + ACCOUNT_ID 'my_account_id' +) + +query IIII +SELECT name, provider, type, scope FROM duckdb_secrets() ORDER BY name; +---- +__default_r2 config r2 ['r2://'] +__default_s3 config s3 ['s3://', 's3n://', 's3a://'] + + +# Without name we use the __default_ name. The default config for for the R2 type is config +statement ok +CREATE SECRET ( + TYPE GCS, + KEY_ID 'my_key', + SECRET 'my_secret' +) + +# duckdb_secrets with all defaults looks like this now +query IIIIII +SELECT name, persistent, storage, provider, type, scope FROM duckdb_secrets() ORDER BY name; +---- +__default_gcs 0 memory config gcs ['gcs://', 'gs://'] +__default_r2 0 memory config r2 ['r2://'] +__default_s3 0 memory config s3 ['s3://', 's3n://', 's3a://'] \ No newline at end of file diff --git a/test/sql/secrets/create_secret_gcs.test_slow b/test/sql/secrets/create_secret_gcs.test_slow new file mode 100644 index 0000000..3d21639 --- /dev/null +++ b/test/sql/secrets/create_secret_gcs.test_slow @@ -0,0 +1,34 @@ +# name: test/sql/secrets/create_secret_gcs.test_slow +# description: Test secret creation using the default gcs secret provider +# group: [secrets] + +statement ok +PRAGMA enable_verification; + +require httpfs + +# Ensure any currently stored secrets don't interfere with the test +statement ok +set allow_persistent_secrets=false; + +statement ok +reset s3_use_ssl; + +# GCS Secrets automatically default to the correct endpoint for Google Cloud Storage +statement ok +CREATE SECRET ( + TYPE GCS, + KEY_ID 'my_key', + SECRET 'my_secret' +) + +# The secret will be created for the default scope +query IIII +SELECT name, type, provider, scope FROM duckdb_secrets(); +---- +__default_gcs gcs config ['gcs://', 'gs://'] + +statement error +FROM 'gcs://test-bucket/test.csv' +---- +https://storage.googleapis.com/test-bucket/test.csv diff --git a/test/sql/secrets/create_secret_hffs.test b/test/sql/secrets/create_secret_hffs.test new file mode 100644 index 0000000..0224ead --- /dev/null +++ b/test/sql/secrets/create_secret_hffs.test @@ -0,0 +1,31 @@ +# name: test/sql/secrets/create_secret_hffs.test +# description: Test huggingface secrets +# group: [secrets] + +statement ok +PRAGMA enable_verification; + +require httpfs + +statement ok +set allow_persistent_secrets=false; + +# Manually setting token is simplest +statement ok +CREATE SECRET hf1 ( + TYPE HUGGINGFACE, + TOKEN 'bla' +) + +# Cache provider will automatically try to fetch the token from the cache +statement ok +CREATE SECRET hf2 ( + TYPE HUGGINGFACE, + PROVIDER 'credential_chain' +) + +query IIII +SELECT name, type, provider, scope FROM duckdb_secrets() order by name; +---- +hf1 huggingface config ['hf://'] +hf2 huggingface credential_chain ['hf://'] diff --git a/test/sql/secrets/create_secret_invalid_map.test b/test/sql/secrets/create_secret_invalid_map.test new file mode 100644 index 0000000..fb51270 --- /dev/null +++ b/test/sql/secrets/create_secret_invalid_map.test @@ -0,0 +1,24 @@ +# name: test/sql/secrets/create_secret_invalid_map.test +# description: Test throwing input errors on multi map input. +# group: [secrets] + +require httpfs + +statement ok +PRAGMA enable_verification; + +statement error +CREATE PERSISTENT SECRET http_multimap ( + TYPE HTTP, + EXTRA_HTTP_HEADERS MAP{123: 'quack1', 123 : 'quack2'} +); +---- +:Invalid Input Error.*Map keys must be unique.* + +statement error +CREATE PERSISTENT SECRET http_multimap ( + TYPE HTTP, + EXTRA_HTTP_HEADERS MAP{NULL: 'quack1', 123 : 'quack2'} +); +---- +:Invalid Input Error.*Map keys can not be NULL.* \ No newline at end of file diff --git a/test/sql/secrets/create_secret_minio.test b/test/sql/secrets/create_secret_minio.test new file mode 100644 index 0000000..11dcb0e --- /dev/null +++ b/test/sql/secrets/create_secret_minio.test @@ -0,0 +1,78 @@ +# name: test/sql/secrets/create_secret_minio.test +# description: Test s3 secrets actually work using minio +# group: [secrets] + +require parquet + +require httpfs + +require-env S3_TEST_SERVER_AVAILABLE 1 + +# Require that these environment variables are also set + +require-env AWS_DEFAULT_REGION + +require-env AWS_ACCESS_KEY_ID + +require-env AWS_SECRET_ACCESS_KEY + +require-env DUCKDB_S3_ENDPOINT + +require-env DUCKDB_S3_USE_SSL + +set ignore_error_messages + +load __TEST_DIR__/persistent_secrets.db + +statement ok +PRAGMA enable_verification; + +statement ok +set secret_directory='__TEST_DIR__/create_secret_minio' + +# first need to unset the duckdb settings: currently the env variables are loaded automatically making all queries auth +statement ok +set s3_access_key_id=''; + +statement ok +set s3_secret_access_key=''; + +statement error +copy (select 1 as a) to 's3://test- /test-file.parquet' +---- + +# Now we create a scoped secret with correct credentials +statement ok +CREATE PERSISTENT SECRET ( + TYPE S3, + PROVIDER config, + SCOPE 's3://test-bucket/only-this-file-gets-auth.parquet', + KEY_ID '${AWS_ACCESS_KEY_ID}', + SECRET '${AWS_SECRET_ACCESS_KEY}', + REGION '${AWS_DEFAULT_REGION}', + ENDPOINT '${DUCKDB_S3_ENDPOINT}', + USE_SSL '${DUCKDB_S3_USE_SSL}' +) + +# scope doesn't match! query still fails +statement error +copy (select 1 as a) to 's3://test-bucket/test-file.parquet' +---- + +# scope matches, the secret is chosen and the query will succeed +statement ok +copy (select 1 as a) to 's3://test-bucket/only-this-file-gets-auth.parquet' + +restart + +statement ok +set secret_directory='__TEST_DIR__/create_secret_minio' + +# persistent secrets survive restart +statement ok +copy (select 1 as a) to 's3://test-bucket/only-this-file-gets-auth.parquet' + +# Its still scoped +statement error +copy (select 1 as a) to 's3://test-bucket/no-auth-here.parquet' +---- \ No newline at end of file diff --git a/test/sql/secrets/create_secret_name_conflicts.test b/test/sql/secrets/create_secret_name_conflicts.test new file mode 100644 index 0000000..8c48080 --- /dev/null +++ b/test/sql/secrets/create_secret_name_conflicts.test @@ -0,0 +1,89 @@ +# name: test/sql/secrets/create_secret_name_conflicts.test +# description: Test name conflict behaviour for secrets +# group: [secrets] + +statement ok +PRAGMA enable_verification; + +load __TEST_DIR__/persistent_secrets.db + +require httpfs + +statement ok +set secret_directory='__TEST_DIR__/create_secret_name_conflicts' + +statement ok +CREATE TEMPORARY SECRET s1 ( TYPE S3 ) + +statement error +CREATE TEMPORARY SECRET s1 ( TYPE S3 ) +---- +Invalid Input Error: Temporary secret with name 's1' already exists! + +statement ok +CREATE PERSISTENT SECRET s1 ( TYPE S3 ) + +statement error +CREATE PERSISTENT SECRET s1 ( TYPE S3 ) +---- +Persistent secret with name 's1' already exists in secret storage 'local_file'! + +statement error +DROP SECRET s1; +---- +Invalid Input Error: Ambiguity found for secret name 's1', secret occurs in multiple storages + +statement error +DROP SECRET s1 FROM bogus; +---- +Invalid Input Error: Unknown storage type found for drop secret: 'bogus' + +statement ok +DROP TEMPORARY SECRET s1; + +# Re-dropping the temp s1 is now erroneous +statement error +DROP TEMPORARY SECRET s1; +---- +Invalid Input Error: Failed to remove non-existent secret with name 's1' + +query II +SELECT name, storage FROM duckdb_secrets() +---- +s1 local_file + +# Now we will do it again but while the permanent secret is still lazily loaded +restart + +statement ok +set secret_directory='__TEST_DIR__/create_secret_name_conflicts' + +statement ok +CREATE TEMPORARY SECRET s1 ( TYPE S3 ) + +# Now the drop should be ambiguous again: but the persistent secret will be lazily loaded now +statement error +DROP SECRET s1; +---- +Invalid Input Error: Ambiguity found for secret name 's1', secret occurs in multiple storages + +# Fully specified drop statement this time +statement ok +DROP PERSISTENT SECRET s1 FROM LOCAL_FILE; + +# Now a semi-weird case: this will create if not exists only within its own storage: therefore this does actually create +# the secret +statement ok +CREATE PERSISTENT SECRET IF NOT EXISTS s1 ( TYPE S3 ) + +query II +SELECT name, storage FROM duckdb_secrets() ORDER BY storage +---- +s1 local_file +s1 memory + +statement ok +DROP PERSISTENT SECRET s1; + +statement ok +DROP SECRET s1; \ No newline at end of file diff --git a/test/sql/secrets/create_secret_non_writable_persistent_dir.test b/test/sql/secrets/create_secret_non_writable_persistent_dir.test new file mode 100644 index 0000000..1091eb6 --- /dev/null +++ b/test/sql/secrets/create_secret_non_writable_persistent_dir.test @@ -0,0 +1,46 @@ +# name: test/sql/secrets/create_secret_non_writable_persistent_dir.test +# description: Test persistent secrets when the secret dir is non-writable +# group: [secrets] + +statement ok +PRAGMA enable_verification; + +load __TEST_DIR__/create_secret_non_writable_persistent_dir.db + +require httpfs + +# First we create any file +statement ok +COPY (SELECT 1 as a) to '__TEST_DIR__/file_to_prevent_the_secret_dir_from_being_created.csv' + +# Then we set the secret dir to this. +statement ok +set secret_directory='__TEST_DIR__/file_to_prevent_the_secret_dir_from_being_created.csv' + +# Now on creation of a tmp secret, the secret manager is initialized, but the persistent secret directory creation is impossible +statement ok +CREATE SECRET my_tmp_secret ( + TYPE S3, + SCOPE 's3://bucket1' +) + +# This now fails with the message that we could not create the persistent secret directory +statement error +CREATE PERSISTENT SECRET my_tmp_secret ( + TYPE S3, + SCOPE 's3://bucket2' +) +---- + +restart + +# Try with a correct, deeply nested path: AOK? +statement ok +set secret_directory='__TEST_DIR__/create_secret_non_writable_persistent_dir/a/deeply/nested/folder/will/be/created' + +statement maybe +CREATE PERSISTENT SECRET my_tmp_secret ( + TYPE S3, + SCOPE 's3://bucket2' +) +---- diff --git a/test/sql/secrets/create_secret_overwriting.test b/test/sql/secrets/create_secret_overwriting.test new file mode 100644 index 0000000..39e7314 --- /dev/null +++ b/test/sql/secrets/create_secret_overwriting.test @@ -0,0 +1,73 @@ +# name: test/sql/secrets/create_secret_overwriting.test +# description: Test secret overwriting and deleting +# group: [secrets] + +statement ok +PRAGMA enable_verification; + +require httpfs + +# Ensure any currently stored secrets don't interfere with the test +statement ok +set allow_persistent_secrets=false; + +# Create some s3 secret +statement ok +CREATE SECRET my_secret ( + TYPE S3, + SCOPE 's3://bucket1' +) + +query II +SELECT name, scope FROM duckdb_secrets(); +---- +my_secret ['s3://bucket1'] + +statement error +CREATE SECRET my_secret ( + TYPE S3, + KEY_ID 'my_key', + SECRET 'my_secret', + SCOPE 's3://bucket1' +) +---- +Invalid Input Error: Temporary secret with name 'my_secret' already exists! + +# We should be able to replace the secret though +statement ok +CREATE OR REPLACE SECRET my_secret ( + TYPE S3, + SCOPE 's3://bucket2' +) + +query II +SELECT name, scope FROM duckdb_secrets(); +---- +my_secret ['s3://bucket2'] + +# We can also ignore if we want to +statement ok +CREATE SECRET IF NOT EXISTS my_secret ( + TYPE S3, + SCOPE 's3://bucket5' +) + +query II +SELECT name, scope FROM duckdb_secrets(); +---- +my_secret ['s3://bucket2'] + +# Now try dropping a secret that does not exist +statement error +DROP SECRET my_secret_does_not_exist; +---- +Failed to remove non-existent secret with name 'my_secret_does_not_exist' + +# Drop one that does exist +statement ok +DROP SECRET my_secret; + +# Secret be gone! +query II +SELECT name, scope FROM duckdb_secrets(); +---- diff --git a/test/sql/secrets/create_secret_persistence.test b/test/sql/secrets/create_secret_persistence.test new file mode 100644 index 0000000..bc44ab7 --- /dev/null +++ b/test/sql/secrets/create_secret_persistence.test @@ -0,0 +1,195 @@ +# name: test/sql/secrets/create_secret_persistence.test +# description: Test secret persistence +# group: [secrets] + +statement ok +PRAGMA enable_verification; + +load __TEST_DIR__/persistent_secrets.db + +require httpfs + +statement ok +set secret_directory='__TEST_DIR__/create_secret_persistence' + +# Create some s3 secret, the normally the default is TEMPORARY +statement ok +CREATE SECRET my_tmp_secret ( + TYPE S3, + SCOPE 's3://bucket1' +) + +# Explicitly stating +statement ok +CREATE TEMPORARY SECRET my_tmp_secret_2 ( + TYPE S3, + SCOPE 's3://bucket2' +) + +statement ok +CREATE OR REPLACE PERSISTENT SECRET my_tmp_secret_3 ( + TYPE S3, + SCOPE 's3://bucket3' +) + +query III +SELECT name, storage, scope FROM duckdb_secrets() where storage='memory' order by name; +---- +my_tmp_secret memory ['s3://bucket1'] +my_tmp_secret_2 memory ['s3://bucket2'] + +query II +SELECT name, scope FROM duckdb_secrets() where storage != 'memory'; +---- +my_tmp_secret_3 ['s3://bucket3'] + +restart + +statement ok +set secret_directory='__TEST_DIR__/create_secret_persistence' + +# Persistent secrets are restored automatically +query II +SELECT name, scope FROM duckdb_secrets(); +---- +my_tmp_secret_3 ['s3://bucket3'] + +restart + +statement ok +set secret_directory='__TEST_DIR__/create_secret_persistence' + +# Trying to create same name TMP secret fails +statement error +CREATE PERSISTENT SECRET my_tmp_secret_3 ( + TYPE S3, + SCOPE 's3://bucket3_not_used' +) +---- +Invalid Input Error: Persistent secret with name 'my_tmp_secret_3' already exists in secret storage 'local_file'! + +restart + +statement ok +set secret_directory='__TEST_DIR__/create_secret_persistence' + +# Trying to create same name PERSISTENT secret fails +statement error +CREATE PERSISTENT SECRET my_tmp_secret_3 ( + TYPE S3, + SCOPE 's3://bucket3_not_used' +) +---- +Invalid Input Error: Persistent secret with name 'my_tmp_secret_3' already exists in secret storage 'local_file'! + +restart + +statement ok +set secret_directory='__TEST_DIR__/create_secret_persistence' + +# Note: this will be a temporary secret: there are now 2 secrets with the same name +statement ok +CREATE SECRET IF NOT EXISTS my_tmp_secret_3 ( + TYPE S3, + SCOPE 's3://bucket3_not_used' +) + +# Secret is unmodified +query III +SELECT name, storage, scope FROM duckdb_secrets() where name='my_tmp_secret_3' order by storage; +---- +my_tmp_secret_3 local_file ['s3://bucket3'] +my_tmp_secret_3 memory ['s3://bucket3_not_used'] + +restart + +statement ok +set secret_directory='__TEST_DIR__/create_secret_persistence' + +# Ignoring already existing persistent secret is fine +statement ok +CREATE PERSISTENT SECRET IF NOT EXISTS my_tmp_secret_3 ( + TYPE S3, + SCOPE 's3://bucket3_not_used' +) + +# Running second time, code path slightly different as the secret is lazy loaded in previous step +statement ok +CREATE PERSISTENT SECRET IF NOT EXISTS my_tmp_secret_3 ( + TYPE S3, + SCOPE 's3://bucket3_not_used' +) + +# Secret is unmodified +query II +SELECT name, scope FROM duckdb_secrets(); +---- +my_tmp_secret_3 ['s3://bucket3'] + +restart + +statement ok +set secret_directory='__TEST_DIR__/create_secret_persistence' + +# Secret is still unmodified after restart +query II +SELECT name, scope FROM duckdb_secrets() ; +---- +my_tmp_secret_3 ['s3://bucket3'] + +# Now we do actually update the persistent secret +statement ok +CREATE OR REPLACE PERSISTENT SECRET my_tmp_secret_3 ( + TYPE S3, + SCOPE 's3://bucket3_updated' +) + +# Its updated! +query II +SELECT name, scope FROM duckdb_secrets(); +---- +my_tmp_secret_3 ['s3://bucket3_updated'] + +restart + +statement ok +set secret_directory='__TEST_DIR__/create_secret_persistence' + +# Survives restart! +query II +SELECT name, scope FROM duckdb_secrets(); +---- +my_tmp_secret_3 ['s3://bucket3_updated'] + +# Now we add another secret, to reliably test deletion of the other one +statement ok +CREATE PERSISTENT SECRET IF NOT EXISTS my_tmp_secret_4 ( + TYPE S3, + SCOPE 's3://another_secret' +) + +query II +SELECT name, scope FROM duckdb_secrets() order by name; +---- +my_tmp_secret_3 ['s3://bucket3_updated'] +my_tmp_secret_4 ['s3://another_secret'] + +statement ok +DROP SECRET my_tmp_secret_3; + +# my_tmp_secret_3 is deleted +query II +SELECT name, scope FROM duckdb_secrets(); +---- +my_tmp_secret_4 ['s3://another_secret'] + +restart + +statement ok +set secret_directory='__TEST_DIR__/create_secret_persistence' + +# Secret is actually deleted +query III +SELECT name, storage, scope FROM duckdb_secrets() order by name; +---- +my_tmp_secret_4 local_file ['s3://another_secret'] diff --git a/test/sql/secrets/create_secret_persistence_error_handling.test b/test/sql/secrets/create_secret_persistence_error_handling.test new file mode 100644 index 0000000..93c102e --- /dev/null +++ b/test/sql/secrets/create_secret_persistence_error_handling.test @@ -0,0 +1,46 @@ +# name: test/sql/secrets/create_secret_persistence_error_handling.test +# description: Test secret persistence with buggy secrets +# group: [secrets] + +statement ok +PRAGMA enable_verification; + +load __TEST_DIR__/create_secret_persistence_error_handling.db + +require httpfs + +statement ok +set secret_directory='__TEST_DIR__/create_secret_persistence_error_handling' + +# Hacky way to make duckdb create the create_secret_persistence_error_handling dir +statement ok +COPY (select 1 as a, 2 as b ) to '__TEST_DIR__/create_secret_persistence_error_handling/' (FORMAT csv, PARTITION_BY a) + +# Now write a corrupt secret file +statement ok +COPY (select 1 as a ) to '__TEST_DIR__/create_secret_persistence_error_handling/s1.duckdb_secret' (FORMAT csv) + +statement error +FROM duckdb_secrets(); +---- + +restart + +statement ok +set secret_directory='__TEST_DIR__/create_secret_persistence_error_handling2' + +statement ok +CREATE PERSISTENT SECRET s1 (TYPE S3); + +restart no_extension_load + +statement ok +set secret_directory='__TEST_DIR__/create_secret_persistence_error_handling2' + +# Disable autoloading +statement ok +SET autoload_known_extensions=false; + +# Force persistent deserialization; we can deserialize generic key/value secrets +statement ok +from duckdb_secrets(); diff --git a/test/sql/secrets/create_secret_r2.test b/test/sql/secrets/create_secret_r2.test new file mode 100644 index 0000000..972fe21 --- /dev/null +++ b/test/sql/secrets/create_secret_r2.test @@ -0,0 +1,65 @@ +# name: test/sql/secrets/create_secret_r2.test +# description: Test secret creation using the default r2 secret provider +# group: [secrets] + +statement ok +PRAGMA enable_verification; + +require httpfs + +statement ok +set secret_directory='__TEST_DIR__/create_secret_r2' + +# R2 is secrets will instead of requiring manually constructing the endpoint of .r2.cloudflarestorage.com, +# use the account_id to configure it. Also the region is not required at all. Also the scope defaults to r2:// +statement ok +CREATE SECRET ( + TYPE R2, + ACCOUNT_ID 'some_bogus_account', + KEY_ID 'my_key', + SECRET 'my_secret' +) + +# The secret will be created for the default scope +query IIII +SELECT name, type, provider, scope FROM duckdb_secrets(); +---- +__default_r2 r2 config ['r2://'] + +# +statement error +FROM 's3://test-bucket/test.csv' +---- +:.*HTTP Error.*HTTP GET error on.* + +# Account ID is only for R2, trying to set this for S3 will fail +statement error +CREATE SECRET ( + TYPE S3, + ACCOUNT_ID 'some_bogus_account', + KEY_ID 'my_key', + SECRET 'my_secret' +) +---- +Binder Error: Unknown parameter 'account_id' for secret type 's3' with default provider 'config' + +# Account ID is only for R2, trying to set this for GCS will fail +statement error +CREATE SECRET ( + TYPE GCS, + PROVIDER config, + ACCOUNT_ID 'some_bogus_account', + KEY_ID 'my_key', + SECRET 'my_secret' +) +---- +Binder Error: Unknown parameter 'account_id' for secret type 'gcs' with provider 'config' + +# Ensure secret lookup works correctly; +statement ok +CREATE SECRET test( + TYPE R2, + ACCOUNT_ID 'some_bogus_account', + KEY_ID 'my_key', + SECRET 'my_secret' +) diff --git a/test/sql/secrets/create_secret_r2_serialization.test b/test/sql/secrets/create_secret_r2_serialization.test new file mode 100644 index 0000000..6ba8d54 --- /dev/null +++ b/test/sql/secrets/create_secret_r2_serialization.test @@ -0,0 +1,70 @@ +# name: test/sql/secrets/create_secret_r2_serialization.test +# description: Demo of secret serialization +# group: [secrets] + +# NOTE: this is a testing feature that will be removed / replaced with actual persistent secrets. + +require httpfs + +require parquet + +load __TEST_DIR__/test_serialize_secrets.db + +statement ok +PRAGMA enable_verification; + +statement ok +set secret_directory='__TEST_DIR__/create_secret_r2_serialization' + +statement ok +CREATE OR REPLACE PERSISTENT SECRET s1 ( + TYPE S3, + PROVIDER config, + SCOPE 's3://my_scope', + KEY_ID 'mekey', + SECRET 'mesecret', + REGION 'meregion', + SESSION_TOKEN 'mesesh', + ENDPOINT 'meendpoint', + URL_STYLE 'mahstyle', + USE_SSL true, + URL_COMPATIBILITY_MODE true +) + +query IIII +select name, type, provider, scope FROM duckdb_secrets(); +---- +s1 s3 config ['s3://my_scope'] + +query I nosort secret_to_string +select * from duckdb_secrets(); +---- + +restart + +# Now setting the secret dir somehwere nonexistent will yield no persistent secrets +statement ok +set secret_directory='__TEST_DIR__/does_not_exist2' + +query I +select count(*) FROM duckdb_secrets(); +---- +0 + +restart + +# However setting it to the dir that does, we can suddenly see our persisted secrets +statement ok +set secret_directory='__TEST_DIR__/create_secret_r2_serialization' + +# After restart secret is still there +query IIII +select name, type, provider, scope FROM duckdb_secrets(); +---- +s1 s3 config ['s3://my_scope'] + +# Even more: it matches the exact string note that we don't disable redaction here to ensure we cover +# redaction set serialization with this test +query I nosort secret_to_string +select * from duckdb_secrets(); +---- \ No newline at end of file diff --git a/test/sql/secrets/create_secret_s3_serialization.test b/test/sql/secrets/create_secret_s3_serialization.test new file mode 100644 index 0000000..4d127e6 --- /dev/null +++ b/test/sql/secrets/create_secret_s3_serialization.test @@ -0,0 +1,99 @@ +# name: test/sql/secrets/create_secret_s3_serialization.test +# description: Test serialization of the S3/GCS/r2 secrets +# group: [secrets] + +require httpfs + +require parquet + +load __TEST_DIR__/test_serialize_secrets.db + +statement ok +PRAGMA enable_verification; + +statement ok +set secret_directory='__TEST_DIR__/create_secret_s3_serialization' + +statement ok +CREATE OR REPLACE PERSISTENT SECRET s1 ( + TYPE S3, + PROVIDER config, + SCOPE 's3://my_s3_scope', + KEY_ID 'mekey', + SECRET 'mesecret', + REGION 'meregion', + SESSION_TOKEN 'mesesh', + ENDPOINT 'meendpoint', + URL_STYLE 'mahstyle', + USE_SSL true, + URL_COMPATIBILITY_MODE true +) + +statement ok +CREATE OR REPLACE PERSISTENT SECRET s2 ( + TYPE R2, + PROVIDER config, + SCOPE 's3://my_r2_scope', + ACCOUNT_ID 'some_bogus_account', + KEY_ID 'mekey', + SECRET 'mesecret', + SESSION_TOKEN 'mesesh', + URL_STYLE 'mahstyle', + USE_SSL 1, + URL_COMPATIBILITY_MODE 1 +) + +statement ok +CREATE OR REPLACE PERSISTENT SECRET s3 ( + TYPE GCS, + PROVIDER config, + SCOPE 's3://my_gcs_scope', + KEY_ID 'mekey', + SECRET 'mesecret', + SESSION_TOKEN 'mesesh', + URL_STYLE 'mahstyle', + USE_SSL true, + URL_COMPATIBILITY_MODE true +) + +query IIII +select name, type, provider, scope FROM duckdb_secrets() order by name; +---- +s1 s3 config ['s3://my_s3_scope'] +s2 r2 config ['s3://my_r2_scope'] +s3 gcs config ['s3://my_gcs_scope'] + +# Note: this query prints the tokens as an unredacted string +query I nosort secret_to_string +select secret_string from duckdb_secrets(redact=false) order by type; +---- + +restart + +# Now setting the secret dir somehwere nonexistent will yield no persistent secrets +statement ok +set secret_directory='__TEST_DIR__/does_not_exist1' + +query I +select count(*) FROM duckdb_secrets(redact=false); +---- +0 + +restart + +# However setting it to the dir that does, we can suddenly see our persisted secrets +statement ok +set secret_directory='__TEST_DIR__/create_secret_s3_serialization' + +# After restart secrets are still there +query IIII +select name, type, provider, scope FROM duckdb_secrets() order by name; +---- +s1 s3 config ['s3://my_s3_scope'] +s2 r2 config ['s3://my_r2_scope'] +s3 gcs config ['s3://my_gcs_scope'] + +# Note: this query prints the tokens as an unredacted string +query I nosort secret_to_string +select secret_string from duckdb_secrets(redact=false) order by type; +---- \ No newline at end of file diff --git a/test/sql/secrets/create_secret_scope_matching.test b/test/sql/secrets/create_secret_scope_matching.test new file mode 100644 index 0000000..3d5dd2a --- /dev/null +++ b/test/sql/secrets/create_secret_scope_matching.test @@ -0,0 +1,61 @@ +# name: test/sql/secrets/create_secret_scope_matching.test +# description: Test scope matching behaviour is correct +# group: [secrets] + +load __TEST_DIR__/create_secret_scope_matching.db + +statement ok +PRAGMA enable_verification; + +require httpfs + +statement ok +set secret_directory='__TEST_DIR__/create_secret_scope_matching' + +# No match +query I +SELECT name FROM which_secret('s3://', 's3') +---- + +statement ok +CREATE TEMPORARY SECRET t1 ( TYPE S3 ) + +statement ok +CREATE TEMPORARY SECRET t2 ( TYPE S3 ) + +statement ok +CREATE SECRET p1 IN LOCAL_FILE ( TYPE S3 ) + +# This ties within the same storage: the two temporary secrets s1 and s2 both score identically. We solve this by +# tie-breaking on secret name alphabetical ordering +query I +SELECT name FROM which_secret('s3://', 's3') +---- +t1 + +query III +FROM which_secret('s3://', 's3') +---- +t1 TEMPORARY memory + +statement ok +DROP SECRET t1 + +# Temporary secrets take preference over temporary ones +query I +SELECT name FROM which_secret('s3://', 's3') +---- +t2 + +statement ok +DROP SECRET t2 + +query I +SELECT name FROM which_secret('s3://', 's3') +---- +p1 + +statement maybe +DROP SECRET p1 +---- +Invalid Input Error: Failed to remove non-existent secret diff --git a/test/sql/secrets/create_secret_settings.test b/test/sql/secrets/create_secret_settings.test new file mode 100644 index 0000000..6200424 --- /dev/null +++ b/test/sql/secrets/create_secret_settings.test @@ -0,0 +1,71 @@ +# name: test/sql/secrets/create_secret_settings.test +# description: Test setting secret settings +# group: [secrets] + +statement ok +PRAGMA enable_verification; + +load __TEST_DIR__/secrets_settings.db + +require httpfs + +statement ok +set secret_directory='__TEST_DIR__/create_secret_settings1' + +statement ok +set allow_persistent_secrets=true; + +# Create some s3 secret, the normally the default is TEMPORARY +statement ok +CREATE PERSISTENT SECRET my_perm_secret ( + TYPE S3, + SCOPE 's3://bucket1' +) + +query II +SELECT name, scope from duckdb_secrets(); +---- +my_perm_secret ['s3://bucket1'] + +statement error +set secret_directory='__TEST_DIR__/create_secret_settings2' +---- +Invalid Input Error: Changing Secret Manager settings after the secret manager is used is not allowed! + +statement error +set allow_persistent_secrets=false; +---- +Invalid Input Error: Changing Secret Manager settings after the secret manager is used is not allowed! + +# This setting CAN be modified after init +statement ok +set default_secret_storage = 'local_file' + +statement ok +reset default_secret_storage; + +restart + +# When disabling secrets, we won't read the one that we wrote earlier +statement ok +set allow_persistent_secrets=false + +query I +select count(*) from duckdb_secrets(); +---- +0 + +restart + +# Switch settings back and it works again +statement ok +set allow_persistent_secrets=true + +# setting the path right it will work +statement ok +set secret_directory='__TEST_DIR__/create_secret_settings1' + +query II +SELECT name, scope from duckdb_secrets(); +---- +my_perm_secret ['s3://bucket1'] diff --git a/test/sql/secrets/create_secret_storage_backends.test b/test/sql/secrets/create_secret_storage_backends.test new file mode 100644 index 0000000..5daa06d --- /dev/null +++ b/test/sql/secrets/create_secret_storage_backends.test @@ -0,0 +1,111 @@ +# name: test/sql/secrets/create_secret_storage_backends.test +# description: Test different storage backends +# group: [secrets] + +load __TEST_DIR__/create_secret_storage_backends.db + +statement ok +PRAGMA enable_verification; + +require httpfs + +# Ensure any currently stored secrets don't interfere with the test +statement ok +set allow_persistent_secrets=false; + +statement error +CREATE TEMPORARY SECRET s1 IN LOCAL_FILE ( TYPE S3 ) +---- +Invalid Input Error: Persistent secrets are disabled. Restart DuckDB and enable persistent secrets through 'SET allow_persistent_secrets=true' + +statement error +CREATE PERSISTENT SECRET s1 IN NON_EXISTENT_SECRET_STORAGE ( TYPE S3 ) +---- +Invalid Input Error: Persistent secrets are disabled. Restart DuckDB and enable persistent secrets through 'SET allow_persistent_secrets=true' + +# We have disabled the permanent secrets, so this should fail +statement error +CREATE PERSISTENT SECRET perm_s1 ( TYPE S3 ) +---- +Invalid Input Error: Persistent secrets are disabled. Restart DuckDB and enable persistent secrets through 'SET allow_persistent_secrets=true' + +restart + +# Enable persistent secrets so we can set a 'secret_directory' +statement ok +set allow_persistent_secrets=true; + +statement ok +set secret_directory='__TEST_DIR__/create_secret_storages' + +# Default for persistent secret is currently LOCAL_FILE (only native persistent storage method currently) +statement ok +CREATE PERSISTENT SECRET perm_s1 ( TYPE S3 ) + +# Specifying IN ... implies persistent, hence this is okay +statement ok +CREATE SECRET perm_s2 IN LOCAL_FILE ( TYPE S3 ) + +# Explicitly stating temporary is cool +statement ok +CREATE TEMPORARY SECRET temp_s1 ( TYPE s3 ); + +# Not specifying it will use the system default (which is temp) +statement ok +CREATE SECRET temp_s2 ( TYPE s3 ); + +query IIIIII +SELECT * EXCLUDE (secret_string) FROM duckdb_secrets() ORDER BY name +---- +perm_s1 s3 config true local_file ['s3://', 's3n://', 's3a://'] +perm_s2 s3 config true local_file ['s3://', 's3n://', 's3a://'] +temp_s1 s3 config false memory ['s3://', 's3n://', 's3a://'] +temp_s2 s3 config false memory ['s3://', 's3n://', 's3a://'] + +restart + +# Since extensions can add secret storage backends, we allow switching the default backend +statement ok +set default_secret_storage='currently-non-existent' + +statement ok +set secret_directory='__TEST_DIR__/create_secret_storages' + +statement error +CREATE PERSISTENT SECRET s1 ( TYPE S3 ) +---- +Secret storage 'currently-non-existent' not found! + +# We can still work around this broken default by specifying the storage explicitly +statement ok +CREATE PERSISTENT SECRET s1 IN LOCAL_FILE ( TYPE S3 ) + +restart + +statement ok +set secret_directory='__TEST_DIR__/create_secret_storages' + +# Let's restore and now things work again +statement ok +reset default_secret_storage + +statement ok +CREATE PERSISTENT SECRET s2 ( TYPE S3 ) + +query IIIIII +SELECT * EXCLUDE (secret_string) FROM duckdb_secrets() ORDER BY name +---- +perm_s1 s3 config true local_file ['s3://', 's3n://', 's3a://'] +perm_s2 s3 config true local_file ['s3://', 's3n://', 's3a://'] +s1 s3 config true local_file ['s3://', 's3n://', 's3a://'] +s2 s3 config true local_file ['s3://', 's3n://', 's3a://'] + +statement maybe +DROP SECRET perm_s1; +---- +Invalid Input Error: Failed to remove non-existent secret + +statement maybe +DROP SECRET perm_s2; +---- +Invalid Input Error: Failed to remove non-existent secret diff --git a/test/sql/secrets/create_secret_transactional.test b/test/sql/secrets/create_secret_transactional.test new file mode 100644 index 0000000..79a4157 --- /dev/null +++ b/test/sql/secrets/create_secret_transactional.test @@ -0,0 +1,146 @@ +# name: test/sql/secrets/create_secret_transactional.test +# description: Test secret transactional safety +# group: [secrets] + +statement ok +PRAGMA enable_verification; + +require httpfs + +load __TEST_DIR__/create_secret_transactional.db + +statement ok +set secret_directory='__TEST_DIR__/create_secret_transactional' + +statement ok +PRAGMA threads=1 + +foreach secret_type TEMPORARY PERSISTENT + +statement ok con1 +BEGIN TRANSACTION + +statement ok con1 +CREATE ${secret_type} SECRET s1 (TYPE S3) + +statement ok con2 +BEGIN TRANSACTION + +statement ok con2 +CREATE ${secret_type} SECRET s2 (TYPE S3) + +query I con1 +SELECT name FROM duckdb_secrets(); +---- +s1 + +query I con2 +SELECT name FROM duckdb_secrets(); +---- +s2 + +statement ok con1 +COMMIT + +# Transaction 2 still only sees own secret: it has not commited yet +query I con2 +SELECT name FROM duckdb_secrets(); +---- +s2 + +# New transaction will see only committed secret +query I con3 +SELECT name FROM duckdb_secrets(); +---- +s1 + +statement ok con2 +COMMIT + +# Now both are visible +query I con3 +SELECT name FROM duckdb_secrets() ORDER BY name; +---- +s1 +s2 + +statement ok con1 +BEGIN TRANSACTION + +statement ok con1 +DROP SECRET s1; + +# Drop not yet commited: con3 will not see it yet +query I con3 +SELECT name FROM duckdb_secrets() ORDER BY name; +---- +s1 +s2 + +# Commit the drop +statement ok con1 +COMMIT + +# Drop now visible to con3 +query I con3 +SELECT name FROM duckdb_secrets(); +---- +s2 + +# Clean up for loop end +statement ok +DROP SECRET s2 + +endloop + +# Now lets test transactional safety of lazily loaded persistent secrets + +statement ok +CREATE PERSISTENT SECRET perm_s1 (TYPE S3) + +restart + +statement ok +set secret_directory='__TEST_DIR__/create_secret_transactional' + +# After restart, we create 2 connections that each add their own tmp secret; the perm secret is now lazily loaded! +statement ok con1 +BEGIN TRANSACTION + +statement ok con1 +CREATE SECRET tmp_s1 (TYPE S3) + +statement ok con2 +BEGIN TRANSACTION + +statement ok con2 +CREATE SECRET tmp_s2 (TYPE S3) + +# Now con1 drops the lazily loaded perm secret +statement ok con1 +DROP SECRET perm_s1; + +query I con1 +SELECT name FROM duckdb_secrets(); +---- +tmp_s1 + +# con2 still has both secrets +query I con2 +SELECT name FROM duckdb_secrets() ORDER BY name; +---- +perm_s1 +tmp_s2 + +statement ok con1 +COMMIT + +statement ok con2 +COMMIT + +# Now the deletion is visible to con2 +query I con2 +SELECT name FROM duckdb_secrets() ORDER BY name; +---- +tmp_s1 +tmp_s2 \ No newline at end of file diff --git a/test/sql/secrets/persistent_key_value_secret.test b/test/sql/secrets/persistent_key_value_secret.test new file mode 100644 index 0000000..89448fa --- /dev/null +++ b/test/sql/secrets/persistent_key_value_secret.test @@ -0,0 +1,28 @@ +# name: test/sql/secrets/persistent_key_value_secret.test +# group: [secrets] + +load __TEST_DIR__/persistent_extra_headers + +require httpfs + +require json + +statement ok +CREATE PERSISTENT SECRET http ( + TYPE HTTP, + EXTRA_HTTP_HEADERS MAP { + 'Authorization': 'Bearer sk_test_not_valid_key' + } +); + +restart + +# Because this is an https host, the 'EXTRA_HTTP_HEADERS' will be used, as long as this doesn't crash anything +# we are happy with this test throwing an IO error. +statement error +select + unnest(data) as customers +from + read_json('https://non.existant/endpoint'); +---- +IO Error: Could not establish connection error for HTTP HEAD to 'https://non.existant/endpoint' diff --git a/test/sql/secrets/secret_compatibility_httpfs.test b/test/sql/secrets/secret_compatibility_httpfs.test new file mode 100644 index 0000000..6ae522d --- /dev/null +++ b/test/sql/secrets/secret_compatibility_httpfs.test @@ -0,0 +1,21 @@ +# name: test/sql/secrets/secret_compatibility_httpfs.test +# description: Test secret compatibility across versions +# group: [secrets] + +require httpfs + +require-env TEST_PERSISTENT_SECRETS_AVAILABLE + +# Ensure any currently stored secrets don't interfere with the test +statement ok +set secret_directory='./data/secrets/httpfs' + +query IIIIIII +from duckdb_secrets() order by name; +---- +s3_config_secret_v1_1_2 s3 config true local_file ['s3://', 's3n://', 's3a://'] name=s3_config_secret_v1_1_2;type=s3;provider=config;serializable=true;scope=s3://,s3n://,s3a://;region=us-east-2;use_ssl=false +s3_config_secret_v1_1_3 s3 config true local_file ['s3://', 's3n://', 's3a://'] name=s3_config_secret_v1_1_3;type=s3;provider=config;serializable=true;scope=s3://,s3n://,s3a://;region=us-east-2;use_ssl=false +s3_config_secret_v_1_0_0 s3 config true local_file ['s3://', 's3n://', 's3a://'] name=s3_config_secret_v_1_0_0;type=s3;provider=config;serializable=true;scope=s3://,s3n://,s3a://;endpoint=s3.amazonaws.com;key_id=;region=us-east-2;s3_url_compatibility_mode=0;secret=redacted;session_token=redacted;url_style=;use_ssl=0 +s3_secret_chain_v_1_0_0 s3 credential_chain true local_file ['s3://', 's3n://', 's3a://'] name=s3_secret_chain_v_1_0_0;type=s3;provider=credential_chain;serializable=true;scope=s3://,s3n://,s3a://;endpoint=s3.amazonaws.com;region=us-east-2;use_ssl=false +s3_secret_chain_v_1_1_2 s3 credential_chain true local_file ['s3://', 's3n://', 's3a://'] name=s3_secret_chain_v_1_1_2;type=s3;provider=credential_chain;serializable=true;scope=s3://,s3n://,s3a://;endpoint=s3.amazonaws.com;region=us-east-2;use_ssl=false +s3_secret_chain_v_1_1_3 s3 credential_chain true local_file ['s3://', 's3n://', 's3a://'] name=s3_secret_chain_v_1_1_3;type=s3;provider=credential_chain;serializable=true;scope=s3://,s3n://,s3a://;endpoint=s3.amazonaws.com;region=us-east-2;use_ssl=false diff --git a/test/sql/secrets/secret_types_function.test b/test/sql/secrets/secret_types_function.test new file mode 100644 index 0000000..c1fd676 --- /dev/null +++ b/test/sql/secrets/secret_types_function.test @@ -0,0 +1,20 @@ +# name: test/sql/secrets/secret_types_function.test +# description: Test duckdb_secret_types function +# group: [secrets] + +query III +FROM duckdb_secret_types() WHERE type IN ['s3', 'r2', 'gcs', 'http'] ORDER BY type +---- +http config (empty) + +require httpfs + +require no_extension_autoloading "EXPECTED: The duckdb_secret_types() function does not trigger autoloading httpfs" + +query III +FROM duckdb_secret_types() WHERE type IN ['s3', 'r2', 'gcs', 'http'] ORDER BY type +---- +gcs config httpfs +http config (empty) +r2 config httpfs +s3 config httpfs diff --git a/test/sql/settings/test_disabled_file_system_httpfs.test b/test/sql/settings/test_disabled_file_system_httpfs.test new file mode 100644 index 0000000..9cc1ee8 --- /dev/null +++ b/test/sql/settings/test_disabled_file_system_httpfs.test @@ -0,0 +1,28 @@ +# name: test/sql/settings/test_disabled_file_system_httpfs.test +# description: Test disabled file systems with HTTPFS +# group: [settings] + +require skip_reload + +require no_extension_autoloading "EXPECTED: Test disable loading from local file system" + +statement ok +PRAGMA enable_verification + +require httpfs + +statement ok +SET disabled_filesystems='LocalFileSystem'; + +# httpfs works +statement ok +from read_csv_auto('https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv'); + +statement ok +SET disabled_filesystems='LocalFileSystem,HTTPFileSystem'; + +# not if we disable it +statement error +from read_csv_auto('https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv'); +---- +File system HTTPFileSystem has been disabled by configuration diff --git a/test/sql/storage/encryption/temp_files/encrypted_out_of_core.test_slow b/test/sql/storage/encryption/temp_files/encrypted_out_of_core.test_slow new file mode 100644 index 0000000..5b09f29 --- /dev/null +++ b/test/sql/storage/encryption/temp_files/encrypted_out_of_core.test_slow @@ -0,0 +1,68 @@ +# name: test/sql/storage/encryption/temp_files/encrypted_out_of_core.test_slow +# description: Encrypted large joins in persistent databases have a leftover temporary directory. +# group: [temp_files] + +foreach cipher GCM CTR + + +require httpfs + +require tpch + +load __TEST_DIR__/leftover_temp_files.db + +statement ok +ATTACH '__TEST_DIR__/encrypted_temp_files_${cipher}.db' AS enc_${cipher} (ENCRYPTION_KEY 'asdf', ENCRYPTION_CIPHER '${cipher}'); + +statement ok +SET temp_file_encryption=true; + +statement ok +USE enc_${cipher}; + +statement ok +SET threads=8 + +statement ok +SET memory_limit='1GB'; + +statement ok +CALL dbgen(sf=1); + +statement ok +ALTER TABLE lineitem RENAME TO lineitem1 + +statement ok +CREATE TABLE lineitem2 AS FROM lineitem1 + +# creating and dropping a table with an ORDER BY +statement ok +CREATE OR REPLACE TEMPORARY TABLE ans as select l1.*, l1.* from lineitem1 l1 ORDER BY l_orderkey, l_returnflag + +statement ok +DROP TABLE ans; + +# performing a small hash join +statement ok +CREATE OR REPLACE TEMPORARY TABLE ans as select l1.*, l2.* from lineitem1 l1 JOIN (FROM lineitem2 l2 WHERE l_orderkey<10000) AS l2 USING (l_orderkey, l_linenumber) + +statement ok +DROP TABLE ans; + +# performing a large window function +statement ok +CREATE OR REPLACE TEMPORARY TABLE ans as select l1.*, row_number() OVER (PARTITION BY l_orderkey, l_linenumber ORDER BY l_orderkey) from lineitem1 l1 + +statement ok +DROP TABLE ans; + +# performing a large hash join +statement ok +CREATE OR REPLACE TEMPORARY TABLE ans as select l1.*, l2.* from lineitem1 l1 JOIN lineitem2 l2 USING (l_orderkey, l_linenumber) + +statement ok +DROP TABLE ans; + +restart + +endloop \ No newline at end of file diff --git a/test/sql/storage/external_file_cache/external_file_cache_httpfs.test b/test/sql/storage/external_file_cache/external_file_cache_httpfs.test new file mode 100644 index 0000000..2efa361 --- /dev/null +++ b/test/sql/storage/external_file_cache/external_file_cache_httpfs.test @@ -0,0 +1,18 @@ +# name: test/sql/storage/external_file_cache/external_file_cache_httpfs.test +# description: Test the external file cache for HTTPFS reads +# group: [external_file_cache] + +require parquet + +require httpfs + +# first query caches the data +statement ok +from 's3://duckdb-blobs/data/shakespeare.parquet'; + + +# second query should only have a head request, no gets +query II +explain analyze from 's3://duckdb-blobs/data/shakespeare.parquet'; +---- +analyzed_plan :.*GET: 0.* diff --git a/test/sql/storage/external_file_cache/external_file_cache_read_blob.test_slow b/test/sql/storage/external_file_cache/external_file_cache_read_blob.test_slow new file mode 100644 index 0000000..9edf162 --- /dev/null +++ b/test/sql/storage/external_file_cache/external_file_cache_read_blob.test_slow @@ -0,0 +1,25 @@ +# name: test/sql/storage/external_file_cache/external_file_cache_read_blob.test_slow +# description: Test the external file cache for read_blob HTTPFS reads +# group: [external_file_cache] + +require parquet + +require httpfs + +# first read_blob should do 1 GET +query II +explain analyze from read_blob('s3://duckdb-blobs/data/shakespeare.parquet'); +---- +analyzed_plan :.*GET: 1.* + +# second one should do 0 +query II +explain analyze from read_blob('s3://duckdb-blobs/data/shakespeare.parquet'); +---- +analyzed_plan :.*GET: 0.* + +# although the read was cached using read_blob, the parquet reader can read from cache +query II +explain analyze from 's3://duckdb-blobs/data/shakespeare.parquet'; +---- +analyzed_plan :.*GET: 0.* diff --git a/test/sql/storage/invalid_unicode_scrambled.test_slow b/test/sql/storage/invalid_unicode_scrambled.test_slow new file mode 100644 index 0000000..2af3750 --- /dev/null +++ b/test/sql/storage/invalid_unicode_scrambled.test_slow @@ -0,0 +1,14 @@ +# name: test/sql/storage/invalid_unicode_scrambled.test_slow +# description: Issue #1650 - "invalid unicode detected in segment statistics" when inserting structs with strings and NULL values +# group: [storage] + +require httpfs + +require parquet + +statement ok +create or replace table blah as (with +us as (select distinct * from 'https://github.com/duckdb/duckdb-data/releases/download/v1.0/invalid_unicode_scrambled.parquet') select Address from +us); + + From ab4aa2765b3cc714cef8874b64e2895b1b1ac03c Mon Sep 17 00:00:00 2001 From: Mytherin Date: Thu, 25 Sep 2025 11:57:17 +0200 Subject: [PATCH 02/32] Fix tests --- data/secrets/README.md | 10 + .../s3_config_secret_v1_1_2.duckdb_secret | Bin 0 -> 435 bytes .../s3_config_secret_v1_1_3.duckdb_secret | Bin 0 -> 435 bytes .../s3_config_secret_v_1_0_0.duckdb_secret | Bin 0 -> 1071 bytes .../s3_secret_chain_v_1_0_0.duckdb_secret | Bin 0 -> 561 bytes .../s3_secret_chain_v_1_1_2.duckdb_secret | Bin 0 -> 561 bytes .../s3_secret_chain_v_1_1_3.duckdb_secret | Bin 0 -> 561 bytes .../copy/csv/parallel/test_parallel_csv.test | 139 ------ test/sql/copy/no_head_on_write.test | 2 +- test/sql/json/table/read_json.test | 398 ------------------ test/sql/json/table/read_json_auto.test_slow | 352 ---------------- test/sql/json/table/read_json_objects.test | 225 ---------- 12 files changed, 11 insertions(+), 1115 deletions(-) create mode 100755 data/secrets/README.md create mode 100755 data/secrets/httpfs/s3_config_secret_v1_1_2.duckdb_secret create mode 100755 data/secrets/httpfs/s3_config_secret_v1_1_3.duckdb_secret create mode 100755 data/secrets/httpfs/s3_config_secret_v_1_0_0.duckdb_secret create mode 100755 data/secrets/httpfs/s3_secret_chain_v_1_0_0.duckdb_secret create mode 100755 data/secrets/httpfs/s3_secret_chain_v_1_1_2.duckdb_secret create mode 100755 data/secrets/httpfs/s3_secret_chain_v_1_1_3.duckdb_secret diff --git a/data/secrets/README.md b/data/secrets/README.md new file mode 100755 index 0000000..dccb3ed --- /dev/null +++ b/data/secrets/README.md @@ -0,0 +1,10 @@ +# Test secrets +DuckDB only allows persistent secrets with the x00 permission (e.g. 600 or 700). Therefore to use these +secrets, the permissions need to be set before running any tests that uses them. + +The recommended way to add tests that touch these persistent secret files is to put them behind a +```shell +require-env TEST_PERSISTENT_SECRETS_AVAILABLE +``` +statement, which ensures the tests only run in CI jobs where the permissions are set correctly. + diff --git a/data/secrets/httpfs/s3_config_secret_v1_1_2.duckdb_secret b/data/secrets/httpfs/s3_config_secret_v1_1_2.duckdb_secret new file mode 100755 index 0000000000000000000000000000000000000000..07998250b52387cb7390909a1852d5feb16ed7b2 GIT binary patch literal 435 zcmb7B+X{m)42?QFHrVsnM~G2#F)G za_D7KOE?l2t`>ylh?7_{k%_#9C?Pl|B+7s9>|2#+LC4AwbmPCD7PO z=>UkAqMPvn7|;X)NIeu9qW`vEH?@Xs*@$&B^hy~z5AJ44u%Ic)o#f9_dpG}X(z9-7 pLP69yEj?-tfL~GSW^M%dd>s1_-^mVyMB2ERh892UGxrmo_zv2lrQ!ep literal 0 HcmV?d00001 diff --git a/data/secrets/httpfs/s3_config_secret_v1_1_3.duckdb_secret b/data/secrets/httpfs/s3_config_secret_v1_1_3.duckdb_secret new file mode 100755 index 0000000000000000000000000000000000000000..a3e161bc17bd642f1dd781e7e6ae061ae3ac68bd GIT binary patch literal 435 zcma)3Q3``F42?P+8|-`R5HeiqB*MvmpO3vj8C8t6NO4u!Cux-3R2oN>+2pGF) z4uE*h+X)R28NdeuD8@n~w25uy4EvYKpdFI|tGe&BHgX<3Oq60yV|sH-H?KObne?jr qiBJ-?ZWkZ52EbFaewiBqejn%Ke9iZXkcf@Tv2XCte&T+@3%>!t+NI3^ literal 0 HcmV?d00001 diff --git a/data/secrets/httpfs/s3_config_secret_v_1_0_0.duckdb_secret b/data/secrets/httpfs/s3_config_secret_v_1_0_0.duckdb_secret new file mode 100755 index 0000000000000000000000000000000000000000..7ff366232729c5307c949252d0183bb08f0aa6e1 GIT binary patch literal 1071 zcmb`G-EM<0424}fSbI@vm+M3HXX9=!6J^9yQ6Z^t=)|6|+e)zHDQyyeSzXHN0=yhIQdI= zj86~&O!wGKIJRtg0mMgH)OZc}pkD}(^P`#n(Xi}uX5=%O(OqX~)WK@w+V*ewiPE(! z<)=0BgVvcX1A821!gUI*`a~F&P~lEn(33ad*ND<840KNJgdVrP&}{wsMkOo=iC9A6>>mFKIVx!G#Us}gXT2_sITUi&q{o_2 d5EVBq@$P>6VFLV}%Wej5JAO}^#6uU3bd@6*VhL=+a zx@&fV1R!1Gp~u+BvjJo~)wFm40vHSdDo#p6p~u+2%p@>bqYL|J^rx+j^Zq|PBr#SS zb+kt9XqNhJyelC#9j#YwQa%2~5^#!yX#Fv6XS&|AmJn#%isO z)~G#=3%icm@6pV+xTvgoRO~l2LPc&gdr|5$=HC|cFkZJIltgvZ;?#C%xKh0T4O;}f aKS;a$|BDR>iMVm;>s{}%iDe?iW(OxN;2V;Y5PL9pURwuii>yv3Hu@Ok&ln< zL_0BKPu*U92F@NPH}pISCUF#m6sb_6lL3VR#5Z zcg+rv0HhoI9xyidYyjC_H7y1p4M1-QPz5X$x{KYF3+y8^2}}mN@CA+jw6$^GpTkoU z3$;~0)~GL9ly)1{pQD-Ya8+A#srcGbfr?!5{-D%2=HDJ~)DlXfCTekNH#GDV@BhLU c0Z$Kd+}5pKO$dp&ap~*^uiDS!C+G|R17A+o_5c6? literal 0 HcmV?d00001 diff --git a/test/sql/copy/csv/parallel/test_parallel_csv.test b/test/sql/copy/csv/parallel/test_parallel_csv.test index b70d1df..48b00fe 100644 --- a/test/sql/copy/csv/parallel/test_parallel_csv.test +++ b/test/sql/copy/csv/parallel/test_parallel_csv.test @@ -2,145 +2,6 @@ # description: Test parallel read CSV function on ghub bugs # group: [parallel] -statement ok -PRAGMA enable_verification - -query IIIIIIIIIIIIIIIIIIIIIIIIII -FROM read_csv('data/csv/14512_og.csv', buffer_size = 473, strict_mode = false, delim = ',', quote = '"', escape = '"') ----- -00000579000098 13.99 EA PINE RIDGE CHENIN VOIGNIER 750.0 ML 1 13 NULL 1 NULL NULL NULL NULL NULL NULL DEFAULT BRAND NULL NULL NULL NULL BEER & WINE NULL NULL 7.25 {"sales_tax":{ "tax_type": "rate_percent", "value" :0.0725}} -00000609082001 3.99 EA MADELAINE MINI MILK CHOCOLATE TURKEY 1.0 OZ 1 13 NULL NULL NULL NULL NULL NULL NULL NULL MADELEINE NULL NULL NULL NULL CANDY NULL NULL 7.25 {"sales_tax":{ "tax_type": "rate_percent", "value" :0.0725}} -00817566020096 9.99 EA COTSWOLD EW 5.3 OZ 1 13 NULL NULL NULL NULL NULL NULL NULL NULL LONG CLAWSON NULL NULL NULL NULL DELI INGREDIENTS: DOUBLE GLOUCESTER CHEESE (PASTEURIZED MILK SALT ENZYMES DAIRY CULTURES ANNATTO EXTRACT AS A COLOR) RECONSTITUTED MINCED ONIONS (2%) DRIED CHIVES. CONTAINS: MILK THIS PRODUCT WAS PRODUCED IN AN ENVIRONMENT THAT ALSO USES PEANUTS TREE NUTS EGGS MILK WHEAT SOY FISH SHELLFISH AND SESAME. NULL 2.0 {"sales_tax":{ "tax_type": "rate_percent", "value" :0.02}} - - -query III -select * from read_csv_auto('data/csv/dirty_line.csv', skip = 1) ----- -1.5 a 3 -2.5 b 4 - -query II -select * from read_csv_auto('data/csv/null_string.csv', nullstr="null") ----- -1 NULL -NULL 2 - -# We need to add header = false here. Because with vector_size=2 the sniffer will think we have a header, since the -# row 1 null has types INTEGER;VARCHAR at that point -query II -select * from read_csv_auto('data/csv/null_string.csv', header = false) ----- -a b -1 null -null 2 - -query IIIIIIIIII -select * from read_csv_auto('data/csv/aws_locations.csv') ----- -IAD Washington District of Columbia United States US 20 38.94449997 -77.45580292 North America United States, Mexico, & Canada -ORD Chicago Illinois United States US 20 41.978611 -87.904722 North America United States, Mexico, & Canada -JFK New York New York United States US 8 40.639801 -73.7789 North America United States, Mexico, & Canada -ATL Atlanta Georgia United States US 17 33.6367 -84.428101 North America United States, Mexico, & Canada -LAX Los Angeles California United States US 15 33.942501 -118.407997 North America United States, Mexico, & Canada -MIA Miami Florida United States US 11 25.79319953918457 -80.29060363769531 North America United States, Mexico, & Canada -DFW Dallas-Fort Worth Texas United States US 18 32.896801 -97.038002 North America United States, Mexico, & Canada -IAH Houston Texas United States US 6 29.984399795532227 -95.34140014648438 North America United States, Mexico, & Canada -SFO San Francisco California United States US 8 37.61899948120117 -122.375 North America United States, Mexico, & Canada -BOS Boston Massachusetts United States US 5 42.36429977 -71.00520325 North America United States, Mexico, & Canada -DEN Denver Colorado United States US 6 39.861698150635 -104.672996521 North America United States, Mexico, & Canada -PDX Portland Oregon United States US 2 45.58869934 -122.5979996 North America United States, Mexico, & Canada -SEA Seattle Washington United States US 6 47.448889 -122.309444 North America United States, Mexico, & Canada -MSP Minneapolis Minnesota United States US 4 44.882 -93.221802 North America United States, Mexico, & Canada -PHX Phoenix Arizona United States US 3 33.43429946899414 -112.01200103759766 North America United States, Mexico, & Canada -PHL Philadelphia Pennsylvania United States US 2 39.87189865112305 -75.24109649658203 North America United States, Mexico, & Canada -SLC Salt Lake City Utah United States US 1 40.78839874267578 -111.97799682617188 North America United States, Mexico, & Canada -BNA Nashville Tennessee United States US 2 36.1245002746582 -86.6781997680664 North America United States, Mexico, & Canada -DTW Detroit Michigan United States US 2 42.212398529052734 -83.35340118408203 North America United States, Mexico, & Canada -TPA Tampa Florida United States US 2 27.975500106811523 -82.533203125 North America United States, Mexico, & Canada -EWR Newark New Jersey United States US 10 40.692501068115234 -74.168701171875 North America United States, Mexico, & Canada -CMH Columbus Ohio United States US 2 39.998001 -82.891899 North America United States, Mexico, & Canada -MCI Kansas City Missouri United States US 2 39.2976 -94.713898 North America United States, Mexico, & Canada -QRO Queretaro NULL North America MX 1 20.6173 -100.185997 undefined null -FRA Frankfurt am Main NULL Germany DE 17 50.033333 8.570556 Europe Europe & Israel -DUS Düsseldorf NULL Germany DE 3 51.289501 6.76678 Europe Europe & Israel -HAM Hamburg NULL Germany DE 6 53.630401611328 9.9882297515869 Europe Europe & Israel -MUC Munich NULL Germany DE 4 48.353802 11.7861 Europe Europe & Israel -TXL Berlin NULL Germany DE 5 52.559722 13.287778 Europe Europe & Israel -CDG Paris NULL France FR 11 49.012798 2.55 Europe Europe & Israel -MRS Marseille NULL France FR 6 43.439271922 5.22142410278 Europe Europe & Israel -MXP Milan NULL Italy IT 9 45.6306 8.72811 Europe Europe & Israel -FCO Rome NULL Italy IT 6 41.8002778 12.2388889 Europe Europe & Israel -PMO Palermo NULL Italy IT 1 38.175999 13.091 Europe Europe & Israel -AMS Amsterdam NULL Netherlands NL 5 52.308601 4.76389 Europe Europe & Israel -MAN Manchester NULL UK GB 5 53.35369873046875 -2.2749500274658203 Europe Europe & Israel -LHR London NULL UK GB 25 51.4775 -0.461389 Europe Europe & Israel -DUB Dublin NULL Ireland IE 2 53.421299 -6.27007 Europe Europe & Israel -VIE Vienna NULL Austria AT 3 48.110298156738 16.569700241089 Europe Europe & Israel -ARN Stockholm NULL Sweden SE 4 59.651901245117 17.918600082397 Europe Europe & Israel -CPH Copenhagen NULL Denmark DK 3 55.617900848389 12.656000137329 Europe Europe & Israel -HEL Helsinki NULL Finland FI 4 60.317199707031 24.963300704956 Europe Europe & Israel -ATH Athens NULL Greece GR 1 37.9364013672 23.9444999695 Europe Europe & Israel -BRU Brussels NULL Belgium BE 1 50.901401519800004 4.48443984985 Europe Europe & Israel -BUD Budapest NULL Hungary HU 1 47.42976 19.261093 Europe Europe & Israel -LIS Lisbon NULL Portugal PT 1 38.7813 -9.13592 Europe Europe & Israel -OSL Oslo NULL Norway NO 2 60.193901062012 11.100399971008 Europe Europe & Israel -OTP Bucharest NULL Romania RO 1 44.5711111 26.085 Europe Europe & Israel -PRG Prague NULL Czech Republic CZ 1 50.1008 14.26 Europe Europe & Israel -SOF Sofia NULL Bulgaria BG 1 42.696693420410156 23.411436080932617 Europe Europe & Israel -WAW Warsaw NULL Poland PL 3 52.165833 20.967222 Europe Europe & Israel -ZAG Zagreb NULL Croatia HR 1 45.7429008484 16.0687999725 Europe Europe & Israel -ZRH Zurich NULL Switzerland CH 2 47.464699 8.54917 Europe Europe & Israel -BCN Barcelona NULL Spain ES 2 41.2971 2.07846 Europe Europe & Israel -MAD Madrid NULL Spain ES 10 40.471926 -3.56264 Europe Europe & Israel -DEL New Delhi NULL India IN 14 28.5665 77.103104 Asia India -MAA Chennai NULL India IN 8 12.990005493164062 80.16929626464844 Asia India -BOM Mumbai NULL India IN 8 19.0886993408 72.8678970337 Asia India -PNQ Pune NULL India IN 4 18.58209991455078 73.9197006225586 Asia India -BLR Bangalore NULL India IN 5 13.1979 77.706299 Asia India -HYD Hyderabad NULL India IN 5 17.231318 78.429855 Asia India -SIN Singapore NULL Singapore SG 7 1.35019 103.994003 Asia Hong Kong, Indonesia, Philippines, Singapore, South Korea, Taiwan, & Thailand -KIX Osaka NULL Japan JP 5 34.42729949951172 135.24400329589844 Asia Japan -NRT Tokyo NULL Japan JP 22 35.764702 140.386002 Asia Japan -TPE Taoyuan NULL Taiwan TW 3 25.0777 121.233002 Asia Hong Kong, Indonesia, Philippines, Singapore, South Korea, Taiwan, & Thailand -ICN Seoul NULL Korea KR 8 37.46910095214844 126.45099639892578 Asia Hong Kong, Indonesia, Philippines, Singapore, South Korea, Taiwan, & Thailand -BKK Bangkok NULL Thailand TH 2 13.689999 100.750114 Asia Hong Kong, Indonesia, Philippines, Singapore, South Korea, Taiwan, & Thailand -CCU Kolkata NULL India IN 2 22.654699325561523 88.44670104980469 Asia India -CGK Jakarta NULL Indonesia ID 5 -6.1255698204 106.65599823 Asia Hong Kong, Indonesia, Philippines, Singapore, South Korea, Taiwan, & Thailand -KUL Kuala Lumpur NULL Malaysia MY 2 2.745579957962 101.70999908447 Asia Hong Kong, Indonesia, Philippines, Singapore, South Korea, Taiwan, & Thailand -MNL Manila NULL Philippines PH 1 14.5086 121.019997 Asia Hong Kong, Indonesia, Philippines, Singapore, South Korea, Taiwan, & Thailand -HAN Hanoi NULL Vietnam VN 1 21.221200942993164 105.80699920654297 Asia Hong Kong, Indonesia, Philippines, Singapore, South Korea, Taiwan, & Thailand -SGN Ho Chi Minh City NULL Vietnam VN 1 10.8187999725 106.652000427 Asia Hong Kong, Indonesia, Philippines, Singapore, South Korea, Taiwan, & Thailand -SYD Sydney NULL Australia AU 4 -33.94609832763672 151.177001953125 Australia & New Zealand Australia & New Zealand -AKL Auckland NULL New Zealand NZ 2 -37.008098602299995 174.792007446 Australia & New Zealand Australia & New Zealand -MEL Melbourne NULL Australia AU 3 -37.673302 144.843002 Australia & New Zealand Australia & New Zealand -PER Perth NULL Australia AU 1 -31.94029998779297 115.96700286865234 Australia & New Zealand Australia & New Zealand -GRU Sao Paulo NULL Brazil BR 8 -23.435556 -46.473056 South America South America -GIG Rio De Janeiro NULL Brazil BR 5 -22.8099994659 -43.2505569458 South America South America -FOR Fortaleza NULL Brazil BR 4 -3.776279926300049 -38.53260040283203 South America South America -BOG Bogota NULL Colombia CO 3 4.70159 -74.1469 South America South America -EZE Buenos Aires NULL Argentina AR 2 -34.8222 -58.5358 South America South America -SCL Santiago NULL Chile CL 3 -33.393001556396484 -70.78579711914062 South America South America -LIM Lima NULL Peru PE 2 -12.0219 -77.114305 South America South America -TLV Tel Aviv NULL Israel IL 2 32.01139831542969 34.88669967651367 Middle East Europe & Israel -BAH Manama NULL Bahrain BH 2 26.27079963684082 50.63359832763672 Middle East South Africa, Kenya, & Middle East -DXB Dubai NULL UAE AE 1 25.2527999878 55.3643989563 Middle East South Africa, Kenya, & Middle East -FJR Fujairah NULL UAE AE 3 25.112222 56.324167 Middle East South Africa, Kenya, & Middle East -MCT Muscat NULL Oman OM 1 23.593299865722656 58.284400939941406 Middle East South Africa, Kenya, & Middle East -CPT Cape Town NULL South Africa ZA 1 -33.9648017883 18.6016998291 Africa South Africa, Kenya, & Middle East -JNB Johannesburg NULL South Africa ZA 1 -26.1392 28.246 Africa South Africa, Kenya, & Middle East -NBO Nairobi NULL Kenya KE 1 -1.31923997402 36.9277992249 Africa South Africa, Kenya, & Middle East -PVG Shanghai NULL China CN 1 31.143400192260742 121.80500030517578 China China -SZX Shenzhen NULL China CN 1 22.639299392700195 113.81099700927734 China China -ZHY Zhongwei NULL China CN 1 37.572778 105.154444 China China -PEK Beijing NULL China CN 1 40.080101013183594 116.58499908447266 China China -HKG Hong Kong NULL China HK 4 22.308901 113.915001 China China -CMH Columbus Ohio United States US 1 39.998056 -82.891944 North America United States, Mexico, & Canada -HIO Hillsboro Oregon United States US 1 45.540394 -122.949825 North America United States, Mexico, & Canada -TPA Tampa Florida United States US 1 27.979722 -82.534722 North America United States, Mexico, & Canada -PNQ Pune Maharashtra India IN 1 18.582222 73.919722 Asia India -MCT Muscat Muscat Oman OM 1 23.6015386 58.2899376 Middle East South Africa, Kenya, & Middle East - - require httpfs query II diff --git a/test/sql/copy/no_head_on_write.test b/test/sql/copy/no_head_on_write.test index e301051..6caab7a 100644 --- a/test/sql/copy/no_head_on_write.test +++ b/test/sql/copy/no_head_on_write.test @@ -1,6 +1,6 @@ # name: test/sql/copy/no_head_on_write.test # description: Confirm that we don't send head requests for writes -# group: [secret] +# group: [copy] require-env S3_TEST_SERVER_AVAILABLE 1 diff --git a/test/sql/json/table/read_json.test b/test/sql/json/table/read_json.test index 8ece129..d20b5ea 100644 --- a/test/sql/json/table/read_json.test +++ b/test/sql/json/table/read_json.test @@ -4,404 +4,6 @@ require json -statement ok -pragma enable_verification - -statement error -SELECT * FROM read_json('data/json/example_n.ndjson', auto_detect=false) ----- -Binder Error - -# can't read ndjson with array -statement error -SELECT * FROM read_json('data/json/example_n.ndjson', columns={id: 'INTEGER', name: 'VARCHAR'}, format='array') ----- -Invalid Input Error: Expected top-level JSON array - -# read_ndjson works -query II -SELECT * FROM read_ndjson('data/json/example_n.ndjson', columns={id: 'INTEGER', name: 'VARCHAR'}) ----- -1 O Brother, Where Art Thou? -2 Home for the Holidays -3 The Firm -4 Broadcast News -5 Raising Arizona - -# We can also read only one of the columns -query I -SELECT * FROM read_ndjson('data/json/example_n.ndjson', columns={id: 'INTEGER'}) ----- -1 -2 -3 -4 -5 - -query I -SELECT * FROM read_ndjson('data/json/example_n.ndjson', columns={name: 'VARCHAR'}) ----- -O Brother, Where Art Thou? -Home for the Holidays -The Firm -Broadcast News -Raising Arizona - -# what about a broken JSON file -query II -SELECT * FROM read_ndjson('data/json/unterminated_quotes.ndjson', columns={id: 'INTEGER', name: 'VARCHAR'}, ignore_errors=true) ----- -1 O Brother, Where Art Thou? -2 Home for the Holidays -NULL NULL -4 Broadcast News -5 Raising Arizona - -# some of these values don't have "name" -query II -SELECT * FROM read_ndjson('data/json/different_schemas.ndjson', columns={id: 'INTEGER', name: 'VARCHAR'}) ----- -1 O Brother, Where Art Thou? -2 NULL -3 The Firm -4 NULL -5 Raising Arizona - -# test projection pushdown (unstructured json) -query I -SELECT id FROM read_json('data/json/example_n.ndjson', columns={id: 'INTEGER', name: 'VARCHAR'}, format='unstructured') ----- -1 -2 -3 -4 -5 - -query I -SELECT name FROM read_json('data/json/example_n.ndjson', columns={id: 'INTEGER', name: 'VARCHAR'}, format='unstructured') ----- -O Brother, Where Art Thou? -Home for the Holidays -The Firm -Broadcast News -Raising Arizona - -# test projection pushdown (newline-delimited json) -query I -SELECT id FROM read_json('data/json/example_n.ndjson', columns={id: 'INTEGER', name: 'VARCHAR'}, format='newline_delimited') ----- -1 -2 -3 -4 -5 - -query I -SELECT name FROM read_ndjson('data/json/example_n.ndjson', columns={id: 'INTEGER', name: 'VARCHAR'}, format='nd') ----- -O Brother, Where Art Thou? -Home for the Holidays -The Firm -Broadcast News -Raising Arizona - -# auto-detect -query II -SELECT * FROM read_json_auto('data/json/example_n.ndjson') ----- -1 O Brother, Where Art Thou? -2 Home for the Holidays -3 The Firm -4 Broadcast News -5 Raising Arizona - -query II -SELECT * FROM 'data/json/example_n.ndjson' ----- -1 O Brother, Where Art Thou? -2 Home for the Holidays -3 The Firm -4 Broadcast News -5 Raising Arizona - -# we can detect at varying levels, level 0 is just JSON -query I -SELECT * FROM read_json_auto('data/json/with_list.json', maximum_depth=0) ----- -{"id":1,"name":["O","Brother,","Where","Art","Thou?"]} -{"id":2,"name":["Home","for","the","Holidays"]} -{"id":3,"name":["The","Firm"]} -{"id":4,"name":["Broadcast","News"]} -{"id":5,"name":["Raising","Arizona"]} - -# at level one we get JSON and JSON -query II -SELECT * FROM read_json_auto('data/json/with_list.json', maximum_depth=1) ----- -1 ["O","Brother,","Where","Art","Thou?"] -2 ["Home","for","the","Holidays"] -3 ["The","Firm"] -4 ["Broadcast","News"] -5 ["Raising","Arizona"] - -# at level 2 we get BIGINT and JSON[] -query II -SELECT * FROM read_json_auto('data/json/with_list.json', maximum_depth=2) ----- -1 ["O", "Brother,", "Where", "Art", "Thou?"] -2 ["Home", "for", "the", "Holidays"] -3 ["The", "Firm"] -4 ["Broadcast", "News"] -5 ["Raising", "Arizona"] - -# at level 3 it's fully detected, and we get BIGINT and VARCHAR[] -query II -SELECT * FROM read_json_auto('data/json/with_list.json', maximum_depth=3) ----- -1 [O, 'Brother,', Where, Art, Thou?] -2 [Home, for, the, Holidays] -3 [The, Firm] -4 [Broadcast, News] -5 [Raising, Arizona] - -# we can detect lists too -query III -SELECT id, typeof(name), unnest(name) FROM 'data/json/with_list.json' ----- -1 VARCHAR[] O -1 VARCHAR[] Brother, -1 VARCHAR[] Where -1 VARCHAR[] Art -1 VARCHAR[] Thou? -2 VARCHAR[] Home -2 VARCHAR[] for -2 VARCHAR[] the -2 VARCHAR[] Holidays -3 VARCHAR[] The -3 VARCHAR[] Firm -4 VARCHAR[] Broadcast -4 VARCHAR[] News -5 VARCHAR[] Raising -5 VARCHAR[] Arizona - -# with depth 2 we don't bother detecting inside of the list - defaults to JSON -query III -SELECT id, typeof(name), unnest(name) FROM read_json_auto('data/json/with_list.json', maximum_depth=2) ----- -1 JSON[] "O" -1 JSON[] "Brother," -1 JSON[] "Where" -1 JSON[] "Art" -1 JSON[] "Thou?" -2 JSON[] "Home" -2 JSON[] "for" -2 JSON[] "the" -2 JSON[] "Holidays" -3 JSON[] "The" -3 JSON[] "Firm" -4 JSON[] "Broadcast" -4 JSON[] "News" -5 JSON[] "Raising" -5 JSON[] "Arizona" - -# with depth 0 we don't bother detecting anything, everything defaults to JSON (even the "id" column in this case) -query II -SELECT typeof(id), typeof(name) FROM read_json_auto('data/json/with_list.json', maximum_depth=1) ----- -JSON JSON -JSON JSON -JSON JSON -JSON JSON -JSON JSON - -# we can detect UUID's -query II -SELECT id, typeof(id) FROM 'data/json/with_uuid.json' ----- -bbd05ae7-76e5-4f1a-a31f-247408251fc9 UUID -d5c52052-5f8e-473f-bc8d-176342643ef5 UUID -3b6a6de3-0732-4591-93ed-8df6091eb00d UUID -ae24e69e-e0bf-4e85-9848-27d35df85b8b UUID -63928b16-1814-436f-8b30-b3c40cc31d51 UUID - -# top-level array of values -query I -select * from read_json('data/json/top_level_array.json', columns={conclusion: 'VARCHAR'}) ----- -cancelled -cancelled - -query I -select * from read_json('data/json/top_level_array.json', auto_detect=true) ----- -cancelled -cancelled - -# if we try to read it as 'unstructured' records -statement error -select * from read_json('data/json/top_level_array.json', columns={conclusion: 'VARCHAR'}, format='unstructured', records=true) ----- -Invalid Input Error: JSON transform error in file "data/json/top_level_array.json", in record/value 1: Expected OBJECT, but got ARRAY - -# if we try to read an ndjson file as if it is an array of values, we get an error -statement error -select * from read_json_auto('data/json/example_n.ndjson', format='array') ----- -Invalid Input Error: Expected top-level JSON array - -# test that we can read a list of longer than STANDARD_VECTOR_SIZE properly -statement ok -copy (select 42 duck from range(10000)) to '__TEST_DIR__/my_file.json' (array true) - -query T -select count(*) from read_json('__TEST_DIR__/my_file.json', columns={duck: 'INTEGER'}, format='array') ----- -10000 - -query T -select sum(duck) = 42*10000 from read_json('__TEST_DIR__/my_file.json', columns={duck: 'INTEGER'}, format='array') ----- -true - -# read_json_auto also understands ARRAY format -query T -select count(*) from '__TEST_DIR__/my_file.json' ----- -10000 - -query T -select sum(duck) = 42*10000 from '__TEST_DIR__/my_file.json' ----- -true - -# what if we do an array of non-records? -statement ok -copy (select list(range) from range(10)) to '__TEST_DIR__/my_file.json' (format csv, quote '', HEADER 0) - -query T -select * from '__TEST_DIR__/my_file.json' ----- -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 - -# fails because it's not records -statement error -select * from read_json('__TEST_DIR__/my_file.json', format='array', columns={range: 'INTEGER'}, records=true) ----- -Invalid Input Error: JSON transform error - -# fails because it's not records -statement error -select * from read_json_auto('__TEST_DIR__/my_file.json', format='array', records=true) ----- -Binder Error: json_read expected records - -query T -select * from read_json('__TEST_DIR__/my_file.json', format='auto', records=false, auto_detect=true) ----- -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 - -# need to supply columns -statement error -select * from read_json('__TEST_DIR__/my_file.json', format='auto', records='false', auto_detect=false) ----- -Binder Error - -# read as unstructured values, so we just get the array -query T -select * from read_json('__TEST_DIR__/my_file.json', format='unstructured', records='false', auto_detect=true) ----- -[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - -# array of non-records -query T -select * from read_json('__TEST_DIR__/my_file.json', format='array', records='false', auto_detect=true) ----- -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 - -# also works with auto -query T -select * from read_json('__TEST_DIR__/my_file.json', format='array', records='auto', auto_detect=true) ----- -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 - -# lower thread count so the next tests don't OOM on many-core machines -statement ok -SET threads=2 - -# issue 6646, this is not an array, but we try to read it as one -statement error -select json_structure(json ->> '$.metadata') as structure, -from read_json('data/json/issue.json', format='array', columns={'json': 'JSON'}, maximum_object_size=104857600) -limit 1; ----- -Invalid Input Error: Expected top-level JSON array - -# let's try a variation -statement error -select json_structure(json ->> '$.metadata') as structure, -from read_json('data/json/issue.json', format='array', records='false', columns={'json': 'JSON'}, maximum_object_size=104857600) -limit 1; ----- -Invalid Input Error: Expected top-level JSON array - -# we can parse it as unstructured values, and give it a different col name -query I -select json_structure(my_json ->> '$.metadata') as structure, -from read_json('data/json/issue.json', format='unstructured', records='false', columns={'my_json': 'JSON'}, maximum_object_size=104857600) -limit 1; ----- -{"argv":["VARCHAR"],"dag":{"dag_size":"VARCHAR","tasks":{"load_oscar":{"status":"VARCHAR","type":"VARCHAR","upstream":"VARCHAR","products":{"nb":"VARCHAR"}},"load_weather":{"status":"VARCHAR","type":"VARCHAR","upstream":"VARCHAR","products":{"nb":"VARCHAR"}},"compress":{"status":"VARCHAR","type":"VARCHAR","upstream":{"load_oscar":"VARCHAR"},"products":{"nb":"VARCHAR"}}}}} - -statement ok -pragma disable_verification - -# test that we can read a JSON list that spans more than one buffer size -# the JSON is 55 bytes, and the minimum buffer size is 32MB -# let's do 50k to be safe -statement ok -copy (select 42 this_is_a_very_long_field_name_yes_very_much_so from range(50000)) to '__TEST_DIR__/my_file.json' (array true) - -query T -select sum(this_is_a_very_long_field_name_yes_very_much_so) = 42 * 50000 from '__TEST_DIR__/my_file.json' ----- -true - require httpfs query II diff --git a/test/sql/json/table/read_json_auto.test_slow b/test/sql/json/table/read_json_auto.test_slow index efed7ee..8b607cd 100644 --- a/test/sql/json/table/read_json_auto.test_slow +++ b/test/sql/json/table/read_json_auto.test_slow @@ -4,358 +4,6 @@ require json -statement ok -pragma enable_verification - -# some arrow tests (python/pyarrow/tests/test_json.py) on their github -# these are very similar to the pandas tests, so let's not copy those -# instead of adding all of these files to data/test we just create them on the fly here -# whenever we add a '' at the end it's just to check we skip the newline at the end that's sometimes there -statement ok -copy (select * from (values ('{"a": 1, "b": 2}'), (''))) to '__TEST_DIR__/my_file.json' (format csv, quote '', header 0); - -query II -select * from '__TEST_DIR__/my_file.json' ----- -1 2 - -statement ok -copy (select * from (values ('{"a": 1}'), ('{"a": 2}'), ('{"a": 3}'))) to '__TEST_DIR__/my_file.json' (format csv, quote '', header 0) - -query I -select * from '__TEST_DIR__/my_file.json' ----- -1 -2 -3 - -query I -select count(*) from '__TEST_DIR__/my_file.json' ----- -3 - -statement ok -copy (select * from (values ('{"a": 1,"b": 2, "c": 3}'), ('{"a": 4,"b": 5, "c": 6}'))) to '__TEST_DIR__/my_file.json' (format csv, quote '', header 0) - -query III -select * from '__TEST_DIR__/my_file.json' ----- -1 2 3 -4 5 6 - -statement ok -copy (select * from (values ('{"a": 1,"b": 2, "c": "3", "d": false}'), ('{"a": 4.0, "b": -5, "c": "foo", "d": true}'), (''))) to '__TEST_DIR__/my_file.json' (format csv, quote '', header 0) - -query IIII -select * from '__TEST_DIR__/my_file.json' ----- -1.0 2 3 false -4.0 -5 foo true - -# mixed types that cannot be resolved, defaults to JSON (column 3) -statement ok -copy (select * from (values ('{"a": 1, "b": 2, "c": null, "d": null, "e": null}'), ('{"a": null, "b": -5, "c": "foo", "d": null, "e": true}'), ('{"a": 4.5, "b": null, "c": "nan", "d": null,"e": false}'), (''))) to '__TEST_DIR__/my_file.json' (format csv, quote '', header 0) - -query IIIII -select * from '__TEST_DIR__/my_file.json' ----- -1.0 2 NULL NULL NULL -NULL -5 foo NULL true -4.5 NULL nan NULL false - -# mixed types are resolved to DOUBLE here -statement ok -copy (select * from (values ('{"a": 1}'), ('{"a": 1.45}'), ('{"a": -23.456}'), ('{}'), (''))) to '__TEST_DIR__/my_file.json' (format csv, quote '', header 0) - -query II -select typeof(a), a from '__TEST_DIR__/my_file.json' ----- -DOUBLE 1.0 -DOUBLE 1.45 -DOUBLE -23.456 -DOUBLE NULL - -statement ok -copy (select * from (values ('{"foo": "bar", "num": 0}'), ('{"foo": "baz", "num": 1}'), (''))) to '__TEST_DIR__/my_file.json' (format csv, quote '', header 0) - -query II -select * from '__TEST_DIR__/my_file.json' ----- -bar 0 -baz 1 - -# we can read values from a top-level list -query I -select * from 'data/json/top_level_array.json' ----- -cancelled -cancelled - -query I -select count(*) from 'data/json/top_level_array.json' ----- -2 - -# for maximum_depth=0 this is two records of JSON -query I -select * from read_json_auto('data/json/top_level_array.json', maximum_depth=0) ----- -{"conclusion":"cancelled"} -{"conclusion":"cancelled"} - -# for 1 it's 1 column of JSON -query I -select * from read_json_auto('data/json/top_level_array.json', maximum_depth=1) ----- -"cancelled" -"cancelled" - -# if we read this with records='false', we get the struct instead of the unpacked columns -query I -select typeof(json) from read_json_auto('data/json/top_level_array.json', records='false') ----- -STRUCT(conclusion VARCHAR) -STRUCT(conclusion VARCHAR) - -# however, if there are multiple top-level arrays, we default to reading them as lists -query I -select * from 'data/json/top_level_two_arrays.json' ----- -[{'conclusion': cancelled}, {'conclusion': cancelled}] -[{'conclusion': cancelled}, {'conclusion': cancelled}] - -# if we read a top-level array as if it is a record, then we get an error -statement error -select * from read_json_auto('data/json/top_level_array.json', format='unstructured', records='true') ----- -Binder Error: json_read expected records - -# issue Mark found when analyzing a JSON dump of our CI - projection pushdown wasn't working properly -statement ok -select * from 'data/json/projection_pushdown_example.json' WHERE status <> 'completed' - -# different schema's - this one should work regardless of sampling 1 or all lines -query II -select * from read_json_auto('data/json/different_schemas.ndjson', sample_size=1) ----- -1 O Brother, Where Art Thou? -2 NULL -3 The Firm -4 NULL -5 Raising Arizona - -query II -select * from read_json_auto('data/json/different_schemas.ndjson', sample_size=-1) ----- -1 O Brother, Where Art Thou? -2 NULL -3 The Firm -4 NULL -5 Raising Arizona - -# if we require fields to appear in all objects by setting field_appearance_threshold=1, we default to MAP -query I -select typeof(COLUMNS(*)) from read_json_auto('data/json/different_schemas.ndjson', field_appearance_threshold=1) limit 1 ----- -MAP(VARCHAR, JSON) - -query I -select * from read_json_auto('data/json/different_schemas.ndjson', field_appearance_threshold=1) ----- -{id=1, name='"O Brother, Where Art Thou?"'} -{id=2} -{id=3, name='"The Firm"'} -{id=4} -{id=5, name='"Raising Arizona"'} - -# if we set it to 0.5 it should work already since "name" appears in 3/5 objects, which is greater than 0.5 -query II -select * from read_json_auto('data/json/different_schemas.ndjson', field_appearance_threshold=0.5) ----- -1 O Brother, Where Art Thou? -2 NULL -3 The Firm -4 NULL -5 Raising Arizona - -# can't set it to less than 0 or more than 1 -statement error -select * from read_json_auto('data/json/different_schemas.ndjson', field_appearance_threshold=-1) ----- -Binder Error: read_json_auto "field_appearance_threshold" parameter must be between 0 and 1 - -statement error -select * from read_json_auto('data/json/different_schemas.ndjson', field_appearance_threshold=2) ----- -Binder Error: read_json_auto "field_appearance_threshold" parameter must be between 0 and 1 - -# inconsistent schema's - if we only sample 1 row, we get an error, because we only see a NULL value for the 2nd column -statement error -select * from read_json_auto('data/json/inconsistent_schemas.ndjson', sample_size=1, convert_strings_to_integers=true) ----- -Invalid Input Error: JSON transform error in file "data/json/inconsistent_schemas.ndjson", in line 3 - -# if we increase the sample size to 2, we can read it just fine -query II -select * from read_json_auto('data/json/inconsistent_schemas.ndjson', sample_size=2) ----- -"1" NULL -2 Home for the Holidays -[3] The Firm -4 Broadcast News -5 Raising Arizona - -# we can also find bigint in strings (happens a lot in JSON for some reason ...) -statement ok -copy (select * from (values ('{"id": "26941143801"}'), ('{"id": "26941143807"}'))) to '__TEST_DIR__/my_file.json' (format csv, quote '', header 0) - -# but only if we set the parameter to true -query T -select typeof(id) from read_json('__TEST_DIR__/my_file.json', convert_strings_to_integers=true) ----- -BIGINT -BIGINT - -# empty array and the example file works -query II -select * from read_json_auto(['data/json/empty_array.json', 'data/json/example_n.ndjson']); ----- -1 O Brother, Where Art Thou? -2 Home for the Holidays -3 The Firm -4 Broadcast News -5 Raising Arizona - -# Simple map inference with default threshold -query T -select distinct typeof(a) from read_json_auto('data/json/simple_map.jsonl') ----- -MAP(VARCHAR, BIGINT) - -# Test setting map_inference_threshold high -query T -select distinct typeof(a) from read_json_auto('data/json/simple_map.jsonl', map_inference_threshold=1000) ----- -MAP(VARCHAR, BIGINT) - -# Map inference can be disabled -query T -select distinct typeof(a) from read_json_auto('data/json/simple_map.jsonl', map_inference_threshold=-1, field_appearance_threshold=0) ----- -STRUCT("1" JSON, "2" BIGINT, "3" BIGINT, "4" BIGINT, "5" BIGINT, "6" BIGINT, "7" BIGINT, "8" BIGINT, "9" BIGINT, "10" BIGINT, "11" BIGINT, "12" BIGINT, "13" BIGINT, "14" BIGINT, "15" BIGINT, "16" JSON, "17" BIGINT, "18" BIGINT, "19" BIGINT, "20" BIGINT, "21" BIGINT, "22" BIGINT, "23" BIGINT, "24" BIGINT, "25" BIGINT, "26" BIGINT, "27" BIGINT, "28" BIGINT, "29" BIGINT, "30" BIGINT, "31" BIGINT, "32" BIGINT, "33" BIGINT, "34" BIGINT, "35" BIGINT, "36" BIGINT, "37" BIGINT, "38" BIGINT, "39" BIGINT, "40" BIGINT, "41" BIGINT, "42" BIGINT, "43" BIGINT, "44" BIGINT, "45" BIGINT, "46" BIGINT, "47" BIGINT, "48" BIGINT, "49" BIGINT, "50" BIGINT, "51" BIGINT, "52" BIGINT, "53" BIGINT, "54" BIGINT, "55" BIGINT, "56" BIGINT, "57" BIGINT, "58" BIGINT, "59" BIGINT, "60" BIGINT, "61" BIGINT, "62" BIGINT, "63" BIGINT, "64" BIGINT, "65" BIGINT, "66" BIGINT, "67" BIGINT, "68" BIGINT, "69" BIGINT, "70" BIGINT, "71" BIGINT, "72" BIGINT, "73" BIGINT, "74" BIGINT, "75" BIGINT, "76" BIGINT, "77" BIGINT, "78" BIGINT, "79" BIGINT, "80" BIGINT, "81" BIGINT, "82" BIGINT, "83" BIGINT, "84" BIGINT, "85" BIGINT, "86" BIGINT, "87" BIGINT, "88" BIGINT, "89" BIGINT, "90" BIGINT, "91" BIGINT, "92" BIGINT, "93" BIGINT, "94" BIGINT, "95" BIGINT, "96" BIGINT, "97" BIGINT, "98" BIGINT, "99" BIGINT, "100" BIGINT) - -# Map inference with max_depth works as expected -query T -select distinct typeof(a) from read_json_auto('data/json/simple_map.jsonl', maximum_depth=2) ----- -MAP(VARCHAR, JSON) - -query T -select distinct typeof(a) from read_json_auto('data/json/simple_map.jsonl', maximum_depth=1) ----- -JSON - -# Map where all values are null -query T -select distinct typeof(a) from read_json_auto('data/json/map_of_nulls.jsonl') ----- -MAP(VARCHAR, JSON) - -# Map type can be inferred at the top level -query T -select distinct typeof(json) from read_json_auto('data/json/top_level_map.jsonl') ----- -MAP(VARCHAR, BIGINT) - -# Map type can be inferred for struct value type -query T -select distinct typeof(a) from read_json_auto('data/json/map_of_structs.jsonl') ----- -MAP(VARCHAR, STRUCT(b BIGINT)) - -# Map 80% similarity check works -query T -select distinct typeof(a) from read_json_auto('data/json/map_50_50.jsonl', map_inference_threshold=10) ----- -STRUCT(s1 STRUCT(f1 BIGINT[]), s2 STRUCT(f2 BIGINT[]), s3 STRUCT(f1 BIGINT[]), s4 STRUCT(f2 BIGINT[]), s5 STRUCT(f1 BIGINT[]), s6 STRUCT(f2 BIGINT[]), s7 STRUCT(f1 BIGINT[]), s8 STRUCT(f2 BIGINT[]), s9 STRUCT(f1 BIGINT[]), s10 STRUCT(f2 BIGINT[])) - -# Map of maps -query T -select distinct typeof(a) from read_json_auto('data/json/map_of_map.jsonl', map_inference_threshold=10) ----- -MAP(VARCHAR, MAP(VARCHAR, BIGINT)) - -# All NULL types get converted to JSON if we do map inference -query T -select distinct typeof(a) from read_json_auto('data/json/map_of_struct_with_nulls.jsonl', map_inference_threshold=10) ----- -MAP(VARCHAR, STRUCT(a JSON[])) - -# Candidate types are properly handled for map inference -query I -SELECT distinct typeof(a) FROM read_json_auto('data/json/map_of_dates.jsonl', map_inference_threshold=25) ----- -MAP(VARCHAR, DATE) - -# Mixed candidate types are also handled -query I -SELECT distinct typeof(a) FROM read_json_auto('data/json/map_of_mixed_date_timestamps.jsonl', map_inference_threshold=25) ----- -MAP(VARCHAR, VARCHAR) - -# Incompatible types are handled correctly -query T -select distinct typeof(a) from read_json_auto('data/json/map_incompatible.jsonl', map_inference_threshold=10) ----- -STRUCT(s1 STRUCT("1" JSON), s2 STRUCT("1" MAP(VARCHAR, JSON)), s3 STRUCT("1" VARCHAR), s4 STRUCT("1" BIGINT[]), s5 STRUCT("1" BIGINT), s6 STRUCT("1" VARCHAR), s7 STRUCT("1" BIGINT[]), s8 STRUCT("1" BIGINT), s9 STRUCT("1" VARCHAR), s10 STRUCT("1" BIGINT[])) - -# Can't set map_inference_threshold to a negative value (except -1) -statement error -select * from read_json_auto('data/json/simple_map.jsonl', map_inference_threshold=-10) ----- -Binder Error: read_json_auto "map_inference_threshold" parameter must be 0 or positive, or -1 to disable map inference for consistent objects. - -# if we only sample the first file, we default to a single JSON column -query I -select * from read_json_auto(['data/json/empty_array.json', 'data/json/example_n.ndjson'], maximum_sample_files=1); ----- -{"id":1,"name":"O Brother, Where Art Thou?"} -{"id":2,"name":"Home for the Holidays"} -{"id":3,"name":"The Firm"} -{"id":4,"name":"Broadcast News"} -{"id":5,"name":"Raising Arizona"} - -# -1 is unlimited -query II -select * from read_json_auto(['data/json/empty_array.json', 'data/json/example_n.ndjson'], maximum_sample_files=-1); ----- -1 O Brother, Where Art Thou? -2 Home for the Holidays -3 The Firm -4 Broadcast News -5 Raising Arizona - -# can't be -2 or lower -statement error -select * from read_json_auto(['data/json/empty_array.json', 'data/json/example_n.ndjson'], maximum_sample_files=-2); ----- -Binder Error - -# can't be 0 -statement error -select * from read_json_auto(['data/json/empty_array.json', 'data/json/example_n.ndjson'], maximum_sample_files=0); ----- -Binder Error - -# cannot be NULL either -statement error -select * from read_json_auto(['data/json/empty_array.json', 'data/json/example_n.ndjson'], maximum_sample_files=NULL); ----- -Binder Error - -statement ok -pragma disable_verification - require httpfs # this is one big object - yyjson uses it as a benchmark diff --git a/test/sql/json/table/read_json_objects.test b/test/sql/json/table/read_json_objects.test index ecf73b6..ec49c45 100644 --- a/test/sql/json/table/read_json_objects.test +++ b/test/sql/json/table/read_json_objects.test @@ -4,144 +4,6 @@ require json -# we cannot check the error output for the specific byte, because on Windows the \n are replaced with \r\n -# therefore, the byte count is different. So, we cut off the error message here -statement error -select * from read_json_objects('data/json/unterminated_quotes.ndjson') ----- -Invalid Input Error: Malformed JSON - -# now it should work! -query I -SELECT * FROM read_csv('data/json/example_n.ndjson', columns={'json': 'JSON'}, delim=NULL, header=0, quote=NULL, escape=NULL, auto_detect = false) ----- -{"id":1,"name":"O Brother, Where Art Thou?"} -{"id":2,"name":"Home for the Holidays"} -{"id":3,"name":"The Firm"} -{"id":4,"name":"Broadcast News"} -{"id":5,"name":"Raising Arizona"} - -# example_n is with regular \n newlines -query I -SELECT * FROM read_ndjson_objects('data/json/example_n.ndjson') ----- -{"id":1,"name":"O Brother, Where Art Thou?"} -{"id":2,"name":"Home for the Holidays"} -{"id":3,"name":"The Firm"} -{"id":4,"name":"Broadcast News"} -{"id":5,"name":"Raising Arizona"} - -# this one does not have the 'records' param -statement error -SELECT * FROM read_ndjson_objects('data/json/example_n.ndjson', records='false') ----- -Binder Error: Invalid named parameter - -query I -SELECT * FROM read_ndjson_objects('data/json/example_n.ndjson') ----- -{"id":1,"name":"O Brother, Where Art Thou?"} -{"id":2,"name":"Home for the Holidays"} -{"id":3,"name":"The Firm"} -{"id":4,"name":"Broadcast News"} -{"id":5,"name":"Raising Arizona"} - -# we can auto-detect that it's newline-delimited -query I -SELECT * FROM read_json_objects('data/json/example_n.ndjson', format='auto') ----- -{"id":1,"name":"O Brother, Where Art Thou?"} -{"id":2,"name":"Home for the Holidays"} -{"id":3,"name":"The Firm"} -{"id":4,"name":"Broadcast News"} -{"id":5,"name":"Raising Arizona"} - -# example_r is with \r newlines - works with unstructured -query I -SELECT * FROM read_json_objects('data/json/example_r.ndjson', format='unstructured') ----- -{"id":1,"name":"O Brother, Where Art Thou?"} -{"id":2,"name":"Home for the Holidays"} -{"id":3,"name":"The Firm"} -{"id":4,"name":"Broadcast News"} -{"id":5,"name":"Raising Arizona"} - -# we can detect that it's not newline-delimited -query I -SELECT * FROM read_json_objects('data/json/example_r.ndjson', format='auto') ----- -{"id":1,"name":"O Brother, Where Art Thou?"} -{"id":2,"name":"Home for the Holidays"} -{"id":3,"name":"The Firm"} -{"id":4,"name":"Broadcast News"} -{"id":5,"name":"Raising Arizona"} - -# \r newlines are NOT valid according to ndjson spec - this does not work, all a single line -statement error -SELECT * FROM read_ndjson_objects('data/json/example_r.ndjson') ----- -Invalid Input Error: Malformed JSON in file "data/json/example_r.ndjson" - -# example_rn is with \r\n newlines -query I -SELECT * FROM read_ndjson_objects('data/json/example_rn.ndjson') ----- -{"id":1,"name":"O Brother, Where Art Thou?"} -{"id":2,"name":"Home for the Holidays"} -{"id":3,"name":"The Firm"} -{"id":4,"name":"Broadcast News"} -{"id":5,"name":"Raising Arizona"} - -query I -SELECT * FROM read_ndjson_objects('data/json/example_rn.ndjson') ----- -{"id":1,"name":"O Brother, Where Art Thou?"} -{"id":2,"name":"Home for the Holidays"} -{"id":3,"name":"The Firm"} -{"id":4,"name":"Broadcast News"} -{"id":5,"name":"Raising Arizona"} - -# same but gzipped -query I -SELECT * FROM read_ndjson_objects('data/json/example_rn.ndjson.gz') ----- -{"id":1,"name":"O Brother, Where Art Thou?"} -{"id":2,"name":"Home for the Holidays"} -{"id":3,"name":"The Firm"} -{"id":4,"name":"Broadcast News"} -{"id":5,"name":"Raising Arizona"} - -query I -SELECT * FROM read_json_objects('data/json/example_rn.ndjson.gz', format='nd') ----- -{"id":1,"name":"O Brother, Where Art Thou?"} -{"id":2,"name":"Home for the Holidays"} -{"id":3,"name":"The Firm"} -{"id":4,"name":"Broadcast News"} -{"id":5,"name":"Raising Arizona"} - -# multi-file scan -query I -SELECT count(*) from read_json_objects(['data/json/example_n.ndjson', 'data/json/example_r.ndjson', 'data/json/example_rn.ndjson'], format='auto') ----- -15 - -query I -SELECT count(*) from read_ndjson_objects(['data/json/example_n.ndjson', 'data/json/example_rn.ndjson']) ----- -10 - -# globbing -query I -SELECT count(*) from read_json_objects('data/json/example_*.ndjson', format='auto') ----- -15 - -query I -SELECT count(*) from read_ndjson_objects('data/json/example_*n.ndjson') ----- -10 - require httpfs # same file but hosted on github @@ -163,90 +25,3 @@ select * from read_ndjson_objects('https://github.com/duckdb/duckdb-data/release {"id":4,"name":"Broadcast News"} {"id":5,"name":"Raising Arizona"} -# empty file -query I -select * from read_json_objects('data/json/empty.ndjson') ----- - -query I -select * from read_ndjson_objects('data/json/empty.ndjson') ----- - -# invalid json stuff -statement error -select * from read_json_objects('data/json/unterminated_quotes.ndjson', format='nd') ----- -Invalid Input Error: Malformed JSON in file "data/json/unterminated_quotes.ndjson" - -statement error -select * from read_ndjson_objects('data/json/unterminated_quotes.ndjson') ----- -Invalid Input Error: Malformed JSON in file "data/json/unterminated_quotes.ndjson" - -# we can auto-detect and ignore the error (becomes NULL) -query I -select * from read_json_objects('data/json/unterminated_quotes.ndjson', format='auto', ignore_errors=true) ----- -{"id":1,"name":"O Brother, Where Art Thou?"} -{"id":2,"name":"Home for the Holidays"} -NULL -{"id":4,"name":"Broadcast News"} -{"id":5,"name":"Raising Arizona"} - -# multiple values per line (works for read_json_objects) -query I -select * from read_json_objects('data/json/multiple_objects_per_line.ndjson', format='unstructured') ----- -{"id":1,"name":"O Brother, Where Art Thou?"} -{"id":2,"name":"Home for the Holidays"} -{"id":3,"name":"The Firm"} -{"id":4,"name":"Broadcast News"} -{"id":5,"name":"Raising Arizona"} - -# does not work for read_ndjson_objects -statement error -select * from read_ndjson_objects('data/json/multiple_objects_per_line.ndjson') ----- -Invalid Input Error: Malformed JSON in file "data/json/multiple_objects_per_line.ndjson" - -# what if we try to read a CSV? -statement error -select * from read_json_objects('data/csv/tpcds_14.csv') ----- -Invalid Input Error: Malformed JSON - -statement error -select * from read_ndjson_objects('data/csv/tpcds_14.csv') ----- -Invalid Input Error: Malformed JSON in file "data/csv/tpcds_14.csv" - -# how about parquet? -statement error -select * from read_json_objects('data/parquet-testing/blob.parquet') ----- -Invalid Input Error: Malformed JSON - -statement error -select * from read_ndjson_objects('data/parquet-testing/blob.parquet') ----- -Invalid Input Error: Malformed JSON in file "data/parquet-testing/blob.parquet" - -# we can also read the objects from a JSON array (not newline-delimited) -query I -select * from read_json_objects('data/json/top_level_array.json') ----- -{"conclusion":"cancelled"} -{"conclusion":"cancelled"} - -# and auto-detect it -query I -select * from read_json_objects('data/json/top_level_array.json', format='auto') ----- -{"conclusion":"cancelled"} -{"conclusion":"cancelled"} - -# the file only has one line, so if we read this as ndjson, we just get the array -query I -select * from read_json_objects('data/json/top_level_array.json', format='nd') ----- -[{"conclusion":"cancelled"}, {"conclusion":"cancelled"}] From 976360168eb69c60d59b21df9ba17d2ecda9bad8 Mon Sep 17 00:00:00 2001 From: Mytherin Date: Thu, 25 Sep 2025 11:58:00 +0200 Subject: [PATCH 03/32] Add CSV --- data/csv/unquoted_escape/human_eval.csv | 339 ++++++++++++++++++++++++ 1 file changed, 339 insertions(+) create mode 100644 data/csv/unquoted_escape/human_eval.csv diff --git a/data/csv/unquoted_escape/human_eval.csv b/data/csv/unquoted_escape/human_eval.csv new file mode 100644 index 0000000..2880643 --- /dev/null +++ b/data/csv/unquoted_escape/human_eval.csv @@ -0,0 +1,339 @@ +HumanEval/0,from typing import List\ +\ +\ +def has_close_elements(numbers: List[float]\, threshold: float) -> bool:\ + """ Check if in given list of numbers\, are any two numbers closer to each other than\ + given threshold.\ + >>> has_close_elements([1.0\, 2.0\, 3.0]\, 0.5)\ + False\ + >>> has_close_elements([1.0\, 2.8\, 3.0\, 4.0\, 5.0\, 2.0]\, 0.3)\ + True\ + """\ +,has_close_elements, for idx\, elem in enumerate(numbers):\ + for idx2\, elem2 in enumerate(numbers):\ + if idx != idx2:\ + distance = abs(elem - elem2)\ + if distance < threshold:\ + return True\ +\ + return False\ +,\ +\ +METADATA = {\ + 'author': 'jt'\,\ + 'dataset': 'test'\ +}\ +\ +\ +def check(candidate):\ + assert candidate([1.0\, 2.0\, 3.9\, 4.0\, 5.0\, 2.2]\, 0.3) == True\ + assert candidate([1.0\, 2.0\, 3.9\, 4.0\, 5.0\, 2.2]\, 0.05) == False\ + assert candidate([1.0\, 2.0\, 5.9\, 4.0\, 5.0]\, 0.95) == True\ + assert candidate([1.0\, 2.0\, 5.9\, 4.0\, 5.0]\, 0.8) == False\ + assert candidate([1.0\, 2.0\, 3.0\, 4.0\, 5.0\, 2.0]\, 0.1) == True\ + assert candidate([1.1\, 2.2\, 3.1\, 4.1\, 5.1]\, 1.0) == True\ + assert candidate([1.1\, 2.2\, 3.1\, 4.1\, 5.1]\, 0.5) == False\ +\ + +HumanEval/1,from typing import List\ +\ +\ +def separate_paren_groups(paren_string: str) -> List[str]:\ + """ Input to this function is a string containing multiple groups of nested parentheses. Your goal is to\ + separate those group into separate strings and return the list of those.\ + Separate groups are balanced (each open brace is properly closed) and not nested within each other\ + Ignore any spaces in the input string.\ + >>> separate_paren_groups('( ) (( )) (( )( ))')\ + ['()'\, '(())'\, '(()())']\ + """\ +,separate_paren_groups, result = []\ + current_string = []\ + current_depth = 0\ +\ + for c in paren_string:\ + if c == '(':\ + current_depth += 1\ + current_string.append(c)\ + elif c == ')':\ + current_depth -= 1\ + current_string.append(c)\ +\ + if current_depth == 0:\ + result.append(''.join(current_string))\ + current_string.clear()\ +\ + return result\ +,\ +\ +METADATA = {\ + 'author': 'jt'\,\ + 'dataset': 'test'\ +}\ +\ +\ +def check(candidate):\ + assert candidate('(()()) ((())) () ((())()())') == [\ + '(()())'\, '((()))'\, '()'\, '((())()())'\ + ]\ + assert candidate('() (()) ((())) (((())))') == [\ + '()'\, '(())'\, '((()))'\, '(((())))'\ + ]\ + assert candidate('(()(())((())))') == [\ + '(()(())((())))'\ + ]\ + assert candidate('( ) (( )) (( )( ))') == ['()'\, '(())'\, '(()())']\ + +HumanEval/2,\ +\ +def truncate_number(number: float) -> float:\ + """ Given a positive floating point number\, it can be decomposed into\ + and integer part (largest integer smaller than given number) and decimals\ + (leftover part always smaller than 1).\ +\ + Return the decimal part of the number.\ + >>> truncate_number(3.5)\ + 0.5\ + """\ +,truncate_number, return number % 1.0\ +,\ +\ +METADATA = {\ + 'author': 'jt'\,\ + 'dataset': 'test'\ +}\ +\ +\ +def check(candidate):\ + assert candidate(3.5) == 0.5\ + assert abs(candidate(1.33) - 0.33) < 1e-6\ + assert abs(candidate(123.456) - 0.456) < 1e-6\ + +HumanEval/3,from typing import List\ +\ +\ +def below_zero(operations: List[int]) -> bool:\ + """ You're given a list of deposit and withdrawal operations on a bank account that starts with\ + zero balance. Your task is to detect if at any point the balance of account fallls below zero\, and\ + at that point function should return True. Otherwise it should return False.\ + >>> below_zero([1\, 2\, 3])\ + False\ + >>> below_zero([1\, 2\, -4\, 5])\ + True\ + """\ +,below_zero, balance = 0\ +\ + for op in operations:\ + balance += op\ + if balance < 0:\ + return True\ +\ + return False\ +,\ +\ +METADATA = {\ + 'author': 'jt'\,\ + 'dataset': 'test'\ +}\ +\ +\ +def check(candidate):\ + assert candidate([]) == False\ + assert candidate([1\, 2\, -3\, 1\, 2\, -3]) == False\ + assert candidate([1\, 2\, -4\, 5\, 6]) == True\ + assert candidate([1\, -1\, 2\, -2\, 5\, -5\, 4\, -4]) == False\ + assert candidate([1\, -1\, 2\, -2\, 5\, -5\, 4\, -5]) == True\ + assert candidate([1\, -2\, 2\, -2\, 5\, -5\, 4\, -4]) == True\ + +HumanEval/4,from typing import List\ +\ +\ +def mean_absolute_deviation(numbers: List[float]) -> float:\ + """ For a given list of input numbers\, calculate Mean Absolute Deviation\ + around the mean of this dataset.\ + Mean Absolute Deviation is the average absolute difference between each\ + element and a centerpoint (mean in this case):\ + MAD = average | x - x_mean |\ + >>> mean_absolute_deviation([1.0\, 2.0\, 3.0\, 4.0])\ + 1.0\ + """\ +,mean_absolute_deviation, mean = sum(numbers) / len(numbers)\ + return sum(abs(x - mean) for x in numbers) / len(numbers)\ +,\ +\ +METADATA = {\ + 'author': 'jt'\,\ + 'dataset': 'test'\ +}\ +\ +\ +def check(candidate):\ + assert abs(candidate([1.0\, 2.0\, 3.0]) - 2.0/3.0) < 1e-6\ + assert abs(candidate([1.0\, 2.0\, 3.0\, 4.0]) - 1.0) < 1e-6\ + assert abs(candidate([1.0\, 2.0\, 3.0\, 4.0\, 5.0]) - 6.0/5.0) < 1e-6\ +\ + +HumanEval/5,from typing import List\ +\ +\ +def intersperse(numbers: List[int]\, delimeter: int) -> List[int]:\ + """ Insert a number 'delimeter' between every two consecutive elements of input list `numbers'\ + >>> intersperse([]\, 4)\ + []\ + >>> intersperse([1\, 2\, 3]\, 4)\ + [1\, 4\, 2\, 4\, 3]\ + """\ +,intersperse, if not numbers:\ + return []\ +\ + result = []\ +\ + for n in numbers[:-1]:\ + result.append(n)\ + result.append(delimeter)\ +\ + result.append(numbers[-1])\ +\ + return result\ +,\ +\ +METADATA = {\ + 'author': 'jt'\,\ + 'dataset': 'test'\ +}\ +\ +\ +def check(candidate):\ + assert candidate([]\, 7) == []\ + assert candidate([5\, 6\, 3\, 2]\, 8) == [5\, 8\, 6\, 8\, 3\, 8\, 2]\ + assert candidate([2\, 2\, 2]\, 2) == [2\, 2\, 2\, 2\, 2]\ + +HumanEval/6,from typing import List\ +\ +\ +def parse_nested_parens(paren_string: str) -> List[int]:\ + """ Input to this function is a string represented multiple groups for nested parentheses separated by spaces.\ + For each of the group\, output the deepest level of nesting of parentheses.\ + E.g. (()()) has maximum two levels of nesting while ((())) has three.\ +\ + >>> parse_nested_parens('(()()) ((())) () ((())()())')\ + [2\, 3\, 1\, 3]\ + """\ +,parse_nested_parens, def parse_paren_group(s):\ + depth = 0\ + max_depth = 0\ + for c in s:\ + if c == '(':\ + depth += 1\ + max_depth = max(depth\, max_depth)\ + else:\ + depth -= 1\ +\ + return max_depth\ +\ + return [parse_paren_group(x) for x in paren_string.split(' ') if x]\ +,\ +\ +METADATA = {\ + 'author': 'jt'\,\ + 'dataset': 'test'\ +}\ +\ +\ +def check(candidate):\ + assert candidate('(()()) ((())) () ((())()())') == [2\, 3\, 1\, 3]\ + assert candidate('() (()) ((())) (((())))') == [1\, 2\, 3\, 4]\ + assert candidate('(()(())((())))') == [4]\ + +HumanEval/7,from typing import List\ +\ +\ +def filter_by_substring(strings: List[str]\, substring: str) -> List[str]:\ + """ Filter an input list of strings only for ones that contain given substring\ + >>> filter_by_substring([]\, 'a')\ + []\ + >>> filter_by_substring(['abc'\, 'bacd'\, 'cde'\, 'array']\, 'a')\ + ['abc'\, 'bacd'\, 'array']\ + """\ +,filter_by_substring, return [x for x in strings if substring in x]\ +,\ +\ +METADATA = {\ + 'author': 'jt'\,\ + 'dataset': 'test'\ +}\ +\ +\ +def check(candidate):\ + assert candidate([]\, 'john') == []\ + assert candidate(['xxx'\, 'asd'\, 'xxy'\, 'john doe'\, 'xxxAAA'\, 'xxx']\, 'xxx') == ['xxx'\, 'xxxAAA'\, 'xxx']\ + assert candidate(['xxx'\, 'asd'\, 'aaaxxy'\, 'john doe'\, 'xxxAAA'\, 'xxx']\, 'xx') == ['xxx'\, 'aaaxxy'\, 'xxxAAA'\, 'xxx']\ + assert candidate(['grunt'\, 'trumpet'\, 'prune'\, 'gruesome']\, 'run') == ['grunt'\, 'prune']\ + +HumanEval/8,from typing import List\, Tuple\ +\ +\ +def sum_product(numbers: List[int]) -> Tuple[int\, int]:\ + """ For a given list of integers\, return a tuple consisting of a sum and a product of all the integers in a list.\ + Empty sum should be equal to 0 and empty product should be equal to 1.\ + >>> sum_product([])\ + (0\, 1)\ + >>> sum_product([1\, 2\, 3\, 4])\ + (10\, 24)\ + """\ +,sum_product, sum_value = 0\ + prod_value = 1\ +\ + for n in numbers:\ + sum_value += n\ + prod_value *= n\ + return sum_value\, prod_value\ +,\ +\ +METADATA = {\ + 'author': 'jt'\,\ + 'dataset': 'test'\ +}\ +\ +\ +def check(candidate):\ + assert candidate([]) == (0\, 1)\ + assert candidate([1\, 1\, 1]) == (3\, 1)\ + assert candidate([100\, 0]) == (100\, 0)\ + assert candidate([3\, 5\, 7]) == (3 + 5 + 7\, 3 * 5 * 7)\ + assert candidate([10]) == (10\, 10)\ + +HumanEval/9,from typing import List\, Tuple\ +\ +\ +def rolling_max(numbers: List[int]) -> List[int]:\ + """ From a given list of integers\, generate a list of rolling maximum element found until given moment\ + in the sequence.\ + >>> rolling_max([1\, 2\, 3\, 2\, 3\, 4\, 2])\ + [1\, 2\, 3\, 3\, 3\, 4\, 4]\ + """\ +,rolling_max, running_max = None\ + result = []\ +\ + for n in numbers:\ + if running_max is None:\ + running_max = n\ + else:\ + running_max = max(running_max\, n)\ +\ + result.append(running_max)\ +\ + return result\ +,\ +\ +METADATA = {\ + 'author': 'jt'\,\ + 'dataset': 'test'\ +}\ +\ +\ +def check(candidate):\ + assert candidate([]) == []\ + assert candidate([1\, 2\, 3\, 4]) == [1\, 2\, 3\, 4]\ + assert candidate([4\, 3\, 2\, 1]) == [4\, 4\, 4\, 4]\ + assert candidate([3\, 2\, 3\, 100\, 3]) == [3\, 3\, 3\, 100\, 100]\ + From 1a3d991269f8ea938d2b78f15bc989637e047b00 Mon Sep 17 00:00:00 2001 From: Mytherin Date: Thu, 25 Sep 2025 12:06:10 +0200 Subject: [PATCH 04/32] More CSV files --- data/csv/lineitem1k.tbl.gz | Bin 0 -> 38116 bytes data/csv/real/web_page.csv | 60 +++++ data/csv/unquoted_escape/human_eval.tsv | 339 ++++++++++++++++++++++++ 3 files changed, 399 insertions(+) create mode 100644 data/csv/lineitem1k.tbl.gz create mode 100644 data/csv/real/web_page.csv create mode 100644 data/csv/unquoted_escape/human_eval.tsv diff --git a/data/csv/lineitem1k.tbl.gz b/data/csv/lineitem1k.tbl.gz new file mode 100644 index 0000000000000000000000000000000000000000..df6e512bcd152e00b591cd678fc87c30b03543e7 GIT binary patch literal 38116 zcmV(Qj#zE`=} zzOWxCU=y5aVK0&fYEIJ_KIeOH; z`J8=sZ?(Ge=HGmM3*MJ@3-Qg}T*60q``5RBd-J`QA6@*>NBJ6l^kMxP!rOoQ(?9>y z|MRDx9{%ZHAO7c`e)?bk_OEaM{?q^d$N%y6^y}A;k1r4Jp1;0)d3|{N`T4I;4_|(J zdi#TaD>Zr&9ld`GZ?U|k+*`c)0;wGz#fVNSFZ>wV`Zc(>fB)nE_|wCG{o_CWkB6WB z^zZ-sr+mp}f~Kfita_VCMxPmdoT-ambQ`t<(k)4LB(umAbs_2W1CPkw&>`2K;% z5UX=<^x@y)8~xQ7UB4Aa(!D#*-v1c-nzGC5*XYkf_~}po*TWzG>8H1s56@po8Pk(*-M6y^^a-q_W$MlxzAKpS}A>C42lTF)JF9EdcX^nsu zie>%xfBUa*4_Md_pMHJ#<@xUrI@b1uh51}!rsZvKG9of507tupe+oYMqfMqVLHukIl0^*mG_Yc z@zdLXhomHu##c`ZyMCpsw>I@P9*e^?$P7Bch3+@H<+$Z zA#rb5X*y1|ms_f9W0zdoN)p!PYryT9ztVxYp7+DoPhVfZ(nW%}ub(h^I#et-v_#@! zs)=q7ovqkgcemEogzMWM*MvRJ>5`^%4_CYVzy9xk`k!yOy?nGD{`UCir`rQ$h8`OY zx;4HqZ=v{ht1Qb8sefEE4Z5sej@9@oM|dcxZ|1Vh^ZcG?*GL!85zY?RrO_$wt=w{0 zn$fOmkRvqa>8z~Ns^aQA{z|j@^!TeBU=a+lCHNHIdMSZ!PN4s5HO8S4rb^N&I#Eeu z1WmX2-eyAV>&K_3&kv8ke0h4|WAzTV>Fp2ctu;q?HfPd*bbm-KxsWZM(E% znpr15er*@qIo08h4}bak;oYBEON@u%^W(ceKdw>T9zMMP`0ecvq+DIgbeZV`m)X+} z#8TTW=dlF52kSD?Iz%sP-qu$fGpCp7%Q!~n7O|WVJD*%AU#xd_HN;yFOH;K?&N#y) zr~eqT{@PV5l6rXf`uZe0JLmiQ`RUyUI?o?Iy?%N5`VOLF%`{Y6@km1H9ZZ3amm4B^ z#r5tq`acyG*RM3*^I~JNPtWh4k57Tzz?a_1UcQoH^g*w?EKdYQfPp>IHZ54bOTku8miBEO|8NEbEJ4%n$t2_06KE z_49{MU!EU+etP^QTmJa#Bi5ZxJIhZw&=spSW6SCO_)O2lc$?bsmdiwi2y5MrQbU=?E z&3iqTg;WWhu~7P3rG+V}mYdI)rgac?I`%{d<)d!I2h!F{b<$fA-dd>ir)tF}Vy{yT zp~_9l+fmN^Sj6!anxpEk<=!hFJtJzjoYd-re~=&R1i2F7!5zUCxbe zOp5`xi=@@=au+8dZ%)FPThp&@iK1P&p7HCq-#>qO{v!6saiaE`jm@XEr%`zF70Kp& z>mEZX>rC5OS5Yo3f1SOGhtqETurh!B@Cz;WyGL9xy;L`nNj4GrWu!G5z5euC(aCec zS8-DN5Pi=knqNJ%o0TexWzAC`ESa%bCSw4O++?zhu<7j1&}*IK%!RQHo88$Dmi;ic z+gX8+gHH12d(Gl}tmL}9(UTlu9K4aC5N=U#b-a)^L}dMX@jha^Nt=Ipdi?n1w-3_~ zCUwSAW9+1Y>7(O3dy+~8bVBiX!d z*n1HgJf~Y}%XwRGWK&hvxjpe;2t$|9%h_s{ZEf4_ zV!>nDO>M#PHK>{%N=yuU7U4_J&=R?lrHM;Zn!g3rm%D`F4D-~2)2U^ZKu2cyMMFW8 z47!nI%7vc!As}}wBvOjaomKL9>gEINp@Nom)vjN?AC^Y`@?q#cI%Z@s(rce+PPh`J z{+cUU^j|qU3n8e)qQBNFX934$Y?66gmkCE#b&MdmM&eq+(WUp*$K+?^svPYcFPeb8 z6*HC~hWoIlmN&#A=cHL$%XA>$YH4g$kvr0zOShnAMK&+3mlr(iog82NJy&=633O6F zjOo#TiF9o0!=~c*g)T%-=X04wz3ukn9LGuLkOuhh@#)i-AC~o~!HnnU*Doxd?8gFC zSD@tCvWT5jZd+DlxG<8(B-NrjXuO{!E_X1LNVU^sYau(C))%3ZK)T1J>4o+4`E|xr zGq&VzA&-TX{gepgZCSA-x56}|+m=0aF1;#nzR8ja&OGAk2F>Rz@O!RbecqMMv{mN$ z(V&ri+$DHJ=QfaIODD29zDZ;%x}3r-g{AJpHi+Sb49QK0c5!!kz4^khL>~}hVU!XD?GJB@_qPB8syDWkNqmm9>~fWkp;qr#NCQSh*014w zqK&UDhmFPg28)m$Qk*kH7D!Ri@=h0XS5SUh3WXmxw|=#bm^_@1?_QqCK*h$2W5(B+ zh0;-?2Rlc442GvkHY)iX?NTfEjt;ZD_^xHt@k+M7O zF4g5XbCo}^DDrJdPnA{?+=hBi32|n3ehYY_Q#0f5^Yp7k29N&Fd;`P)rTM-6fvo(N z@vPHFz}pTzRmec;+2{uiD|ay;_^a2T(dC{_jrHy~tD1hGqxIp_!{?8WU!Fcbk-nf& zWRhR#?MW4h4?fL-^u0VsaZ{C90toA?JVyu~MH_lu;nw;5zyaZe#WWI!sv_w%HoApm zB04eBddxGx;116(RMqtK%5uE$Yu}cfQyBCLJ?g7KmJW;akxnrE3-KCSaq_asbMLAu z>L$$o6ZA&cpX_-ff zw2Z{UMW&30^L1EN$1wkl6OaG&=jSmLa(u8VsdKJ@yl^^=Ud;4#4Eaw(lZWE;t8b^X zZ=8M-gghhylpV2f59c~WCr?G+RA=SXU|rX01b?fIVNE-LJxd&I&KSYVACjCWEkViy z(Ljzy#@xix=@(g}DQiw{d=0w|uy3shXK}QGg*L5VN8#I%*VAMd<94*FY})+lgB5)5 z&D9rh3IFoj%j4^B5*6h!kb^@9l|C{&JzCx3N|6B0Zdz8^t~L9vi+&YC8Mgb=GG`yR zv##0C8LyOpT-JB zJX)x)ynYRmx8gOH^yi*c;DECnw%d$ouynG$$iO5;#t{rM49FFY^OWzCWwD{?2&#@N zTW@K_f#IpisZ&>R9k&K;L9%OVNsJP*GyAfE4zimB`G>%^ex07$FuB%luGId+BU#;l zm$&nA#bd_^daB6pgk0dgk-b8G>wK~6W*m&!&nah(GqB-(xZ@HBrkOtd`a-XzjZ>0T zGQM(q%Vgi=n?GKeSmIz347o>gR3;6c^2c)d)hLA2O}(^z9w!(t;j>7-4vL%a|Uq;HbH?1%g; zv`ATkLS4W5O`+J6gs22dp@k%sTuEcVu1D~n9xJgs>rqbCJa82qmLw#8|9SZG^zMt| zbNG0TWo;RX*f|b?(8Cm}uQJ1YSXH<-nwFiDh25RZF&-cp$oJzBLdwjZS7UJx#mNKkz4**Qm<$t$=21ee;$f8Ik&Qaj zpt0~j|_p89t66kvEVKAfO9laN%HL5DX)E;sH-ICoojP;rK^sK?=2=U zW}e8_AVVm@d4&RKiRSIpXUyBY*ZthS(l+QB-a=b2drS`N>kG?_)knsCHN^D*Js_QR zYyR@$Y%9F52W$bJ9Mw;HvS`C^4}W2CIhRawd?j0*{_6mBxNwU$ANt3u#SS{hfCGJ< z?N%$3(`55}t(ZFv!2?{t*D|sLn-NP4QQpDbooa01x%GAU%i~vna^A)^^D8vuSBHWJFdb(f)wDL}jA4)Wd@D7r-~K=f+6M&5k(|V_CYzq@oOUa? z$le{bW>v4e$n2BkLhnLnKmYjSo{kt|V~*r~(+3+Eq{ISjoT@fX*kz@C0EyBPJ4|<1kDr<%;@@>^y_A2J${3yBu@I4>``;JBOQ{- z!VEyTz_Ow{?o+>2O~e`i;P6q$*9m7GzuvuPbj6le!?_T%{#XUjWglEan*tUAiY}S9RTJ^%?!I_2IUw6 zv&FQhdn3wRT{^vjEZe)z0Eke5#AM7(xYe`{f!k!y@Q)A6h8dytd&ls$-tEo1m9sHs zpMYDZ54L-06p_YceVeciVMaz5{xpjRe+ov)Ib54y5)85(!{{IDP%12b`aq-#uNS#L z-r6&9GZaHy!&s<3PN$zi5sHI&I0T1TK8Z&@#naSNiF_G>e|Fw4tSIXQRYQ5jgb$h_ zo~27v5*LC6k-md^kI2*oM6P;jHpN_x+R1A0X91*)ulItGeC6h{&sXMeECcEKSk^Gip09AxK64EX>O9$@U4rfH8)6;u8gDJJuR~|9rBdah^`q3P zz~iJ=RK0#$*t>%XGt+~B&m%w>TtIq-W4;Jp)Jn^p6={<SQGU@?i`i6#!Bj8>}3paw!B0 zKu-IdjdkeOuUFVT9>Au<7{vZ*h>gtwGT|cM--I((W^8PS3{C-S1YVc=0-pKpLXQu{ z@$1VFoyl57)+!HQAp7k5t$X8QZWc??377TjRsD&eo8I&CE(|hc(ujQvbcJ%m-$1=U zflD$T(OEB`Ny4HT_N;K!I~%=R>Q^#d7KqINyiK@aElz&zb0RmD^S&)nIKv)qsRDME-qF~-4`asli-@ZF<-IlA&z!|mzv1(dF0u#GHoGnfFe293nOizrf_ z$ZK$6Lu4dTs<)iXdu^cr3u96BmiPPRbssRzgH5tbXOut8dp8CXTB7Z1aFQd((tIf8 z*81A&v=7GX*QSFh1`BQWSVVM(M+-G>j6{fzqzOvaALm=Oi9RA4JJox(I>C5!+O~h=)(}sBRc|5HxePpTLOucklie*qserC^t z0^))|SMQciu7_g+0f*p7#~3q6(9s=a>rLoXdxK407<3=MIZ zoEt7IqTew@Ka%`@5C2705@pE=b^Us`4C`7s56m>9wOxcQnLFddP{2znvW(#S3_&l0 z-cP8O$-RtUFWjev`DDv|R3+(y$c`)I(}k_{Ist7vP-7*Ocv;iIl2)if+s<>qKU2Ap=h`FsaCIwNxzL zq8OP)wCB+#m`ovKdmn#)9@^w#L*j}+QqB(klw(y5|B=r4Lfh`f=h9op>VWC0E*#5$ zG6!leUOuK=#xTh0CZm%+@Io^>lN_HF0d=S1h>NAJ$K0_5#{{`H@;W@_7~x3f!nKW~ zFg->o2XW^qo8#lEPWi%R#vA?70?E0w(?{kx2s9D;;9wlNbkzlC&(dCv?zPNkg}}mP zMWtwLewxIt|M^*j(s@M%L-Z4@`@0$jPye6ggob(}1Yc7L1BB&UiUEq>25tKrm< zf7R9l;!eflS~#{skzrY#<5)?G3{itEkGUt1WbUy9vao%qi2u^T3AwZ+Ih9jb#A_*; z*PtUi(W*!`k9}&dxzs*82O-dyoVSgh+&Jf!nP5t%{hY2Tnafg)v+T}K28XAf3`v74 zk!qv0tql89ZKo#(Iw~WO z#gPXEI?q&Xy@-Ze2xvWD9HmK61xjhwkM?DrL(dK_>KfxZ6r^!QgL zl$k?7WVe&^Om=VJViB^>auVM?Y&)OTr{Q9aRkzWP<=X7E)9W#g+3WM~j|O)Osga#3 zur9gI1!j@FyBPZ|=(^jGdS6KLHS;PkzAFaFxa{JA)QbRonQ zC-H~#Hj1UE*m?9u80T(9-CoBxiVSva(iliA%_APkSyF}`$J%eL8gG6xkINd9F%pjl zUzTF1al>QA>mdsO{}S(Yfkw@UxX%YP9Cl#U^;Ipa9`nDC33&MZE8PWtROW({V}*O} zmVFSr6xkheSyefbs#HX+>4yvZV0j-Fu*qiHoGLxtblh6u`T@tzK#`Ywe?c>37M>iV@NmQNyAuY!%@Rdxxj1QI8O)hAp>ws?=+hW zKzK**Z2?W$({7XFKVQ} zSl%L$lt#p8u=hc^k+z#?XGw@k`2~aE3qI*PBCD% z8-O9u>m5<_PntAv(N2tko-uzJ!-*R4pLzNy8R7e$t(wFet>6$&Z+W2w@4l81L#m;8 zCtNFSyJ=D3;88&@QLx(gl&OvQFV>eKJGV>z5|D_nW=|~;%aKyfT z`RyZ}26O1=I7q>?^j1-&$7MUwLBsiC9&tPQweC^Tjaum{bd{u6-WxidAI8)D{K9&K zY-7@?^jvo*V1UwcE@Ckq$%Op8T3PFuvsiK}5)=C6AmN+{joi*no|% z*G1K1RylRrj|QbqwL;x8%SV8ew1%!J@FDXfpmVTlM&y&i2AS9O3ql zGpKPwe*E%0O57=m=m%B^XL&~0GV6^M?S zxyzRbBR0qx&Bb+h%fJ8e$927F2tAMo=unabT{2+Oq(+1AwComI6K{HbRgBY>nIxs1 z7g6!4*)42+42F~{QsxcRDuF%jmH`C0t^|yVr86PiYs8gS6RpIHh(rlqKoMLX$`pyU zDA4$R6lp@UK~ZhjbWUUAm7{AX=6f3MsSKi*1f(0_P0X8052Wbx9o)3z%qG*yEKj%! zk!}tV66hN0S?R_xh!#3czoHH#k2(+_QmRsV#_c4m`0#X;npEDLZS~vPhLL$Ej{s)O zp1B$_${(8^Jh#IrqY9cTZx)ECW?gUfK=O@81>YetBEbf$)FZ9~a>)d;1_SG|kJ!wz zF>Be%Tc1cy1R-xh@kh(_{SfXm?JxjN2=m~qMN&Ima85f&*U<1rb-XOo_qV3ApK?UW}_I!6wW{9>_1@-mkfThfiiEWp--5Ek%TZAX85t8SK4-(>$jX zY(=@TSpjNH`ZcVthdnrO6pX?b$nq`b0EV~}1_~bKi5nb6WhJ@XLE*?j(ARrq@iEo% zWdO0IL{Z`m9h^|TbBr7mSco^|wA_5*nfarc>55VpE8QECT#aa9S!ITaPIJMLt^$4r zGGdV+AF||GS(5fSUTAQBFt_r+Sx9CxZfvfNp&&GmLdAxP>W2Jkm1y41$h@Z^iUKlD zxd2AOOno&SPbuu4({H`u-#QpYQDBBbA2F5o3c8^69y>CE7v+xrz9NcJkOBq(eki!y z8J>^_^2i4XYlVa7w}AIS(Dr8QFT+~%D!qT=GT8ZIzJLDp*C)1J=n}OAjvwLcEpG-# zn6?Z|QnfiNBj>M2FIU8W`(YB)OBnaa^2%=67zJq;RF%h?2lGg!vp&;&c}uz_C6-N^ zzDN>Jwx!)V@j-a=ZbTwv+N3rLJy%)6xT%%Wq%yZ{{$J8C?k+Ga4^-k;$HJ0fB7RVT z0RsIXV1yTzml|<- z!Cu=nAFu@ukD}%jCIX9OIC`|mQ}|^%h@=BmM{h@tZvyQR*B-iRB6N{TG=SsO?T_I+0hC<@VA--PvTxTzZ6!&S8cjojhw~ z9eV-M5R@8FF(#U~u2vF*UJOR=MVXUlOJpxN3K^RH0ncfd8mm^Gdjf zEf~TRJ!z!MJNaapYd*+LncAdpN;T{0%4#kQwx@f@FXex?3~fT9%X4?RfhEE*2prk{ zd`X%%c&oBb*Bm&1fh-&>JIvp;+2yaFSTGr;WS)>rS2(1Fm{u-9sIB(K95V&5%zw9j z^%p5_$$l;~T!>vE)91h#QpU=_=OX)IvaiIW2WW61(+6#m`@F_-s>rd=Pp|JDKR>=l zQcT3B=1|q)S``P3%qAyFB^~E%s+oe);BZy+DHFNnSdV08uOi*W>bZ*pj3@9n(=#Ue zv>k;9Wk89xNc>8YPXUhp>W`&$#mN;+|Ih6ZJp1|$Vw`XZ?s`E{*NbRN*9)~w?Bly-k66);D>tv z=zOgb`4O{q>-3-uCQ%`fml}Rx)t1>h7G8w9R$z*G+&38pjYq&~P3(NN}f1|LZ)i2Z>?33AtZ3Tk;dDrjeasa zeu3}3Ly&JP=)U7Od>q5b&Y{5;QdHo*BKuhtp7oX{>Cuv`W#X6DZqH9bRz{$)^T7#> zVR)6^r^?j6~fNvJCz3c}uNN&(P=>i@(#m=vbL6h;%*Gb~#9}gtJ zx`wbn9$%hi42e89k1hhtewGCPY!;+!cnoXGO~8>mvILAGFk5IR$>Pn}8Uv^R31{T# z8Dx)A^(ZDsI*KpWyk4eOz3gbjcHRshEVfd`Y|hhw5Dnc)dhi+5M_0pn@#A(>--$3( z4KH^=-hToxl2Hw(eJr{UK*v_n0hQ^7nPbswk?mp8gPsA+?i^_)O#%yIz$D)t2x%D$ zfTUH3!gh9S{ADBhX=B6KT;g}~iYV&fii3R!CttjgD#R;J5#EF42Vd45ZZ%Pre}vl@VbE1HxMm9+-RkqHK_Vv*1~O?fa3Q6&t%fbt+Y! z(+8Jf>WGA)A`5kC)@j?5St~2+`l3q6XknnsN_*O5SVw-AaYuH1pT3mm&QR=Oay;)J-t z(+kvTa~juO1J4Y|#ZkD!?4@)P#)^BlMVa?yPa2fT*NDQwW2sgYgdzTr8(P10X0>rB z7Os=TQSw9^C7xejqkjD^;We)JR@cH0rmlvG9E>6P(7Hpf5H>032U6#tJkpJ@MtoE= zHmXT?qR1ODsULQF{voe6SZ(xCdAWg#2*pAuZG~9S3awzpUBYg-HcnLA*?=4xK;~Qv zs%!xkXM#tvB9ia|grfrywWDLy(Ch!9u){Fp9V}YOd&oHQ;I8$4c&*7*FuQ9tWZ4Ij zby@rxi0z}beDr@17R?C;*(Tr*bDj zPz*57P%-ADp1YS+Edu1wJ2a|Do$pnWm=Dd+FMP`5CGH2`D(*5Z42xN zGas|W$JEv9!AVgI`STE4BXgCOHmDy%P1Z3i+j5#Vp@wwwDnB{sAC6rhW|h!QHkup7 z(il2p+e#7s>r{j9%Pl^PVr%ELID|M=tV!V&^-;S?BY$&;(P9YkY|dTzRF_NKHQ?L*5k^5Zw1auLLC->AM;q#stRz*6WPJi9>@K6RYTgxz8 zU4TB$q|OU;`l#YY&y^rOmQR6*RoIZUkJmt{-@$)e_3ctT^lc2G(SuE|N=w5^Egnot zUI$?4MlB#-b?NAEdgu8X_?LO~#z`~W*sI3s7g-Nv?@+OpM&HlkJ(&+qt=0oP-`gRW zF&e9Nqlc3WntG^1Y%ZiU-wHUH!6QIMJC6Lu7x7243zKNoE6uF;mwxEr&D&djN_jVD za{of|vHuP3P#raTXou2D6(^w|-?LeHGP|Z34RrJr{`_IzoWSGoTV-0A_ON`SLL}&xp4y ztOso4095|^^8EGPZ%+%4G8TPS!oUZ{#Y+XRSz$dywlk@sA}QPx{rtoRG&S5OH}`S% zcWNg&_>UidA8_rEnHU8r>{z)Hq#=py1s&mzfiS)Mt4?&P-|dN+0K=~bEQ}^umI*6^ z=%_tlP-ofP9;th7&t3yzS2a?bxoR$8Nx4KA^{2EC@#l& zx|1^TR1(@>^u}cP@T5~q*2+qo$zy;6rj__5OYQ15xI&V*J@>~qvA^|JL9&N_z=Ko- z$lqJF!AaZOapA+ZHjnWFS#7v6N52iz$8?BB%RV%uN}L6O>{fi1y0mz<0leBB)&#lV z@IvB{;#UlzA)?htM5_^|Yb-9-yi?PwOt%TrcIG>XLZhA+FFtb#@@%{K>7z3h`W2DW z(9b4blsNaYycCjmm3Er}X#viU#WsA`Lv{fDe+}Hbub|a*w`Ih3or5*7f_e8YQ!mS| zV3kYKP-vNR10bq#rDaB=X*4o%s#Pq5TU7G2u&S=^#D!LWD1FyNWD6C!`?`9YRg`6S zZ6{*l5{vP*DyVn%RWsYV(@jYV@0X85VDcfLIVg8ahg0m+>H?52rfBLYvrh3|x_%JL zo?84uFdd?b3M)j;2OAN2W%V4yt?KnxeZi}BJ=sxM`gkB2jkcH<_HU0va?iA+Z_(w^ z^f#ASi?p0Q6_LhO&9;2Q*(~QyHtPbQKM5*F32#6tX=HZD(IlSSq>#P0989(VrfF2Q zYk0RED|q!tU8Zr~KMu)-OtS#{EDp#3*u}R>{Qdf_!nTP?iPBN;4oH81#6OUk@bVy2 zt&yK4##18Vk_%m8aZ}nBbc&jqh?=OmqENv^kMzyl!esEhZpZ~0hHoh|wZy1_fn(Yi zYIRo~$d)RR%M5Rin!?lbFg(INmM%-}&}+E%hJFMP>Ck#JD{o%g5X-B;r?2SVpsqWc zrWdUFk|{j~LB>dW0i$>yKuc^oDYEG)92-=Qbc)ZRmEajv2ZS7-85Kn%$-wEdRj21>AjWWRpB zg4HsxQ!;^vaLi3#keTNAd5i{(K^YBD{m7=|+G-arnO(yc1%wpkw<{M-8b&43=(i#K zpC6BuBeN|?z*UbOPAW`8_7al^)YK~nYuobXE6!NdF73-*-~yFNi8)j&3V==$2|-5_k~O!;(Q8VAV^VM{!VUZvZb48hXb@FZ^g6Q zfeh;9X|+C(;Q2V;BoO!5fe@#84!I!X+9NQ9mJw%rwq`rCq?E4d5=A*Km5H14V+Fgz zm)N*Y6tP6S8m;zhJdT?=tlWh5YN6R;g@t`J&VLIb(vm{k_3AkY_=i|kBQz;4w(2=nDW z#}N%YlYMzD$CmXjz5)N=Ihx2WT#4b9H78@4Ewb&~x6HqCpN-u_50R5WpNAR)k}|W0 z3wqfGYh&)_$YRpS%A}F5+5T})r!PO_7M6Te% z5WiI>JdRY*nFwA+QF><;fRr=0F!og}qL~TXXoQXJIU70FV~?4Ck*?U<_4QyA4bicK zDcpDNRg~IWDZV62Z`-cV+U$HWP5*m68*S!$6zQ7RTd1;SFH#9DlQGU&Xl@tfoaBN1 zNxc-^fF#C$l_1mo8ajjGFr8+;yxNMgz)etqtKr7D3;^gg2+YbKgNjK}W?_Xl2XtDm z-?WKGMs6hir9eO|7rhjnDMAs`_X)lb=i|X%u1?7wO^BMLvq6o!A^jU^oR{ApKaH-( zauQePOqSo6WrZohxF18e>6^UAGP;V?SjOJG%Cb$|4!))|uo}~j6=l02r@{S~Tt-7j z<$%~zgC;2!G$S|PX-oHa>|Kg3C!r%t6oBaBTsW7&M0A<1>WSFekF0^BteMerq>h}` zHwTKh1M37IgfB9RSR+{g^ua~br0;X}L0GKY@xT?GchZd61pwk^i_v$iy6rNB5fq2+ z>Fn-D;8=5ZXzzVZ9BOj1Ne|y0B{0&$r?SHJ9F0}NC{ULX(Nhx0aSrMtx?KiU?7aV4 zkQB5CENb~%{7$c@*AMrv{Ui$EL>`KZdr0UJn4RDwI$3qL*`h7=i(2XzwM;4>&7`A;}{>;qgmBe}X6SMknKr-^}d4^ltU z4PnIOVyeE$bCkAgc_jXtz@)*%(tD7V25B^~}VBaHOXf!-6f7fp}^Y%|j!otIFel+50d_ z?QNvWiStKfM)c#IeKJMGN7MD8VsbP zV`uiZO7B2>Ww@^N>%FA9?Yeg8=PZZr9sH2UOAaq+Fj2uT$nC(8v(XMK?9znKt}fnu z8-II#_vfc?KqWCzkR@;d$Q>g9yEAt>EI~Tby9An?xSjx0^1k53Q9D}?XzJlnemzzt zzy9|4@#Dj9kGKHOquOKKJW#CBQAg0KW=^l-bhYAVd#JW4fuQ~WW|_Iy%H(Ab9W9ZE z&o9rvf8fUILnN7;pidbAQc%QK^jj?LtagH8F@rKOPZsr)@|^-#L9k4bbUjJ}8IR_; zpJ=qzB(?@QDLs2uqWJus3&!gG%k$qJUr=+*^2oj9Hsy@bffW~#qcpszag(@6ZfF0i zaJN>8vy{uU`1R%aZzmQc4jRfp$h(1Vb?85gxL9<}TgTV(?izOCtX#C$#i6#5`H^AH zRM?D1Z;T~^pq6}a=BJIws`p#hgPymMEb-F_4$`w{9kSWq@5H6KQ|O57ioU9;a}_#i z+*X3+Gq*InNqZq=-I#PbZYyIfmu*5BM7gA(X&S4lQY{?UbFdUnT;wCsb{N%bGix^$? ztP#AbywL37xZ?yYDDG}*yh1u`i&o(dAvwpXsRB>@)-DqHQ9361Ba+8ozJD_sv-uyl z8E%}ff9C!-kyJi+bHaV_pwH3@uN^zNPJkk-1~cQe{M`qg0+~q{{`ZK__Zq9%8+B9o zBdM%tO{dtU?}DNY0|!JF6ljA>HE}x@JgsEw_aYp8dtNQdT}ATk z#C>SkijJvnRCzpo`SAOb1k@vY4>>{UujfAv*m>dZmE=#ieeM1) zLoj03bf5!Q2!pALPMPpT_syKxX%DIxlUX9%>tt``n5YDV>6ioBOTtvGoYh0wF|?t+atfrrq6HCH*k90^ji|t{yG>hn z)&Xsb!J_GH1eFV}P#94G)|?j3e+Ou0;Aje0LB!} znfeW4xmTks`FH6Ie?383hg|kI$Oht?QWV0q;JuBx>^7JIw63+y_aT+bB%QsR=8l$z ztI`4rt2_4uYnEO*>^M%{Py=DSOgwP1-rUgtv?%;ot_x`S9V+_r0w4hP%q)FcPk>2p z1)HG1vjqbmpI@KM>$mZm(USzZ0D#C7m?gyMoS#JUZPP|YeW0B=A+J8`99299yO+uR zw-4mrK0SUh94#0DU7^P8!wi`YiTU%IO*`!!T3uUTS3}8i&1la2PYWHnDv*}Vxh=^U z4Vn1?b=_?C;$ccCKy-@HInX$m#G+M%Wp4_ufGj@?^ikJr7$YOOIatn zXNjeE(YbTA>nwWQ>Ln-5=!h}dI5d1Y18tCwwip8a8Z^%yB^kv_jeb97mo};l1J$_9 zXF!*&<^5sRkw%7ip|v-CYa?t$N3B$$Ma{<-qNz%$`a<;UXstv`)p;=nE@XoTGte^A zN9L+=GIxsY+~s%pixNXPg`<48uksLyF{)ghAVG?W#0 zcuZJBv+x+x8}^(<+)V?9dZD=yLzCf5#Z0Q*DiY(%4;=7OB#hfD2bzTK^Be**7s*F9 z=V6IN-)9M9J-#1Zm%Q6@s4H0C*_2nT)89p{%alV3ARun8jb7ua_K*Y-EuO`8%CoL* z(rv#o6u1&*bs{0l@Abo{cNU<85kQAVG+Lw&2^mD^a+kMccPEQh{uE`iuaYUyLe6uk z{4%k*qE`fyaYS3#Vw5)HrGbIM+p5Dmf>%Qpf1NgE$LgR>`Qg$BO~z2@DQBuVY0;N^ z>s7Eqc~f##(1dF^6T72LVTAZcHQJE`%f6fVOLW>B*g?e-CTA`-spUNqH5{P#3XGqj z!*S4=Iqv1j)bAecagp6R7!SO*D0pQhk`oV0=A&z2cUnrxjib2}XkEdL>~7r+XAu4ObC4-s&%pt!<) zrmN;4SQ_A~8eTCVt{O?b+gE)IB(#ERFT^W{4MA3OwG8%QFMg;xb43wf6{}Bke0EOQ z4H>Tb8cXky`=bw3O@n4VI$wlx89Box ztEEAfQn<_9977-{B#9eUuuN`mU(o)EW7D2gdlG)Ly#-E|lmOKVh?5Jz&oFf{v%Vfs2V6?0qMD*uDT-P{q9qsX*)Bf^t;Px9@ zWOwe)O*TY!soQjOGwJ4N6W(jh(>2AQc)=TjT8_Y|?H+qx1G$Cdc_S7KZ$8D+ZaJ4ppX^!3xTp}8|w)l|ncB+#PN1|r*-I6;HZEw@;x*SUci@d@fZ1xZ!hvu#I=N#YUO z+QL@9{S_UlFqjfydF6%I@Wb;JeL-ovDe@O1I}yG zXj}03MS%6N^fmcraUyLWhe@01kgiB5;Y_VT3}HQ>`)~#ABKKsahp*>b&L;4+HPbdJ zT5YC%*%a%O0b-9BpW%711w}Gz)M)`J#SOMc1fzLdRkM%8UykHE4Z$Wp>gC+KUSI+o zdc|nK`&rVU)`(mM4HDM6A_uz}xL9U~e?J}Da_w@RBfT>eIwa@l5V&xR6GgNvA&Sy9 zkW=@-g}T&!i!AJ9V57hI#0^7IVJHIf)gX}`WX1)io~d!KTUgh-r38ZExXmVJG?dSM zex%WL4#G?G0sT`46+RM3DbGmnZf0hUS7p8T`7SZmtRxwP#f-<2`9_?mHv}x$sqr4I zy)W{BJ8^JS_GF&zwaGP0R7NB7pI@FH-~G1GU{fD|9-0Q!pQ_VT>eaE{N!i14GF_ zy*ybG<84bN9SbrhC~E`=w|lmru!dHEmp!#e{$b-t9UHlsUKVqy%f8`3r|osauTHyA zx&Vql`BWHu&7Cy@$oT!>2|{wEr&&LqH-#D%ZymLIK1yKxOy#hprcB*KPkn+bKK1mPQn&rMVup~PoG)ubY}Ab$gvIl!&|7Z_muC{U4ucAA~4dW-r)cMGbHY;LV++^jno{`ih;P~rOcgrrj;}Qqj^XDhV zWivK8j^Y4>xmxEAG-%yN9)b8@eY+J+Qyibz%>j zQ??xmx*Yku9I^>~Z{At74ErT>FKR(VqDr9-0_#o!+u^tulm51;SIg-K5Cp$=IvKNW?8rjnuJq;LVLEz7cXWc-^PKaR- zR7hDl?6PHd90f#o(*1`2{+f#I?GK4evrdl{Q~E`z=WE?h(1F_`(p!Tj(^+(s-QRn> zF06&wj|}2H_{qTn!<~}3)RzV{9?#jgcZ(1y56U7$@|V3%UPvt%pKy64^M1Z!bAFdm zfL_yMFZw|DfZ%jCRoxxcC>+QuZWMJDOd-?jVy;j}WFrK-L2?CJoGBj4w9;E@L9qHe zo^l65WKH0g)kRkteVpLTt*>FBH8W06Rd$ zztGEeG`|b6*l4KDFNN)`)?P-JrK^KU)wJ?~JO*Q+B+=z0^(dMBOm)BG*4V@VdX^h> z!e(QI&-S-c{IOhg_27GrL?&6~tGwKLn0vdHVlhnNs=ee0c9@VCXnOJX4xurjWlTkG z1^-L7hrcBid*JpqNVTxO9%Ag$*YR9}@Ha>V+BFnXq_qa=VUlZ3bd2j+eHRTWk5o!t zzg`7cr|Q~b8AE8{GmOB>$YR8Tb_&DJ_vaLnOIKg7Iw&qxbtYq7MF?}XLCyj4s3Q$f)KrP#v zV@p*sn;Wbeph**bubZlMf6UPw(o;o(E94?9hNYrulIdjHHsRjP(d&@TitXT8*nGXF zvtQV##xQEC9lUY9j7A>;13paY1~ z-SY@lwSAhy0_OTVU^~_YFF%jM#+^ZQFFO1v2Z5%HddENpT;c4dBNI-%zTL6NYPC)7 z@%8miV+k8Zox~z6HFvTW=mYRrU}S*~yEX0QYe2z^vd)~oEbMjnBd9C4MApj})O>A0 z0gM85*wK+o6M;bF|Me~|27H_G*aOM4Rrkm_bemA71sA$TWt>l3*hLOLEpME(7l~_J zgHrw5to>}sR<)By6>RQ3M{@6Sur!=Vudd*#c}B*-1fwl4I}y4iT3GaBP5DLYB~i_( z%FpDohl-se$mo4Z4t*JpG>SgD+_HXDt3{(|Rby-AduTLa4gSklk%yl50X^@%^n?qp zOEvA8)}$$yNmVMlzR_(nRm`H`UF~ra3?N!qas~@}L3`Bi(q@zKGC9TFPQcU@L)cWE zwfljSrOZw*{HUTy0AqxVZg82qb1|<+o3sY|Y{wneZY(ovId1lK^!k&6?G_*GVk7=p z22cZ%h`oC+$uw_IA4934 zPR`wlZ&##eJcwAiP8wJVKt@Qd;gWAW0^16d%Mn%M3PQ|8+5P(A7odPrg5B)F_g>I& zk*id}&Dq38I#$$p159Xo^86)ynx#s!)(x8?f*l!Wcs3$Ok3s9Vu`b(&8BP=8U35qm zLh@_i8OL{4N=9Ez8}=E)=m26-cE&jGqS{fP=B*|Dl`KYAq=%bIixf~|cHg0i`tXc4^`LZn0^gFH_ z2YO~)SOlg3EHzuM19<1c>RNLuVyh}L+0I~hna?#G4v6cu`sVs2jI@BBzlPX7M65KdgvYA-6sotR5dv&4~0A+(&XhaqAp~!kh_AL*# zyMl#Ah+Xl+d&Xf!bDL%yuBp&C^6@JpO+KAEQ!fztYL$(_!~mcr+2VO*Vv|D;LPH5< zJ8VunqG_wOCUune!m3lrX!7BpaX)|fG)B@G{s-*Y=(E9u81bADZVG8{S~lM9V!RJ6 zs*d!gug@R)F>Mdx1%9zm0H)`rDwhN}5f$E=IFP0?8cH$DKj?NWa4ITiq ztS(3#{Ztb*IY5fianC|5%lZ9eT4rd>VN|Qabk|Z23<8D!CAAE-I?b0tsTE%Y3(m8LO2!zg~r&Bzbp$ zVQe(_+fz?#&|*PbpVDV%(N>EDfcDV)9_E}F&=HZnpg`ceD2}7S`JUU}iV1*tD+&|I z#wuw74m*y95&`W9Ag=%B2*inl+{7jdE|O=O8~tN9-gI7U7F3HjBhDI zw?V+yYZKM%-xz^}i)%p*j{F=qvu60u0w_#y)fN<9`O zXuPpVKRkZ<^7ug}Nyi&$4}?Jn`K|{6YSJ15ze~t#W~&M59glWE{MX*Dm*D58j~||3 z?+bB4Lakz~F~zqUxtDpdF4cE@R9zlb&IL~CZ{9%?hcS)45N@;hlS4^H$vve!$~`XXH5Y-auS@i0HC#~V;ZXby$#BsDpp}p);$zN7+cK(c zQ4-TP+0wpTD3DVz>@Q=Z(cQ74daEKj!x6aN7styfiCt}DZW|lAa+Y|>4IblFBeABJ zGRDwqO=yRdgwQGXB>Q|!9*!W;3JnGQH5x7sQ;w*93Php6+`~)v><}SP5L~A#tW8Z0 za9!;z5)`g`S^Ta197zKqv`?9p}mj z2qy0rZn_@RJ4x$6O}CePNn#}gviw#~FXqr1(lQW@;nWfx_Xb05GjTOpDz21rI(N@oCV#*XvBXeE?|h;m2N___sI_aR-z#fR-U)_D+kuDH!Hp7qNZOCVho zVXC#!sje0mXNbNX9ZYj1vPIQj1uMsO!8Mt$s!To>G{#IRaNnDO4(oek(93mCdMolZ zDsy(Ku+#kX{N?%O(}z_-s-R5_r!ZqqeM5z?#PVEAGqt}v`TWYQx5hl{C-m?jznUh^ z*}K~un=y=19jDZjmxh5Ztg@w@fU9Wqbe0FLuNNTZFMCEx&DQ~5PF&HF(Ved)W;uZY zLUw$j%n%tboa1bH5hJ_f zf^G~hE?CLOLv<(7qS!TvuW#KqG>vH7&YZk4^3$Ue(1Zj}6tB=1BP~Rp%sh+Em^FdC2=ci&u!dtle2I1 zLm74UUw)hZ|9{c7|5@fo6q7;5;I}T;YtBCJia%-iH?Kdn(JYF_tSXHJBr3pqVCg zXXz-$kX=QbEmW}IqO>`7@kw^vTb2$F%R$f)>^Io<2PXqjV@OXge|>uR8&}X~`fnFB z-a&%{#5-}cVk0Ni)YjGO8e&{|0LhvW1rwF$kFRfnw5f;%zyGGb{?Gg~`LX zQ84!_7`;Q`PEs&?$mkXu`%oc0dMx%O#SqfCE9+w{?YBz+>%j%&{|^~Z3m&OLunP`q zfS71NS?v=;*Rs$cV-!Juy~m|^MUJ%Z{apP9bBsRlrqJSK^wF?Xc5_DN7 zVVERLrb)z-?biU~?~+q1o;O`%Ty?L(adxNI{$Kc=Or!De`uG*MYz78y7a*BYC!?#- zVKqSa4g@0DXiYH4G(FwQ+byMoSuQoUnG1Dt=5iHAyEb~|8_K8c{j@a~Df03!j0Qe`0I3!gO0f%z^{K{3eRI3KG zXKH!pg+OeV+}vM(o5*Pp1>{kSoBCU0Doq4BLnkdL3_Yh5`qre;sju&^I`;he!>9Lq z+I|PI|NfO<6<{t?^>BlH80<)yj4E$Ya25&@6sy z5*paVO^_!8-70&GZK12(#tGv6Qs}E;!|c{{--d#g9q7_C zbZ3mABW#&m3euf3#O%abE$4+da-AN#s|qn;TskzS(x9Sfx!31;_*_TyvggsQCIjwT z%lV0IQhK)zU)C#nk=)=w0i0xwz#K4;ye~8ee}BY?*1IicC~;NC>Tz=^`xjT^2?n>kEdulF6Dw0id}9&b^? z&wqV-5&4yzsP>jah*I<&=m{2fWp^04=8nlK2dkElQMoj>`>yYl<*xl*2Insj1EOj7=R}an*Xf|!Eq_1k!1vXJhCVtsAw72tXDo~69(bw zj8j2KBj+){l{g5o$a|?O#xT6VtPQ6OGRMd@vboF(RV$KFf$ zQ^ufcJj-X2R-UK3mOj{$TvRej5x7sl&ZD;ey~o7Z#Wt~Nr`8r5Zj zaI}6KG)E(A4;xCBJk!g`?IVRkHskA}FLYajiKQ)*KB`HmllOXR>xMxPp8D*-!oh@E z+}5kPS>Y)D;8o6KL2850L|++62B&r;rtvKb5~I8e0I;@CxQ+>vP(_U|mtLHXIYkwLpP^L*LB2;b62P0ikIK^Tc!b^tOj*iWIH!*EDIJ9@)~EkSURM5MuO0v zizE-ZG|5_O-a*}D$?QQmskjILXW^FjjKKmHBc$`wy`_{^i_ejKIrN`0eN369+d}Mz zGc#n9(?X9JEqY?EmcigXfz5l(bylGNUZ&`-ee{=EZ!G8>V*JtOy>Q*lvFkJR<47c3vXibX@j;KYpy}rM;S&zfA{Vf z%ua8*a}$HI)1y{Zb$d9jUz>^pdAK+Q05|&ZnS9k3(1k-VS!GZyp`f`X-Srm8MRpfH zdet_nRj{LTEhbiYxo84MH%K&t5=Z+jdd$ckZVhC#UYG@P=+(Y&J6Gyzs26Ny;8oWz zjNq>`y*QKOgQ$O1f#`c7?4+^kx9r4(lFi=)q6ab6`uxFcduY z8SSz%U>a~;!D{KeB|2!Eudqrvq6VJB>Ypn}=9Olfx&?wzvXX&}4-)^&bO?m6=v2R9 zu#Zt=!0|3?#uni{u=7Jc`^3e=q~p>A{%2orsV~*G*TJpuW0Eu91m$~Y&CPpyB*#D0 zLt`+J(@xb7m_!HHf@`uC;TV{k3H+u>&UV#8SgwG$K*!SLmmxVb7ALtpZhLWQzO8%j z>ZYxto5tarz}@ZyC~R%S3{d^}7eE^RR7h}VnNT^sb&Fc@OnMwiZ%sQmI zv9TY{xrf)UKjSD)g&;K9d@Mpime+fZ%cDH9v}uFx0Hr3aj@G!6Jr=6)S8AUw;D?V7 zOHe756ju*unF?)(>ZP2|r+-f~C5_4ClXWSRK47}9Lc%nL#^j5f@k)NMca;D*)Y$oW zcypFfSv3<|g@NJn3VypR!#`qK9>z#~jgcW0fn8UaSiT2g@J$D8u=35LZn5c5F6gUX zpUwa!(2lTFaOcwUgN2;Kk%4Z5vmL%{v1V0v=&tlCZG+Y;h;Zv#4sqNgx1smVK%aS1 zvVm@!VXCd$6K@c$PM`R@&9#`|L~ObtuLo{l=3*VsN&rQXSWM-pVY5D?$uh=l;eHcd zdmS>!S@S>+`oN5W9gX0+z>v-m_Mk}BWKrFBCpAr#1q?N?sK<9NV+=LXOy9`QWbh=) z`}^$Yb099MiV3O;P^!Z5dJjCg7le$sBCZjn2fQG(d$BW80@1}>EgQ4jqRi7sf^TfX z>~~%E|_qx%4ANe~NO=)a9Zyp4yE40f;WO0w7+ICg)C48A!0rYTHsO zU01&9C@z%OpitT9|E8-1(P$}3UJ!6NB@fc8M2~e;M&pB_YEHb#mv0@M&h5R$I~wVQ zpI;tDvhM{(ljkdqh-!tY>b1E_*%a2g$N5K7b$m&5zP5iOaWUYT(Mn6k5m5e2e=u=m z0;nun{#(&FYS>kaKi~R0-pYtM>VrL(+y}ImuN?@eh_hC_x9nfLBewjKq`dly`@>vq zS@&#MvDivQXlogmRoBBLZT8XMT3^pY*>>7A4_G^W2S>_Y_YSy|P%c~`pCg+HexR>V z^5p=Nj_2IfsoqU13on;g zlKURigf;kJYw=K8CSMN<3*NnFs7-eEj2&1xb;^_8>>Nbn7Svj-`Uzm4_iE+YxEA67 zGLn?hh>kvBITad+*?EzoZh*xHRzZq%EQ--ET+%uA6 z2xiiG{d%?ZtNWl~<%Gc4dV(3uw1t>+C}nG3;5Lp0uXA?FL~wT8i@l^ zalBYtm$DtfW&)(Nr3gZkzN$Ho*?mgs;6C3ha_A*Hs6BrmFx z5rD59cp>$x@}`qleNxkaaJ!6!Ns{5%BNSz7LoPjy4vjK%c8r&%u8^W8Acqot3t!AVJF6dO-Y{C zt$W7eAnHR`CtD;~+|WR?Bk1bV8KubhUU}`L<(jz;@hE0IhR4tfDgitCPcRy!XOvZ# z=7@FE)K14RD-rGi;|7Pc*hJIvoEW)Ta62(vP8CApAO|mmnO6%8fVfW9% z+z)%plRb1fOouTP@^LF}Rn3DyuS9yv#Tc%~Go2)C7PUcP6h52#*i=JQ50QTGTcbG| zbH;JUnNE9~G-T8E;vVgTy*duL>&}D+7OIV0V$mdF4CJh%4=tb^gRck)zj}4J4sCt3 z5&4zVGwL1=?YHK#4+N;lC*Q||vu}1s3_u^7y5O=E6wS-LNWP;OtKWkJ?>!Sv9dU5! z$6%^+@IqZ~gI_)5xTr2p6?PLrw)cb&msA%V_RvV;`ynpU=|}twrCp$F>agOX1{~5p z^{!K|b_{oV6fF2P>(PZ^le@>Ii(s`VtXL3-;DDA_hDw8+8}GW89zf2oUk}*dW4FNL z#b-K|lDKpg9iD1Qqn0X$7G@sggi}mfC_ICvlkWG;eXP8{R6JI{9Jb?UK0y zj|hSBP-`6;_28upu!L#!l-948w1+Sy8(gC5S3j( zHvy2Q)xmg7mW3oFPmRU1-F&aN!rg0~an;50_2cv34BNXb1x5o}+78k>dfDA&T*CvkJVjVz+AiJ=XOR zNNI>KJ%9-@BsY4&WaNbHbLux7{AyaT%`sf(fcI7bQbMtrwNOoC96cuoo+>V9BwgQ9 z4U+vm@#uTS#hQi|VKF~SV$3?ELrAI)O*tH*^`vtP8YbAn2HU3~=#w2ZQW7tZ*9D)p zw!K=1UxIgFl%MQG+F>%po3g+yf)_c|&3jX`zBpnLf}EmD`I2 zp<{{%JINdPpV{M6Ba}sMeJGW@85Ft{;$sMfSz3{o$lN}SNmcV|>*Nk8xdk!U&-p1; z!~`~B$Lr^bLQidQ3n%{+1o zS~Az><8^VsOijb;yA!_?i8|ri#UXzG{OgcU?hxP;HBYh!K=Py%+;5TkvFBPw6ShS- zVJJ2=X=|WC(`+@%7)K+Q98E#H^zHL>2#U>i@0&3jp_(>kqr2R9NGG;JwNo1hITAjB z*|%qxMmx3K+iWj>=utW)u7e-->m}&?&>(?%XO?Dzc$#%VD!er=iU&x`1&|;5^_sOE z(PDaC39@0-&0YfY4%7ql0cU`Zi1)!)n6FG5h8~7V2pA5HkS5T2eVyPQciGHb*a-G8 z=wTCN1UWPmS#>`fTXq*V=CH#U;u7o>6*$Ru|7CPXd6-!%;hr3*?6CnPwt}<^0SQDU zye$0C1xmnB3=W{IYqTA2J{k(Y`u`1!fmhvLx7PjYT5xO?Gp3)I!opT z*7MHAKb`Nb$V7%f1_aHz_tCR8!&i(^3gyFg*pqe#NlGh}B%u0xV=R>~ZS34kBD&^AE8Q3nj0X|KAlW_uX;;Bj6;QPc zv;rn4e*UkY{`TS1d&@vNMAPn(i&V&u98GjRtG;ey(;FbVsTOQHo-01)Q)3N5kU}at zS%L!D4Q6pNh}y&x9)+52Dx-zUn)%(TQms2!6-V016TYn=H8&(Z0-9rEUIMw?xE^zWMZik>}Mk_g}U>^<|Tsv zHvCui*=7}GN?z{5L9mfOSo?NPmLb`Jx0YG?OV3$M>npLB)Z&-e#f@%7;@OxcSS3I$ z;U(e_TF(-3I343?cXjn+iQjomAEkInh*y^4c=r(yf6=Ql_k=dYqnxHPmlIqzL1onA zVUEje*{h81x)rlifKVg?WRzP6U2y7Qw0|I{Ep}T+(99#M75A$FOpJteKHhslB=>LUzmpcI{x}}nYUPWB0_#+Ab~6dlz|VxKf2AK z@vyhh-z|+3bR0o{EoX~mC@0h0+!CV?0Q^j$VLeiXM~<$*(AA7DyA@^f&HE^H2^LLU z_)eP}TYf*~b;gL$GmXd|k|?=I|KaHr|Ir#s!EWp_l&;eAJ8cx5P@SYqm<0uM zWF3HzJ2N3S)F$0=XQM49fwzjpqVh;Z3AtUL>N8E^DoG#D1n?>x@U3WeMZ+Ldg=-Ml zXslJevpsi(ke^#aSvU@@ESVZy%@b3FyBmy*^WwD#2}~l+rBMtV5+W2ut;9p3S_j}Y zf%;u?ADgfPG(n%03+{RzQwQh}$iv3%gK4LF2+{+l9rq__gQK)^dxL0eTa+!tyg%GS zY_Hta4h~} z_VcFT>DCXK%9--^YIDun}J|>a?b4JyYKZfq(%DMD6Mx= zoRwnlQbDmJz>N`9G}=;ys_XsU1rdU>^s=@a@0`G~kAu#6-? zIz2l)uC-(%fMJUOuCTJ#WP;z#hXWTGq@=)!c+V_wjDBe)*vi(jH!EI7kRdHWDt^*Z(<`vcFXauyldlREp=X8;hMF|PRIf3N^&@wB~#d1o{7D7X` zvvWVE;SD#Byg*;S{PyusV|#-R#XxfQu>KKGMDB#lmC5D0ds$qwyoe5)vi@;W@_v=W z7R_Mz&GK&Lg2{rQ>cnv_Zot_PUoV;;^xHsI$Jt4W)uaSM!Mk?#r#;@j#dbPg zY@+27$g?8>2DRF>M}}b1ktmDnl_ATB)wFS8}e5&|3o zV?SgBr2%t+upKNh$6>4_ArpVN-79L@lp&=Fna5`J=sbm?UWBqp!&Qj^&wa3gpL-Myg+^n*{OF;g;nI`g28|&dXTYfIQY?Fo!LqU@iE5No z=(QO>9Gy{V%(n}936|SKSKK66JW)H`dw$-B-H5ZqWoGjr+X|yBRRHA z0}neJPLN-fQfZ6Hk6CH`%CZ7E8FE*LjEkf+t^)ptoe%lw%T6XPYK$2pM;9vw=@Ygm zBBN6BMh&Axp>RDt9gmBt6#ir#^=W9Wn>rWEaQ#e;a*IzcP0-~HmYwpl%Yn3>`t*9V z;%%ASqs-VN@CtjVLvsy|x$N&Pd9={)HRbpn-1XV^<2&2Qynib#0yYEVj}%K1*Cife z3k#mNLx4|g6r)}22p#-`#^UB{J;CsBcQFdjb$M{00-N7~(SieXJuSeqM;{f!5&w^T ztxu-LS428epQN&LU=o_jL@3D~Fmss=$^TBY@2;Hse_2tSn;}CLv4P$w?5_6`L-D~0 zBygtr>|B>DuVEd3i9}|psFLuFTPA&C;s`?WJ=zW?&V(?u2dkg{&5T&x z11H%QUL!aLyVmDQ_8$tvrfX5?cuV(+J??rk04BkUK2qG4RRA&LjTllYV{v+7AOURn z5b|u*wYLau+OVKW?eP5dD*VXB;)XNIq9BLpOzs09t%;e*BA5Q9Wo6zsH>!Pv@A4}{ zF<&&<9in?}Xrvfxmnw}+W~aQqWpkUF(xwb~NoReBILM@`WhYB>w3yLFX%Gos1-7e3T@H5@+MOxwJC-mV5hB&$E;?lqe^CsH5kJSMs-48P}Bi7y| z^z+N}?;l=|Jpg4iBpu9bG4T0D7quSz9$3HZAD;C!EGrqyBbY^phZLyDtEtg(>#c@S zY(%OnPir=88gj@}%gO{cEy1jqH5SHq==NhKOrAtST(flUxXW!nLaqWUKB*GJ{?)8u zeU*(Dy3MB%eVA2NPeW?_|N8op9X*aD&HFrs`mRd&zRVm7{Q!L#%nM|ZERkr6B$7!8 zYUIO@e`e-x<`Mp<4v9oUl6yq>V#~MW@o>eF@E|l*T&Q_96)yQ|(?e@Nst*iL6<`*H zEewXPTZEEiV8?|(&O-H2Y?;yeIxUkuP843E0v@X4NL`^XGGc^7Tc=xZ1ni}x80Es- zWtaA(p=v2Yp|D!4em+nRHsOPBJUWb9AlN%rvJlu>^qaSA+4IAGdeiQ`&b}(9UQi9o z?P9t*SMZ>q!_X=VS^^^+bK^kwBG{DmrEhdoPREaxwyVz96v4Y#-||EtZ#$+?(Uhf{ zpZ7&-ewW0V38`fbKUEMoBs1{QA6jNbuYw-8Hz;6@AEbfNsok8PnkHs!-Im5KG;Xdke=`t+b+jzVkI33!yQ4^eEfooCgh0@dVEEx~$(pxj zE%H7@T5@W$-QBi9y}8@Ie&^Zr44*r;ahPX~<;>2kJ>&0)@oa@=8 zWtJFa8ppEl7FE~IFlO@_!kI9_e0I`7kbFbq#rfu8dcSF0;3YZT+{vq3e%7)gdf!gde@n{Cp87)wJQTfw5I+KP}yPmg?(4Tyy z3y|q!1})81COwwBQarz71(7}`8|q{3a7GM||K7u9y`-aYGA8(6+li+x-*56}>vlRU-n0);H#~+`TWGH5k!Ex(_4U1u3JZ%Fn zCwkE~LQ!)VnfPS~n=#T_^(I zdRQ2f`%+s*%D-3|kC;d**^xYTXWm<>y=5t2TweuVdt0MVhf0PGnmbhbD4Q;Tp762) z##KEo>QYD*BmqX+T;|Pu5u$tN6P)CH2pOm~P^(lfPuw6G+<-l(=qbeUj~7Yq>)u*X z15cvs2RV@(HyH#2)1H~ihv#p!6fh68rit%Xa}%2WI9#){eliz}%vqdlYEP)5B=pU< z+8pUN(LlL708ny|oB-yOtOIP?VIO%7v$B(VRx={%t`Qm>!y&(J&ZA#e*!UerN)w^H;6ym>^4JPrR6;fPw6yZ_ShVey5SZ@XcG$nlY%&f()a4l~ z97p;VkKB;G+g21{man(We+kO*VmfqF!6BMwB~DDImdqGjyyd%(Z8yK(aCTKcgb~f_ zx3K*x?7RHy<}po<$g~n71oGpEG7w*c5nn`bWZFHB-iNJPi!1MBtrsZzGnFB01R)0E z=n?M~@`xj%6`=NRI26TE$#1CFC<4QG{-G81T`B6mFn{q3kh1{iNmjTK#klbjmd%ng zaG9uJ@o9kUC*1Z?i)J2qGgFjTFGG)zj`}pvs-PQqWbM;u>gwgSsGm_RBIjR^;8vUi zo8S<}3nO$JDIvUCzbj}@EfhfCN{%8xo1kHCsu5NlHE2}p*P-v;rmtaOQx~p`%(c zY9hg1@axdllyr04ki(8n{W*sYS?NwvXXJNQqF4{?1<|4`_M&By`ATjHg+&~y0FaDX z%XsB5H%F%dDher%FpCQm{rcQtut$AhZXJcdOI)txzzs8-e$qbAfrG6s}R>z+$fXOqI9v%(oEJ0P}caSEQ<|M z#rU?nS@A%=#z3kdcs@uB)>6XaGTU=4tD8s`DeL*yqZS9P?KR>Ts{3F-R~=RoX&s@# z9)fdT3KM(g@Dna?yo@XSe)#<;R-1F!?jYH-QUflc{VrZ{1`Sa76}Vz6v>ka=qCs1Cu0r-!ptVGjEo0q=8(?ybqN3Gy1kHd{*s4}SsOx4W&$O($xy8U`q z4-iY=CAB7x86aaiViy?9P>#@AZ}u@SYLQtNR$hNSY6)`4{MATE+-(v$ZpVUP%k%f*;P#Sw}2eXKP(GQCltX)Lq+v1J}%z1SCGaew}l0 zJxptLC_rU&Jkr5EHLJCHR=?*gXw~zL&N#AH-sFF8@?x*708|-=hemWc+1%NFlBA!+xca3Bi(u9?=?kTT|F^&IEOLX2NoNh5&VzV_VnUIa$lc=p?Jim52VJ7bpL-a;9BIM?LrdJQ$k|D46C zPp>-{RmSfvR!E~n?mvxrOzEuyM#m_o&LrJnbOeD#QK~o$@gO4yMgb@$-c1Dp=mNx% z2xcpR8BOZA{(5A4F44%n=_Rk zWX^2T7)p^M?hZC{87Es2i_DR?K0 z$TEV@)9H(k%eA?7bYjhF6O)zGkwv}yxb9bF_!{R54V$L`v-7bM0HGk{l80=sB#Qp} z5~`NNjb?j$SM2{HT01o&FgC>DM|1%~OM0U(ky>7T37alv^<~W}OEdvI=hOA(k-2Ll zVG4*X65VeB)<~q)wHTS{ny^+a40KtWMFQztENir}{;7*$V|AhJNK+VBdKzz_HYX^S zUVD&c*lvfsHJq1+%It4HKGZ3mo`dtK-9<#u4O(zuQo#p2su*}Q&pmr!MU^^=v_=l} z*JmpNHCvC=_Qb#lpb7#RNQ-h+s8&>=4IFUCiJNlVJcayCW$=OQ-9iCP4$2CsADya&Bf}96if$~x6Y3{ltPWT0YC5N?RKcLzkXgDOdb}_?hR;- zDj2~L&IburZZc3C3&0sdK(#*8Ukrv?7Ss3_AfCuHZeMzsB-34)G%3S&R+9Y`W=`7F zx|p>a#R@Xd=I25W1(4D=sTxN?!R!dDq=ltqHTF!2i{ZmpC9CZdQq!VEbdN?z04qx7 zXyp+ujflafVY?EnQ@Hrb0&kYGtdhV|2poAT=#+;|75J)<0v%4{3J(RR-^d-a_9luwnbO?0^ki{yk6ty{?$Cc{w9v8qdnX#zl?G}~1;@}eI%XH#`^)nn0(x#PQB z7{?(xnI2g0H#%$U2;`~_Ah%gG9lET)o}|Yxl?_~e z$q^&zmjWo5kSLRm$%sGNIR5+!neFlCl-}T1-Scj-NlZY&tgQD=`GO4EKxY|851VM! zw~`?=m#f(lj|MI4SMRhmup1Pu9xi?BF=iplgOXu`TpAPda)n6m)OK*YLS#_aZKs$) z|Jbf|^Q|>72CmQW6h_{OR(UUo^~NK`F$G~$T>X!|>IYhhO6nP((IH!F>*we1zs6>h ze1RBFJcZgLi57Y3oTpe8N-b_Lo@;(-O=lGz`;kPvFmqR!&@2zmK@ur*+Ggu7^W4+7p-TBr{+fRgWG1{>7X#OUeb89@aUzL1GEL;YIXl+Z=A! zgD`Gr^96@v#7oOfWbf2{+~@;icx|*Maz{kd627ahGD|kskGZ-nLE1RyTI(uTTsImAIP>un)P%+l z$|%O$k7dNOzL}#;=g_UG&$4@U>l0IUd)2)DjVQ8HsJ21(I0UE>SJvMExne~20{p+} z@*4TOuq2q>*`Q5o83MJ&B;;2Arv-?uiScaUAvCsnW&!F#7}O%6cDgpxvMLfsolRCb zMb0MwxLjzrw7x=9$itwWgD8wUF4!tr{eH$o)qsxDUGP&cm$r00g__wayk zkbYhD?zsHu*#iCV#kr8~8c?9$zQ2Bd`Syi1$}n(Fxnp=2#78I4z|lkwTgGk%HW~Yg zNJSuxp9ZAF1A14-0fZEL>-!wz_9ki_d7N~Tuk7G{{(omX%o;piT+p`+K}KpFVol=* zweUK0&v0lh2=XaL>@;cOa=VTAkUhu4@LO{(`@ZRK&MW&lAi` zW7m-mqy6rHK1oDdkPo{jfJK#1i|SStNegdC(oJ4lp>BVTQ9I5DXt*~Lo~9L?+gQO# zPtCRy7feK^Vd1e)u{y={l5PIGG=iT9cLV{pUP~a;jL!54Uwtb!8>n0CyBhIRL?Q~= z|E_nqwS_mG!5t1XCo(=?PiRq6fL7aVoA<5o&XKCo&rpliqr%XCi4mJ*MKW0SrJyWa z$fX#2t`#ql@ePS!s3KWo({j~?Veg=^s{Z`-_4Uh-uU|gBNK`I^whDuq>>^34qaheW zlaAm`l`N|e^R()X0&Css{AkCsYQ@DXl!mMYj9U-8>cZe2u*aLe*L{(#=QP%}OD8i1WTd;NFpj7~;|*dOW+v^^R)ZvD*hgt2Iy1_y2|KTo@YZ zmh+H=o{G>gtPr)3x>b0iWFG5RlMh?E?QGy_3vormok;GmVh55B0VqWN8^k8ni|rDJ z)%EKdyT9*;b#bDyCGAck2@ve9nG!J106)=9y&-_Q#(_muIhN~KZB3@I#(S!H(9!bR{^Ypg^U0Pe35O1*3VTX}5X9nem>XWYA$;;SlqVb{87dfC$WOZ)Bt zh2#tl5ww%4(aAg_#0|l!5Lk7pBaxkBCD|sF_dkDw>+o?-FAwO9F zF$dcH6(tVZ43Aoq2g=5i6%ZK5&}tEeWxVw9*v)Q@pY^Yg8VLV*dA2PgAaFXc@!mrv z8JrBREYz?n3x3PZnj+SAP4sqd9?oz1bSL@o(Bljbpmhg9%*L4ZsC?5xR}t)`jrqXY zk-(F(YLS@<`-+AP1zmArP>-!s3*46KI?2tNUC@B0H(3*csX|7Lih-#WVJu%q*7a5^ zp(7~`fW%v{^>{nh8o({}3h&=>c$c2*f*!1lh)v5n4&bp{fHY^<;qkt6vnnk6a++>5 z>s=?2pucSxvv^-WzdZl^tYu-95BT@jUvm-_ifX;mXKrZ7WNV62_eDKR;$1yW@17a7 zZG}yXXq3h&361}bG96o{=Hj~`VfX5!*{C$A*X=v|AGiTbxqEB%{PyGf^M}v7pj96` zw4{i3KqKuHlqxe}zz?9zte}^yk2F z6$>}A+6zB_{R_Wu0=gL2mkQUOidu6aie9QHKw4S#?NjReboy{7ze(G$eVv|RV^G9H zrTKBwH8C840B0y|ZBn;$WwPaodd;Z9MKu2^Y9sLj222(7R2aK5Y-L=0W4joPM%%eC zWeu|K*mQaD%Hw;MgnC7G${04EW6>Ij*>gU~L|o&Km+#Qrq=!!6`Hvrv zm(S>S0Hc~Uj(7Wb^*&xbe*u9F&0@bz;pNfgiCrAg0GyR1mK|Y`6G-{ZyM7vmWtE-J z!sJ<--K=?xfb*%rTQ?Tw|K+zjB1$L6bKe?xQS16abrqsmnLl1;cmHY`dK?}ez0*i& znDmwlwdxP#q@aOaP(g|59{b2f$*PedS-BTsF^w8goQ%*tWeq1ND#<=Ondtg83t#ou ztoO5SBzaEf3sON~yAyrK)J8<<*8qleilABF@G4E7bu$EWy1AZV=^|QB;6=gTjKk+~ z=}wTz#ULPw{)^uTeMx3=3vYJi>6fdJE0698S(Yk6OOXkV)m3HL-i|J#5o+j_Wb}4E zLhpjyGHgkK1}AfR@`vbY5(Dvy{bhHD$3!~Q$bHJf zM@uoxrg+&Z`&o9+d=-mR8_$_AOQmp_bt^-t8{+J8(Z?t;;QVM1kI7IH9;ua_Vlt!c zJ~OOdJx;K;Q>IzCY8iQKhsF5@~xchIYGd^{{G1yNoP% zr{@!o2pia9>~7**B2N}z;ofG-+S1C3f}czA+$jA+BkpV5vSO$w%mm}Bw8ByFpZf4W?s1xkj`Hx$N9fiP(ikxT z?&x4a#=8W+xwBRi?STahQ$MxbbTAz?<+lz_OB`wYIBE%L31)xCvFEjBVIk#HU7#uq zrwYjLcIe86UpOEPhj$&&oCm*;5v;XmCoIpMq!bkU5H!b{D4a+A!vp+dWO?7Y>A+L) z!2|c?nOuxW1^S2CuPOK))fM$u}%AFj^y>C>XSuN>M1CE`_hVV(h!(|AwnL* z1OT*3r26Ltyc}IEj{^-rFnm)QOs~{Inqqd+AkTjz*P4wIA+rWM%zIc z$>w8gNp2r>7Rka@A!TN`2iW+>?_Ym>{=%$Jp1?@N92j|-V0HvI8vEW>xcA4N8*H&X z=VXqMN!7Cx?PLmo3wy>(pM zgi2{61+Ze=c^^){=W1<)RSJ~a?x!EScBXOipeGz@9S}Djg?mr^Q$eyV9VUYhPnWgS zBs_hXMA-0M|;wZW3s<>t_FI75`-!yfTNH(7OqdbqvR+?;#}H!Q5jOuLGOd z=om9o*GMgyXFG?1GPp~MrxOj@Q#~aH3ll~^DIvsdZTqD&RyBA!8(sWgHR{t1JwM8) z=a;WDh~kbS=F%M?3yEzS+c@M=K?{e5-i{z>T6L(8lLygekutm-I>HpC!AnPFp;Ps6 zB!O)tgH={R*$%m@bi&G^&XOU^jB~t)W!Sh1AoiZ+iNu)D%j?ZCATBw044c%y?(%3x zSJ$n-J_eW?Fy*hG#$e}p%U%lvECS{cYUQF!$|W^H4aMu{FmIu%*lyd|)}PIAm#T*; z9?QBM-ve%YoeZ^+5_17&E-FI8VuhnX%cQ)uDPoY+GIUbd?)RKBk+1`Jv`Fu_ib_L;J-ka+K|C0kewJHxAMM;MrUm!+ z!m)BXW@KxbsK%419m=&G%C)Q@(6V$Wu6gq+b0&S=>0Q9o^B-tX zkG-+hTCo;sVSCd-M>AwWe=4wDcp_Nhs%vV|Lp%|m5tNac6FXXllxw!qy_OhU^qbGB zUlM25!#Cea_j}d%)6WmjKd7TWqCA;OgN@d>2s+U0An7g8)=taeS7H{Qwy2IBUhBcJ za+#N3-)7+mxeTU&qn(I(y}mB#lWKutMB%IiUNh*CwmiOaX`yi=KT|DJx!P7srTV0D3b z5*fsuw;nnXmit>9$6>JAN_#ua7*BcAJmSRz5K#4vs7u{DS>t;u zd$p8UaG9HII#q5gG4+RqEUJP(O#r$*aAj$<JayyW0~U#)^tj z^V)A8z*)kO!BgKMkT6C=DyPR)#>Z6s*u!@%J9|6)y$S8%N}kU*5+D$JJ>{pUxwYO!A?f;= z7qTL#!d?&yp$DM>%M{Jqw<7_ElGk{RboC}uCJwy3FEp>F&5|nmB(o56(gji?7gZrU#`dvP=B zi*Gu<+jPaghypB&kjpaG`+tnu0ZJR$Sb=#5xOHvXYwkq{Q`xIpk($y84~2n@B!s$| zEP@bE0YzOjbH*#il`-NTS3AF5zw^c@Z2{oT#SP@Zd`b|Yc#}IsWrG;zq*G# z1)*bI2wFUH?65IP)VX7MXmG_1FU$@dtUSlURt0raq7H`8-w2=f>qXvgcThmimEwT} zyZ2JvLUgz*@o`;+AX*!VA0?y!?kPtAYXEr?T!t%)1x+gT)V&O8;Fa$ zR7;D1)M!_G_(Ir_?h&Q})Ctr{Jr#GxwsU}jb`@sZHCSfFmt`r}D|w_huPw9R@8-Vj zVlj@?kRX3e{HTWQ_Mo(+x}*yOGINXvaRZlG9xL+vlk6q1$4W{tElQ0bI3tuB2dk;R z*mBd<$4E*XNRoowKy_D>Q7*ZU@w$gaUa5^m&;XOD!qvGY_<6z9SCKIZTWMvrd&h`B zH;)uug`!{}S`LJ7>Lz{MwxY^X!7OsLB`INQaG(&in}@(}h9Ja)`#4s%e}hFIE)uv1 z=P0$5d1Y-y9evOxKvk|8ovCNrqy{4rGz~h>y^-`U<#eehxklPGWiOlFuhy~^Nk?1YyE{B{Zu-)Ge zHj_IW3FcjoR3mR5Aw&R6#t$kJLib`Erxq)AsYaVNN!MQ=IC);s+|z>wBNs9UK!uVd z1j&6V`sALfGVE-c9MuPVQ!l$LX0DRH+JgM9a=mGNf5c31aEwTp!eM}qnLv;0jK+OC;u zvFyFQQGhUVg3NyK$;D~MGiyY)Te-e}SZcqF>N{WD5}J&md9V>-qs^b8bW6fnF2%qH z^#t8MaDw&Zq4c{(c~aF-236LAyHnYlZbWQ*D5@)T8l!7$X9`*2i6IR5LQ z+e#CmZ@V-vje{5An$gwj4HO{g4WMo_W>Nrot$Iy)1x>N+lAo&C(>~9iPi(!jM7#NE U*B&)F-`oHC|L|Euz{ukP0K=-`{r~^~ literal 0 HcmV?d00001 diff --git a/data/csv/real/web_page.csv b/data/csv/real/web_page.csv new file mode 100644 index 0000000..0798017 --- /dev/null +++ b/data/csv/real/web_page.csv @@ -0,0 +1,60 @@ +1|AAAAAAAABAAAAAAA|1997-09-03||2450810|2452620|Y|98539|http://www.foo.com|welcome|2531|8|3|4 +2|AAAAAAAACAAAAAAA|1997-09-03|2000-09-02|2450814|2452580|N||http://www.foo.com|protected|1564|4|3|1 +3|AAAAAAAACAAAAAAA|2000-09-03||2450814|2452611|N||http://www.foo.com|feedback|1564|4|3|4 +4|AAAAAAAAEAAAAAAA|1997-09-03|1999-09-03|2450812|2452579|N||http://www.foo.com|general|3732|18|7|1 +5|AAAAAAAAEAAAAAAA|1999-09-04|2001-09-02|2450812|2452597|N||http://www.foo.com|welcome|3732|18|3|1 +6|AAAAAAAAEAAAAAAA|2001-09-03||2450814|2452597|N||http://www.foo.com|ad|3732|18|7|4 +7|AAAAAAAAHAAAAAAA|1997-09-03||2450815|2452574|N||http://www.foo.com|feedback|3034|18|7|4 +8|AAAAAAAAIAAAAAAA|1997-09-03|2000-09-02|2450815|2452646|Y|1898|http://www.foo.com|protected|3128|12|2|4 +9|AAAAAAAAIAAAAAAA|2000-09-03||2450807|2452579|Y|84146|http://www.foo.com|welcome|3128|13|5|3 +10|AAAAAAAAKAAAAAAA|1997-09-03|1999-09-03||2452623|N||http://www.foo.com||||| +11|AAAAAAAAKAAAAAAA|1999-09-04|2001-09-02|2450814|2452611|N||http://www.foo.com|welcome|7046|23|4|4 +12|AAAAAAAAKAAAAAAA|2001-09-03||2450815|2452611|N||http://www.foo.com|protected|7046|17|4|4 +13|AAAAAAAANAAAAAAA|1997-09-03||2450807|2452629|N||http://www.foo.com|protected|2281|6|4|1 +14|AAAAAAAAOAAAAAAA|1997-09-03|2000-09-02|2450810|2452639|N||http://www.foo.com|dynamic|5676|19|6|0 +15|AAAAAAAAOAAAAAAA|2000-09-03||2450810|2452639|N||http://www.foo.com|dynamic|2469|10|5|2 +16|AAAAAAAAABAAAAAA|1997-09-03|1999-09-03|2450814|2452601|Y|33463|http://www.foo.com|feedback|701|2|1|4 +17|AAAAAAAAABAAAAAA|1999-09-04|2001-09-02|2450812|2452645|N||http://www.foo.com|general|701|11|1|3 +18|AAAAAAAAABAAAAAA|2001-09-03||2450812|2452608|N||http://www.foo.com|ad|4080|11|6|3 +19|AAAAAAAADBAAAAAA|1997-09-03||2450808|2452648|Y|57610|http://www.foo.com|general|2347|9|7|4 +20|AAAAAAAAEBAAAAAA|1997-09-03|2000-09-02|2450809|2452555|Y|46487|http://www.foo.com|ad|1147|3|6|0 +21|AAAAAAAAEBAAAAAA|2000-09-03||2450809|2452555|Y|10897|http://www.foo.com|general|1147|3|6|4 +22|AAAAAAAAGBAAAAAA|1997-09-03|1999-09-03|2450812|2452565|Y|20213|http://www.foo.com|general|5663|25|3|4 +23|AAAAAAAAGBAAAAAA|1999-09-04|2001-09-02|2450812|2452623|Y|20213|http://www.foo.com|order|4729|23|6|4 +24|AAAAAAAAGBAAAAAA|2001-09-03||2450812|2452646|Y|20213|http://www.foo.com|dynamic|5918|23|6|1 +25|AAAAAAAAJBAAAAAA|1997-09-03||2450811|2452620|N||http://www.foo.com|feedback|1526|9|4|2 +26|AAAAAAAAKBAAAAAA|1997-09-03|2000-09-02|2450812|2452636|Y|98376|http://www.foo.com|ad|1826|9|3|1 +27|AAAAAAAAKBAAAAAA|2000-09-03||2450812|2452607|Y|98376|http://www.foo.com|protected|1553|9|1|1 +28|AAAAAAAAMBAAAAAA|1997-09-03|1999-09-03|2450807|2452572|N||http://www.foo.com|protected|1308|4|1|2 +29|AAAAAAAAMBAAAAAA|1999-09-04|2001-09-02|2450808|2452611|N||http://www.foo.com|order|1308|4|1|2 +30|AAAAAAAAMBAAAAAA|2001-09-03||2450808|2452611|N||http://www.foo.com|general|3872|18|1|4 +31|AAAAAAAAPBAAAAAA|1997-09-03||2450810|2452596|N||http://www.foo.com|general|1732|3|6|0 +32|AAAAAAAAACAAAAAA|1997-09-03|2000-09-02|2450808|2452585|N||http://www.foo.com|welcome|5104|20|7|4 +33|AAAAAAAAACAAAAAA|2000-09-03||2450808|2452585|N||http://www.foo.com|protected|2129|7|1|0 +34|AAAAAAAACCAAAAAA|1997-09-03|1999-09-03|2450808|2452616|N||http://www.foo.com|welcome|2726|12|5|2 +35|AAAAAAAACCAAAAAA|1999-09-04|2001-09-02|2450808|2452591|N||http://www.foo.com|protected|2726|12|1|2 +36|AAAAAAAACCAAAAAA|2001-09-03||2450812|2452613|N||http://www.foo.com|dynamic|2726|3|1|2 +37|AAAAAAAAFCAAAAAA|1997-09-03||2450809|2452556|N||http://www.foo.com|ad|3076|15|3|0 +38|AAAAAAAAGCAAAAAA|1997-09-03|2000-09-02|2450811|2452583|Y|37285|http://www.foo.com|general|3096|18|3|0 +39|AAAAAAAAGCAAAAAA|2000-09-03||2450815|2452583|N||http://www.foo.com|general|3096|18|3|0 +40|AAAAAAAAICAAAAAA|1997-09-03|1999-09-03|2450813|2452576|N||http://www.foo.com|general|4402|18|4|2 +41|AAAAAAAAICAAAAAA|1999-09-04|2001-09-02|2450813|2452579|Y|16769|http://www.foo.com|welcome|784|3|4|4 +42|AAAAAAAAICAAAAAA|2001-09-03||2450813|2452579|Y|60150|http://www.foo.com|dynamic|1451|3|4|4 +43|AAAAAAAALCAAAAAA|1997-09-03||2450814|2452580|Y|64793|http://www.foo.com|ad|3760|12|3|2 +44|AAAAAAAAMCAAAAAA|1997-09-03|2000-09-02|2450811|2452602|Y|92078|http://www.foo.com|ad|4179|19|7|1 +45|AAAAAAAAMCAAAAAA|2000-09-03||2450811|2452575|Y|98633|http://www.foo.com|feedback|4584|19|7|4 +46|AAAAAAAAOCAAAAAA|1997-09-03|1999-09-03|2450809|2452574|N||http://www.foo.com|protected|1711|4|5|1 +47|AAAAAAAAOCAAAAAA|1999-09-04|2001-09-02|2450815|2452574|N||http://www.foo.com|welcome|1711|4|5|1 +48|AAAAAAAAOCAAAAAA|2001-09-03||2450815|2452622|N||http://www.foo.com|ad|1732|9|5|1 +49|AAAAAAAABDAAAAAA|1997-09-03||2450809|2452618|N||http://www.foo.com|order|4894|20|3|2 +50|AAAAAAAACDAAAAAA|1997-09-03|2000-09-02|2450808|2452615|N||http://www.foo.com|welcome|5262|16|5|2 +51|AAAAAAAACDAAAAAA|2000-09-03||2450811|2452564|N||http://www.foo.com|general|3423|19|7|1 +52|AAAAAAAAEDAAAAAA|1997-09-03|1999-09-03|2450815|2452606|N||http://www.foo.com|welcome|3306|21|7|1 +53|AAAAAAAAEDAAAAAA|1999-09-04|2001-09-02|2450808|2452636|N||http://www.foo.com|dynamic|3306|21|7|1 +54|AAAAAAAAEDAAAAAA|2001-09-03||2450808|2452629|N||http://www.foo.com|protected|1931|7|2|2 +55|AAAAAAAAHDAAAAAA|1997-09-03||2450811|2452549|N||http://www.foo.com|order|3788|19|1|0 +56|AAAAAAAAIDAAAAAA|1997-09-03|2000-09-02|2450815|2452554|N||http://www.foo.com|protected|5733|24|2|2 +57|AAAAAAAAIDAAAAAA|2000-09-03||2450811|2452568|N||http://www.foo.com|ad|5733|16|2|2 +58|AAAAAAAAKDAAAAAA|1997-09-03|1999-09-03|2450813|2452619|Y|7625|http://www.foo.com|ad|6577|24|4|3 +59|AAAAAAAAKDAAAAAA|1999-09-04|2001-09-02|2450813|2452624|Y|80555|http://www.foo.com|general|6577|24|2|3 +60|AAAAAAAAKDAAAAAA|2001-09-03||2450813|2452566|Y|80555|http://www.foo.com|welcome|6577|24|2|3 diff --git a/data/csv/unquoted_escape/human_eval.tsv b/data/csv/unquoted_escape/human_eval.tsv new file mode 100644 index 0000000..574d08a --- /dev/null +++ b/data/csv/unquoted_escape/human_eval.tsv @@ -0,0 +1,339 @@ +HumanEval/0 from typing import List\ +\ +\ +def has_close_elements(numbers: List[float], threshold: float) -> bool:\ +\ """ Check if in given list of numbers, are any two numbers closer to each other than\ +\ given threshold.\ +\ >>> has_close_elements([1.0, 2.0, 3.0], 0.5)\ +\ False\ +\ >>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)\ +\ True\ +\ """\ + has_close_elements \ for idx, elem in enumerate(numbers):\ +\ \ for idx2, elem2 in enumerate(numbers):\ +\ \ \ if idx != idx2:\ +\ \ \ \ distance = abs(elem - elem2)\ +\ \ \ \ if distance < threshold:\ +\ \ \ \ \ return True\ +\ +\ return False\ + \ +\ +METADATA = {\ +\ 'author': 'jt',\ +\ 'dataset': 'test'\ +}\ +\ +\ +def check(candidate):\ +\ assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.3) == True\ +\ assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.05) == False\ +\ assert candidate([1.0, 2.0, 5.9, 4.0, 5.0], 0.95) == True\ +\ assert candidate([1.0, 2.0, 5.9, 4.0, 5.0], 0.8) == False\ +\ assert candidate([1.0, 2.0, 3.0, 4.0, 5.0, 2.0], 0.1) == True\ +\ assert candidate([1.1, 2.2, 3.1, 4.1, 5.1], 1.0) == True\ +\ assert candidate([1.1, 2.2, 3.1, 4.1, 5.1], 0.5) == False\ +\ + +HumanEval/1 from typing import List\ +\ +\ +def separate_paren_groups(paren_string: str) -> List[str]:\ +\ """ Input to this function is a string containing multiple groups of nested parentheses. Your goal is to\ +\ separate those group into separate strings and return the list of those.\ +\ Separate groups are balanced (each open brace is properly closed) and not nested within each other\ +\ Ignore any spaces in the input string.\ +\ >>> separate_paren_groups('( ) (( )) (( )( ))')\ +\ ['()', '(())', '(()())']\ +\ """\ + separate_paren_groups \ result = []\ +\ current_string = []\ +\ current_depth = 0\ +\ +\ for c in paren_string:\ +\ \ if c == '(':\ +\ \ \ current_depth += 1\ +\ \ \ current_string.append(c)\ +\ \ elif c == ')':\ +\ \ \ current_depth -= 1\ +\ \ \ current_string.append(c)\ +\ +\ \ \ if current_depth == 0:\ +\ \ \ \ result.append(''.join(current_string))\ +\ \ \ \ current_string.clear()\ +\ +\ return result\ + \ +\ +METADATA = {\ +\ 'author': 'jt',\ +\ 'dataset': 'test'\ +}\ +\ +\ +def check(candidate):\ +\ assert candidate('(()()) ((())) () ((())()())') == [\ +\ \ '(()())', '((()))', '()', '((())()())'\ +\ ]\ +\ assert candidate('() (()) ((())) (((())))') == [\ +\ \ '()', '(())', '((()))', '(((())))'\ +\ ]\ +\ assert candidate('(()(())((())))') == [\ +\ \ '(()(())((())))'\ +\ ]\ +\ assert candidate('( ) (( )) (( )( ))') == ['()', '(())', '(()())']\ + +HumanEval/2 \ +\ +def truncate_number(number: float) -> float:\ +\ """ Given a positive floating point number, it can be decomposed into\ +\ and integer part (largest integer smaller than given number) and decimals\ +\ (leftover part always smaller than 1).\ +\ +\ Return the decimal part of the number.\ +\ >>> truncate_number(3.5)\ +\ 0.5\ +\ """\ + truncate_number \ return number % 1.0\ + \ +\ +METADATA = {\ +\ 'author': 'jt',\ +\ 'dataset': 'test'\ +}\ +\ +\ +def check(candidate):\ +\ assert candidate(3.5) == 0.5\ +\ assert abs(candidate(1.33) - 0.33) < 1e-6\ +\ assert abs(candidate(123.456) - 0.456) < 1e-6\ + +HumanEval/3 from typing import List\ +\ +\ +def below_zero(operations: List[int]) -> bool:\ +\ """ You're given a list of deposit and withdrawal operations on a bank account that starts with\ +\ zero balance. Your task is to detect if at any point the balance of account fallls below zero, and\ +\ at that point function should return True. Otherwise it should return False.\ +\ >>> below_zero([1, 2, 3])\ +\ False\ +\ >>> below_zero([1, 2, -4, 5])\ +\ True\ +\ """\ + below_zero \ balance = 0\ +\ +\ for op in operations:\ +\ \ balance += op\ +\ \ if balance < 0:\ +\ \ \ return True\ +\ +\ return False\ + \ +\ +METADATA = {\ +\ 'author': 'jt',\ +\ 'dataset': 'test'\ +}\ +\ +\ +def check(candidate):\ +\ assert candidate([]) == False\ +\ assert candidate([1, 2, -3, 1, 2, -3]) == False\ +\ assert candidate([1, 2, -4, 5, 6]) == True\ +\ assert candidate([1, -1, 2, -2, 5, -5, 4, -4]) == False\ +\ assert candidate([1, -1, 2, -2, 5, -5, 4, -5]) == True\ +\ assert candidate([1, -2, 2, -2, 5, -5, 4, -4]) == True\ + +HumanEval/4 from typing import List\ +\ +\ +def mean_absolute_deviation(numbers: List[float]) -> float:\ +\ """ For a given list of input numbers, calculate Mean Absolute Deviation\ +\ around the mean of this dataset.\ +\ Mean Absolute Deviation is the average absolute difference between each\ +\ element and a centerpoint (mean in this case):\ +\ MAD = average | x - x_mean |\ +\ >>> mean_absolute_deviation([1.0, 2.0, 3.0, 4.0])\ +\ 1.0\ +\ """\ + mean_absolute_deviation \ mean = sum(numbers) / len(numbers)\ +\ return sum(abs(x - mean) for x in numbers) / len(numbers)\ + \ +\ +METADATA = {\ +\ 'author': 'jt',\ +\ 'dataset': 'test'\ +}\ +\ +\ +def check(candidate):\ +\ assert abs(candidate([1.0, 2.0, 3.0]) - 2.0/3.0) < 1e-6\ +\ assert abs(candidate([1.0, 2.0, 3.0, 4.0]) - 1.0) < 1e-6\ +\ assert abs(candidate([1.0, 2.0, 3.0, 4.0, 5.0]) - 6.0/5.0) < 1e-6\ +\ + +HumanEval/5 from typing import List\ +\ +\ +def intersperse(numbers: List[int], delimeter: int) -> List[int]:\ +\ """ Insert a number 'delimeter' between every two consecutive elements of input list `numbers'\ +\ >>> intersperse([], 4)\ +\ []\ +\ >>> intersperse([1, 2, 3], 4)\ +\ [1, 4, 2, 4, 3]\ +\ """\ + intersperse \ if not numbers:\ +\ \ return []\ +\ +\ result = []\ +\ +\ for n in numbers[:-1]:\ +\ \ result.append(n)\ +\ \ result.append(delimeter)\ +\ +\ result.append(numbers[-1])\ +\ +\ return result\ + \ +\ +METADATA = {\ +\ 'author': 'jt',\ +\ 'dataset': 'test'\ +}\ +\ +\ +def check(candidate):\ +\ assert candidate([], 7) == []\ +\ assert candidate([5, 6, 3, 2], 8) == [5, 8, 6, 8, 3, 8, 2]\ +\ assert candidate([2, 2, 2], 2) == [2, 2, 2, 2, 2]\ + +HumanEval/6 from typing import List\ +\ +\ +def parse_nested_parens(paren_string: str) -> List[int]:\ +\ """ Input to this function is a string represented multiple groups for nested parentheses separated by spaces.\ +\ For each of the group, output the deepest level of nesting of parentheses.\ +\ E.g. (()()) has maximum two levels of nesting while ((())) has three.\ +\ +\ >>> parse_nested_parens('(()()) ((())) () ((())()())')\ +\ [2, 3, 1, 3]\ +\ """\ + parse_nested_parens \ def parse_paren_group(s):\ +\ \ depth = 0\ +\ \ max_depth = 0\ +\ \ for c in s:\ +\ \ \ if c == '(':\ +\ \ \ \ depth += 1\ +\ \ \ \ max_depth = max(depth, max_depth)\ +\ \ \ else:\ +\ \ \ \ depth -= 1\ +\ +\ \ return max_depth\ +\ +\ return [parse_paren_group(x) for x in paren_string.split(' ') if x]\ + \ +\ +METADATA = {\ +\ 'author': 'jt',\ +\ 'dataset': 'test'\ +}\ +\ +\ +def check(candidate):\ +\ assert candidate('(()()) ((())) () ((())()())') == [2, 3, 1, 3]\ +\ assert candidate('() (()) ((())) (((())))') == [1, 2, 3, 4]\ +\ assert candidate('(()(())((())))') == [4]\ + +HumanEval/7 from typing import List\ +\ +\ +def filter_by_substring(strings: List[str], substring: str) -> List[str]:\ +\ """ Filter an input list of strings only for ones that contain given substring\ +\ >>> filter_by_substring([], 'a')\ +\ []\ +\ >>> filter_by_substring(['abc', 'bacd', 'cde', 'array'], 'a')\ +\ ['abc', 'bacd', 'array']\ +\ """\ + filter_by_substring \ return [x for x in strings if substring in x]\ + \ +\ +METADATA = {\ +\ 'author': 'jt',\ +\ 'dataset': 'test'\ +}\ +\ +\ +def check(candidate):\ +\ assert candidate([], 'john') == []\ +\ assert candidate(['xxx', 'asd', 'xxy', 'john doe', 'xxxAAA', 'xxx'], 'xxx') == ['xxx', 'xxxAAA', 'xxx']\ +\ assert candidate(['xxx', 'asd', 'aaaxxy', 'john doe', 'xxxAAA', 'xxx'], 'xx') == ['xxx', 'aaaxxy', 'xxxAAA', 'xxx']\ +\ assert candidate(['grunt', 'trumpet', 'prune', 'gruesome'], 'run') == ['grunt', 'prune']\ + +HumanEval/8 from typing import List, Tuple\ +\ +\ +def sum_product(numbers: List[int]) -> Tuple[int, int]:\ +\ """ For a given list of integers, return a tuple consisting of a sum and a product of all the integers in a list.\ +\ Empty sum should be equal to 0 and empty product should be equal to 1.\ +\ >>> sum_product([])\ +\ (0, 1)\ +\ >>> sum_product([1, 2, 3, 4])\ +\ (10, 24)\ +\ """\ + sum_product \ sum_value = 0\ +\ prod_value = 1\ +\ +\ for n in numbers:\ +\ \ sum_value += n\ +\ \ prod_value *= n\ +\ return sum_value, prod_value\ + \ +\ +METADATA = {\ +\ 'author': 'jt',\ +\ 'dataset': 'test'\ +}\ +\ +\ +def check(candidate):\ +\ assert candidate([]) == (0, 1)\ +\ assert candidate([1, 1, 1]) == (3, 1)\ +\ assert candidate([100, 0]) == (100, 0)\ +\ assert candidate([3, 5, 7]) == (3 + 5 + 7, 3 * 5 * 7)\ +\ assert candidate([10]) == (10, 10)\ + +HumanEval/9 from typing import List, Tuple\ +\ +\ +def rolling_max(numbers: List[int]) -> List[int]:\ +\ """ From a given list of integers, generate a list of rolling maximum element found until given moment\ +\ in the sequence.\ +\ >>> rolling_max([1, 2, 3, 2, 3, 4, 2])\ +\ [1, 2, 3, 3, 3, 4, 4]\ +\ """\ + rolling_max \ running_max = None\ +\ result = []\ +\ +\ for n in numbers:\ +\ \ if running_max is None:\ +\ \ \ running_max = n\ +\ \ else:\ +\ \ \ running_max = max(running_max, n)\ +\ +\ \ result.append(running_max)\ +\ +\ return result\ + \ +\ +METADATA = {\ +\ 'author': 'jt',\ +\ 'dataset': 'test'\ +}\ +\ +\ +def check(candidate):\ +\ assert candidate([]) == []\ +\ assert candidate([1, 2, 3, 4]) == [1, 2, 3, 4]\ +\ assert candidate([4, 3, 2, 1]) == [4, 4, 4, 4]\ +\ assert candidate([3, 2, 3, 100, 3]) == [3, 3, 3, 100, 100]\ + From 20faecfa0015c8c122c52b96f3db04ab58a4ab4b Mon Sep 17 00:00:00 2001 From: Mytherin Date: Thu, 25 Sep 2025 12:18:39 +0200 Subject: [PATCH 05/32] Add scripts --- data/csv/lineitem1k.tbl.gz | Bin 38116 -> 0 bytes data/csv/real/web_page.csv | 60 ----- data/csv/unquoted_escape/human_eval.csv | 339 ------------------------ data/csv/unquoted_escape/human_eval.tsv | 339 ------------------------ scripts/generate_presigned_url.sh | 32 +++ scripts/install_s3_test_server.sh | 13 + scripts/minio_s3.yml | 80 ++++++ scripts/run_s3_test_server.sh | 31 +++ scripts/set_s3_test_server_variables.sh | 13 + 9 files changed, 169 insertions(+), 738 deletions(-) delete mode 100644 data/csv/lineitem1k.tbl.gz delete mode 100644 data/csv/real/web_page.csv delete mode 100644 data/csv/unquoted_escape/human_eval.csv delete mode 100644 data/csv/unquoted_escape/human_eval.tsv create mode 100755 scripts/generate_presigned_url.sh create mode 100755 scripts/install_s3_test_server.sh create mode 100644 scripts/minio_s3.yml create mode 100755 scripts/run_s3_test_server.sh create mode 100644 scripts/set_s3_test_server_variables.sh diff --git a/data/csv/lineitem1k.tbl.gz b/data/csv/lineitem1k.tbl.gz deleted file mode 100644 index df6e512bcd152e00b591cd678fc87c30b03543e7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 38116 zcmV(Qj#zE`=} zzOWxCU=y5aVK0&fYEIJ_KIeOH; z`J8=sZ?(Ge=HGmM3*MJ@3-Qg}T*60q``5RBd-J`QA6@*>NBJ6l^kMxP!rOoQ(?9>y z|MRDx9{%ZHAO7c`e)?bk_OEaM{?q^d$N%y6^y}A;k1r4Jp1;0)d3|{N`T4I;4_|(J zdi#TaD>Zr&9ld`GZ?U|k+*`c)0;wGz#fVNSFZ>wV`Zc(>fB)nE_|wCG{o_CWkB6WB z^zZ-sr+mp}f~Kfita_VCMxPmdoT-ambQ`t<(k)4LB(umAbs_2W1CPkw&>`2K;% z5UX=<^x@y)8~xQ7UB4Aa(!D#*-v1c-nzGC5*XYkf_~}po*TWzG>8H1s56@po8Pk(*-M6y^^a-q_W$MlxzAKpS}A>C42lTF)JF9EdcX^nsu zie>%xfBUa*4_Md_pMHJ#<@xUrI@b1uh51}!rsZvKG9of507tupe+oYMqfMqVLHukIl0^*mG_Yc z@zdLXhomHu##c`ZyMCpsw>I@P9*e^?$P7Bch3+@H<+$Z zA#rb5X*y1|ms_f9W0zdoN)p!PYryT9ztVxYp7+DoPhVfZ(nW%}ub(h^I#et-v_#@! zs)=q7ovqkgcemEogzMWM*MvRJ>5`^%4_CYVzy9xk`k!yOy?nGD{`UCir`rQ$h8`OY zx;4HqZ=v{ht1Qb8sefEE4Z5sej@9@oM|dcxZ|1Vh^ZcG?*GL!85zY?RrO_$wt=w{0 zn$fOmkRvqa>8z~Ns^aQA{z|j@^!TeBU=a+lCHNHIdMSZ!PN4s5HO8S4rb^N&I#Eeu z1WmX2-eyAV>&K_3&kv8ke0h4|WAzTV>Fp2ctu;q?HfPd*bbm-KxsWZM(E% znpr15er*@qIo08h4}bak;oYBEON@u%^W(ceKdw>T9zMMP`0ecvq+DIgbeZV`m)X+} z#8TTW=dlF52kSD?Iz%sP-qu$fGpCp7%Q!~n7O|WVJD*%AU#xd_HN;yFOH;K?&N#y) zr~eqT{@PV5l6rXf`uZe0JLmiQ`RUyUI?o?Iy?%N5`VOLF%`{Y6@km1H9ZZ3amm4B^ z#r5tq`acyG*RM3*^I~JNPtWh4k57Tzz?a_1UcQoH^g*w?EKdYQfPp>IHZ54bOTku8miBEO|8NEbEJ4%n$t2_06KE z_49{MU!EU+etP^QTmJa#Bi5ZxJIhZw&=spSW6SCO_)O2lc$?bsmdiwi2y5MrQbU=?E z&3iqTg;WWhu~7P3rG+V}mYdI)rgac?I`%{d<)d!I2h!F{b<$fA-dd>ir)tF}Vy{yT zp~_9l+fmN^Sj6!anxpEk<=!hFJtJzjoYd-re~=&R1i2F7!5zUCxbe zOp5`xi=@@=au+8dZ%)FPThp&@iK1P&p7HCq-#>qO{v!6saiaE`jm@XEr%`zF70Kp& z>mEZX>rC5OS5Yo3f1SOGhtqETurh!B@Cz;WyGL9xy;L`nNj4GrWu!G5z5euC(aCec zS8-DN5Pi=knqNJ%o0TexWzAC`ESa%bCSw4O++?zhu<7j1&}*IK%!RQHo88$Dmi;ic z+gX8+gHH12d(Gl}tmL}9(UTlu9K4aC5N=U#b-a)^L}dMX@jha^Nt=Ipdi?n1w-3_~ zCUwSAW9+1Y>7(O3dy+~8bVBiX!d z*n1HgJf~Y}%XwRGWK&hvxjpe;2t$|9%h_s{ZEf4_ zV!>nDO>M#PHK>{%N=yuU7U4_J&=R?lrHM;Zn!g3rm%D`F4D-~2)2U^ZKu2cyMMFW8 z47!nI%7vc!As}}wBvOjaomKL9>gEINp@Nom)vjN?AC^Y`@?q#cI%Z@s(rce+PPh`J z{+cUU^j|qU3n8e)qQBNFX934$Y?66gmkCE#b&MdmM&eq+(WUp*$K+?^svPYcFPeb8 z6*HC~hWoIlmN&#A=cHL$%XA>$YH4g$kvr0zOShnAMK&+3mlr(iog82NJy&=633O6F zjOo#TiF9o0!=~c*g)T%-=X04wz3ukn9LGuLkOuhh@#)i-AC~o~!HnnU*Doxd?8gFC zSD@tCvWT5jZd+DlxG<8(B-NrjXuO{!E_X1LNVU^sYau(C))%3ZK)T1J>4o+4`E|xr zGq&VzA&-TX{gepgZCSA-x56}|+m=0aF1;#nzR8ja&OGAk2F>Rz@O!RbecqMMv{mN$ z(V&ri+$DHJ=QfaIODD29zDZ;%x}3r-g{AJpHi+Sb49QK0c5!!kz4^khL>~}hVU!XD?GJB@_qPB8syDWkNqmm9>~fWkp;qr#NCQSh*014w zqK&UDhmFPg28)m$Qk*kH7D!Ri@=h0XS5SUh3WXmxw|=#bm^_@1?_QqCK*h$2W5(B+ zh0;-?2Rlc442GvkHY)iX?NTfEjt;ZD_^xHt@k+M7O zF4g5XbCo}^DDrJdPnA{?+=hBi32|n3ehYY_Q#0f5^Yp7k29N&Fd;`P)rTM-6fvo(N z@vPHFz}pTzRmec;+2{uiD|ay;_^a2T(dC{_jrHy~tD1hGqxIp_!{?8WU!Fcbk-nf& zWRhR#?MW4h4?fL-^u0VsaZ{C90toA?JVyu~MH_lu;nw;5zyaZe#WWI!sv_w%HoApm zB04eBddxGx;116(RMqtK%5uE$Yu}cfQyBCLJ?g7KmJW;akxnrE3-KCSaq_asbMLAu z>L$$o6ZA&cpX_-ff zw2Z{UMW&30^L1EN$1wkl6OaG&=jSmLa(u8VsdKJ@yl^^=Ud;4#4Eaw(lZWE;t8b^X zZ=8M-gghhylpV2f59c~WCr?G+RA=SXU|rX01b?fIVNE-LJxd&I&KSYVACjCWEkViy z(Ljzy#@xix=@(g}DQiw{d=0w|uy3shXK}QGg*L5VN8#I%*VAMd<94*FY})+lgB5)5 z&D9rh3IFoj%j4^B5*6h!kb^@9l|C{&JzCx3N|6B0Zdz8^t~L9vi+&YC8Mgb=GG`yR zv##0C8LyOpT-JB zJX)x)ynYRmx8gOH^yi*c;DECnw%d$ouynG$$iO5;#t{rM49FFY^OWzCWwD{?2&#@N zTW@K_f#IpisZ&>R9k&K;L9%OVNsJP*GyAfE4zimB`G>%^ex07$FuB%luGId+BU#;l zm$&nA#bd_^daB6pgk0dgk-b8G>wK~6W*m&!&nah(GqB-(xZ@HBrkOtd`a-XzjZ>0T zGQM(q%Vgi=n?GKeSmIz347o>gR3;6c^2c)d)hLA2O}(^z9w!(t;j>7-4vL%a|Uq;HbH?1%g; zv`ATkLS4W5O`+J6gs22dp@k%sTuEcVu1D~n9xJgs>rqbCJa82qmLw#8|9SZG^zMt| zbNG0TWo;RX*f|b?(8Cm}uQJ1YSXH<-nwFiDh25RZF&-cp$oJzBLdwjZS7UJx#mNKkz4**Qm<$t$=21ee;$f8Ik&Qaj zpt0~j|_p89t66kvEVKAfO9laN%HL5DX)E;sH-ICoojP;rK^sK?=2=U zW}e8_AVVm@d4&RKiRSIpXUyBY*ZthS(l+QB-a=b2drS`N>kG?_)knsCHN^D*Js_QR zYyR@$Y%9F52W$bJ9Mw;HvS`C^4}W2CIhRawd?j0*{_6mBxNwU$ANt3u#SS{hfCGJ< z?N%$3(`55}t(ZFv!2?{t*D|sLn-NP4QQpDbooa01x%GAU%i~vna^A)^^D8vuSBHWJFdb(f)wDL}jA4)Wd@D7r-~K=f+6M&5k(|V_CYzq@oOUa? z$le{bW>v4e$n2BkLhnLnKmYjSo{kt|V~*r~(+3+Eq{ISjoT@fX*kz@C0EyBPJ4|<1kDr<%;@@>^y_A2J${3yBu@I4>``;JBOQ{- z!VEyTz_Ow{?o+>2O~e`i;P6q$*9m7GzuvuPbj6le!?_T%{#XUjWglEan*tUAiY}S9RTJ^%?!I_2IUw6 zv&FQhdn3wRT{^vjEZe)z0Eke5#AM7(xYe`{f!k!y@Q)A6h8dytd&ls$-tEo1m9sHs zpMYDZ54L-06p_YceVeciVMaz5{xpjRe+ov)Ib54y5)85(!{{IDP%12b`aq-#uNS#L z-r6&9GZaHy!&s<3PN$zi5sHI&I0T1TK8Z&@#naSNiF_G>e|Fw4tSIXQRYQ5jgb$h_ zo~27v5*LC6k-md^kI2*oM6P;jHpN_x+R1A0X91*)ulItGeC6h{&sXMeECcEKSk^Gip09AxK64EX>O9$@U4rfH8)6;u8gDJJuR~|9rBdah^`q3P zz~iJ=RK0#$*t>%XGt+~B&m%w>TtIq-W4;Jp)Jn^p6={<SQGU@?i`i6#!Bj8>}3paw!B0 zKu-IdjdkeOuUFVT9>Au<7{vZ*h>gtwGT|cM--I((W^8PS3{C-S1YVc=0-pKpLXQu{ z@$1VFoyl57)+!HQAp7k5t$X8QZWc??377TjRsD&eo8I&CE(|hc(ujQvbcJ%m-$1=U zflD$T(OEB`Ny4HT_N;K!I~%=R>Q^#d7KqINyiK@aElz&zb0RmD^S&)nIKv)qsRDME-qF~-4`asli-@ZF<-IlA&z!|mzv1(dF0u#GHoGnfFe293nOizrf_ z$ZK$6Lu4dTs<)iXdu^cr3u96BmiPPRbssRzgH5tbXOut8dp8CXTB7Z1aFQd((tIf8 z*81A&v=7GX*QSFh1`BQWSVVM(M+-G>j6{fzqzOvaALm=Oi9RA4JJox(I>C5!+O~h=)(}sBRc|5HxePpTLOucklie*qserC^t z0^))|SMQciu7_g+0f*p7#~3q6(9s=a>rLoXdxK407<3=MIZ zoEt7IqTew@Ka%`@5C2705@pE=b^Us`4C`7s56m>9wOxcQnLFddP{2znvW(#S3_&l0 z-cP8O$-RtUFWjev`DDv|R3+(y$c`)I(}k_{Ist7vP-7*Ocv;iIl2)if+s<>qKU2Ap=h`FsaCIwNxzL zq8OP)wCB+#m`ovKdmn#)9@^w#L*j}+QqB(klw(y5|B=r4Lfh`f=h9op>VWC0E*#5$ zG6!leUOuK=#xTh0CZm%+@Io^>lN_HF0d=S1h>NAJ$K0_5#{{`H@;W@_7~x3f!nKW~ zFg->o2XW^qo8#lEPWi%R#vA?70?E0w(?{kx2s9D;;9wlNbkzlC&(dCv?zPNkg}}mP zMWtwLewxIt|M^*j(s@M%L-Z4@`@0$jPye6ggob(}1Yc7L1BB&UiUEq>25tKrm< zf7R9l;!eflS~#{skzrY#<5)?G3{itEkGUt1WbUy9vao%qi2u^T3AwZ+Ih9jb#A_*; z*PtUi(W*!`k9}&dxzs*82O-dyoVSgh+&Jf!nP5t%{hY2Tnafg)v+T}K28XAf3`v74 zk!qv0tql89ZKo#(Iw~WO z#gPXEI?q&Xy@-Ze2xvWD9HmK61xjhwkM?DrL(dK_>KfxZ6r^!QgL zl$k?7WVe&^Om=VJViB^>auVM?Y&)OTr{Q9aRkzWP<=X7E)9W#g+3WM~j|O)Osga#3 zur9gI1!j@FyBPZ|=(^jGdS6KLHS;PkzAFaFxa{JA)QbRonQ zC-H~#Hj1UE*m?9u80T(9-CoBxiVSva(iliA%_APkSyF}`$J%eL8gG6xkINd9F%pjl zUzTF1al>QA>mdsO{}S(Yfkw@UxX%YP9Cl#U^;Ipa9`nDC33&MZE8PWtROW({V}*O} zmVFSr6xkheSyefbs#HX+>4yvZV0j-Fu*qiHoGLxtblh6u`T@tzK#`Ywe?c>37M>iV@NmQNyAuY!%@Rdxxj1QI8O)hAp>ws?=+hW zKzK**Z2?W$({7XFKVQ} zSl%L$lt#p8u=hc^k+z#?XGw@k`2~aE3qI*PBCD% z8-O9u>m5<_PntAv(N2tko-uzJ!-*R4pLzNy8R7e$t(wFet>6$&Z+W2w@4l81L#m;8 zCtNFSyJ=D3;88&@QLx(gl&OvQFV>eKJGV>z5|D_nW=|~;%aKyfT z`RyZ}26O1=I7q>?^j1-&$7MUwLBsiC9&tPQweC^Tjaum{bd{u6-WxidAI8)D{K9&K zY-7@?^jvo*V1UwcE@Ckq$%Op8T3PFuvsiK}5)=C6AmN+{joi*no|% z*G1K1RylRrj|QbqwL;x8%SV8ew1%!J@FDXfpmVTlM&y&i2AS9O3ql zGpKPwe*E%0O57=m=m%B^XL&~0GV6^M?S zxyzRbBR0qx&Bb+h%fJ8e$927F2tAMo=unabT{2+Oq(+1AwComI6K{HbRgBY>nIxs1 z7g6!4*)42+42F~{QsxcRDuF%jmH`C0t^|yVr86PiYs8gS6RpIHh(rlqKoMLX$`pyU zDA4$R6lp@UK~ZhjbWUUAm7{AX=6f3MsSKi*1f(0_P0X8052Wbx9o)3z%qG*yEKj%! zk!}tV66hN0S?R_xh!#3czoHH#k2(+_QmRsV#_c4m`0#X;npEDLZS~vPhLL$Ej{s)O zp1B$_${(8^Jh#IrqY9cTZx)ECW?gUfK=O@81>YetBEbf$)FZ9~a>)d;1_SG|kJ!wz zF>Be%Tc1cy1R-xh@kh(_{SfXm?JxjN2=m~qMN&Ima85f&*U<1rb-XOo_qV3ApK?UW}_I!6wW{9>_1@-mkfThfiiEWp--5Ek%TZAX85t8SK4-(>$jX zY(=@TSpjNH`ZcVthdnrO6pX?b$nq`b0EV~}1_~bKi5nb6WhJ@XLE*?j(ARrq@iEo% zWdO0IL{Z`m9h^|TbBr7mSco^|wA_5*nfarc>55VpE8QECT#aa9S!ITaPIJMLt^$4r zGGdV+AF||GS(5fSUTAQBFt_r+Sx9CxZfvfNp&&GmLdAxP>W2Jkm1y41$h@Z^iUKlD zxd2AOOno&SPbuu4({H`u-#QpYQDBBbA2F5o3c8^69y>CE7v+xrz9NcJkOBq(eki!y z8J>^_^2i4XYlVa7w}AIS(Dr8QFT+~%D!qT=GT8ZIzJLDp*C)1J=n}OAjvwLcEpG-# zn6?Z|QnfiNBj>M2FIU8W`(YB)OBnaa^2%=67zJq;RF%h?2lGg!vp&;&c}uz_C6-N^ zzDN>Jwx!)V@j-a=ZbTwv+N3rLJy%)6xT%%Wq%yZ{{$J8C?k+Ga4^-k;$HJ0fB7RVT z0RsIXV1yTzml|<- z!Cu=nAFu@ukD}%jCIX9OIC`|mQ}|^%h@=BmM{h@tZvyQR*B-iRB6N{TG=SsO?T_I+0hC<@VA--PvTxTzZ6!&S8cjojhw~ z9eV-M5R@8FF(#U~u2vF*UJOR=MVXUlOJpxN3K^RH0ncfd8mm^Gdjf zEf~TRJ!z!MJNaapYd*+LncAdpN;T{0%4#kQwx@f@FXex?3~fT9%X4?RfhEE*2prk{ zd`X%%c&oBb*Bm&1fh-&>JIvp;+2yaFSTGr;WS)>rS2(1Fm{u-9sIB(K95V&5%zw9j z^%p5_$$l;~T!>vE)91h#QpU=_=OX)IvaiIW2WW61(+6#m`@F_-s>rd=Pp|JDKR>=l zQcT3B=1|q)S``P3%qAyFB^~E%s+oe);BZy+DHFNnSdV08uOi*W>bZ*pj3@9n(=#Ue zv>k;9Wk89xNc>8YPXUhp>W`&$#mN;+|Ih6ZJp1|$Vw`XZ?s`E{*NbRN*9)~w?Bly-k66);D>tv z=zOgb`4O{q>-3-uCQ%`fml}Rx)t1>h7G8w9R$z*G+&38pjYq&~P3(NN}f1|LZ)i2Z>?33AtZ3Tk;dDrjeasa zeu3}3Ly&JP=)U7Od>q5b&Y{5;QdHo*BKuhtp7oX{>Cuv`W#X6DZqH9bRz{$)^T7#> zVR)6^r^?j6~fNvJCz3c}uNN&(P=>i@(#m=vbL6h;%*Gb~#9}gtJ zx`wbn9$%hi42e89k1hhtewGCPY!;+!cnoXGO~8>mvILAGFk5IR$>Pn}8Uv^R31{T# z8Dx)A^(ZDsI*KpWyk4eOz3gbjcHRshEVfd`Y|hhw5Dnc)dhi+5M_0pn@#A(>--$3( z4KH^=-hToxl2Hw(eJr{UK*v_n0hQ^7nPbswk?mp8gPsA+?i^_)O#%yIz$D)t2x%D$ zfTUH3!gh9S{ADBhX=B6KT;g}~iYV&fii3R!CttjgD#R;J5#EF42Vd45ZZ%Pre}vl@VbE1HxMm9+-RkqHK_Vv*1~O?fa3Q6&t%fbt+Y! z(+8Jf>WGA)A`5kC)@j?5St~2+`l3q6XknnsN_*O5SVw-AaYuH1pT3mm&QR=Oay;)J-t z(+kvTa~juO1J4Y|#ZkD!?4@)P#)^BlMVa?yPa2fT*NDQwW2sgYgdzTr8(P10X0>rB z7Os=TQSw9^C7xejqkjD^;We)JR@cH0rmlvG9E>6P(7Hpf5H>032U6#tJkpJ@MtoE= zHmXT?qR1ODsULQF{voe6SZ(xCdAWg#2*pAuZG~9S3awzpUBYg-HcnLA*?=4xK;~Qv zs%!xkXM#tvB9ia|grfrywWDLy(Ch!9u){Fp9V}YOd&oHQ;I8$4c&*7*FuQ9tWZ4Ij zby@rxi0z}beDr@17R?C;*(Tr*bDj zPz*57P%-ADp1YS+Edu1wJ2a|Do$pnWm=Dd+FMP`5CGH2`D(*5Z42xN zGas|W$JEv9!AVgI`STE4BXgCOHmDy%P1Z3i+j5#Vp@wwwDnB{sAC6rhW|h!QHkup7 z(il2p+e#7s>r{j9%Pl^PVr%ELID|M=tV!V&^-;S?BY$&;(P9YkY|dTzRF_NKHQ?L*5k^5Zw1auLLC->AM;q#stRz*6WPJi9>@K6RYTgxz8 zU4TB$q|OU;`l#YY&y^rOmQR6*RoIZUkJmt{-@$)e_3ctT^lc2G(SuE|N=w5^Egnot zUI$?4MlB#-b?NAEdgu8X_?LO~#z`~W*sI3s7g-Nv?@+OpM&HlkJ(&+qt=0oP-`gRW zF&e9Nqlc3WntG^1Y%ZiU-wHUH!6QIMJC6Lu7x7243zKNoE6uF;mwxEr&D&djN_jVD za{of|vHuP3P#raTXou2D6(^w|-?LeHGP|Z34RrJr{`_IzoWSGoTV-0A_ON`SLL}&xp4y ztOso4095|^^8EGPZ%+%4G8TPS!oUZ{#Y+XRSz$dywlk@sA}QPx{rtoRG&S5OH}`S% zcWNg&_>UidA8_rEnHU8r>{z)Hq#=py1s&mzfiS)Mt4?&P-|dN+0K=~bEQ}^umI*6^ z=%_tlP-ofP9;th7&t3yzS2a?bxoR$8Nx4KA^{2EC@#l& zx|1^TR1(@>^u}cP@T5~q*2+qo$zy;6rj__5OYQ15xI&V*J@>~qvA^|JL9&N_z=Ko- z$lqJF!AaZOapA+ZHjnWFS#7v6N52iz$8?BB%RV%uN}L6O>{fi1y0mz<0leBB)&#lV z@IvB{;#UlzA)?htM5_^|Yb-9-yi?PwOt%TrcIG>XLZhA+FFtb#@@%{K>7z3h`W2DW z(9b4blsNaYycCjmm3Er}X#viU#WsA`Lv{fDe+}Hbub|a*w`Ih3or5*7f_e8YQ!mS| zV3kYKP-vNR10bq#rDaB=X*4o%s#Pq5TU7G2u&S=^#D!LWD1FyNWD6C!`?`9YRg`6S zZ6{*l5{vP*DyVn%RWsYV(@jYV@0X85VDcfLIVg8ahg0m+>H?52rfBLYvrh3|x_%JL zo?84uFdd?b3M)j;2OAN2W%V4yt?KnxeZi}BJ=sxM`gkB2jkcH<_HU0va?iA+Z_(w^ z^f#ASi?p0Q6_LhO&9;2Q*(~QyHtPbQKM5*F32#6tX=HZD(IlSSq>#P0989(VrfF2Q zYk0RED|q!tU8Zr~KMu)-OtS#{EDp#3*u}R>{Qdf_!nTP?iPBN;4oH81#6OUk@bVy2 zt&yK4##18Vk_%m8aZ}nBbc&jqh?=OmqENv^kMzyl!esEhZpZ~0hHoh|wZy1_fn(Yi zYIRo~$d)RR%M5Rin!?lbFg(INmM%-}&}+E%hJFMP>Ck#JD{o%g5X-B;r?2SVpsqWc zrWdUFk|{j~LB>dW0i$>yKuc^oDYEG)92-=Qbc)ZRmEajv2ZS7-85Kn%$-wEdRj21>AjWWRpB zg4HsxQ!;^vaLi3#keTNAd5i{(K^YBD{m7=|+G-arnO(yc1%wpkw<{M-8b&43=(i#K zpC6BuBeN|?z*UbOPAW`8_7al^)YK~nYuobXE6!NdF73-*-~yFNi8)j&3V==$2|-5_k~O!;(Q8VAV^VM{!VUZvZb48hXb@FZ^g6Q zfeh;9X|+C(;Q2V;BoO!5fe@#84!I!X+9NQ9mJw%rwq`rCq?E4d5=A*Km5H14V+Fgz zm)N*Y6tP6S8m;zhJdT?=tlWh5YN6R;g@t`J&VLIb(vm{k_3AkY_=i|kBQz;4w(2=nDW z#}N%YlYMzD$CmXjz5)N=Ihx2WT#4b9H78@4Ewb&~x6HqCpN-u_50R5WpNAR)k}|W0 z3wqfGYh&)_$YRpS%A}F5+5T})r!PO_7M6Te% z5WiI>JdRY*nFwA+QF><;fRr=0F!og}qL~TXXoQXJIU70FV~?4Ck*?U<_4QyA4bicK zDcpDNRg~IWDZV62Z`-cV+U$HWP5*m68*S!$6zQ7RTd1;SFH#9DlQGU&Xl@tfoaBN1 zNxc-^fF#C$l_1mo8ajjGFr8+;yxNMgz)etqtKr7D3;^gg2+YbKgNjK}W?_Xl2XtDm z-?WKGMs6hir9eO|7rhjnDMAs`_X)lb=i|X%u1?7wO^BMLvq6o!A^jU^oR{ApKaH-( zauQePOqSo6WrZohxF18e>6^UAGP;V?SjOJG%Cb$|4!))|uo}~j6=l02r@{S~Tt-7j z<$%~zgC;2!G$S|PX-oHa>|Kg3C!r%t6oBaBTsW7&M0A<1>WSFekF0^BteMerq>h}` zHwTKh1M37IgfB9RSR+{g^ua~br0;X}L0GKY@xT?GchZd61pwk^i_v$iy6rNB5fq2+ z>Fn-D;8=5ZXzzVZ9BOj1Ne|y0B{0&$r?SHJ9F0}NC{ULX(Nhx0aSrMtx?KiU?7aV4 zkQB5CENb~%{7$c@*AMrv{Ui$EL>`KZdr0UJn4RDwI$3qL*`h7=i(2XzwM;4>&7`A;}{>;qgmBe}X6SMknKr-^}d4^ltU z4PnIOVyeE$bCkAgc_jXtz@)*%(tD7V25B^~}VBaHOXf!-6f7fp}^Y%|j!otIFel+50d_ z?QNvWiStKfM)c#IeKJMGN7MD8VsbP zV`uiZO7B2>Ww@^N>%FA9?Yeg8=PZZr9sH2UOAaq+Fj2uT$nC(8v(XMK?9znKt}fnu z8-II#_vfc?KqWCzkR@;d$Q>g9yEAt>EI~Tby9An?xSjx0^1k53Q9D}?XzJlnemzzt zzy9|4@#Dj9kGKHOquOKKJW#CBQAg0KW=^l-bhYAVd#JW4fuQ~WW|_Iy%H(Ab9W9ZE z&o9rvf8fUILnN7;pidbAQc%QK^jj?LtagH8F@rKOPZsr)@|^-#L9k4bbUjJ}8IR_; zpJ=qzB(?@QDLs2uqWJus3&!gG%k$qJUr=+*^2oj9Hsy@bffW~#qcpszag(@6ZfF0i zaJN>8vy{uU`1R%aZzmQc4jRfp$h(1Vb?85gxL9<}TgTV(?izOCtX#C$#i6#5`H^AH zRM?D1Z;T~^pq6}a=BJIws`p#hgPymMEb-F_4$`w{9kSWq@5H6KQ|O57ioU9;a}_#i z+*X3+Gq*InNqZq=-I#PbZYyIfmu*5BM7gA(X&S4lQY{?UbFdUnT;wCsb{N%bGix^$? ztP#AbywL37xZ?yYDDG}*yh1u`i&o(dAvwpXsRB>@)-DqHQ9361Ba+8ozJD_sv-uyl z8E%}ff9C!-kyJi+bHaV_pwH3@uN^zNPJkk-1~cQe{M`qg0+~q{{`ZK__Zq9%8+B9o zBdM%tO{dtU?}DNY0|!JF6ljA>HE}x@JgsEw_aYp8dtNQdT}ATk z#C>SkijJvnRCzpo`SAOb1k@vY4>>{UujfAv*m>dZmE=#ieeM1) zLoj03bf5!Q2!pALPMPpT_syKxX%DIxlUX9%>tt``n5YDV>6ioBOTtvGoYh0wF|?t+atfrrq6HCH*k90^ji|t{yG>hn z)&Xsb!J_GH1eFV}P#94G)|?j3e+Ou0;Aje0LB!} znfeW4xmTks`FH6Ie?383hg|kI$Oht?QWV0q;JuBx>^7JIw63+y_aT+bB%QsR=8l$z ztI`4rt2_4uYnEO*>^M%{Py=DSOgwP1-rUgtv?%;ot_x`S9V+_r0w4hP%q)FcPk>2p z1)HG1vjqbmpI@KM>$mZm(USzZ0D#C7m?gyMoS#JUZPP|YeW0B=A+J8`99299yO+uR zw-4mrK0SUh94#0DU7^P8!wi`YiTU%IO*`!!T3uUTS3}8i&1la2PYWHnDv*}Vxh=^U z4Vn1?b=_?C;$ccCKy-@HInX$m#G+M%Wp4_ufGj@?^ikJr7$YOOIatn zXNjeE(YbTA>nwWQ>Ln-5=!h}dI5d1Y18tCwwip8a8Z^%yB^kv_jeb97mo};l1J$_9 zXF!*&<^5sRkw%7ip|v-CYa?t$N3B$$Ma{<-qNz%$`a<;UXstv`)p;=nE@XoTGte^A zN9L+=GIxsY+~s%pixNXPg`<48uksLyF{)ghAVG?W#0 zcuZJBv+x+x8}^(<+)V?9dZD=yLzCf5#Z0Q*DiY(%4;=7OB#hfD2bzTK^Be**7s*F9 z=V6IN-)9M9J-#1Zm%Q6@s4H0C*_2nT)89p{%alV3ARun8jb7ua_K*Y-EuO`8%CoL* z(rv#o6u1&*bs{0l@Abo{cNU<85kQAVG+Lw&2^mD^a+kMccPEQh{uE`iuaYUyLe6uk z{4%k*qE`fyaYS3#Vw5)HrGbIM+p5Dmf>%Qpf1NgE$LgR>`Qg$BO~z2@DQBuVY0;N^ z>s7Eqc~f##(1dF^6T72LVTAZcHQJE`%f6fVOLW>B*g?e-CTA`-spUNqH5{P#3XGqj z!*S4=Iqv1j)bAecagp6R7!SO*D0pQhk`oV0=A&z2cUnrxjib2}XkEdL>~7r+XAu4ObC4-s&%pt!<) zrmN;4SQ_A~8eTCVt{O?b+gE)IB(#ERFT^W{4MA3OwG8%QFMg;xb43wf6{}Bke0EOQ z4H>Tb8cXky`=bw3O@n4VI$wlx89Box ztEEAfQn<_9977-{B#9eUuuN`mU(o)EW7D2gdlG)Ly#-E|lmOKVh?5Jz&oFf{v%Vfs2V6?0qMD*uDT-P{q9qsX*)Bf^t;Px9@ zWOwe)O*TY!soQjOGwJ4N6W(jh(>2AQc)=TjT8_Y|?H+qx1G$Cdc_S7KZ$8D+ZaJ4ppX^!3xTp}8|w)l|ncB+#PN1|r*-I6;HZEw@;x*SUci@d@fZ1xZ!hvu#I=N#YUO z+QL@9{S_UlFqjfydF6%I@Wb;JeL-ovDe@O1I}yG zXj}03MS%6N^fmcraUyLWhe@01kgiB5;Y_VT3}HQ>`)~#ABKKsahp*>b&L;4+HPbdJ zT5YC%*%a%O0b-9BpW%711w}Gz)M)`J#SOMc1fzLdRkM%8UykHE4Z$Wp>gC+KUSI+o zdc|nK`&rVU)`(mM4HDM6A_uz}xL9U~e?J}Da_w@RBfT>eIwa@l5V&xR6GgNvA&Sy9 zkW=@-g}T&!i!AJ9V57hI#0^7IVJHIf)gX}`WX1)io~d!KTUgh-r38ZExXmVJG?dSM zex%WL4#G?G0sT`46+RM3DbGmnZf0hUS7p8T`7SZmtRxwP#f-<2`9_?mHv}x$sqr4I zy)W{BJ8^JS_GF&zwaGP0R7NB7pI@FH-~G1GU{fD|9-0Q!pQ_VT>eaE{N!i14GF_ zy*ybG<84bN9SbrhC~E`=w|lmru!dHEmp!#e{$b-t9UHlsUKVqy%f8`3r|osauTHyA zx&Vql`BWHu&7Cy@$oT!>2|{wEr&&LqH-#D%ZymLIK1yKxOy#hprcB*KPkn+bKK1mPQn&rMVup~PoG)ubY}Ab$gvIl!&|7Z_muC{U4ucAA~4dW-r)cMGbHY;LV++^jno{`ih;P~rOcgrrj;}Qqj^XDhV zWivK8j^Y4>xmxEAG-%yN9)b8@eY+J+Qyibz%>j zQ??xmx*Yku9I^>~Z{At74ErT>FKR(VqDr9-0_#o!+u^tulm51;SIg-K5Cp$=IvKNW?8rjnuJq;LVLEz7cXWc-^PKaR- zR7hDl?6PHd90f#o(*1`2{+f#I?GK4evrdl{Q~E`z=WE?h(1F_`(p!Tj(^+(s-QRn> zF06&wj|}2H_{qTn!<~}3)RzV{9?#jgcZ(1y56U7$@|V3%UPvt%pKy64^M1Z!bAFdm zfL_yMFZw|DfZ%jCRoxxcC>+QuZWMJDOd-?jVy;j}WFrK-L2?CJoGBj4w9;E@L9qHe zo^l65WKH0g)kRkteVpLTt*>FBH8W06Rd$ zztGEeG`|b6*l4KDFNN)`)?P-JrK^KU)wJ?~JO*Q+B+=z0^(dMBOm)BG*4V@VdX^h> z!e(QI&-S-c{IOhg_27GrL?&6~tGwKLn0vdHVlhnNs=ee0c9@VCXnOJX4xurjWlTkG z1^-L7hrcBid*JpqNVTxO9%Ag$*YR9}@Ha>V+BFnXq_qa=VUlZ3bd2j+eHRTWk5o!t zzg`7cr|Q~b8AE8{GmOB>$YR8Tb_&DJ_vaLnOIKg7Iw&qxbtYq7MF?}XLCyj4s3Q$f)KrP#v zV@p*sn;Wbeph**bubZlMf6UPw(o;o(E94?9hNYrulIdjHHsRjP(d&@TitXT8*nGXF zvtQV##xQEC9lUY9j7A>;13paY1~ z-SY@lwSAhy0_OTVU^~_YFF%jM#+^ZQFFO1v2Z5%HddENpT;c4dBNI-%zTL6NYPC)7 z@%8miV+k8Zox~z6HFvTW=mYRrU}S*~yEX0QYe2z^vd)~oEbMjnBd9C4MApj})O>A0 z0gM85*wK+o6M;bF|Me~|27H_G*aOM4Rrkm_bemA71sA$TWt>l3*hLOLEpME(7l~_J zgHrw5to>}sR<)By6>RQ3M{@6Sur!=Vudd*#c}B*-1fwl4I}y4iT3GaBP5DLYB~i_( z%FpDohl-se$mo4Z4t*JpG>SgD+_HXDt3{(|Rby-AduTLa4gSklk%yl50X^@%^n?qp zOEvA8)}$$yNmVMlzR_(nRm`H`UF~ra3?N!qas~@}L3`Bi(q@zKGC9TFPQcU@L)cWE zwfljSrOZw*{HUTy0AqxVZg82qb1|<+o3sY|Y{wneZY(ovId1lK^!k&6?G_*GVk7=p z22cZ%h`oC+$uw_IA4934 zPR`wlZ&##eJcwAiP8wJVKt@Qd;gWAW0^16d%Mn%M3PQ|8+5P(A7odPrg5B)F_g>I& zk*id}&Dq38I#$$p159Xo^86)ynx#s!)(x8?f*l!Wcs3$Ok3s9Vu`b(&8BP=8U35qm zLh@_i8OL{4N=9Ez8}=E)=m26-cE&jGqS{fP=B*|Dl`KYAq=%bIixf~|cHg0i`tXc4^`LZn0^gFH_ z2YO~)SOlg3EHzuM19<1c>RNLuVyh}L+0I~hna?#G4v6cu`sVs2jI@BBzlPX7M65KdgvYA-6sotR5dv&4~0A+(&XhaqAp~!kh_AL*# zyMl#Ah+Xl+d&Xf!bDL%yuBp&C^6@JpO+KAEQ!fztYL$(_!~mcr+2VO*Vv|D;LPH5< zJ8VunqG_wOCUune!m3lrX!7BpaX)|fG)B@G{s-*Y=(E9u81bADZVG8{S~lM9V!RJ6 zs*d!gug@R)F>Mdx1%9zm0H)`rDwhN}5f$E=IFP0?8cH$DKj?NWa4ITiq ztS(3#{Ztb*IY5fianC|5%lZ9eT4rd>VN|Qabk|Z23<8D!CAAE-I?b0tsTE%Y3(m8LO2!zg~r&Bzbp$ zVQe(_+fz?#&|*PbpVDV%(N>EDfcDV)9_E}F&=HZnpg`ceD2}7S`JUU}iV1*tD+&|I z#wuw74m*y95&`W9Ag=%B2*inl+{7jdE|O=O8~tN9-gI7U7F3HjBhDI zw?V+yYZKM%-xz^}i)%p*j{F=qvu60u0w_#y)fN<9`O zXuPpVKRkZ<^7ug}Nyi&$4}?Jn`K|{6YSJ15ze~t#W~&M59glWE{MX*Dm*D58j~||3 z?+bB4Lakz~F~zqUxtDpdF4cE@R9zlb&IL~CZ{9%?hcS)45N@;hlS4^H$vve!$~`XXH5Y-auS@i0HC#~V;ZXby$#BsDpp}p);$zN7+cK(c zQ4-TP+0wpTD3DVz>@Q=Z(cQ74daEKj!x6aN7styfiCt}DZW|lAa+Y|>4IblFBeABJ zGRDwqO=yRdgwQGXB>Q|!9*!W;3JnGQH5x7sQ;w*93Php6+`~)v><}SP5L~A#tW8Z0 za9!;z5)`g`S^Ta197zKqv`?9p}mj z2qy0rZn_@RJ4x$6O}CePNn#}gviw#~FXqr1(lQW@;nWfx_Xb05GjTOpDz21rI(N@oCV#*XvBXeE?|h;m2N___sI_aR-z#fR-U)_D+kuDH!Hp7qNZOCVho zVXC#!sje0mXNbNX9ZYj1vPIQj1uMsO!8Mt$s!To>G{#IRaNnDO4(oek(93mCdMolZ zDsy(Ku+#kX{N?%O(}z_-s-R5_r!ZqqeM5z?#PVEAGqt}v`TWYQx5hl{C-m?jznUh^ z*}K~un=y=19jDZjmxh5Ztg@w@fU9Wqbe0FLuNNTZFMCEx&DQ~5PF&HF(Ved)W;uZY zLUw$j%n%tboa1bH5hJ_f zf^G~hE?CLOLv<(7qS!TvuW#KqG>vH7&YZk4^3$Ue(1Zj}6tB=1BP~Rp%sh+Em^FdC2=ci&u!dtle2I1 zLm74UUw)hZ|9{c7|5@fo6q7;5;I}T;YtBCJia%-iH?Kdn(JYF_tSXHJBr3pqVCg zXXz-$kX=QbEmW}IqO>`7@kw^vTb2$F%R$f)>^Io<2PXqjV@OXge|>uR8&}X~`fnFB z-a&%{#5-}cVk0Ni)YjGO8e&{|0LhvW1rwF$kFRfnw5f;%zyGGb{?Gg~`LX zQ84!_7`;Q`PEs&?$mkXu`%oc0dMx%O#SqfCE9+w{?YBz+>%j%&{|^~Z3m&OLunP`q zfS71NS?v=;*Rs$cV-!Juy~m|^MUJ%Z{apP9bBsRlrqJSK^wF?Xc5_DN7 zVVERLrb)z-?biU~?~+q1o;O`%Ty?L(adxNI{$Kc=Or!De`uG*MYz78y7a*BYC!?#- zVKqSa4g@0DXiYH4G(FwQ+byMoSuQoUnG1Dt=5iHAyEb~|8_K8c{j@a~Df03!j0Qe`0I3!gO0f%z^{K{3eRI3KG zXKH!pg+OeV+}vM(o5*Pp1>{kSoBCU0Doq4BLnkdL3_Yh5`qre;sju&^I`;he!>9Lq z+I|PI|NfO<6<{t?^>BlH80<)yj4E$Ya25&@6sy z5*paVO^_!8-70&GZK12(#tGv6Qs}E;!|c{{--d#g9q7_C zbZ3mABW#&m3euf3#O%abE$4+da-AN#s|qn;TskzS(x9Sfx!31;_*_TyvggsQCIjwT z%lV0IQhK)zU)C#nk=)=w0i0xwz#K4;ye~8ee}BY?*1IicC~;NC>Tz=^`xjT^2?n>kEdulF6Dw0id}9&b^? z&wqV-5&4yzsP>jah*I<&=m{2fWp^04=8nlK2dkElQMoj>`>yYl<*xl*2Insj1EOj7=R}an*Xf|!Eq_1k!1vXJhCVtsAw72tXDo~69(bw zj8j2KBj+){l{g5o$a|?O#xT6VtPQ6OGRMd@vboF(RV$KFf$ zQ^ufcJj-X2R-UK3mOj{$TvRej5x7sl&ZD;ey~o7Z#Wt~Nr`8r5Zj zaI}6KG)E(A4;xCBJk!g`?IVRkHskA}FLYajiKQ)*KB`HmllOXR>xMxPp8D*-!oh@E z+}5kPS>Y)D;8o6KL2850L|++62B&r;rtvKb5~I8e0I;@CxQ+>vP(_U|mtLHXIYkwLpP^L*LB2;b62P0ikIK^Tc!b^tOj*iWIH!*EDIJ9@)~EkSURM5MuO0v zizE-ZG|5_O-a*}D$?QQmskjILXW^FjjKKmHBc$`wy`_{^i_ejKIrN`0eN369+d}Mz zGc#n9(?X9JEqY?EmcigXfz5l(bylGNUZ&`-ee{=EZ!G8>V*JtOy>Q*lvFkJR<47c3vXibX@j;KYpy}rM;S&zfA{Vf z%ua8*a}$HI)1y{Zb$d9jUz>^pdAK+Q05|&ZnS9k3(1k-VS!GZyp`f`X-Srm8MRpfH zdet_nRj{LTEhbiYxo84MH%K&t5=Z+jdd$ckZVhC#UYG@P=+(Y&J6Gyzs26Ny;8oWz zjNq>`y*QKOgQ$O1f#`c7?4+^kx9r4(lFi=)q6ab6`uxFcduY z8SSz%U>a~;!D{KeB|2!Eudqrvq6VJB>Ypn}=9Olfx&?wzvXX&}4-)^&bO?m6=v2R9 zu#Zt=!0|3?#uni{u=7Jc`^3e=q~p>A{%2orsV~*G*TJpuW0Eu91m$~Y&CPpyB*#D0 zLt`+J(@xb7m_!HHf@`uC;TV{k3H+u>&UV#8SgwG$K*!SLmmxVb7ALtpZhLWQzO8%j z>ZYxto5tarz}@ZyC~R%S3{d^}7eE^RR7h}VnNT^sb&Fc@OnMwiZ%sQmI zv9TY{xrf)UKjSD)g&;K9d@Mpime+fZ%cDH9v}uFx0Hr3aj@G!6Jr=6)S8AUw;D?V7 zOHe756ju*unF?)(>ZP2|r+-f~C5_4ClXWSRK47}9Lc%nL#^j5f@k)NMca;D*)Y$oW zcypFfSv3<|g@NJn3VypR!#`qK9>z#~jgcW0fn8UaSiT2g@J$D8u=35LZn5c5F6gUX zpUwa!(2lTFaOcwUgN2;Kk%4Z5vmL%{v1V0v=&tlCZG+Y;h;Zv#4sqNgx1smVK%aS1 zvVm@!VXCd$6K@c$PM`R@&9#`|L~ObtuLo{l=3*VsN&rQXSWM-pVY5D?$uh=l;eHcd zdmS>!S@S>+`oN5W9gX0+z>v-m_Mk}BWKrFBCpAr#1q?N?sK<9NV+=LXOy9`QWbh=) z`}^$Yb099MiV3O;P^!Z5dJjCg7le$sBCZjn2fQG(d$BW80@1}>EgQ4jqRi7sf^TfX z>~~%E|_qx%4ANe~NO=)a9Zyp4yE40f;WO0w7+ICg)C48A!0rYTHsO zU01&9C@z%OpitT9|E8-1(P$}3UJ!6NB@fc8M2~e;M&pB_YEHb#mv0@M&h5R$I~wVQ zpI;tDvhM{(ljkdqh-!tY>b1E_*%a2g$N5K7b$m&5zP5iOaWUYT(Mn6k5m5e2e=u=m z0;nun{#(&FYS>kaKi~R0-pYtM>VrL(+y}ImuN?@eh_hC_x9nfLBewjKq`dly`@>vq zS@&#MvDivQXlogmRoBBLZT8XMT3^pY*>>7A4_G^W2S>_Y_YSy|P%c~`pCg+HexR>V z^5p=Nj_2IfsoqU13on;g zlKURigf;kJYw=K8CSMN<3*NnFs7-eEj2&1xb;^_8>>Nbn7Svj-`Uzm4_iE+YxEA67 zGLn?hh>kvBITad+*?EzoZh*xHRzZq%EQ--ET+%uA6 z2xiiG{d%?ZtNWl~<%Gc4dV(3uw1t>+C}nG3;5Lp0uXA?FL~wT8i@l^ zalBYtm$DtfW&)(Nr3gZkzN$Ho*?mgs;6C3ha_A*Hs6BrmFx z5rD59cp>$x@}`qleNxkaaJ!6!Ns{5%BNSz7LoPjy4vjK%c8r&%u8^W8Acqot3t!AVJF6dO-Y{C zt$W7eAnHR`CtD;~+|WR?Bk1bV8KubhUU}`L<(jz;@hE0IhR4tfDgitCPcRy!XOvZ# z=7@FE)K14RD-rGi;|7Pc*hJIvoEW)Ta62(vP8CApAO|mmnO6%8fVfW9% z+z)%plRb1fOouTP@^LF}Rn3DyuS9yv#Tc%~Go2)C7PUcP6h52#*i=JQ50QTGTcbG| zbH;JUnNE9~G-T8E;vVgTy*duL>&}D+7OIV0V$mdF4CJh%4=tb^gRck)zj}4J4sCt3 z5&4zVGwL1=?YHK#4+N;lC*Q||vu}1s3_u^7y5O=E6wS-LNWP;OtKWkJ?>!Sv9dU5! z$6%^+@IqZ~gI_)5xTr2p6?PLrw)cb&msA%V_RvV;`ynpU=|}twrCp$F>agOX1{~5p z^{!K|b_{oV6fF2P>(PZ^le@>Ii(s`VtXL3-;DDA_hDw8+8}GW89zf2oUk}*dW4FNL z#b-K|lDKpg9iD1Qqn0X$7G@sggi}mfC_ICvlkWG;eXP8{R6JI{9Jb?UK0y zj|hSBP-`6;_28upu!L#!l-948w1+Sy8(gC5S3j( zHvy2Q)xmg7mW3oFPmRU1-F&aN!rg0~an;50_2cv34BNXb1x5o}+78k>dfDA&T*CvkJVjVz+AiJ=XOR zNNI>KJ%9-@BsY4&WaNbHbLux7{AyaT%`sf(fcI7bQbMtrwNOoC96cuoo+>V9BwgQ9 z4U+vm@#uTS#hQi|VKF~SV$3?ELrAI)O*tH*^`vtP8YbAn2HU3~=#w2ZQW7tZ*9D)p zw!K=1UxIgFl%MQG+F>%po3g+yf)_c|&3jX`zBpnLf}EmD`I2 zp<{{%JINdPpV{M6Ba}sMeJGW@85Ft{;$sMfSz3{o$lN}SNmcV|>*Nk8xdk!U&-p1; z!~`~B$Lr^bLQidQ3n%{+1o zS~Az><8^VsOijb;yA!_?i8|ri#UXzG{OgcU?hxP;HBYh!K=Py%+;5TkvFBPw6ShS- zVJJ2=X=|WC(`+@%7)K+Q98E#H^zHL>2#U>i@0&3jp_(>kqr2R9NGG;JwNo1hITAjB z*|%qxMmx3K+iWj>=utW)u7e-->m}&?&>(?%XO?Dzc$#%VD!er=iU&x`1&|;5^_sOE z(PDaC39@0-&0YfY4%7ql0cU`Zi1)!)n6FG5h8~7V2pA5HkS5T2eVyPQciGHb*a-G8 z=wTCN1UWPmS#>`fTXq*V=CH#U;u7o>6*$Ru|7CPXd6-!%;hr3*?6CnPwt}<^0SQDU zye$0C1xmnB3=W{IYqTA2J{k(Y`u`1!fmhvLx7PjYT5xO?Gp3)I!opT z*7MHAKb`Nb$V7%f1_aHz_tCR8!&i(^3gyFg*pqe#NlGh}B%u0xV=R>~ZS34kBD&^AE8Q3nj0X|KAlW_uX;;Bj6;QPc zv;rn4e*UkY{`TS1d&@vNMAPn(i&V&u98GjRtG;ey(;FbVsTOQHo-01)Q)3N5kU}at zS%L!D4Q6pNh}y&x9)+52Dx-zUn)%(TQms2!6-V016TYn=H8&(Z0-9rEUIMw?xE^zWMZik>}Mk_g}U>^<|Tsv zHvCui*=7}GN?z{5L9mfOSo?NPmLb`Jx0YG?OV3$M>npLB)Z&-e#f@%7;@OxcSS3I$ z;U(e_TF(-3I343?cXjn+iQjomAEkInh*y^4c=r(yf6=Ql_k=dYqnxHPmlIqzL1onA zVUEje*{h81x)rlifKVg?WRzP6U2y7Qw0|I{Ep}T+(99#M75A$FOpJteKHhslB=>LUzmpcI{x}}nYUPWB0_#+Ab~6dlz|VxKf2AK z@vyhh-z|+3bR0o{EoX~mC@0h0+!CV?0Q^j$VLeiXM~<$*(AA7DyA@^f&HE^H2^LLU z_)eP}TYf*~b;gL$GmXd|k|?=I|KaHr|Ir#s!EWp_l&;eAJ8cx5P@SYqm<0uM zWF3HzJ2N3S)F$0=XQM49fwzjpqVh;Z3AtUL>N8E^DoG#D1n?>x@U3WeMZ+Ldg=-Ml zXslJevpsi(ke^#aSvU@@ESVZy%@b3FyBmy*^WwD#2}~l+rBMtV5+W2ut;9p3S_j}Y zf%;u?ADgfPG(n%03+{RzQwQh}$iv3%gK4LF2+{+l9rq__gQK)^dxL0eTa+!tyg%GS zY_Hta4h~} z_VcFT>DCXK%9--^YIDun}J|>a?b4JyYKZfq(%DMD6Mx= zoRwnlQbDmJz>N`9G}=;ys_XsU1rdU>^s=@a@0`G~kAu#6-? zIz2l)uC-(%fMJUOuCTJ#WP;z#hXWTGq@=)!c+V_wjDBe)*vi(jH!EI7kRdHWDt^*Z(<`vcFXauyldlREp=X8;hMF|PRIf3N^&@wB~#d1o{7D7X` zvvWVE;SD#Byg*;S{PyusV|#-R#XxfQu>KKGMDB#lmC5D0ds$qwyoe5)vi@;W@_v=W z7R_Mz&GK&Lg2{rQ>cnv_Zot_PUoV;;^xHsI$Jt4W)uaSM!Mk?#r#;@j#dbPg zY@+27$g?8>2DRF>M}}b1ktmDnl_ATB)wFS8}e5&|3o zV?SgBr2%t+upKNh$6>4_ArpVN-79L@lp&=Fna5`J=sbm?UWBqp!&Qj^&wa3gpL-Myg+^n*{OF;g;nI`g28|&dXTYfIQY?Fo!LqU@iE5No z=(QO>9Gy{V%(n}936|SKSKK66JW)H`dw$-B-H5ZqWoGjr+X|yBRRHA z0}neJPLN-fQfZ6Hk6CH`%CZ7E8FE*LjEkf+t^)ptoe%lw%T6XPYK$2pM;9vw=@Ygm zBBN6BMh&Axp>RDt9gmBt6#ir#^=W9Wn>rWEaQ#e;a*IzcP0-~HmYwpl%Yn3>`t*9V z;%%ASqs-VN@CtjVLvsy|x$N&Pd9={)HRbpn-1XV^<2&2Qynib#0yYEVj}%K1*Cife z3k#mNLx4|g6r)}22p#-`#^UB{J;CsBcQFdjb$M{00-N7~(SieXJuSeqM;{f!5&w^T ztxu-LS428epQN&LU=o_jL@3D~Fmss=$^TBY@2;Hse_2tSn;}CLv4P$w?5_6`L-D~0 zBygtr>|B>DuVEd3i9}|psFLuFTPA&C;s`?WJ=zW?&V(?u2dkg{&5T&x z11H%QUL!aLyVmDQ_8$tvrfX5?cuV(+J??rk04BkUK2qG4RRA&LjTllYV{v+7AOURn z5b|u*wYLau+OVKW?eP5dD*VXB;)XNIq9BLpOzs09t%;e*BA5Q9Wo6zsH>!Pv@A4}{ zF<&&<9in?}Xrvfxmnw}+W~aQqWpkUF(xwb~NoReBILM@`WhYB>w3yLFX%Gos1-7e3T@H5@+MOxwJC-mV5hB&$E;?lqe^CsH5kJSMs-48P}Bi7y| z^z+N}?;l=|Jpg4iBpu9bG4T0D7quSz9$3HZAD;C!EGrqyBbY^phZLyDtEtg(>#c@S zY(%OnPir=88gj@}%gO{cEy1jqH5SHq==NhKOrAtST(flUxXW!nLaqWUKB*GJ{?)8u zeU*(Dy3MB%eVA2NPeW?_|N8op9X*aD&HFrs`mRd&zRVm7{Q!L#%nM|ZERkr6B$7!8 zYUIO@e`e-x<`Mp<4v9oUl6yq>V#~MW@o>eF@E|l*T&Q_96)yQ|(?e@Nst*iL6<`*H zEewXPTZEEiV8?|(&O-H2Y?;yeIxUkuP843E0v@X4NL`^XGGc^7Tc=xZ1ni}x80Es- zWtaA(p=v2Yp|D!4em+nRHsOPBJUWb9AlN%rvJlu>^qaSA+4IAGdeiQ`&b}(9UQi9o z?P9t*SMZ>q!_X=VS^^^+bK^kwBG{DmrEhdoPREaxwyVz96v4Y#-||EtZ#$+?(Uhf{ zpZ7&-ewW0V38`fbKUEMoBs1{QA6jNbuYw-8Hz;6@AEbfNsok8PnkHs!-Im5KG;Xdke=`t+b+jzVkI33!yQ4^eEfooCgh0@dVEEx~$(pxj zE%H7@T5@W$-QBi9y}8@Ie&^Zr44*r;ahPX~<;>2kJ>&0)@oa@=8 zWtJFa8ppEl7FE~IFlO@_!kI9_e0I`7kbFbq#rfu8dcSF0;3YZT+{vq3e%7)gdf!gde@n{Cp87)wJQTfw5I+KP}yPmg?(4Tyy z3y|q!1})81COwwBQarz71(7}`8|q{3a7GM||K7u9y`-aYGA8(6+li+x-*56}>vlRU-n0);H#~+`TWGH5k!Ex(_4U1u3JZ%Fn zCwkE~LQ!)VnfPS~n=#T_^(I zdRQ2f`%+s*%D-3|kC;d**^xYTXWm<>y=5t2TweuVdt0MVhf0PGnmbhbD4Q;Tp762) z##KEo>QYD*BmqX+T;|Pu5u$tN6P)CH2pOm~P^(lfPuw6G+<-l(=qbeUj~7Yq>)u*X z15cvs2RV@(HyH#2)1H~ihv#p!6fh68rit%Xa}%2WI9#){eliz}%vqdlYEP)5B=pU< z+8pUN(LlL708ny|oB-yOtOIP?VIO%7v$B(VRx={%t`Qm>!y&(J&ZA#e*!UerN)w^H;6ym>^4JPrR6;fPw6yZ_ShVey5SZ@XcG$nlY%&f()a4l~ z97p;VkKB;G+g21{man(We+kO*VmfqF!6BMwB~DDImdqGjyyd%(Z8yK(aCTKcgb~f_ zx3K*x?7RHy<}po<$g~n71oGpEG7w*c5nn`bWZFHB-iNJPi!1MBtrsZzGnFB01R)0E z=n?M~@`xj%6`=NRI26TE$#1CFC<4QG{-G81T`B6mFn{q3kh1{iNmjTK#klbjmd%ng zaG9uJ@o9kUC*1Z?i)J2qGgFjTFGG)zj`}pvs-PQqWbM;u>gwgSsGm_RBIjR^;8vUi zo8S<}3nO$JDIvUCzbj}@EfhfCN{%8xo1kHCsu5NlHE2}p*P-v;rmtaOQx~p`%(c zY9hg1@axdllyr04ki(8n{W*sYS?NwvXXJNQqF4{?1<|4`_M&By`ATjHg+&~y0FaDX z%XsB5H%F%dDher%FpCQm{rcQtut$AhZXJcdOI)txzzs8-e$qbAfrG6s}R>z+$fXOqI9v%(oEJ0P}caSEQ<|M z#rU?nS@A%=#z3kdcs@uB)>6XaGTU=4tD8s`DeL*yqZS9P?KR>Ts{3F-R~=RoX&s@# z9)fdT3KM(g@Dna?yo@XSe)#<;R-1F!?jYH-QUflc{VrZ{1`Sa76}Vz6v>ka=qCs1Cu0r-!ptVGjEo0q=8(?ybqN3Gy1kHd{*s4}SsOx4W&$O($xy8U`q z4-iY=CAB7x86aaiViy?9P>#@AZ}u@SYLQtNR$hNSY6)`4{MATE+-(v$ZpVUP%k%f*;P#Sw}2eXKP(GQCltX)Lq+v1J}%z1SCGaew}l0 zJxptLC_rU&Jkr5EHLJCHR=?*gXw~zL&N#AH-sFF8@?x*708|-=hemWc+1%NFlBA!+xca3Bi(u9?=?kTT|F^&IEOLX2NoNh5&VzV_VnUIa$lc=p?Jim52VJ7bpL-a;9BIM?LrdJQ$k|D46C zPp>-{RmSfvR!E~n?mvxrOzEuyM#m_o&LrJnbOeD#QK~o$@gO4yMgb@$-c1Dp=mNx% z2xcpR8BOZA{(5A4F44%n=_Rk zWX^2T7)p^M?hZC{87Es2i_DR?K0 z$TEV@)9H(k%eA?7bYjhF6O)zGkwv}yxb9bF_!{R54V$L`v-7bM0HGk{l80=sB#Qp} z5~`NNjb?j$SM2{HT01o&FgC>DM|1%~OM0U(ky>7T37alv^<~W}OEdvI=hOA(k-2Ll zVG4*X65VeB)<~q)wHTS{ny^+a40KtWMFQztENir}{;7*$V|AhJNK+VBdKzz_HYX^S zUVD&c*lvfsHJq1+%It4HKGZ3mo`dtK-9<#u4O(zuQo#p2su*}Q&pmr!MU^^=v_=l} z*JmpNHCvC=_Qb#lpb7#RNQ-h+s8&>=4IFUCiJNlVJcayCW$=OQ-9iCP4$2CsADya&Bf}96if$~x6Y3{ltPWT0YC5N?RKcLzkXgDOdb}_?hR;- zDj2~L&IburZZc3C3&0sdK(#*8Ukrv?7Ss3_AfCuHZeMzsB-34)G%3S&R+9Y`W=`7F zx|p>a#R@Xd=I25W1(4D=sTxN?!R!dDq=ltqHTF!2i{ZmpC9CZdQq!VEbdN?z04qx7 zXyp+ujflafVY?EnQ@Hrb0&kYGtdhV|2poAT=#+;|75J)<0v%4{3J(RR-^d-a_9luwnbO?0^ki{yk6ty{?$Cc{w9v8qdnX#zl?G}~1;@}eI%XH#`^)nn0(x#PQB z7{?(xnI2g0H#%$U2;`~_Ah%gG9lET)o}|Yxl?_~e z$q^&zmjWo5kSLRm$%sGNIR5+!neFlCl-}T1-Scj-NlZY&tgQD=`GO4EKxY|851VM! zw~`?=m#f(lj|MI4SMRhmup1Pu9xi?BF=iplgOXu`TpAPda)n6m)OK*YLS#_aZKs$) z|Jbf|^Q|>72CmQW6h_{OR(UUo^~NK`F$G~$T>X!|>IYhhO6nP((IH!F>*we1zs6>h ze1RBFJcZgLi57Y3oTpe8N-b_Lo@;(-O=lGz`;kPvFmqR!&@2zmK@ur*+Ggu7^W4+7p-TBr{+fRgWG1{>7X#OUeb89@aUzL1GEL;YIXl+Z=A! zgD`Gr^96@v#7oOfWbf2{+~@;icx|*Maz{kd627ahGD|kskGZ-nLE1RyTI(uTTsImAIP>un)P%+l z$|%O$k7dNOzL}#;=g_UG&$4@U>l0IUd)2)DjVQ8HsJ21(I0UE>SJvMExne~20{p+} z@*4TOuq2q>*`Q5o83MJ&B;;2Arv-?uiScaUAvCsnW&!F#7}O%6cDgpxvMLfsolRCb zMb0MwxLjzrw7x=9$itwWgD8wUF4!tr{eH$o)qsxDUGP&cm$r00g__wayk zkbYhD?zsHu*#iCV#kr8~8c?9$zQ2Bd`Syi1$}n(Fxnp=2#78I4z|lkwTgGk%HW~Yg zNJSuxp9ZAF1A14-0fZEL>-!wz_9ki_d7N~Tuk7G{{(omX%o;piT+p`+K}KpFVol=* zweUK0&v0lh2=XaL>@;cOa=VTAkUhu4@LO{(`@ZRK&MW&lAi` zW7m-mqy6rHK1oDdkPo{jfJK#1i|SStNegdC(oJ4lp>BVTQ9I5DXt*~Lo~9L?+gQO# zPtCRy7feK^Vd1e)u{y={l5PIGG=iT9cLV{pUP~a;jL!54Uwtb!8>n0CyBhIRL?Q~= z|E_nqwS_mG!5t1XCo(=?PiRq6fL7aVoA<5o&XKCo&rpliqr%XCi4mJ*MKW0SrJyWa z$fX#2t`#ql@ePS!s3KWo({j~?Veg=^s{Z`-_4Uh-uU|gBNK`I^whDuq>>^34qaheW zlaAm`l`N|e^R()X0&Css{AkCsYQ@DXl!mMYj9U-8>cZe2u*aLe*L{(#=QP%}OD8i1WTd;NFpj7~;|*dOW+v^^R)ZvD*hgt2Iy1_y2|KTo@YZ zmh+H=o{G>gtPr)3x>b0iWFG5RlMh?E?QGy_3vormok;GmVh55B0VqWN8^k8ni|rDJ z)%EKdyT9*;b#bDyCGAck2@ve9nG!J106)=9y&-_Q#(_muIhN~KZB3@I#(S!H(9!bR{^Ypg^U0Pe35O1*3VTX}5X9nem>XWYA$;;SlqVb{87dfC$WOZ)Bt zh2#tl5ww%4(aAg_#0|l!5Lk7pBaxkBCD|sF_dkDw>+o?-FAwO9F zF$dcH6(tVZ43Aoq2g=5i6%ZK5&}tEeWxVw9*v)Q@pY^Yg8VLV*dA2PgAaFXc@!mrv z8JrBREYz?n3x3PZnj+SAP4sqd9?oz1bSL@o(Bljbpmhg9%*L4ZsC?5xR}t)`jrqXY zk-(F(YLS@<`-+AP1zmArP>-!s3*46KI?2tNUC@B0H(3*csX|7Lih-#WVJu%q*7a5^ zp(7~`fW%v{^>{nh8o({}3h&=>c$c2*f*!1lh)v5n4&bp{fHY^<;qkt6vnnk6a++>5 z>s=?2pucSxvv^-WzdZl^tYu-95BT@jUvm-_ifX;mXKrZ7WNV62_eDKR;$1yW@17a7 zZG}yXXq3h&361}bG96o{=Hj~`VfX5!*{C$A*X=v|AGiTbxqEB%{PyGf^M}v7pj96` zw4{i3KqKuHlqxe}zz?9zte}^yk2F z6$>}A+6zB_{R_Wu0=gL2mkQUOidu6aie9QHKw4S#?NjReboy{7ze(G$eVv|RV^G9H zrTKBwH8C840B0y|ZBn;$WwPaodd;Z9MKu2^Y9sLj222(7R2aK5Y-L=0W4joPM%%eC zWeu|K*mQaD%Hw;MgnC7G${04EW6>Ij*>gU~L|o&Km+#Qrq=!!6`Hvrv zm(S>S0Hc~Uj(7Wb^*&xbe*u9F&0@bz;pNfgiCrAg0GyR1mK|Y`6G-{ZyM7vmWtE-J z!sJ<--K=?xfb*%rTQ?Tw|K+zjB1$L6bKe?xQS16abrqsmnLl1;cmHY`dK?}ez0*i& znDmwlwdxP#q@aOaP(g|59{b2f$*PedS-BTsF^w8goQ%*tWeq1ND#<=Ondtg83t#ou ztoO5SBzaEf3sON~yAyrK)J8<<*8qleilABF@G4E7bu$EWy1AZV=^|QB;6=gTjKk+~ z=}wTz#ULPw{)^uTeMx3=3vYJi>6fdJE0698S(Yk6OOXkV)m3HL-i|J#5o+j_Wb}4E zLhpjyGHgkK1}AfR@`vbY5(Dvy{bhHD$3!~Q$bHJf zM@uoxrg+&Z`&o9+d=-mR8_$_AOQmp_bt^-t8{+J8(Z?t;;QVM1kI7IH9;ua_Vlt!c zJ~OOdJx;K;Q>IzCY8iQKhsF5@~xchIYGd^{{G1yNoP% zr{@!o2pia9>~7**B2N}z;ofG-+S1C3f}czA+$jA+BkpV5vSO$w%mm}Bw8ByFpZf4W?s1xkj`Hx$N9fiP(ikxT z?&x4a#=8W+xwBRi?STahQ$MxbbTAz?<+lz_OB`wYIBE%L31)xCvFEjBVIk#HU7#uq zrwYjLcIe86UpOEPhj$&&oCm*;5v;XmCoIpMq!bkU5H!b{D4a+A!vp+dWO?7Y>A+L) z!2|c?nOuxW1^S2CuPOK))fM$u}%AFj^y>C>XSuN>M1CE`_hVV(h!(|AwnL* z1OT*3r26Ltyc}IEj{^-rFnm)QOs~{Inqqd+AkTjz*P4wIA+rWM%zIc z$>w8gNp2r>7Rka@A!TN`2iW+>?_Ym>{=%$Jp1?@N92j|-V0HvI8vEW>xcA4N8*H&X z=VXqMN!7Cx?PLmo3wy>(pM zgi2{61+Ze=c^^){=W1<)RSJ~a?x!EScBXOipeGz@9S}Djg?mr^Q$eyV9VUYhPnWgS zBs_hXMA-0M|;wZW3s<>t_FI75`-!yfTNH(7OqdbqvR+?;#}H!Q5jOuLGOd z=om9o*GMgyXFG?1GPp~MrxOj@Q#~aH3ll~^DIvsdZTqD&RyBA!8(sWgHR{t1JwM8) z=a;WDh~kbS=F%M?3yEzS+c@M=K?{e5-i{z>T6L(8lLygekutm-I>HpC!AnPFp;Ps6 zB!O)tgH={R*$%m@bi&G^&XOU^jB~t)W!Sh1AoiZ+iNu)D%j?ZCATBw044c%y?(%3x zSJ$n-J_eW?Fy*hG#$e}p%U%lvECS{cYUQF!$|W^H4aMu{FmIu%*lyd|)}PIAm#T*; z9?QBM-ve%YoeZ^+5_17&E-FI8VuhnX%cQ)uDPoY+GIUbd?)RKBk+1`Jv`Fu_ib_L;J-ka+K|C0kewJHxAMM;MrUm!+ z!m)BXW@KxbsK%419m=&G%C)Q@(6V$Wu6gq+b0&S=>0Q9o^B-tX zkG-+hTCo;sVSCd-M>AwWe=4wDcp_Nhs%vV|Lp%|m5tNac6FXXllxw!qy_OhU^qbGB zUlM25!#Cea_j}d%)6WmjKd7TWqCA;OgN@d>2s+U0An7g8)=taeS7H{Qwy2IBUhBcJ za+#N3-)7+mxeTU&qn(I(y}mB#lWKutMB%IiUNh*CwmiOaX`yi=KT|DJx!P7srTV0D3b z5*fsuw;nnXmit>9$6>JAN_#ua7*BcAJmSRz5K#4vs7u{DS>t;u zd$p8UaG9HII#q5gG4+RqEUJP(O#r$*aAj$<JayyW0~U#)^tj z^V)A8z*)kO!BgKMkT6C=DyPR)#>Z6s*u!@%J9|6)y$S8%N}kU*5+D$JJ>{pUxwYO!A?f;= z7qTL#!d?&yp$DM>%M{Jqw<7_ElGk{RboC}uCJwy3FEp>F&5|nmB(o56(gji?7gZrU#`dvP=B zi*Gu<+jPaghypB&kjpaG`+tnu0ZJR$Sb=#5xOHvXYwkq{Q`xIpk($y84~2n@B!s$| zEP@bE0YzOjbH*#il`-NTS3AF5zw^c@Z2{oT#SP@Zd`b|Yc#}IsWrG;zq*G# z1)*bI2wFUH?65IP)VX7MXmG_1FU$@dtUSlURt0raq7H`8-w2=f>qXvgcThmimEwT} zyZ2JvLUgz*@o`;+AX*!VA0?y!?kPtAYXEr?T!t%)1x+gT)V&O8;Fa$ zR7;D1)M!_G_(Ir_?h&Q})Ctr{Jr#GxwsU}jb`@sZHCSfFmt`r}D|w_huPw9R@8-Vj zVlj@?kRX3e{HTWQ_Mo(+x}*yOGINXvaRZlG9xL+vlk6q1$4W{tElQ0bI3tuB2dk;R z*mBd<$4E*XNRoowKy_D>Q7*ZU@w$gaUa5^m&;XOD!qvGY_<6z9SCKIZTWMvrd&h`B zH;)uug`!{}S`LJ7>Lz{MwxY^X!7OsLB`INQaG(&in}@(}h9Ja)`#4s%e}hFIE)uv1 z=P0$5d1Y-y9evOxKvk|8ovCNrqy{4rGz~h>y^-`U<#eehxklPGWiOlFuhy~^Nk?1YyE{B{Zu-)Ge zHj_IW3FcjoR3mR5Aw&R6#t$kJLib`Erxq)AsYaVNN!MQ=IC);s+|z>wBNs9UK!uVd z1j&6V`sALfGVE-c9MuPVQ!l$LX0DRH+JgM9a=mGNf5c31aEwTp!eM}qnLv;0jK+OC;u zvFyFQQGhUVg3NyK$;D~MGiyY)Te-e}SZcqF>N{WD5}J&md9V>-qs^b8bW6fnF2%qH z^#t8MaDw&Zq4c{(c~aF-236LAyHnYlZbWQ*D5@)T8l!7$X9`*2i6IR5LQ z+e#CmZ@V-vje{5An$gwj4HO{g4WMo_W>Nrot$Iy)1x>N+lAo&C(>~9iPi(!jM7#NE U*B&)F-`oHC|L|Euz{ukP0K=-`{r~^~ diff --git a/data/csv/real/web_page.csv b/data/csv/real/web_page.csv deleted file mode 100644 index 0798017..0000000 --- a/data/csv/real/web_page.csv +++ /dev/null @@ -1,60 +0,0 @@ -1|AAAAAAAABAAAAAAA|1997-09-03||2450810|2452620|Y|98539|http://www.foo.com|welcome|2531|8|3|4 -2|AAAAAAAACAAAAAAA|1997-09-03|2000-09-02|2450814|2452580|N||http://www.foo.com|protected|1564|4|3|1 -3|AAAAAAAACAAAAAAA|2000-09-03||2450814|2452611|N||http://www.foo.com|feedback|1564|4|3|4 -4|AAAAAAAAEAAAAAAA|1997-09-03|1999-09-03|2450812|2452579|N||http://www.foo.com|general|3732|18|7|1 -5|AAAAAAAAEAAAAAAA|1999-09-04|2001-09-02|2450812|2452597|N||http://www.foo.com|welcome|3732|18|3|1 -6|AAAAAAAAEAAAAAAA|2001-09-03||2450814|2452597|N||http://www.foo.com|ad|3732|18|7|4 -7|AAAAAAAAHAAAAAAA|1997-09-03||2450815|2452574|N||http://www.foo.com|feedback|3034|18|7|4 -8|AAAAAAAAIAAAAAAA|1997-09-03|2000-09-02|2450815|2452646|Y|1898|http://www.foo.com|protected|3128|12|2|4 -9|AAAAAAAAIAAAAAAA|2000-09-03||2450807|2452579|Y|84146|http://www.foo.com|welcome|3128|13|5|3 -10|AAAAAAAAKAAAAAAA|1997-09-03|1999-09-03||2452623|N||http://www.foo.com||||| -11|AAAAAAAAKAAAAAAA|1999-09-04|2001-09-02|2450814|2452611|N||http://www.foo.com|welcome|7046|23|4|4 -12|AAAAAAAAKAAAAAAA|2001-09-03||2450815|2452611|N||http://www.foo.com|protected|7046|17|4|4 -13|AAAAAAAANAAAAAAA|1997-09-03||2450807|2452629|N||http://www.foo.com|protected|2281|6|4|1 -14|AAAAAAAAOAAAAAAA|1997-09-03|2000-09-02|2450810|2452639|N||http://www.foo.com|dynamic|5676|19|6|0 -15|AAAAAAAAOAAAAAAA|2000-09-03||2450810|2452639|N||http://www.foo.com|dynamic|2469|10|5|2 -16|AAAAAAAAABAAAAAA|1997-09-03|1999-09-03|2450814|2452601|Y|33463|http://www.foo.com|feedback|701|2|1|4 -17|AAAAAAAAABAAAAAA|1999-09-04|2001-09-02|2450812|2452645|N||http://www.foo.com|general|701|11|1|3 -18|AAAAAAAAABAAAAAA|2001-09-03||2450812|2452608|N||http://www.foo.com|ad|4080|11|6|3 -19|AAAAAAAADBAAAAAA|1997-09-03||2450808|2452648|Y|57610|http://www.foo.com|general|2347|9|7|4 -20|AAAAAAAAEBAAAAAA|1997-09-03|2000-09-02|2450809|2452555|Y|46487|http://www.foo.com|ad|1147|3|6|0 -21|AAAAAAAAEBAAAAAA|2000-09-03||2450809|2452555|Y|10897|http://www.foo.com|general|1147|3|6|4 -22|AAAAAAAAGBAAAAAA|1997-09-03|1999-09-03|2450812|2452565|Y|20213|http://www.foo.com|general|5663|25|3|4 -23|AAAAAAAAGBAAAAAA|1999-09-04|2001-09-02|2450812|2452623|Y|20213|http://www.foo.com|order|4729|23|6|4 -24|AAAAAAAAGBAAAAAA|2001-09-03||2450812|2452646|Y|20213|http://www.foo.com|dynamic|5918|23|6|1 -25|AAAAAAAAJBAAAAAA|1997-09-03||2450811|2452620|N||http://www.foo.com|feedback|1526|9|4|2 -26|AAAAAAAAKBAAAAAA|1997-09-03|2000-09-02|2450812|2452636|Y|98376|http://www.foo.com|ad|1826|9|3|1 -27|AAAAAAAAKBAAAAAA|2000-09-03||2450812|2452607|Y|98376|http://www.foo.com|protected|1553|9|1|1 -28|AAAAAAAAMBAAAAAA|1997-09-03|1999-09-03|2450807|2452572|N||http://www.foo.com|protected|1308|4|1|2 -29|AAAAAAAAMBAAAAAA|1999-09-04|2001-09-02|2450808|2452611|N||http://www.foo.com|order|1308|4|1|2 -30|AAAAAAAAMBAAAAAA|2001-09-03||2450808|2452611|N||http://www.foo.com|general|3872|18|1|4 -31|AAAAAAAAPBAAAAAA|1997-09-03||2450810|2452596|N||http://www.foo.com|general|1732|3|6|0 -32|AAAAAAAAACAAAAAA|1997-09-03|2000-09-02|2450808|2452585|N||http://www.foo.com|welcome|5104|20|7|4 -33|AAAAAAAAACAAAAAA|2000-09-03||2450808|2452585|N||http://www.foo.com|protected|2129|7|1|0 -34|AAAAAAAACCAAAAAA|1997-09-03|1999-09-03|2450808|2452616|N||http://www.foo.com|welcome|2726|12|5|2 -35|AAAAAAAACCAAAAAA|1999-09-04|2001-09-02|2450808|2452591|N||http://www.foo.com|protected|2726|12|1|2 -36|AAAAAAAACCAAAAAA|2001-09-03||2450812|2452613|N||http://www.foo.com|dynamic|2726|3|1|2 -37|AAAAAAAAFCAAAAAA|1997-09-03||2450809|2452556|N||http://www.foo.com|ad|3076|15|3|0 -38|AAAAAAAAGCAAAAAA|1997-09-03|2000-09-02|2450811|2452583|Y|37285|http://www.foo.com|general|3096|18|3|0 -39|AAAAAAAAGCAAAAAA|2000-09-03||2450815|2452583|N||http://www.foo.com|general|3096|18|3|0 -40|AAAAAAAAICAAAAAA|1997-09-03|1999-09-03|2450813|2452576|N||http://www.foo.com|general|4402|18|4|2 -41|AAAAAAAAICAAAAAA|1999-09-04|2001-09-02|2450813|2452579|Y|16769|http://www.foo.com|welcome|784|3|4|4 -42|AAAAAAAAICAAAAAA|2001-09-03||2450813|2452579|Y|60150|http://www.foo.com|dynamic|1451|3|4|4 -43|AAAAAAAALCAAAAAA|1997-09-03||2450814|2452580|Y|64793|http://www.foo.com|ad|3760|12|3|2 -44|AAAAAAAAMCAAAAAA|1997-09-03|2000-09-02|2450811|2452602|Y|92078|http://www.foo.com|ad|4179|19|7|1 -45|AAAAAAAAMCAAAAAA|2000-09-03||2450811|2452575|Y|98633|http://www.foo.com|feedback|4584|19|7|4 -46|AAAAAAAAOCAAAAAA|1997-09-03|1999-09-03|2450809|2452574|N||http://www.foo.com|protected|1711|4|5|1 -47|AAAAAAAAOCAAAAAA|1999-09-04|2001-09-02|2450815|2452574|N||http://www.foo.com|welcome|1711|4|5|1 -48|AAAAAAAAOCAAAAAA|2001-09-03||2450815|2452622|N||http://www.foo.com|ad|1732|9|5|1 -49|AAAAAAAABDAAAAAA|1997-09-03||2450809|2452618|N||http://www.foo.com|order|4894|20|3|2 -50|AAAAAAAACDAAAAAA|1997-09-03|2000-09-02|2450808|2452615|N||http://www.foo.com|welcome|5262|16|5|2 -51|AAAAAAAACDAAAAAA|2000-09-03||2450811|2452564|N||http://www.foo.com|general|3423|19|7|1 -52|AAAAAAAAEDAAAAAA|1997-09-03|1999-09-03|2450815|2452606|N||http://www.foo.com|welcome|3306|21|7|1 -53|AAAAAAAAEDAAAAAA|1999-09-04|2001-09-02|2450808|2452636|N||http://www.foo.com|dynamic|3306|21|7|1 -54|AAAAAAAAEDAAAAAA|2001-09-03||2450808|2452629|N||http://www.foo.com|protected|1931|7|2|2 -55|AAAAAAAAHDAAAAAA|1997-09-03||2450811|2452549|N||http://www.foo.com|order|3788|19|1|0 -56|AAAAAAAAIDAAAAAA|1997-09-03|2000-09-02|2450815|2452554|N||http://www.foo.com|protected|5733|24|2|2 -57|AAAAAAAAIDAAAAAA|2000-09-03||2450811|2452568|N||http://www.foo.com|ad|5733|16|2|2 -58|AAAAAAAAKDAAAAAA|1997-09-03|1999-09-03|2450813|2452619|Y|7625|http://www.foo.com|ad|6577|24|4|3 -59|AAAAAAAAKDAAAAAA|1999-09-04|2001-09-02|2450813|2452624|Y|80555|http://www.foo.com|general|6577|24|2|3 -60|AAAAAAAAKDAAAAAA|2001-09-03||2450813|2452566|Y|80555|http://www.foo.com|welcome|6577|24|2|3 diff --git a/data/csv/unquoted_escape/human_eval.csv b/data/csv/unquoted_escape/human_eval.csv deleted file mode 100644 index 2880643..0000000 --- a/data/csv/unquoted_escape/human_eval.csv +++ /dev/null @@ -1,339 +0,0 @@ -HumanEval/0,from typing import List\ -\ -\ -def has_close_elements(numbers: List[float]\, threshold: float) -> bool:\ - """ Check if in given list of numbers\, are any two numbers closer to each other than\ - given threshold.\ - >>> has_close_elements([1.0\, 2.0\, 3.0]\, 0.5)\ - False\ - >>> has_close_elements([1.0\, 2.8\, 3.0\, 4.0\, 5.0\, 2.0]\, 0.3)\ - True\ - """\ -,has_close_elements, for idx\, elem in enumerate(numbers):\ - for idx2\, elem2 in enumerate(numbers):\ - if idx != idx2:\ - distance = abs(elem - elem2)\ - if distance < threshold:\ - return True\ -\ - return False\ -,\ -\ -METADATA = {\ - 'author': 'jt'\,\ - 'dataset': 'test'\ -}\ -\ -\ -def check(candidate):\ - assert candidate([1.0\, 2.0\, 3.9\, 4.0\, 5.0\, 2.2]\, 0.3) == True\ - assert candidate([1.0\, 2.0\, 3.9\, 4.0\, 5.0\, 2.2]\, 0.05) == False\ - assert candidate([1.0\, 2.0\, 5.9\, 4.0\, 5.0]\, 0.95) == True\ - assert candidate([1.0\, 2.0\, 5.9\, 4.0\, 5.0]\, 0.8) == False\ - assert candidate([1.0\, 2.0\, 3.0\, 4.0\, 5.0\, 2.0]\, 0.1) == True\ - assert candidate([1.1\, 2.2\, 3.1\, 4.1\, 5.1]\, 1.0) == True\ - assert candidate([1.1\, 2.2\, 3.1\, 4.1\, 5.1]\, 0.5) == False\ -\ - -HumanEval/1,from typing import List\ -\ -\ -def separate_paren_groups(paren_string: str) -> List[str]:\ - """ Input to this function is a string containing multiple groups of nested parentheses. Your goal is to\ - separate those group into separate strings and return the list of those.\ - Separate groups are balanced (each open brace is properly closed) and not nested within each other\ - Ignore any spaces in the input string.\ - >>> separate_paren_groups('( ) (( )) (( )( ))')\ - ['()'\, '(())'\, '(()())']\ - """\ -,separate_paren_groups, result = []\ - current_string = []\ - current_depth = 0\ -\ - for c in paren_string:\ - if c == '(':\ - current_depth += 1\ - current_string.append(c)\ - elif c == ')':\ - current_depth -= 1\ - current_string.append(c)\ -\ - if current_depth == 0:\ - result.append(''.join(current_string))\ - current_string.clear()\ -\ - return result\ -,\ -\ -METADATA = {\ - 'author': 'jt'\,\ - 'dataset': 'test'\ -}\ -\ -\ -def check(candidate):\ - assert candidate('(()()) ((())) () ((())()())') == [\ - '(()())'\, '((()))'\, '()'\, '((())()())'\ - ]\ - assert candidate('() (()) ((())) (((())))') == [\ - '()'\, '(())'\, '((()))'\, '(((())))'\ - ]\ - assert candidate('(()(())((())))') == [\ - '(()(())((())))'\ - ]\ - assert candidate('( ) (( )) (( )( ))') == ['()'\, '(())'\, '(()())']\ - -HumanEval/2,\ -\ -def truncate_number(number: float) -> float:\ - """ Given a positive floating point number\, it can be decomposed into\ - and integer part (largest integer smaller than given number) and decimals\ - (leftover part always smaller than 1).\ -\ - Return the decimal part of the number.\ - >>> truncate_number(3.5)\ - 0.5\ - """\ -,truncate_number, return number % 1.0\ -,\ -\ -METADATA = {\ - 'author': 'jt'\,\ - 'dataset': 'test'\ -}\ -\ -\ -def check(candidate):\ - assert candidate(3.5) == 0.5\ - assert abs(candidate(1.33) - 0.33) < 1e-6\ - assert abs(candidate(123.456) - 0.456) < 1e-6\ - -HumanEval/3,from typing import List\ -\ -\ -def below_zero(operations: List[int]) -> bool:\ - """ You're given a list of deposit and withdrawal operations on a bank account that starts with\ - zero balance. Your task is to detect if at any point the balance of account fallls below zero\, and\ - at that point function should return True. Otherwise it should return False.\ - >>> below_zero([1\, 2\, 3])\ - False\ - >>> below_zero([1\, 2\, -4\, 5])\ - True\ - """\ -,below_zero, balance = 0\ -\ - for op in operations:\ - balance += op\ - if balance < 0:\ - return True\ -\ - return False\ -,\ -\ -METADATA = {\ - 'author': 'jt'\,\ - 'dataset': 'test'\ -}\ -\ -\ -def check(candidate):\ - assert candidate([]) == False\ - assert candidate([1\, 2\, -3\, 1\, 2\, -3]) == False\ - assert candidate([1\, 2\, -4\, 5\, 6]) == True\ - assert candidate([1\, -1\, 2\, -2\, 5\, -5\, 4\, -4]) == False\ - assert candidate([1\, -1\, 2\, -2\, 5\, -5\, 4\, -5]) == True\ - assert candidate([1\, -2\, 2\, -2\, 5\, -5\, 4\, -4]) == True\ - -HumanEval/4,from typing import List\ -\ -\ -def mean_absolute_deviation(numbers: List[float]) -> float:\ - """ For a given list of input numbers\, calculate Mean Absolute Deviation\ - around the mean of this dataset.\ - Mean Absolute Deviation is the average absolute difference between each\ - element and a centerpoint (mean in this case):\ - MAD = average | x - x_mean |\ - >>> mean_absolute_deviation([1.0\, 2.0\, 3.0\, 4.0])\ - 1.0\ - """\ -,mean_absolute_deviation, mean = sum(numbers) / len(numbers)\ - return sum(abs(x - mean) for x in numbers) / len(numbers)\ -,\ -\ -METADATA = {\ - 'author': 'jt'\,\ - 'dataset': 'test'\ -}\ -\ -\ -def check(candidate):\ - assert abs(candidate([1.0\, 2.0\, 3.0]) - 2.0/3.0) < 1e-6\ - assert abs(candidate([1.0\, 2.0\, 3.0\, 4.0]) - 1.0) < 1e-6\ - assert abs(candidate([1.0\, 2.0\, 3.0\, 4.0\, 5.0]) - 6.0/5.0) < 1e-6\ -\ - -HumanEval/5,from typing import List\ -\ -\ -def intersperse(numbers: List[int]\, delimeter: int) -> List[int]:\ - """ Insert a number 'delimeter' between every two consecutive elements of input list `numbers'\ - >>> intersperse([]\, 4)\ - []\ - >>> intersperse([1\, 2\, 3]\, 4)\ - [1\, 4\, 2\, 4\, 3]\ - """\ -,intersperse, if not numbers:\ - return []\ -\ - result = []\ -\ - for n in numbers[:-1]:\ - result.append(n)\ - result.append(delimeter)\ -\ - result.append(numbers[-1])\ -\ - return result\ -,\ -\ -METADATA = {\ - 'author': 'jt'\,\ - 'dataset': 'test'\ -}\ -\ -\ -def check(candidate):\ - assert candidate([]\, 7) == []\ - assert candidate([5\, 6\, 3\, 2]\, 8) == [5\, 8\, 6\, 8\, 3\, 8\, 2]\ - assert candidate([2\, 2\, 2]\, 2) == [2\, 2\, 2\, 2\, 2]\ - -HumanEval/6,from typing import List\ -\ -\ -def parse_nested_parens(paren_string: str) -> List[int]:\ - """ Input to this function is a string represented multiple groups for nested parentheses separated by spaces.\ - For each of the group\, output the deepest level of nesting of parentheses.\ - E.g. (()()) has maximum two levels of nesting while ((())) has three.\ -\ - >>> parse_nested_parens('(()()) ((())) () ((())()())')\ - [2\, 3\, 1\, 3]\ - """\ -,parse_nested_parens, def parse_paren_group(s):\ - depth = 0\ - max_depth = 0\ - for c in s:\ - if c == '(':\ - depth += 1\ - max_depth = max(depth\, max_depth)\ - else:\ - depth -= 1\ -\ - return max_depth\ -\ - return [parse_paren_group(x) for x in paren_string.split(' ') if x]\ -,\ -\ -METADATA = {\ - 'author': 'jt'\,\ - 'dataset': 'test'\ -}\ -\ -\ -def check(candidate):\ - assert candidate('(()()) ((())) () ((())()())') == [2\, 3\, 1\, 3]\ - assert candidate('() (()) ((())) (((())))') == [1\, 2\, 3\, 4]\ - assert candidate('(()(())((())))') == [4]\ - -HumanEval/7,from typing import List\ -\ -\ -def filter_by_substring(strings: List[str]\, substring: str) -> List[str]:\ - """ Filter an input list of strings only for ones that contain given substring\ - >>> filter_by_substring([]\, 'a')\ - []\ - >>> filter_by_substring(['abc'\, 'bacd'\, 'cde'\, 'array']\, 'a')\ - ['abc'\, 'bacd'\, 'array']\ - """\ -,filter_by_substring, return [x for x in strings if substring in x]\ -,\ -\ -METADATA = {\ - 'author': 'jt'\,\ - 'dataset': 'test'\ -}\ -\ -\ -def check(candidate):\ - assert candidate([]\, 'john') == []\ - assert candidate(['xxx'\, 'asd'\, 'xxy'\, 'john doe'\, 'xxxAAA'\, 'xxx']\, 'xxx') == ['xxx'\, 'xxxAAA'\, 'xxx']\ - assert candidate(['xxx'\, 'asd'\, 'aaaxxy'\, 'john doe'\, 'xxxAAA'\, 'xxx']\, 'xx') == ['xxx'\, 'aaaxxy'\, 'xxxAAA'\, 'xxx']\ - assert candidate(['grunt'\, 'trumpet'\, 'prune'\, 'gruesome']\, 'run') == ['grunt'\, 'prune']\ - -HumanEval/8,from typing import List\, Tuple\ -\ -\ -def sum_product(numbers: List[int]) -> Tuple[int\, int]:\ - """ For a given list of integers\, return a tuple consisting of a sum and a product of all the integers in a list.\ - Empty sum should be equal to 0 and empty product should be equal to 1.\ - >>> sum_product([])\ - (0\, 1)\ - >>> sum_product([1\, 2\, 3\, 4])\ - (10\, 24)\ - """\ -,sum_product, sum_value = 0\ - prod_value = 1\ -\ - for n in numbers:\ - sum_value += n\ - prod_value *= n\ - return sum_value\, prod_value\ -,\ -\ -METADATA = {\ - 'author': 'jt'\,\ - 'dataset': 'test'\ -}\ -\ -\ -def check(candidate):\ - assert candidate([]) == (0\, 1)\ - assert candidate([1\, 1\, 1]) == (3\, 1)\ - assert candidate([100\, 0]) == (100\, 0)\ - assert candidate([3\, 5\, 7]) == (3 + 5 + 7\, 3 * 5 * 7)\ - assert candidate([10]) == (10\, 10)\ - -HumanEval/9,from typing import List\, Tuple\ -\ -\ -def rolling_max(numbers: List[int]) -> List[int]:\ - """ From a given list of integers\, generate a list of rolling maximum element found until given moment\ - in the sequence.\ - >>> rolling_max([1\, 2\, 3\, 2\, 3\, 4\, 2])\ - [1\, 2\, 3\, 3\, 3\, 4\, 4]\ - """\ -,rolling_max, running_max = None\ - result = []\ -\ - for n in numbers:\ - if running_max is None:\ - running_max = n\ - else:\ - running_max = max(running_max\, n)\ -\ - result.append(running_max)\ -\ - return result\ -,\ -\ -METADATA = {\ - 'author': 'jt'\,\ - 'dataset': 'test'\ -}\ -\ -\ -def check(candidate):\ - assert candidate([]) == []\ - assert candidate([1\, 2\, 3\, 4]) == [1\, 2\, 3\, 4]\ - assert candidate([4\, 3\, 2\, 1]) == [4\, 4\, 4\, 4]\ - assert candidate([3\, 2\, 3\, 100\, 3]) == [3\, 3\, 3\, 100\, 100]\ - diff --git a/data/csv/unquoted_escape/human_eval.tsv b/data/csv/unquoted_escape/human_eval.tsv deleted file mode 100644 index 574d08a..0000000 --- a/data/csv/unquoted_escape/human_eval.tsv +++ /dev/null @@ -1,339 +0,0 @@ -HumanEval/0 from typing import List\ -\ -\ -def has_close_elements(numbers: List[float], threshold: float) -> bool:\ -\ """ Check if in given list of numbers, are any two numbers closer to each other than\ -\ given threshold.\ -\ >>> has_close_elements([1.0, 2.0, 3.0], 0.5)\ -\ False\ -\ >>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)\ -\ True\ -\ """\ - has_close_elements \ for idx, elem in enumerate(numbers):\ -\ \ for idx2, elem2 in enumerate(numbers):\ -\ \ \ if idx != idx2:\ -\ \ \ \ distance = abs(elem - elem2)\ -\ \ \ \ if distance < threshold:\ -\ \ \ \ \ return True\ -\ -\ return False\ - \ -\ -METADATA = {\ -\ 'author': 'jt',\ -\ 'dataset': 'test'\ -}\ -\ -\ -def check(candidate):\ -\ assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.3) == True\ -\ assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.05) == False\ -\ assert candidate([1.0, 2.0, 5.9, 4.0, 5.0], 0.95) == True\ -\ assert candidate([1.0, 2.0, 5.9, 4.0, 5.0], 0.8) == False\ -\ assert candidate([1.0, 2.0, 3.0, 4.0, 5.0, 2.0], 0.1) == True\ -\ assert candidate([1.1, 2.2, 3.1, 4.1, 5.1], 1.0) == True\ -\ assert candidate([1.1, 2.2, 3.1, 4.1, 5.1], 0.5) == False\ -\ - -HumanEval/1 from typing import List\ -\ -\ -def separate_paren_groups(paren_string: str) -> List[str]:\ -\ """ Input to this function is a string containing multiple groups of nested parentheses. Your goal is to\ -\ separate those group into separate strings and return the list of those.\ -\ Separate groups are balanced (each open brace is properly closed) and not nested within each other\ -\ Ignore any spaces in the input string.\ -\ >>> separate_paren_groups('( ) (( )) (( )( ))')\ -\ ['()', '(())', '(()())']\ -\ """\ - separate_paren_groups \ result = []\ -\ current_string = []\ -\ current_depth = 0\ -\ -\ for c in paren_string:\ -\ \ if c == '(':\ -\ \ \ current_depth += 1\ -\ \ \ current_string.append(c)\ -\ \ elif c == ')':\ -\ \ \ current_depth -= 1\ -\ \ \ current_string.append(c)\ -\ -\ \ \ if current_depth == 0:\ -\ \ \ \ result.append(''.join(current_string))\ -\ \ \ \ current_string.clear()\ -\ -\ return result\ - \ -\ -METADATA = {\ -\ 'author': 'jt',\ -\ 'dataset': 'test'\ -}\ -\ -\ -def check(candidate):\ -\ assert candidate('(()()) ((())) () ((())()())') == [\ -\ \ '(()())', '((()))', '()', '((())()())'\ -\ ]\ -\ assert candidate('() (()) ((())) (((())))') == [\ -\ \ '()', '(())', '((()))', '(((())))'\ -\ ]\ -\ assert candidate('(()(())((())))') == [\ -\ \ '(()(())((())))'\ -\ ]\ -\ assert candidate('( ) (( )) (( )( ))') == ['()', '(())', '(()())']\ - -HumanEval/2 \ -\ -def truncate_number(number: float) -> float:\ -\ """ Given a positive floating point number, it can be decomposed into\ -\ and integer part (largest integer smaller than given number) and decimals\ -\ (leftover part always smaller than 1).\ -\ -\ Return the decimal part of the number.\ -\ >>> truncate_number(3.5)\ -\ 0.5\ -\ """\ - truncate_number \ return number % 1.0\ - \ -\ -METADATA = {\ -\ 'author': 'jt',\ -\ 'dataset': 'test'\ -}\ -\ -\ -def check(candidate):\ -\ assert candidate(3.5) == 0.5\ -\ assert abs(candidate(1.33) - 0.33) < 1e-6\ -\ assert abs(candidate(123.456) - 0.456) < 1e-6\ - -HumanEval/3 from typing import List\ -\ -\ -def below_zero(operations: List[int]) -> bool:\ -\ """ You're given a list of deposit and withdrawal operations on a bank account that starts with\ -\ zero balance. Your task is to detect if at any point the balance of account fallls below zero, and\ -\ at that point function should return True. Otherwise it should return False.\ -\ >>> below_zero([1, 2, 3])\ -\ False\ -\ >>> below_zero([1, 2, -4, 5])\ -\ True\ -\ """\ - below_zero \ balance = 0\ -\ -\ for op in operations:\ -\ \ balance += op\ -\ \ if balance < 0:\ -\ \ \ return True\ -\ -\ return False\ - \ -\ -METADATA = {\ -\ 'author': 'jt',\ -\ 'dataset': 'test'\ -}\ -\ -\ -def check(candidate):\ -\ assert candidate([]) == False\ -\ assert candidate([1, 2, -3, 1, 2, -3]) == False\ -\ assert candidate([1, 2, -4, 5, 6]) == True\ -\ assert candidate([1, -1, 2, -2, 5, -5, 4, -4]) == False\ -\ assert candidate([1, -1, 2, -2, 5, -5, 4, -5]) == True\ -\ assert candidate([1, -2, 2, -2, 5, -5, 4, -4]) == True\ - -HumanEval/4 from typing import List\ -\ -\ -def mean_absolute_deviation(numbers: List[float]) -> float:\ -\ """ For a given list of input numbers, calculate Mean Absolute Deviation\ -\ around the mean of this dataset.\ -\ Mean Absolute Deviation is the average absolute difference between each\ -\ element and a centerpoint (mean in this case):\ -\ MAD = average | x - x_mean |\ -\ >>> mean_absolute_deviation([1.0, 2.0, 3.0, 4.0])\ -\ 1.0\ -\ """\ - mean_absolute_deviation \ mean = sum(numbers) / len(numbers)\ -\ return sum(abs(x - mean) for x in numbers) / len(numbers)\ - \ -\ -METADATA = {\ -\ 'author': 'jt',\ -\ 'dataset': 'test'\ -}\ -\ -\ -def check(candidate):\ -\ assert abs(candidate([1.0, 2.0, 3.0]) - 2.0/3.0) < 1e-6\ -\ assert abs(candidate([1.0, 2.0, 3.0, 4.0]) - 1.0) < 1e-6\ -\ assert abs(candidate([1.0, 2.0, 3.0, 4.0, 5.0]) - 6.0/5.0) < 1e-6\ -\ - -HumanEval/5 from typing import List\ -\ -\ -def intersperse(numbers: List[int], delimeter: int) -> List[int]:\ -\ """ Insert a number 'delimeter' between every two consecutive elements of input list `numbers'\ -\ >>> intersperse([], 4)\ -\ []\ -\ >>> intersperse([1, 2, 3], 4)\ -\ [1, 4, 2, 4, 3]\ -\ """\ - intersperse \ if not numbers:\ -\ \ return []\ -\ -\ result = []\ -\ -\ for n in numbers[:-1]:\ -\ \ result.append(n)\ -\ \ result.append(delimeter)\ -\ -\ result.append(numbers[-1])\ -\ -\ return result\ - \ -\ -METADATA = {\ -\ 'author': 'jt',\ -\ 'dataset': 'test'\ -}\ -\ -\ -def check(candidate):\ -\ assert candidate([], 7) == []\ -\ assert candidate([5, 6, 3, 2], 8) == [5, 8, 6, 8, 3, 8, 2]\ -\ assert candidate([2, 2, 2], 2) == [2, 2, 2, 2, 2]\ - -HumanEval/6 from typing import List\ -\ -\ -def parse_nested_parens(paren_string: str) -> List[int]:\ -\ """ Input to this function is a string represented multiple groups for nested parentheses separated by spaces.\ -\ For each of the group, output the deepest level of nesting of parentheses.\ -\ E.g. (()()) has maximum two levels of nesting while ((())) has three.\ -\ -\ >>> parse_nested_parens('(()()) ((())) () ((())()())')\ -\ [2, 3, 1, 3]\ -\ """\ - parse_nested_parens \ def parse_paren_group(s):\ -\ \ depth = 0\ -\ \ max_depth = 0\ -\ \ for c in s:\ -\ \ \ if c == '(':\ -\ \ \ \ depth += 1\ -\ \ \ \ max_depth = max(depth, max_depth)\ -\ \ \ else:\ -\ \ \ \ depth -= 1\ -\ -\ \ return max_depth\ -\ -\ return [parse_paren_group(x) for x in paren_string.split(' ') if x]\ - \ -\ -METADATA = {\ -\ 'author': 'jt',\ -\ 'dataset': 'test'\ -}\ -\ -\ -def check(candidate):\ -\ assert candidate('(()()) ((())) () ((())()())') == [2, 3, 1, 3]\ -\ assert candidate('() (()) ((())) (((())))') == [1, 2, 3, 4]\ -\ assert candidate('(()(())((())))') == [4]\ - -HumanEval/7 from typing import List\ -\ -\ -def filter_by_substring(strings: List[str], substring: str) -> List[str]:\ -\ """ Filter an input list of strings only for ones that contain given substring\ -\ >>> filter_by_substring([], 'a')\ -\ []\ -\ >>> filter_by_substring(['abc', 'bacd', 'cde', 'array'], 'a')\ -\ ['abc', 'bacd', 'array']\ -\ """\ - filter_by_substring \ return [x for x in strings if substring in x]\ - \ -\ -METADATA = {\ -\ 'author': 'jt',\ -\ 'dataset': 'test'\ -}\ -\ -\ -def check(candidate):\ -\ assert candidate([], 'john') == []\ -\ assert candidate(['xxx', 'asd', 'xxy', 'john doe', 'xxxAAA', 'xxx'], 'xxx') == ['xxx', 'xxxAAA', 'xxx']\ -\ assert candidate(['xxx', 'asd', 'aaaxxy', 'john doe', 'xxxAAA', 'xxx'], 'xx') == ['xxx', 'aaaxxy', 'xxxAAA', 'xxx']\ -\ assert candidate(['grunt', 'trumpet', 'prune', 'gruesome'], 'run') == ['grunt', 'prune']\ - -HumanEval/8 from typing import List, Tuple\ -\ -\ -def sum_product(numbers: List[int]) -> Tuple[int, int]:\ -\ """ For a given list of integers, return a tuple consisting of a sum and a product of all the integers in a list.\ -\ Empty sum should be equal to 0 and empty product should be equal to 1.\ -\ >>> sum_product([])\ -\ (0, 1)\ -\ >>> sum_product([1, 2, 3, 4])\ -\ (10, 24)\ -\ """\ - sum_product \ sum_value = 0\ -\ prod_value = 1\ -\ -\ for n in numbers:\ -\ \ sum_value += n\ -\ \ prod_value *= n\ -\ return sum_value, prod_value\ - \ -\ -METADATA = {\ -\ 'author': 'jt',\ -\ 'dataset': 'test'\ -}\ -\ -\ -def check(candidate):\ -\ assert candidate([]) == (0, 1)\ -\ assert candidate([1, 1, 1]) == (3, 1)\ -\ assert candidate([100, 0]) == (100, 0)\ -\ assert candidate([3, 5, 7]) == (3 + 5 + 7, 3 * 5 * 7)\ -\ assert candidate([10]) == (10, 10)\ - -HumanEval/9 from typing import List, Tuple\ -\ -\ -def rolling_max(numbers: List[int]) -> List[int]:\ -\ """ From a given list of integers, generate a list of rolling maximum element found until given moment\ -\ in the sequence.\ -\ >>> rolling_max([1, 2, 3, 2, 3, 4, 2])\ -\ [1, 2, 3, 3, 3, 4, 4]\ -\ """\ - rolling_max \ running_max = None\ -\ result = []\ -\ -\ for n in numbers:\ -\ \ if running_max is None:\ -\ \ \ running_max = n\ -\ \ else:\ -\ \ \ running_max = max(running_max, n)\ -\ -\ \ result.append(running_max)\ -\ -\ return result\ - \ -\ -METADATA = {\ -\ 'author': 'jt',\ -\ 'dataset': 'test'\ -}\ -\ -\ -def check(candidate):\ -\ assert candidate([]) == []\ -\ assert candidate([1, 2, 3, 4]) == [1, 2, 3, 4]\ -\ assert candidate([4, 3, 2, 1]) == [4, 4, 4, 4]\ -\ assert candidate([3, 2, 3, 100, 3]) == [3, 3, 3, 100, 100]\ - diff --git a/scripts/generate_presigned_url.sh b/scripts/generate_presigned_url.sh new file mode 100755 index 0000000..359cb31 --- /dev/null +++ b/scripts/generate_presigned_url.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +#Note: DONT run as root + +DUCKDB_PATH=duckdb +if command -v duckdb; then + DUCKDB_PATH=duckdb +elif test -f build/release/duckdb; then + DUCKDB_PATH=build/release/duckdb +elif test -f build/reldebug/duckdb; then + DUCKDB_PATH=build/reldebug/duckdb +elif test -f build/debug/duckdb; then + DUCKDB_PATH=build/debug/duckdb +fi + +mkdir -p data/parquet-testing/presigned + +generate_large_parquet_query=$(cat <> /etc/hosts +echo '127.0.0.1 test-bucket.duckdb-minio.com' >> /etc/hosts +echo '127.0.0.1 test-bucket-2.duckdb-minio.com' >> /etc/hosts +echo '127.0.0.1 test-bucket-public.duckdb-minio.com' >> /etc/hosts \ No newline at end of file diff --git a/scripts/minio_s3.yml b/scripts/minio_s3.yml new file mode 100644 index 0000000..b9547a6 --- /dev/null +++ b/scripts/minio_s3.yml @@ -0,0 +1,80 @@ +services: + minio: + image: minio/minio:RELEASE.2021-11-03T03-36-36Z + hostname: duckdb-minio.com + ports: + - "9000:9000" + - "9001:9001" + volumes: + - /tmp/minio_test_data:/data + - /tmp/minio_root_data:/root/.minio + environment: + - MINIO_ROOT_USER=duckdb_minio_admin + - MINIO_ROOT_PASSWORD=duckdb_minio_admin_password + - MINIO_REGION_NAME=eu-west-1 + - MINIO_DOMAIN=duckdb-minio.com + - MINIO_ACCESS_KEY=duckdb_minio_admin + - MINIO_SECRET_KEY=duckdb_minio_admin_password + command: server /data --console-address ":9001" + + minio_setup: + image: minio/mc:RELEASE.2021-11-05T10-05-06Z + depends_on: + - minio + links: + - minio + volumes: + - ${PWD}/duckdb/data:/duckdb/data + + entrypoint: > + /bin/sh -c " + until ( + /usr/bin/mc config host add myminio http://duckdb-minio.com:9000 duckdb_minio_admin duckdb_minio_admin_password + ) do + echo '...waiting...' && sleep 1; + done; + + /usr/bin/mc admin user add myminio minio_duckdb_user minio_duckdb_user_password + /usr/bin/mc admin user list myminio + /usr/bin/mc admin user info myminio minio_duckdb_user + /usr/bin/mc admin policy set myminio readwrite user=minio_duckdb_user + + /usr/bin/mc admin user add myminio minio_duckdb_user_2 minio_duckdb_user_2_password + /usr/bin/mc admin user list myminio + /usr/bin/mc admin user info myminio minio_duckdb_user_2 + /usr/bin/mc admin policy set myminio readwrite user=minio_duckdb_user_2 + + /usr/bin/mc rb --force myminio/test-bucket + /usr/bin/mc mb myminio/test-bucket + /usr/bin/mc policy get myminio/test-bucket + + /usr/bin/mc rb --force myminio/test-bucket-2 + /usr/bin/mc mb myminio/test-bucket-2 + /usr/bin/mc policy get myminio/test-bucket-2 + + /usr/bin/mc rb --force myminio/test-bucket-public + /usr/bin/mc mb myminio/test-bucket-public + /usr/bin/mc policy set download myminio/test-bucket-public + /usr/bin/mc policy get myminio/test-bucket-public + + # This is for the test of presigned URLs + # !!! When missing, be sure that you have ran 'scripts/generate_presigned_url.sh' !!! + + # small file upload + /usr/bin/mc cp /duckdb/data/csv/phonenumbers.csv myminio/test-bucket/presigned/phonenumbers.csv + /usr/bin/mc cp /duckdb/data/parquet-testing/glob/t1.parquet myminio/test-bucket/presigned/t1.parquet + + # large file upload + /usr/bin/mc cp /duckdb/data/parquet-testing/presigned/presigned-url-lineitem.parquet myminio/test-bucket/presigned/lineitem_large.parquet + + # Upload the db for the attach + /usr/bin/mc cp /duckdb/data/attach_test/attach.db myminio/test-bucket/presigned/attach.db + /usr/bin/mc cp /duckdb/data/attach_test/lineitem_sf1.db myminio/test-bucket/presigned/lineitem_sf1.db + + /usr/bin/mc share download myminio/test-bucket/presigned/phonenumbers.csv + /usr/bin/mc share download myminio/test-bucket/presigned/t1.parquet + /usr/bin/mc share download myminio/test-bucket/presigned/lineitem_large.parquet + /usr/bin/mc share download myminio/test-bucket/presigned/attach.db + + exit 0; + " \ No newline at end of file diff --git a/scripts/run_s3_test_server.sh b/scripts/run_s3_test_server.sh new file mode 100755 index 0000000..2267162 --- /dev/null +++ b/scripts/run_s3_test_server.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +#Note: DONT run as root + +if [ ! -f data/attach_test/attach.db ]; then + echo "File data/attach_test/attach.db not found, run ./scripts/generate_presigned_url.sh to generate" +else + rm -rf /tmp/minio_test_data + rm -rf /tmp/minio_root_data + mkdir -p /tmp/minio_test_data + mkdir -p /tmp/minio_root_data + docker compose -f scripts/minio_s3.yml -p duckdb-minio up -d + + # for testing presigned url + sleep 10 + container_name=$(docker ps -a --format '{{.Names}}' | grep -m 1 "duckdb-minio") + echo $container_name + + export S3_SMALL_CSV_PRESIGNED_URL=$(docker logs $container_name 2>/dev/null | grep -m 1 'Share:.*phonenumbers\.csv' | grep -o 'http[s]\?://[^ ]\+') + echo $S3_SMALL_CSV_PRESIGNED_URL + + export S3_SMALL_PARQUET_PRESIGNED_URL=$(docker logs $container_name 2>/dev/null | grep -m 1 'Share:.*t1\.parquet' | grep -o 'http[s]\?://[^ ]\+') + echo $S3_SMALL_PARQUET_PRESIGNED_URL + + export S3_LARGE_PARQUET_PRESIGNED_URL=$(docker logs $container_name 2>/dev/null | grep -m 1 'Share:.*lineitem_large\.parquet' | grep -o 'http[s]\?://[^ ]\+') + echo $S3_LARGE_PARQUET_PRESIGNED_URL + + export S3_ATTACH_DB_PRESIGNED_URL=$(docker logs $container_name 2>/dev/null | grep -m 1 'Share:.*attach\.db' | grep -o 'http[s]\?://[^ ]\+') + echo $S3_ATTACH_DB_PRESIGNED_URL + + export S3_ATTACH_DB="s3://test-bucket/presigned/attach.db" +fi \ No newline at end of file diff --git a/scripts/set_s3_test_server_variables.sh b/scripts/set_s3_test_server_variables.sh new file mode 100644 index 0000000..860e58f --- /dev/null +++ b/scripts/set_s3_test_server_variables.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +# Run this script with 'source' or the shorthand: '.': +# i.e: source scripts/set_s3_test_server_variables.sh + +# Enable the S3 tests to run +export S3_TEST_SERVER_AVAILABLE=1 + +export AWS_DEFAULT_REGION=eu-west-1 +export AWS_ACCESS_KEY_ID=minio_duckdb_user +export AWS_SECRET_ACCESS_KEY=minio_duckdb_user_password +export DUCKDB_S3_ENDPOINT=duckdb-minio.com:9000 +export DUCKDB_S3_USE_SSL=false From b940311326740c44295c64ca6c3be454ce356d0d Mon Sep 17 00:00:00 2001 From: Mytherin Date: Thu, 25 Sep 2025 12:29:50 +0200 Subject: [PATCH 06/32] More test fixes --- .github/workflows/Linux.yml | 11 +++++++++++ .gitignore | 1 + scripts/generate_presigned_url.sh | 13 +++++-------- scripts/minio_s3.yml | 7 ++++--- scripts/run_s3_test_server.sh | 4 ++-- test/sql/copy/csv/glob/copy_csv_glob_s3.test | 8 ++++---- test/sql/copy/parquet/parquet_glob_s3.test | 10 +++++----- test/sql/copy/s3/metadata_cache.test | 4 ++-- test/sql/copy/s3/upload_small_file.test | 2 +- 9 files changed, 35 insertions(+), 25 deletions(-) diff --git a/.github/workflows/Linux.yml b/.github/workflows/Linux.yml index 1c6e87c..73e5fdb 100644 --- a/.github/workflows/Linux.yml +++ b/.github/workflows/Linux.yml @@ -38,6 +38,17 @@ jobs: - name: Install required ubuntu packages run: sudo apt-get update -y -qq && sudo apt-get install -y -qq ninja-build libcurl4-openssl-dev + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + submodules: 'true' + + - name: Checkout DuckDB to version + if: ${{ matrix.duckdb_version != ''}} + run: | + cd duckdb + git checkout ${{ matrix.duckdb_version }} + - name: Fix permissions of test secrets shell: bash run: chmod -R 700 data/secrets diff --git a/.gitignore b/.gitignore index 140ce78..a550d02 100644 --- a/.gitignore +++ b/.gitignore @@ -355,3 +355,4 @@ extension_external test/sql/pragma/output.json tools/pythonpkg/duckdb_build/ +test/test_data diff --git a/scripts/generate_presigned_url.sh b/scripts/generate_presigned_url.sh index 359cb31..1aa3381 100755 --- a/scripts/generate_presigned_url.sh +++ b/scripts/generate_presigned_url.sh @@ -12,21 +12,18 @@ elif test -f build/debug/duckdb; then DUCKDB_PATH=build/debug/duckdb fi -mkdir -p data/parquet-testing/presigned +rm -rf test/test_data +mkdir -p test/test_data generate_large_parquet_query=$(cat < /bin/sh -c " @@ -65,11 +66,11 @@ services: /usr/bin/mc cp /duckdb/data/parquet-testing/glob/t1.parquet myminio/test-bucket/presigned/t1.parquet # large file upload - /usr/bin/mc cp /duckdb/data/parquet-testing/presigned/presigned-url-lineitem.parquet myminio/test-bucket/presigned/lineitem_large.parquet + /usr/bin/mc cp /duckdb/test_data/presigned-url-lineitem.parquet myminio/test-bucket/presigned/lineitem_large.parquet # Upload the db for the attach - /usr/bin/mc cp /duckdb/data/attach_test/attach.db myminio/test-bucket/presigned/attach.db - /usr/bin/mc cp /duckdb/data/attach_test/lineitem_sf1.db myminio/test-bucket/presigned/lineitem_sf1.db + /usr/bin/mc cp /duckdb/test_data/attach.db myminio/test-bucket/presigned/attach.db + /usr/bin/mc cp /duckdb/test_data/lineitem_sf1.db myminio/test-bucket/presigned/lineitem_sf1.db /usr/bin/mc share download myminio/test-bucket/presigned/phonenumbers.csv /usr/bin/mc share download myminio/test-bucket/presigned/t1.parquet diff --git a/scripts/run_s3_test_server.sh b/scripts/run_s3_test_server.sh index 2267162..d202258 100755 --- a/scripts/run_s3_test_server.sh +++ b/scripts/run_s3_test_server.sh @@ -1,8 +1,8 @@ #!/usr/bin/env bash #Note: DONT run as root -if [ ! -f data/attach_test/attach.db ]; then - echo "File data/attach_test/attach.db not found, run ./scripts/generate_presigned_url.sh to generate" +if [ ! -f test/test_data/attach.db ]; then + echo "File test/test_data/attach.db not found, run ./scripts/generate_presigned_url.sh to generate" else rm -rf /tmp/minio_test_data rm -rf /tmp/minio_root_data diff --git a/test/sql/copy/csv/glob/copy_csv_glob_s3.test b/test/sql/copy/csv/glob/copy_csv_glob_s3.test index 7d0aae8..17d292f 100644 --- a/test/sql/copy/csv/glob/copy_csv_glob_s3.test +++ b/test/sql/copy/csv/glob/copy_csv_glob_s3.test @@ -26,19 +26,19 @@ set ignore_error_messages # copy files to S3 before beginning tests statement ok -COPY (select * from 'data/csv/glob/a1/a1.csv') to 's3://test-bucket/copy_csv_glob_s3/copy/a1/a1.csv'; +COPY (select * from 'duckdb/data/csv/glob/a1/a1.csv') to 's3://test-bucket/copy_csv_glob_s3/copy/a1/a1.csv'; statement ok -COPY (select * from 'data/csv/glob/a2/a2.csv') to 's3://test-bucket/copy_csv_glob_s3/copy/a2/a2.csv'; +COPY (select * from 'duckdb/data/csv/glob/a2/a2.csv') to 's3://test-bucket/copy_csv_glob_s3/copy/a2/a2.csv'; statement ok -COPY (select * from 'data/csv/glob/a3/b1.csv') to 's3://test-bucket/copy_csv_glob_s3/copy/a3/b1.csv'; +COPY (select * from 'duckdb/data/csv/glob/a3/b1.csv') to 's3://test-bucket/copy_csv_glob_s3/copy/a3/b1.csv'; statement ok COPY (select null) to 's3://test-bucket/glob/copy/empty/empty.csv'; statement ok -COPY (select * from 'data/csv/glob/i1/integer.csv') to 's3://test-bucket/copy_csv_glob_s3/copy/empty/integer.csv'; +COPY (select * from 'duckdb/data/csv/glob/i1/integer.csv') to 's3://test-bucket/copy_csv_glob_s3/copy/empty/integer.csv'; statement ok CREATE TABLE dates(d DATE); diff --git a/test/sql/copy/parquet/parquet_glob_s3.test b/test/sql/copy/parquet/parquet_glob_s3.test index ea5df0c..c5e151e 100644 --- a/test/sql/copy/parquet/parquet_glob_s3.test +++ b/test/sql/copy/parquet/parquet_glob_s3.test @@ -31,11 +31,11 @@ set enable_external_file_cache=false; # Copy files to S3 before beginning tests statement ok -COPY (select * from 'data/parquet-testing/glob/t1.parquet') to 's3://test-bucket/parquet_glob_s3/glob/t1.parquet'; -COPY (select * from 'data/parquet-testing/glob/t2.parquet') to 's3://test-bucket/parquet_glob_s3/glob/t2.parquet'; -COPY (select * from 'data/parquet-testing/glob2/t1.parquet') to 's3://test-bucket/parquet_glob_s3/glob2/t1.parquet'; -COPY (select * from 'data/parquet-testing/glob/t1.parquet') to 's3://test-bucket/parquet_glob_s3/with+plus/t1.parquet'; -COPY (select * from 'data/parquet-testing/glob/t1.parquet') to 's3://test-bucket/parquet_glob_s3/with space/t1.parquet'; +COPY (select * from 'duckdb/data/parquet-testing/glob/t1.parquet') to 's3://test-bucket/parquet_glob_s3/glob/t1.parquet'; +COPY (select * from 'duckdb/data/parquet-testing/glob/t2.parquet') to 's3://test-bucket/parquet_glob_s3/glob/t2.parquet'; +COPY (select * from 'duckdb/data/parquet-testing/glob2/t1.parquet') to 's3://test-bucket/parquet_glob_s3/glob2/t1.parquet'; +COPY (select * from 'duckdb/data/parquet-testing/glob/t1.parquet') to 's3://test-bucket/parquet_glob_s3/with+plus/t1.parquet'; +COPY (select * from 'duckdb/data/parquet-testing/glob/t1.parquet') to 's3://test-bucket/parquet_glob_s3/with space/t1.parquet'; # parquet glob with COPY FROM statement ok diff --git a/test/sql/copy/s3/metadata_cache.test b/test/sql/copy/s3/metadata_cache.test index c3e7e8d..84bee9b 100644 --- a/test/sql/copy/s3/metadata_cache.test +++ b/test/sql/copy/s3/metadata_cache.test @@ -38,12 +38,12 @@ CREATE TABLE test1 as SELECT * FROM range(10,20) tbl(i); query II EXPLAIN ANALYZE COPY test TO 's3://test-bucket-public/root-dir/metadata_cache/test.parquet'; ---- -analyzed_plan :.*HTTP Stats.*\#HEAD\: 0.*GET\: 0.*PUT\: 1.*\#POST\: 2.* +analyzed_plan :.*HTTP Stats.*\#HEAD\: 0.*GET\: 0.*PUT\: 1.*\#POST\: 0.* query II EXPLAIN ANALYZE COPY test TO 's3://test-bucket-public/root-dir/metadata_cache/test1.parquet'; ---- -analyzed_plan :.*HTTP Stats.*\#HEAD\: 0.*GET\: 0.*PUT\: 1.*\#POST\: 2.* +analyzed_plan :.*HTTP Stats.*\#HEAD\: 0.*GET\: 0.*PUT\: 1.*\#POST\: 0.* # Now we query the file metadata without the global metadata cache: There should be 1 HEAD request for the file size, # then a GET for the pointer to the parquet metadata, then a GET for the metadata. diff --git a/test/sql/copy/s3/upload_small_file.test b/test/sql/copy/s3/upload_small_file.test index b2e8cb3..9f64fd1 100644 --- a/test/sql/copy/s3/upload_small_file.test +++ b/test/sql/copy/s3/upload_small_file.test @@ -24,7 +24,7 @@ require-env DUCKDB_S3_USE_SSL set ignore_error_messages statement ok -CREATE TABLE web_page as (SELECT * FROM "data/csv/real/web_page.csv"); +CREATE TABLE web_page as (SELECT * FROM "duckdb/data/csv/real/web_page.csv"); query IIIIIIIIIIIIII SELECT * FROM web_page LIMIT 10; From fc2fb4f94a6d6809ec5f58c7c0aef7e21baf47df Mon Sep 17 00:00:00 2001 From: Mytherin Date: Thu, 25 Sep 2025 12:31:25 +0200 Subject: [PATCH 07/32] Yet more test fixes --- test/sql/copy/csv/glob/read_csv_glob_s3.test | 12 ++++++------ test/sql/copy/csv/test_csv_remote.test | 4 ++-- test/sql/copy/csv/unquoted_escape/human_eval.test | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/test/sql/copy/csv/glob/read_csv_glob_s3.test b/test/sql/copy/csv/glob/read_csv_glob_s3.test index f84d13a..2736d14 100644 --- a/test/sql/copy/csv/glob/read_csv_glob_s3.test +++ b/test/sql/copy/csv/glob/read_csv_glob_s3.test @@ -26,25 +26,25 @@ set ignore_error_messages # Copy files to S3 before beginning tests statement ok -COPY (select * from read_csv_auto('data/csv/glob/a1/a1.csv')) to 's3://test-bucket/read_csv_glob_s3/glob/a1/a1.csv' ( HEADER ); +COPY (select * from read_csv_auto('duckdb/data/csv/glob/a1/a1.csv')) to 's3://test-bucket/read_csv_glob_s3/glob/a1/a1.csv' ( HEADER ); statement ok -COPY (select * from read_csv_auto('data/csv/glob/a2/a2.csv')) to 's3://test-bucket/read_csv_glob_s3/glob/a2/a2.csv' ( HEADER ); +COPY (select * from read_csv_auto('duckdb/data/csv/glob/a2/a2.csv')) to 's3://test-bucket/read_csv_glob_s3/glob/a2/a2.csv' ( HEADER ); statement ok -COPY (select * from read_csv_auto('data/csv/glob/a3/b1.csv')) to 's3://test-bucket/read_csv_glob_s3/glob/a3/b1.csv' ( HEADER ); +COPY (select * from read_csv_auto('duckdb/data/csv/glob/a3/b1.csv')) to 's3://test-bucket/read_csv_glob_s3/glob/a3/b1.csv' ( HEADER ); statement ok COPY (select null) to 's3://test-bucket/read_csv_glob_s3/glob/empty/empty.csv' (HEADER 0); statement ok -COPY (select * from read_csv_auto('data/csv/glob/i1/integer.csv')) to 's3://test-bucket/read_csv_glob_s3/glob/i1/integer.csv' ( HEADER ); +COPY (select * from read_csv_auto('duckdb/data/csv/glob/i1/integer.csv')) to 's3://test-bucket/read_csv_glob_s3/glob/i1/integer.csv' ( HEADER ); statement ok -COPY (select * from read_csv_auto('data/csv/glob/a1/a1.csv')) to 's3://test-bucket/read_csv_glob_s3/glob/f1/f*.csv' ( HEADER ); +COPY (select * from read_csv_auto('duckdb/data/csv/glob/a1/a1.csv')) to 's3://test-bucket/read_csv_glob_s3/glob/f1/f*.csv' ( HEADER ); statement ok -COPY (select * from read_csv_auto('data/csv/glob/a2/a2.csv')) to 's3://test-bucket/read_csv_glob_s3/glob/f2/f[a].csv' ( HEADER ); +COPY (select * from read_csv_auto('duckdb/data/csv/glob/a2/a2.csv')) to 's3://test-bucket/read_csv_glob_s3/glob/f2/f[a].csv' ( HEADER ); foreach urlstyle path vhost diff --git a/test/sql/copy/csv/test_csv_remote.test b/test/sql/copy/csv/test_csv_remote.test index 0555b6e..4144082 100644 --- a/test/sql/copy/csv/test_csv_remote.test +++ b/test/sql/copy/csv/test_csv_remote.test @@ -10,12 +10,12 @@ PRAGMA enable_verification # regular csv file query ITTTIITITTIIII nosort webpagecsv -SELECT * FROM read_csv_auto('data/csv/real/web_page.csv') ORDER BY 1; +SELECT * FROM read_csv_auto('duckdb/data/csv/real/web_page.csv') ORDER BY 1; ---- # file with gzip query IIIIIIIIIIIIIII nosort lineitemcsv -SELECT * FROM read_csv_auto('data/csv/lineitem1k.tbl.gz') ORDER BY ALL; +SELECT * FROM read_csv_auto('duckdb/data/csv/lineitem1k.tbl.gz') ORDER BY ALL; ---- query ITTTIITITTIIII nosort webpagecsv diff --git a/test/sql/copy/csv/unquoted_escape/human_eval.test b/test/sql/copy/csv/unquoted_escape/human_eval.test index 099dd3a..baf728e 100644 --- a/test/sql/copy/csv/unquoted_escape/human_eval.test +++ b/test/sql/copy/csv/unquoted_escape/human_eval.test @@ -62,12 +62,12 @@ TRUNCATE human_eval_tsv; statement ok INSERT INTO human_eval_csv SELECT replace(COLUMNS(*), E'\r\n', E'\n') -FROM read_csv('data/csv/unquoted_escape/human_eval.csv', quote = '', escape = '\', sep = ',', header = false, strict_mode = false); +FROM read_csv('duckdb/data/csv/unquoted_escape/human_eval.csv', quote = '', escape = '\', sep = ',', header = false, strict_mode = false); statement ok INSERT INTO human_eval_tsv SELECT replace(COLUMNS(*), E'\r\n', E'\n') -FROM read_csv('data/csv/unquoted_escape/human_eval.tsv', quote = '', escape = '\', sep = '\t', header = false, strict_mode = false); +FROM read_csv('duckdb/data/csv/unquoted_escape/human_eval.tsv', quote = '', escape = '\', sep = '\t', header = false, strict_mode = false); # Verify that the three copies are the same query II From 1831e8f87c343f8e3af828dc3cf9830c8c7e9935 Mon Sep 17 00:00:00 2001 From: Mytherin Date: Thu, 25 Sep 2025 12:34:11 +0200 Subject: [PATCH 08/32] More scripts + CI fix --- .github/workflows/Linux.yml | 4 +- scripts/run_squid.sh | 84 +++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 1 deletion(-) create mode 100755 scripts/run_squid.sh diff --git a/.github/workflows/Linux.yml b/.github/workflows/Linux.yml index 73e5fdb..07d9f95 100644 --- a/.github/workflows/Linux.yml +++ b/.github/workflows/Linux.yml @@ -36,7 +36,9 @@ jobs: steps: - name: Install required ubuntu packages - run: sudo apt-get update -y -qq && sudo apt-get install -y -qq ninja-build libcurl4-openssl-dev + run: | + sudo apt-get update -y -qq + sudo apt-get install -y -qq build-essential cmake ninja-build ccache python3 libcurl4-openssl-dev - uses: actions/checkout@v3 with: diff --git a/scripts/run_squid.sh b/scripts/run_squid.sh new file mode 100755 index 0000000..7b05e6a --- /dev/null +++ b/scripts/run_squid.sh @@ -0,0 +1,84 @@ +#!/bin/bash + +help() { + echo "Usage: ${0} [port] [auth]" + echo " port Port number for squid to lisen to (by default 3128)" + echo " auth Optional string ('auth') to force user basic authentification (autherwise no authentification is required)" + exit 0 +} + +port='3128' +auth='false' +log_dir="squid_logs" +conf_file="squid.conf" +pid_file='${service_name}.pid' + +while [[ $# -gt 0 ]]; do + case "${1}" in + -h|--help) + help + ;; + -p|--port) + port="${2}" + shift # past argument + shift # past value + ;; + --auth) + auth='true' + conf_file="squid_auth.conf" + pid_file='${service_name}_auth.pid' + shift # past argument + ;; + --log_dir) + log_dir="${2}" + shift # past argument + shift # past value + ;; + *) + echo "Unknown option ${1}" + exit 1 + ;; + esac +done + +mkdir "${log_dir}" +touch "${log_dir}/daemon.log" +chmod -R 777 "${log_dir}" + +echo "http_port 127.0.0.1:${port}" >"${conf_file}" +echo "pid_filename ${pid_file}" >>"${conf_file}" + +echo 'logfile_rotate 0' >>"${conf_file}" +echo "logfile_daemon ${log_dir}/daemon.log" >>"${conf_file}" +echo "access_log ${log_dir}/access.log" >>"${conf_file}" +echo "cache_log ${log_dir}/cache.log" >>"${conf_file}" +echo "cache_store_log ${log_dir}/cache_store.log" >>"${conf_file}" + + +if [[ "${auth}" == "true" ]]; then + # User 'john' with password 'doe' + echo 'john:$apr1$dalj9e7s$AhqY28Hvl3EcNblNJMiXa0' >squid_users + + squid_version="$(squid -v | head -n1 | grep -o 'Version [^ ]*' | cut -d ' ' -f 2)" + if [[ "$(uname)" == "Darwin" ]]; then + auth_basic_program="/opt/homebrew/Cellar/squid/${squid_version}/libexec/basic_ncsa_auth" + else + if [[ -e '/usr/lib64/squid/basic_ncsa_auth' ]]; then + auth_basic_program="/usr/lib64/squid/basic_ncsa_auth" + else + auth_basic_program="/usr/lib/squid/basic_ncsa_auth" + fi + fi + + echo '# Add authentification options' >>"${conf_file}" + echo "auth_param basic program ${auth_basic_program} squid_users" >>"${conf_file}" + echo 'auth_param basic children 3' >>"${conf_file}" + echo 'auth_param basic realm Squid BA' >>"${conf_file}" + echo 'acl auth_users proxy_auth REQUIRED' >>"${conf_file}" + echo 'http_access allow auth_users' >>"${conf_file}" + echo 'http_access deny all' >>"${conf_file}" +else + echo 'http_access allow localhost' >>"${conf_file}" +fi + +exec squid -N -f "${conf_file}" From afde74cbcd297e010d6ab9778fcbccf98e1f793c Mon Sep 17 00:00:00 2001 From: Mytherin Date: Thu, 25 Sep 2025 12:35:25 +0200 Subject: [PATCH 09/32] Add Python test server --- .github/workflows/Linux.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/Linux.yml b/.github/workflows/Linux.yml index 07d9f95..de2f057 100644 --- a/.github/workflows/Linux.yml +++ b/.github/workflows/Linux.yml @@ -32,6 +32,8 @@ jobs: DUCKDB_S3_USE_SSL: false HTTP_PROXY_PUBLIC: localhost:3128 TEST_PERSISTENT_SECRETS_AVAILABLE: true + PYTHON_HTTP_SERVER_URL: http://localhost:8008 + PYTHON_HTTP_SERVER_DIR: /tmp/python_test_server GEN: ninja steps: @@ -62,6 +64,13 @@ jobs: sudo apt-get install squid ./scripts/run_squid.sh --port 3128 --log_dir squid_logs & + - name: Run & Populate Python test server + shell: bash + run: | + mkdir -p $PYTHON_HTTP_SERVER_DIR + cd $PYTHON_HTTP_SERVER_DIR + python3 -m http.server 8008 & + - name: Build shell: bash run: make reldebug From 3c7c58672c4649e8dcf83dbbc9543f9aa9bb518e Mon Sep 17 00:00:00 2001 From: Mytherin Date: Thu, 25 Sep 2025 12:36:42 +0200 Subject: [PATCH 10/32] Remove minio tests --- .github/workflows/MinioTests.yml | 79 -------------------------------- 1 file changed, 79 deletions(-) delete mode 100644 .github/workflows/MinioTests.yml diff --git a/.github/workflows/MinioTests.yml b/.github/workflows/MinioTests.yml deleted file mode 100644 index e457f0c..0000000 --- a/.github/workflows/MinioTests.yml +++ /dev/null @@ -1,79 +0,0 @@ -name: Minio Tests -on: [push, pull_request,repository_dispatch] -concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }} - cancel-in-progress: true -defaults: - run: - shell: bash - -jobs: - minio-tests: - name: Minio Tests - runs-on: ubuntu-24.04 - env: - S3_TEST_SERVER_AVAILABLE: 1 - AWS_DEFAULT_REGION: eu-west-1 - AWS_ACCESS_KEY_ID: minio_duckdb_user - AWS_SECRET_ACCESS_KEY: minio_duckdb_user_password - DUCKDB_S3_ENDPOINT: duckdb-minio.com:9000 - DUCKDB_S3_USE_SSL: false - CORE_EXTENSIONS: 'parquet;json' - GEN: ninja - VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake - VCPKG_TARGET_TRIPLET: x64-linux - PYTHON_HTTP_SERVER_URL: http://localhost:8008 - PYTHON_HTTP_SERVER_DIR: /tmp/python_test_server - - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - submodules: 'true' - - - uses: actions/setup-python@v4 - with: - python-version: '3.10' - - - name: Install Ninja - shell: bash - run: sudo apt-get update -y -qq && sudo apt-get install -y -qq ninja-build - - - name: Setup Ccache - uses: hendrikmuhs/ccache-action@main - with: - key: ${{ github.job }} - save: ${{ github.ref == 'refs/heads/main' || github.repository != 'duckdb/duckdb' }} - - - name: Setup vcpkg - uses: lukka/run-vcpkg@v11.1 - with: - vcpkgGitCommitId: 5e5d0e1cd7785623065e77eff011afdeec1a3574 - - - name: Build - shell: bash - run: | - echo -e "\nduckdb_extension_load(tpch)\n" >> extension_config.cmake - make - - - name: Start S3/HTTP test server - shell: bash - run: | - cd duckdb - mkdir data/attach_test - touch data/attach_test/attach.db - sudo ./scripts/install_s3_test_server.sh - source ./scripts/run_s3_test_server.sh - sleep 30 - - - name: Run & Populate test server - shell: bash - run: | - mkdir -p $PYTHON_HTTP_SERVER_DIR - cd $PYTHON_HTTP_SERVER_DIR - python3 -m http.server 8008 & - - - name: Test - shell: bash - run: | - make test \ No newline at end of file From 0b8667eae603d82ba2e1a4946934ce30702fa644 Mon Sep 17 00:00:00 2001 From: Mytherin Date: Thu, 25 Sep 2025 12:37:02 +0200 Subject: [PATCH 11/32] Rename to integration tests --- .github/workflows/Linux.yml => IntegrationTests.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/Linux.yml => IntegrationTests.yml (100%) diff --git a/.github/workflows/Linux.yml b/IntegrationTests.yml similarity index 100% rename from .github/workflows/Linux.yml rename to IntegrationTests.yml From ca8ab730807c8b1b9b1e89cfdac952b07e5b09cf Mon Sep 17 00:00:00 2001 From: Mytherin Date: Thu, 25 Sep 2025 12:37:32 +0200 Subject: [PATCH 12/32] Move to correct directory --- IntegrationTests.yml => .github/workflows/IntegrationTests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename IntegrationTests.yml => .github/workflows/IntegrationTests.yml (99%) diff --git a/IntegrationTests.yml b/.github/workflows/IntegrationTests.yml similarity index 99% rename from IntegrationTests.yml rename to .github/workflows/IntegrationTests.yml index de2f057..4398055 100644 --- a/IntegrationTests.yml +++ b/.github/workflows/IntegrationTests.yml @@ -8,7 +8,7 @@ defaults: shell: bash jobs: - linux-tests-postgres: + linux-tests-httpfs: name: Run tests on Linux runs-on: ubuntu-latest strategy: From 985d4bae268fa1ff25ba8a80c3dfd3a1418b7163 Mon Sep 17 00:00:00 2001 From: Mytherin Date: Thu, 25 Sep 2025 12:42:03 +0200 Subject: [PATCH 13/32] Remove test configs --- Makefile | 14 -------------- test/configs/duckdb-tests.json | 22 ---------------------- 2 files changed, 36 deletions(-) delete mode 100644 test/configs/duckdb-tests.json diff --git a/Makefile b/Makefile index 48de9ee..c7db037 100644 --- a/Makefile +++ b/Makefile @@ -8,17 +8,3 @@ CORE_EXTENSIONS='' # Include the Makefile from extension-ci-tools include extension-ci-tools/makefiles/duckdb_extension.Makefile - - -## Add some more extra tests -test_release_internal: - ./build/release/$(TEST_PATH) "$(PROJ_DIR)test/*" - ./build/release/$(TEST_PATH) --test-dir duckdb --test-config test/configs/duckdb-tests.json - -test_debug_internal: - ./build/debug/$(TEST_PATH) "$(PROJ_DIR)test/*" - ./build/debug/$(TEST_PATH) --test-dir duckdb --test-config test/configs/duckdb-tests.json - -test_reldebug_internal: - ./build/reldebug/$(TEST_PATH) "$(PROJ_DIR)test/*" - ./build/reldebug/$(TEST_PATH) --test-dir duckdb --test-config test/configs/duckdb-tests.json diff --git a/test/configs/duckdb-tests.json b/test/configs/duckdb-tests.json deleted file mode 100644 index aa4975b..0000000 --- a/test/configs/duckdb-tests.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "description": "Run tests with httpfs loaded", - "statically_loaded_extensions": ["core_functions","parquet","httpfs"], - "skip_compiled": "true", - "skip_tests": [ - { - "reason": "Secrets & zstd are supported with httpfs loaded", - "paths": [ - "test/sql/secrets/create_secret_hffs_autoload.test", - "test/sql/secrets/secret_types_function.test", - "test/sql/secrets/secret_autoloading_errors.test", - "test/sql/copy/csv/zstd_crash.test" - ] - }, - { - "reason": "Improved from 1 PUT + 2 POST to 1 PUT", - "paths": [ - "test/sql/copy/s3/metadata_cache.test" - ] - } - ] -} From 8b5503dcf1e31498149c70b706314d7174849371 Mon Sep 17 00:00:00 2001 From: Mytherin Date: Thu, 25 Sep 2025 12:42:56 +0200 Subject: [PATCH 14/32] Add core extensions --- .github/workflows/IntegrationTests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/IntegrationTests.yml b/.github/workflows/IntegrationTests.yml index 4398055..92ceb83 100644 --- a/.github/workflows/IntegrationTests.yml +++ b/.github/workflows/IntegrationTests.yml @@ -34,6 +34,7 @@ jobs: TEST_PERSISTENT_SECRETS_AVAILABLE: true PYTHON_HTTP_SERVER_URL: http://localhost:8008 PYTHON_HTTP_SERVER_DIR: /tmp/python_test_server + CORE_EXTENSIONS: "parquet;json" GEN: ninja steps: From d0575ff9592254b784899ea4814c5fecab7e68b6 Mon Sep 17 00:00:00 2001 From: Mytherin Date: Thu, 25 Sep 2025 12:44:25 +0200 Subject: [PATCH 15/32] Add ccache --- .github/workflows/IntegrationTests.yml | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/.github/workflows/IntegrationTests.yml b/.github/workflows/IntegrationTests.yml index 92ceb83..aabe463 100644 --- a/.github/workflows/IntegrationTests.yml +++ b/.github/workflows/IntegrationTests.yml @@ -48,6 +48,12 @@ jobs: fetch-depth: 0 submodules: 'true' + - name: Setup Ccache + uses: hendrikmuhs/ccache-action@main + with: + key: ${{ github.job }} + save: ${{ github.ref == 'refs/heads/main' || github.repository != 'duckdb/duckdb' }} + - name: Checkout DuckDB to version if: ${{ matrix.duckdb_version != ''}} run: | @@ -74,9 +80,9 @@ jobs: - name: Build shell: bash - run: make reldebug + run: make - - name: Start test server & run tests + - name: Start test server shell: bash run: | sudo ./scripts/install_s3_test_server.sh @@ -85,4 +91,7 @@ jobs: source ./scripts/set_s3_test_server_variables.sh sleep 60 - ./build/reldebug/test/unittest '*' + - name: Run tests + shell: bash + run: | + make test From 8e95da4085ce90ef9a1bd886630edd50a8382656 Mon Sep 17 00:00:00 2001 From: Mytherin Date: Thu, 25 Sep 2025 12:46:32 +0200 Subject: [PATCH 16/32] Remove matrix --- .github/workflows/IntegrationTests.yml | 116 ++++++++++++------------- 1 file changed, 54 insertions(+), 62 deletions(-) diff --git a/.github/workflows/IntegrationTests.yml b/.github/workflows/IntegrationTests.yml index aabe463..4d55c56 100644 --- a/.github/workflows/IntegrationTests.yml +++ b/.github/workflows/IntegrationTests.yml @@ -11,19 +11,7 @@ jobs: linux-tests-httpfs: name: Run tests on Linux runs-on: ubuntu-latest - strategy: - matrix: - # Add commits/tags to build against other DuckDB versions - duckdb_version: [ '' ] - arch: ['linux_amd64'] - vcpkg_version: [ '2023.04.15' ] - include: - - arch: 'linux_amd64' - vcpkg_triplet: 'x64-linux' - env: - VCPKG_TARGET_TRIPLET: ${{ matrix.vcpkg_triplet }} - VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake S3_TEST_SERVER_AVAILABLE: 1 AWS_DEFAULT_REGION: eu-west-1 AWS_ACCESS_KEY_ID: minio_duckdb_user @@ -32,66 +20,70 @@ jobs: DUCKDB_S3_USE_SSL: false HTTP_PROXY_PUBLIC: localhost:3128 TEST_PERSISTENT_SECRETS_AVAILABLE: true - PYTHON_HTTP_SERVER_URL: http://localhost:8008 - PYTHON_HTTP_SERVER_DIR: /tmp/python_test_server CORE_EXTENSIONS: "parquet;json" GEN: ninja + VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake + VCPKG_TARGET_TRIPLET: x64-linux + PYTHON_HTTP_SERVER_URL: http://localhost:8008 + PYTHON_HTTP_SERVER_DIR: /tmp/python_test_server steps: - - name: Install required ubuntu packages - run: | - sudo apt-get update -y -qq - sudo apt-get install -y -qq build-essential cmake ninja-build ccache python3 libcurl4-openssl-dev + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: 'true' + + - uses: actions/setup-python@v4 + with: + python-version: '3.10' - - uses: actions/checkout@v3 - with: - fetch-depth: 0 - submodules: 'true' + - name: Install Ninja + shell: bash + run: sudo apt-get update -y -qq && sudo apt-get install -y -qq ninja-build - - name: Setup Ccache - uses: hendrikmuhs/ccache-action@main - with: - key: ${{ github.job }} - save: ${{ github.ref == 'refs/heads/main' || github.repository != 'duckdb/duckdb' }} + - name: Setup Ccache + uses: hendrikmuhs/ccache-action@main + with: + key: ${{ github.job }} + save: ${{ github.ref == 'refs/heads/main' || github.repository != 'duckdb/duckdb' }} - - name: Checkout DuckDB to version - if: ${{ matrix.duckdb_version != ''}} - run: | - cd duckdb - git checkout ${{ matrix.duckdb_version }} + - name: Setup vcpkg + uses: lukka/run-vcpkg@v11.1 + with: + vcpkgGitCommitId: 5e5d0e1cd7785623065e77eff011afdeec1a3574 - - name: Fix permissions of test secrets - shell: bash - run: chmod -R 700 data/secrets + - name: Fix permissions of test secrets + shell: bash + run: chmod -R 700 data/secrets - # TODO: fix the authenticated proxy here - - name: Install and run http proxy squid - shell: bash - run: | - sudo apt-get install squid - ./scripts/run_squid.sh --port 3128 --log_dir squid_logs & + # TODO: fix the authenticated proxy here + - name: Install and run http proxy squid + shell: bash + run: | + sudo apt-get install squid + ./scripts/run_squid.sh --port 3128 --log_dir squid_logs & - - name: Run & Populate Python test server - shell: bash - run: | - mkdir -p $PYTHON_HTTP_SERVER_DIR - cd $PYTHON_HTTP_SERVER_DIR - python3 -m http.server 8008 & + - name: Run & Populate Python test server + shell: bash + run: | + mkdir -p $PYTHON_HTTP_SERVER_DIR + cd $PYTHON_HTTP_SERVER_DIR + python3 -m http.server 8008 & - - name: Build - shell: bash - run: make + - name: Build + shell: bash + run: make - - name: Start test server - shell: bash - run: | - sudo ./scripts/install_s3_test_server.sh - ./scripts/generate_presigned_url.sh - source ./scripts/run_s3_test_server.sh - source ./scripts/set_s3_test_server_variables.sh - sleep 60 + - name: Start test server + shell: bash + run: | + sudo ./scripts/install_s3_test_server.sh + ./scripts/generate_presigned_url.sh + source ./scripts/run_s3_test_server.sh + source ./scripts/set_s3_test_server_variables.sh + sleep 60 - - name: Run tests - shell: bash - run: | - make test + - name: Run tests + shell: bash + run: | + make test From 1869e5be72ff0196b58b6c308329be703a561ea7 Mon Sep 17 00:00:00 2001 From: Mytherin Date: Thu, 25 Sep 2025 13:02:04 +0200 Subject: [PATCH 17/32] Skip sleep if we build after setting up server --- .github/workflows/IntegrationTests.yml | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/IntegrationTests.yml b/.github/workflows/IntegrationTests.yml index 4d55c56..a66d92c 100644 --- a/.github/workflows/IntegrationTests.yml +++ b/.github/workflows/IntegrationTests.yml @@ -45,7 +45,7 @@ jobs: uses: hendrikmuhs/ccache-action@main with: key: ${{ github.job }} - save: ${{ github.ref == 'refs/heads/main' || github.repository != 'duckdb/duckdb' }} + save: ${{ github.ref == 'refs/heads/main' || github.repository != 'duckdb/duckdb-httpfs' }} - name: Setup vcpkg uses: lukka/run-vcpkg@v11.1 @@ -70,10 +70,6 @@ jobs: cd $PYTHON_HTTP_SERVER_DIR python3 -m http.server 8008 & - - name: Build - shell: bash - run: make - - name: Start test server shell: bash run: | @@ -81,7 +77,10 @@ jobs: ./scripts/generate_presigned_url.sh source ./scripts/run_s3_test_server.sh source ./scripts/set_s3_test_server_variables.sh - sleep 60 + + - name: Build + shell: bash + run: make - name: Run tests shell: bash From a344369a8201f6a5b3a862130286198747e6bd8e Mon Sep 17 00:00:00 2001 From: Mytherin Date: Thu, 25 Sep 2025 13:05:32 +0200 Subject: [PATCH 18/32] Add TPC-H --- .github/workflows/IntegrationTests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/IntegrationTests.yml b/.github/workflows/IntegrationTests.yml index a66d92c..a3274e7 100644 --- a/.github/workflows/IntegrationTests.yml +++ b/.github/workflows/IntegrationTests.yml @@ -9,7 +9,7 @@ defaults: jobs: linux-tests-httpfs: - name: Run tests on Linux + name: MinIO Tests runs-on: ubuntu-latest env: S3_TEST_SERVER_AVAILABLE: 1 @@ -20,7 +20,7 @@ jobs: DUCKDB_S3_USE_SSL: false HTTP_PROXY_PUBLIC: localhost:3128 TEST_PERSISTENT_SECRETS_AVAILABLE: true - CORE_EXTENSIONS: "parquet;json" + CORE_EXTENSIONS: "parquet;json;tpch" GEN: ninja VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake VCPKG_TARGET_TRIPLET: x64-linux From 2f9b22bb4bbb518d25af6dd28d335f6ceb3d16dc Mon Sep 17 00:00:00 2001 From: Mytherin Date: Thu, 25 Sep 2025 13:06:00 +0200 Subject: [PATCH 19/32] We need DuckDB --- .github/workflows/IntegrationTests.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/IntegrationTests.yml b/.github/workflows/IntegrationTests.yml index a3274e7..e0cf555 100644 --- a/.github/workflows/IntegrationTests.yml +++ b/.github/workflows/IntegrationTests.yml @@ -70,6 +70,10 @@ jobs: cd $PYTHON_HTTP_SERVER_DIR python3 -m http.server 8008 & + - name: Build + shell: bash + run: make + - name: Start test server shell: bash run: | @@ -77,10 +81,7 @@ jobs: ./scripts/generate_presigned_url.sh source ./scripts/run_s3_test_server.sh source ./scripts/set_s3_test_server_variables.sh - - - name: Build - shell: bash - run: make + sleep 30 - name: Run tests shell: bash From cc3ac74cd2312ce6ad4edf78381c48a1faa3959f Mon Sep 17 00:00:00 2001 From: Mytherin Date: Thu, 25 Sep 2025 13:16:17 +0200 Subject: [PATCH 20/32] Remove easter egg --- Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/Makefile b/Makefile index c7db037..ed12193 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,5 @@ PROJ_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) EXT_NAME=httpfs EXT_CONFIG=${PROJ_DIR}extension_config.cmake -CORE_EXTENSIONS='' - # Include the Makefile from extension-ci-tools include extension-ci-tools/makefiles/duckdb_extension.Makefile From cd008abcfed31f53784551be4e89575606205adb Mon Sep 17 00:00:00 2001 From: Mytherin Date: Thu, 25 Sep 2025 13:34:18 +0200 Subject: [PATCH 21/32] Fix TPC-H tests --- test/sql/attach/attach_s3_tpch.test_slow | 4 ++-- .../copy/parquet/parquet_encrypted_tpch_httpfs.test_slow | 8 ++++---- test/sql/copy/s3/hive_partitioned_write_s3.test_slow | 4 ++-- test/sql/copy/s3/parquet_s3_tpch.test_slow | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/test/sql/attach/attach_s3_tpch.test_slow b/test/sql/attach/attach_s3_tpch.test_slow index 721be60..20df6e6 100644 --- a/test/sql/attach/attach_s3_tpch.test_slow +++ b/test/sql/attach/attach_s3_tpch.test_slow @@ -41,7 +41,7 @@ loop i 1 9 query I PRAGMA tpch(${i}) ---- -:extension/tpch/dbgen/answers/sf1/q0${i}.csv +:duckdb/extension/tpch/dbgen/answers/sf1/q0${i}.csv endloop @@ -50,7 +50,7 @@ loop i 10 23 query I PRAGMA tpch(${i}) ---- -:extension/tpch/dbgen/answers/sf1/q${i}.csv +:duckdb/extension/tpch/dbgen/answers/sf1/q${i}.csv endloop diff --git a/test/sql/copy/parquet/parquet_encrypted_tpch_httpfs.test_slow b/test/sql/copy/parquet/parquet_encrypted_tpch_httpfs.test_slow index d4697a4..b03427f 100644 --- a/test/sql/copy/parquet/parquet_encrypted_tpch_httpfs.test_slow +++ b/test/sql/copy/parquet/parquet_encrypted_tpch_httpfs.test_slow @@ -31,7 +31,7 @@ loop i 1 9 query I PRAGMA tpch(${i}) ---- -:extension/tpch/dbgen/answers/sf1/q0${i}.csv +:duckdb/extension/tpch/dbgen/answers/sf1/q0${i}.csv endloop @@ -40,7 +40,7 @@ loop i 10 23 query I PRAGMA tpch(${i}) ---- -:extension/tpch/dbgen/answers/sf1/q${i}.csv +:duckdb/extension/tpch/dbgen/answers/sf1/q${i}.csv endloop @@ -80,7 +80,7 @@ loop i 1 9 query I PRAGMA tpch(${i}) ---- -:extension/tpch/dbgen/answers/sf1/q0${i}.csv +:duckdb/extension/tpch/dbgen/answers/sf1/q0${i}.csv endloop @@ -89,6 +89,6 @@ loop i 10 23 query I PRAGMA tpch(${i}) ---- -:extension/tpch/dbgen/answers/sf1/q${i}.csv +:duckdb/extension/tpch/dbgen/answers/sf1/q${i}.csv endloop \ No newline at end of file diff --git a/test/sql/copy/s3/hive_partitioned_write_s3.test_slow b/test/sql/copy/s3/hive_partitioned_write_s3.test_slow index 05c6d14..fd96685 100644 --- a/test/sql/copy/s3/hive_partitioned_write_s3.test_slow +++ b/test/sql/copy/s3/hive_partitioned_write_s3.test_slow @@ -58,7 +58,7 @@ loop i 1 9 query I PRAGMA tpch(${i}) ---- -:extension/tpch/dbgen/answers/sf1/q0${i}.csv +:duckdb/extension/tpch/dbgen/answers/sf1/q0${i}.csv endloop @@ -67,6 +67,6 @@ loop i 10 23 query I PRAGMA tpch(${i}) ---- -:extension/tpch/dbgen/answers/sf1/q${i}.csv +:duckdb/extension/tpch/dbgen/answers/sf1/q${i}.csv endloop diff --git a/test/sql/copy/s3/parquet_s3_tpch.test_slow b/test/sql/copy/s3/parquet_s3_tpch.test_slow index 425e245..2e3d79d 100644 --- a/test/sql/copy/s3/parquet_s3_tpch.test_slow +++ b/test/sql/copy/s3/parquet_s3_tpch.test_slow @@ -78,7 +78,7 @@ loop i 1 9 query I PRAGMA tpch(${i}) ---- -:extension/tpch/dbgen/answers/sf0.01/q0${i}.csv +:duckdb/extension/tpch/dbgen/answers/sf0.01/q0${i}.csv endloop @@ -87,6 +87,6 @@ loop i 10 23 query I PRAGMA tpch(${i}) ---- -:extension/tpch/dbgen/answers/sf0.01/q${i}.csv +:duckdb/extension/tpch/dbgen/answers/sf0.01/q${i}.csv endloop From ffe3b8dbbab3a29f8b72f509d94a307f77f58b62 Mon Sep 17 00:00:00 2001 From: Mytherin Date: Thu, 25 Sep 2025 13:35:09 +0200 Subject: [PATCH 22/32] Set variables in test correctly --- .github/workflows/IntegrationTests.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/IntegrationTests.yml b/.github/workflows/IntegrationTests.yml index e0cf555..c2133bd 100644 --- a/.github/workflows/IntegrationTests.yml +++ b/.github/workflows/IntegrationTests.yml @@ -74,16 +74,16 @@ jobs: shell: bash run: make - - name: Start test server + - name: Install test server shell: bash run: | sudo ./scripts/install_s3_test_server.sh ./scripts/generate_presigned_url.sh - source ./scripts/run_s3_test_server.sh - source ./scripts/set_s3_test_server_variables.sh - sleep 30 - - name: Run tests + - name: Start test server & run tests shell: bash run: | + source ./scripts/run_s3_test_server.sh + source ./scripts/set_s3_test_server_variables.sh + sleep 30 make test From 9db159c489a81f05b3be11dce793f01b17493e09 Mon Sep 17 00:00:00 2001 From: Mytherin Date: Thu, 25 Sep 2025 13:50:36 +0200 Subject: [PATCH 23/32] Hopefully remove unnecessary sleeps and instead wait for processes to finish --- .github/workflows/IntegrationTests.yml | 1 - scripts/minio_s3.yml | 1 + scripts/run_s3_test_server.sh | 11 ++++++++++- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/.github/workflows/IntegrationTests.yml b/.github/workflows/IntegrationTests.yml index c2133bd..edf1e5f 100644 --- a/.github/workflows/IntegrationTests.yml +++ b/.github/workflows/IntegrationTests.yml @@ -85,5 +85,4 @@ jobs: run: | source ./scripts/run_s3_test_server.sh source ./scripts/set_s3_test_server_variables.sh - sleep 30 make test diff --git a/scripts/minio_s3.yml b/scripts/minio_s3.yml index 6dbbae0..148d657 100644 --- a/scripts/minio_s3.yml +++ b/scripts/minio_s3.yml @@ -77,5 +77,6 @@ services: /usr/bin/mc share download myminio/test-bucket/presigned/lineitem_large.parquet /usr/bin/mc share download myminio/test-bucket/presigned/attach.db + echo 'FINISHED SETTING UP MINIO' exit 0; " \ No newline at end of file diff --git a/scripts/run_s3_test_server.sh b/scripts/run_s3_test_server.sh index d202258..d89dad8 100755 --- a/scripts/run_s3_test_server.sh +++ b/scripts/run_s3_test_server.sh @@ -11,10 +11,19 @@ else docker compose -f scripts/minio_s3.yml -p duckdb-minio up -d # for testing presigned url - sleep 10 container_name=$(docker ps -a --format '{{.Names}}' | grep -m 1 "duckdb-minio") echo $container_name + for i in $(seq 1 360); + do + logs=$(docker logs $container_name 2>/dev/null | grep 'FINISHED SETTING UP MINIO') + if [ ! -z "${logs}" ]; then + break + fi + sleep 1 + done + + export S3_SMALL_CSV_PRESIGNED_URL=$(docker logs $container_name 2>/dev/null | grep -m 1 'Share:.*phonenumbers\.csv' | grep -o 'http[s]\?://[^ ]\+') echo $S3_SMALL_CSV_PRESIGNED_URL From 7c32be1d3f7175d0cfd2b8c783d1d9c2e0fbbed5 Mon Sep 17 00:00:00 2001 From: Mytherin Date: Thu, 25 Sep 2025 13:54:23 +0200 Subject: [PATCH 24/32] Bump extension-ci-tools --- extension-ci-tools | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extension-ci-tools b/extension-ci-tools index 90757de..ba18d4f 160000 --- a/extension-ci-tools +++ b/extension-ci-tools @@ -1 +1 @@ -Subproject commit 90757de3f06c6802cd49732849b9e46eef75761f +Subproject commit ba18d4f106a6cc1d5597f442bac06a1d7db098ef From 3ac3ba879c04821e0c30876fa7cf1c6dc9c16d85 Mon Sep 17 00:00:00 2001 From: Mytherin Date: Thu, 25 Sep 2025 13:54:48 +0200 Subject: [PATCH 25/32] Remove legacy LOAD_TESTS --- extension_config.cmake | 8 -------- 1 file changed, 8 deletions(-) diff --git a/extension_config.cmake b/extension_config.cmake index 5881043..1b0138c 100644 --- a/extension_config.cmake +++ b/extension_config.cmake @@ -1,18 +1,10 @@ # This file is included by DuckDB's build system. It specifies which extension to load ################# HTTPFS -# Windows MinGW tests for httpfs currently not working -if (NOT MINGW) - set(LOAD_HTTPFS_TESTS "LOAD_TESTS") -else () - set(LOAD_HTTPFS_TESTS "") -endif() - duckdb_extension_load(json) duckdb_extension_load(parquet) duckdb_extension_load(httpfs SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR} INCLUDE_DIR ${CMAKE_CURRENT_LIST_DIR}/extension/httpfs/include - ${LOAD_HTTPFS_TESTS} ) From 9a14aa438407d472d9dd80e7354fd8b0731ad569 Mon Sep 17 00:00:00 2001 From: Mytherin Date: Thu, 25 Sep 2025 14:03:18 +0200 Subject: [PATCH 26/32] m 1 --- scripts/run_s3_test_server.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/run_s3_test_server.sh b/scripts/run_s3_test_server.sh index d89dad8..8195902 100755 --- a/scripts/run_s3_test_server.sh +++ b/scripts/run_s3_test_server.sh @@ -16,7 +16,7 @@ else for i in $(seq 1 360); do - logs=$(docker logs $container_name 2>/dev/null | grep 'FINISHED SETTING UP MINIO') + logs=$(docker logs $container_name 2>/dev/null | grep -m 1 'FINISHED SETTING UP MINIO') if [ ! -z "${logs}" ]; then break fi From 295078867ac638a1873882ff6a60a10ae68b884b Mon Sep 17 00:00:00 2001 From: Mytherin Date: Thu, 25 Sep 2025 14:10:25 +0200 Subject: [PATCH 27/32] Maybe it's the name --- scripts/run_s3_test_server.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/run_s3_test_server.sh b/scripts/run_s3_test_server.sh index 8195902..2abed50 100755 --- a/scripts/run_s3_test_server.sh +++ b/scripts/run_s3_test_server.sh @@ -16,8 +16,8 @@ else for i in $(seq 1 360); do - logs=$(docker logs $container_name 2>/dev/null | grep -m 1 'FINISHED SETTING UP MINIO') - if [ ! -z "${logs}" ]; then + docker_finish_logs=$(docker logs $container_name 2>/dev/null | grep -m 1 'FINISHED SETTING UP MINIO') + if [ ! -z "${docker_finish_logs}" ]; then break fi sleep 1 From 9c789d7379291c477453da7696d7a784eaa90ee3 Mon Sep 17 00:00:00 2001 From: Mytherin Date: Thu, 25 Sep 2025 14:16:56 +0200 Subject: [PATCH 28/32] Debug code yay --- scripts/run_s3_test_server.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/run_s3_test_server.sh b/scripts/run_s3_test_server.sh index 2abed50..ac89491 100755 --- a/scripts/run_s3_test_server.sh +++ b/scripts/run_s3_test_server.sh @@ -14,12 +14,17 @@ else container_name=$(docker ps -a --format '{{.Names}}' | grep -m 1 "duckdb-minio") echo $container_name + echo "Wait for Docker to start" for i in $(seq 1 360); do + echo $i docker_finish_logs=$(docker logs $container_name 2>/dev/null | grep -m 1 'FINISHED SETTING UP MINIO') + echo $docker_finish_logs if [ ! -z "${docker_finish_logs}" ]; then + echo "Started! Break" break fi + echo "Sleep 1" sleep 1 done From 5d43db7562639c1878e3caa410ecb6612d32d654 Mon Sep 17 00:00:00 2001 From: Mytherin Date: Thu, 25 Sep 2025 14:27:41 +0200 Subject: [PATCH 29/32] No 2>/dev/null --- scripts/run_s3_test_server.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/run_s3_test_server.sh b/scripts/run_s3_test_server.sh index ac89491..109ed57 100755 --- a/scripts/run_s3_test_server.sh +++ b/scripts/run_s3_test_server.sh @@ -18,7 +18,7 @@ else for i in $(seq 1 360); do echo $i - docker_finish_logs=$(docker logs $container_name 2>/dev/null | grep -m 1 'FINISHED SETTING UP MINIO') + docker_finish_logs=$(docker logs $container_name | grep -m 1 'FINISHED SETTING UP MINIO') echo $docker_finish_logs if [ ! -z "${docker_finish_logs}" ]; then echo "Started! Break" From 6a20399b67d7bd325f3781cf0a125ca5557045ff Mon Sep 17 00:00:00 2001 From: Mytherin Date: Thu, 25 Sep 2025 14:29:03 +0200 Subject: [PATCH 30/32] Empty string --- scripts/run_s3_test_server.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/run_s3_test_server.sh b/scripts/run_s3_test_server.sh index 109ed57..71f4902 100755 --- a/scripts/run_s3_test_server.sh +++ b/scripts/run_s3_test_server.sh @@ -18,7 +18,7 @@ else for i in $(seq 1 360); do echo $i - docker_finish_logs=$(docker logs $container_name | grep -m 1 'FINISHED SETTING UP MINIO') + docker_finish_logs=$(docker logs $container_name 2>/dev/null | grep -m 1 'FINISHED SETTING UP MINIO' || '') echo $docker_finish_logs if [ ! -z "${docker_finish_logs}" ]; then echo "Started! Break" From 7a020f375ef54037003293b5a706e06c0a609c6b Mon Sep 17 00:00:00 2001 From: Mytherin Date: Thu, 25 Sep 2025 14:38:06 +0200 Subject: [PATCH 31/32] echo --- scripts/run_s3_test_server.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/run_s3_test_server.sh b/scripts/run_s3_test_server.sh index 71f4902..a3e728f 100755 --- a/scripts/run_s3_test_server.sh +++ b/scripts/run_s3_test_server.sh @@ -18,7 +18,7 @@ else for i in $(seq 1 360); do echo $i - docker_finish_logs=$(docker logs $container_name 2>/dev/null | grep -m 1 'FINISHED SETTING UP MINIO' || '') + docker_finish_logs=$(docker logs $container_name 2>/dev/null | grep -m 1 'FINISHED SETTING UP MINIO' || echo '') echo $docker_finish_logs if [ ! -z "${docker_finish_logs}" ]; then echo "Started! Break" From 98848a6196f802f765ac3282183f568fa59d0d49 Mon Sep 17 00:00:00 2001 From: Mytherin Date: Thu, 25 Sep 2025 14:46:25 +0200 Subject: [PATCH 32/32] Remove extra echos --- scripts/run_s3_test_server.sh | 5 ----- 1 file changed, 5 deletions(-) diff --git a/scripts/run_s3_test_server.sh b/scripts/run_s3_test_server.sh index a3e728f..5ac2d52 100755 --- a/scripts/run_s3_test_server.sh +++ b/scripts/run_s3_test_server.sh @@ -14,17 +14,12 @@ else container_name=$(docker ps -a --format '{{.Names}}' | grep -m 1 "duckdb-minio") echo $container_name - echo "Wait for Docker to start" for i in $(seq 1 360); do - echo $i docker_finish_logs=$(docker logs $container_name 2>/dev/null | grep -m 1 'FINISHED SETTING UP MINIO' || echo '') - echo $docker_finish_logs if [ ! -z "${docker_finish_logs}" ]; then - echo "Started! Break" break fi - echo "Sleep 1" sleep 1 done