From 8170be56cc2c35d74185c41315983720d746169b Mon Sep 17 00:00:00 2001 From: Eitan Lees Date: Sat, 20 Jun 2020 15:10:16 -0400 Subject: [PATCH 1/5] ENH: Change to CDN for base url --- vega_datasets/__init__.py | 8 ++++---- vega_datasets/core.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/vega_datasets/__init__.py b/vega_datasets/__init__.py index b614f10..33a5bb1 100644 --- a/vega_datasets/__init__.py +++ b/vega_datasets/__init__.py @@ -1,9 +1,9 @@ +# This is the tag in http://github.com/vega/vega-datasets from +# which the datasets in this repository are sourced. +SOURCE_TAG = "v1.29.0" + from vega_datasets.core import DataLoader, LocalDataLoader data = DataLoader() local_data = LocalDataLoader() __version__ = "0.9.0dev0" - -# This is the tag in http://github.com/vega/vega-datasets from -# which the datasets in this repository are sourced. -SOURCE_TAG = "v1.29.0" diff --git a/vega_datasets/core.py b/vega_datasets/core.py index 3c798fe..7d39f04 100644 --- a/vega_datasets/core.py +++ b/vega_datasets/core.py @@ -5,7 +5,7 @@ import textwrap from typing import Any, Dict, Iterable, List from urllib.request import urlopen - +from vega_datasets import SOURCE_TAG import pandas as pd @@ -91,7 +91,7 @@ class Dataset(object): _reference_info = """ For information on this dataset, see https://github.com/vega/vega-datasets/ """ - base_url = "https://vega.github.io/vega-datasets/data/" + base_url = "https://cdn.jsdelivr.net/npm/vega-datasets@" + SOURCE_TAG + "/data/" _dataset_info = _load_dataset_info() _pd_read_kwds = {} # type: Dict[str, Any] _return_type = pd.DataFrame From c02dee5a8bb76ab4ac0131e2b510f0ea6dcc2fd4 Mon Sep 17 00:00:00 2001 From: Eitan Lees Date: Sat, 20 Jun 2020 15:15:41 -0400 Subject: [PATCH 2/5] MAINT: Update readme with new url example --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ae48e71..a6c898c 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ If you're curious about the source data, you can access the URL for any of the a ```python >>> data.iris.url -'https://vega.github.io/vega-datasets/data/iris.json' +'https://cdn.jsdelivr.net/npm/vega-datasets@v1.29.0/data/iris.json' ``` For datasets bundled with the package, you can also find their location on disk: From e06573e26c97fde0362645151936453130be7c04 Mon Sep 17 00:00:00 2001 From: Eitan Lees Date: Sat, 20 Jun 2020 15:25:52 -0400 Subject: [PATCH 3/5] FIX: Move SOURCE_TAG to core.py --- tools/generate_datasets_json.py | 8 ++++---- vega_datasets/__init__.py | 4 ---- vega_datasets/core.py | 5 ++++- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/tools/generate_datasets_json.py b/tools/generate_datasets_json.py index d4e19f0..9ee79b1 100644 --- a/tools/generate_datasets_json.py +++ b/tools/generate_datasets_json.py @@ -28,7 +28,7 @@ def main(tag): datasets_file = os.path.abspath( os.path.join(cwd, "..", "vega_datasets", "datasets.json") ) - init_file = os.path.abspath(os.path.join(cwd, "..", "vega_datasets", "__init__.py")) + core_file = os.path.abspath(os.path.join(cwd, "..", "vega_datasets", "__init__.py")) print(f"Extracting datasets from {data_dir}") datasets = {} @@ -40,11 +40,11 @@ def main(tag): with open(datasets_file, "w") as f: json.dump(datasets, f, indent=2, sort_keys=True) - print("Updating init file") + print("Updating SOURCE_TAG in core file") subprocess.check_call( - ["sed", "-i", ".bak", f"s/SOURCE_TAG.*/SOURCE_TAG = {tag!r}/g", init_file] + ["sed", "-i", ".bak", f"s/SOURCE_TAG.*/SOURCE_TAG = {tag!r}/g", core_file] ) - subprocess.check_call(["rm", f"{init_file}.bak"]) + subprocess.check_call(["rm", f"{core_file}.bak"]) if __name__ == "__main__": diff --git a/vega_datasets/__init__.py b/vega_datasets/__init__.py index 33a5bb1..191c6a6 100644 --- a/vega_datasets/__init__.py +++ b/vega_datasets/__init__.py @@ -1,7 +1,3 @@ -# This is the tag in http://github.com/vega/vega-datasets from -# which the datasets in this repository are sourced. -SOURCE_TAG = "v1.29.0" - from vega_datasets.core import DataLoader, LocalDataLoader data = DataLoader() diff --git a/vega_datasets/core.py b/vega_datasets/core.py index 7d39f04..4c9cc8b 100644 --- a/vega_datasets/core.py +++ b/vega_datasets/core.py @@ -5,9 +5,12 @@ import textwrap from typing import Any, Dict, Iterable, List from urllib.request import urlopen -from vega_datasets import SOURCE_TAG import pandas as pd +# This is the tag in http://github.com/vega/vega-datasets from +# which the datasets in this repository are sourced. +SOURCE_TAG = "v1.29.0" + def _load_dataset_info() -> Dict[str, Dict[str, Any]]: """This loads dataset info from three package files: From c4573a38f4d108853154d2bbed0f70d819716cfe Mon Sep 17 00:00:00 2001 From: Eitan Lees Date: Sat, 20 Jun 2020 15:29:08 -0400 Subject: [PATCH 4/5] FIX: update core.py instead of __init__.py for SOURCE_TAG --- tools/generate_datasets_json.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/generate_datasets_json.py b/tools/generate_datasets_json.py index 9ee79b1..e7eff99 100644 --- a/tools/generate_datasets_json.py +++ b/tools/generate_datasets_json.py @@ -28,7 +28,7 @@ def main(tag): datasets_file = os.path.abspath( os.path.join(cwd, "..", "vega_datasets", "datasets.json") ) - core_file = os.path.abspath(os.path.join(cwd, "..", "vega_datasets", "__init__.py")) + core_file = os.path.abspath(os.path.join(cwd, "..", "vega_datasets", "core.py")) print(f"Extracting datasets from {data_dir}") datasets = {} From cc53249ac1c7d4ae2f7712dfececf41b4929eaf0 Mon Sep 17 00:00:00 2001 From: Eitan Lees Date: Sat, 20 Jun 2020 15:43:37 -0400 Subject: [PATCH 5/5] MAINT: update change log with url info --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index 656e8c3..e7bd06e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,6 +3,7 @@ Change Log Release v0.9 (unreleased) ------------------------- +- Change urls to use jsDelivr (a fast CDN) with a fixed version number, instead of GitHub. Release v0.8 (Dec 14, 2019) ---------------------------