Merge pull request #42 from eitanlees/use-CDN-for-base-url

ENH: Change to CDN for base url
altair-viz · Jun 22, 2020 · dcd18ed · dcd18ed
2 parents 70d6829 + cc53249
commit dcd18ed
Show file tree

Hide file tree

Showing 5 changed files with 11 additions and 11 deletions.
diff --git a/CHANGES.md b/CHANGES.md
@@ -3,6 +3,7 @@ Change Log
 
 Release v0.9 (unreleased)
 -------------------------
+- Change urls to use jsDelivr (a fast CDN) with a fixed version number, instead of GitHub.
 
 Release v0.8 (Dec 14, 2019)
 ---------------------------

diff --git a/README.md b/README.md
@@ -47,7 +47,7 @@ If you're curious about the source data, you can access the URL for any of the a
 
 ```python
 >>> data.iris.url
-'https://vega.github.io/vega-datasets/data/iris.json'
+'https://cdn.jsdelivr.net/npm/vega-datasets@v1.29.0/data/iris.json'
 ```
 
 For datasets bundled with the package, you can also find their location on disk:

diff --git a/tools/generate_datasets_json.py b/tools/generate_datasets_json.py
@@ -28,7 +28,7 @@ def main(tag):
     datasets_file = os.path.abspath(
         os.path.join(cwd, "..", "vega_datasets", "datasets.json")
     )
-    init_file = os.path.abspath(os.path.join(cwd, "..", "vega_datasets", "__init__.py"))
+    core_file = os.path.abspath(os.path.join(cwd, "..", "vega_datasets", "core.py"))
 
     print(f"Extracting datasets from {data_dir}")
     datasets = {}
@@ -40,11 +40,11 @@ def main(tag):
     with open(datasets_file, "w") as f:
         json.dump(datasets, f, indent=2, sort_keys=True)
 
-    print("Updating init file")
+    print("Updating SOURCE_TAG in core file")
     subprocess.check_call(
-        ["sed", "-i", ".bak", f"s/SOURCE_TAG.*/SOURCE_TAG = {tag!r}/g", init_file]
+        ["sed", "-i", ".bak", f"s/SOURCE_TAG.*/SOURCE_TAG = {tag!r}/g", core_file]
     )
-    subprocess.check_call(["rm", f"{init_file}.bak"])
+    subprocess.check_call(["rm", f"{core_file}.bak"])
 
 
 if __name__ == "__main__":

diff --git a/vega_datasets/__init__.py b/vega_datasets/__init__.py
@@ -3,7 +3,3 @@
 data = DataLoader()
 local_data = LocalDataLoader()
 __version__ = "0.9.0dev0"
-
-# This is the tag in http://github.com/vega/vega-datasets from
-# which the datasets in this repository are sourced.
-SOURCE_TAG = "v1.29.0"
diff --git a/vega_datasets/core.py b/vega_datasets/core.py
@@ -5,9 +5,12 @@
 import textwrap
 from typing import Any, Dict, Iterable, List
 from urllib.request import urlopen
-
 import pandas as pd
 
+# This is the tag in http://github.com/vega/vega-datasets from
+# which the datasets in this repository are sourced.
+SOURCE_TAG = "v1.29.0"
+
 
 def _load_dataset_info() -> Dict[str, Dict[str, Any]]:
     """This loads dataset info from three package files:
@@ -91,7 +94,7 @@ class Dataset(object):
     _reference_info = """
     For information on this dataset, see https://github.com/vega/vega-datasets/
     """
-    base_url = "https://vega.github.io/vega-datasets/data/"
+    base_url = "https://cdn.jsdelivr.net/npm/vega-datasets@" + SOURCE_TAG + "/data/"
     _dataset_info = _load_dataset_info()
     _pd_read_kwds = {}  # type: Dict[str, Any]
     _return_type = pd.DataFrame
-Original file line number
+Diff line change
@@ Expand Up / @@ -3,6 +3,7 @@ Change Log @@
     Release v0.9 (unreleased)
     -------------------------
+    - Change urls to use jsDelivr (a fast CDN) with a fixed version number, instead of GitHub.
     Release v0.8 (Dec 14, 2019)
     ---------------------------
@@ Expand Down @@