Skip to content

Commit

Permalink
Merge pull request #42 from eitanlees/use-CDN-for-base-url
Browse files Browse the repository at this point in the history
ENH: Change to CDN for base url
  • Loading branch information
jakevdp authored Jun 22, 2020
2 parents 70d6829 + cc53249 commit dcd18ed
Show file tree
Hide file tree
Showing 5 changed files with 11 additions and 11 deletions.
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ Change Log

Release v0.9 (unreleased)
-------------------------
- Change urls to use jsDelivr (a fast CDN) with a fixed version number, instead of GitHub.

Release v0.8 (Dec 14, 2019)
---------------------------
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ If you're curious about the source data, you can access the URL for any of the a

```python
>>> data.iris.url
'https://vega.github.io/vega-datasets/data/iris.json'
'https://cdn.jsdelivr.net/npm/vega-datasets@v1.29.0/data/iris.json'
```

For datasets bundled with the package, you can also find their location on disk:
Expand Down
8 changes: 4 additions & 4 deletions tools/generate_datasets_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def main(tag):
datasets_file = os.path.abspath(
os.path.join(cwd, "..", "vega_datasets", "datasets.json")
)
init_file = os.path.abspath(os.path.join(cwd, "..", "vega_datasets", "__init__.py"))
core_file = os.path.abspath(os.path.join(cwd, "..", "vega_datasets", "core.py"))

print(f"Extracting datasets from {data_dir}")
datasets = {}
Expand All @@ -40,11 +40,11 @@ def main(tag):
with open(datasets_file, "w") as f:
json.dump(datasets, f, indent=2, sort_keys=True)

print("Updating init file")
print("Updating SOURCE_TAG in core file")
subprocess.check_call(
["sed", "-i", ".bak", f"s/SOURCE_TAG.*/SOURCE_TAG = {tag!r}/g", init_file]
["sed", "-i", ".bak", f"s/SOURCE_TAG.*/SOURCE_TAG = {tag!r}/g", core_file]
)
subprocess.check_call(["rm", f"{init_file}.bak"])
subprocess.check_call(["rm", f"{core_file}.bak"])


if __name__ == "__main__":
Expand Down
4 changes: 0 additions & 4 deletions vega_datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,3 @@
data = DataLoader()
local_data = LocalDataLoader()
__version__ = "0.9.0dev0"

# This is the tag in http://github.com/vega/vega-datasets from
# which the datasets in this repository are sourced.
SOURCE_TAG = "v1.29.0"
7 changes: 5 additions & 2 deletions vega_datasets/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@
import textwrap
from typing import Any, Dict, Iterable, List
from urllib.request import urlopen

import pandas as pd

# This is the tag in http://github.com/vega/vega-datasets from
# which the datasets in this repository are sourced.
SOURCE_TAG = "v1.29.0"


def _load_dataset_info() -> Dict[str, Dict[str, Any]]:
"""This loads dataset info from three package files:
Expand Down Expand Up @@ -91,7 +94,7 @@ class Dataset(object):
_reference_info = """
For information on this dataset, see https://github.com/vega/vega-datasets/
"""
base_url = "https://vega.github.io/vega-datasets/data/"
base_url = "https://cdn.jsdelivr.net/npm/vega-datasets@" + SOURCE_TAG + "/data/"
_dataset_info = _load_dataset_info()
_pd_read_kwds = {} # type: Dict[str, Any]
_return_type = pd.DataFrame
Expand Down

0 comments on commit dcd18ed

Please sign in to comment.