Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add simple cache mechanism to GoogleWTS class #1533

Merged
merged 10 commits into from
Nov 18, 2020
12 changes: 12 additions & 0 deletions lib/cartopy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# licensing details.

from ._version import version as __version__ # noqa: F401
import tempfile

__document_these__ = ['config']

Expand All @@ -17,9 +18,11 @@
_writable_dir = os.path.join(os.path.expanduser('~'), '.local', 'share')
_data_dir = os.path.join(os.environ.get("XDG_DATA_HOME", _writable_dir),
'cartopy')
_cache_dir = os.path.join(tempfile.gettempdir(), 'cartopy_cache_dir')

config = {'pre_existing_data_dir': '',
'data_dir': _data_dir,
'cache_dir': _cache_dir,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add an entry into the docstring below describing what this new entry is for.

'repo_data_dir': os.path.join(os.path.dirname(__file__), 'data'),
'downloaders': {},
}
Expand Down Expand Up @@ -53,6 +56,14 @@
cartopy will download the appropriate file(s) to a subdirectory of this
directory, therefore ``data_dir`` should be writable by the user.

``cache_dir``
The absolute path to a directory where tiles data are cached when a
GoogleWTS sub-class is initialized with `cache=True`. If it is not found
cartopy will create it, therefore ``cache_dir`` should be writable by the
user. Note that the default cache dir might be accessible by all users,
depending on your OS and local configuration. If private cache is
mandatory, set cache_dir to a private location.

``repo_data_dir``
The absolute path to the directory where the data delivered with the
cartopy repository is stored. Typically this will only be set by OS
Expand All @@ -67,6 +78,7 @@

del _data_dir
del _writable_dir
del _cache_dir


# Try importing a siteconfig file which exposes an update_config function,
Expand Down
116 changes: 91 additions & 25 deletions lib/cartopy/io/img_tiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from abc import ABCMeta, abstractmethod
import concurrent.futures
import io
import os
import warnings

from PIL import Image
Expand All @@ -36,11 +37,17 @@ class GoogleWTS(metaclass=ABCMeta):

A "tile" in this class refers to the coordinates (x, y, z).

The tiles can be saved to a cache directory using the cache parameter, so
they are downloaded only once. If it is set to True, the default path
stored in the cartopy.config dictionary is used. If it is set to a custom
path, this path is used instead of the default one. If it is set to False
(the default behavior), the tiles are downloaded each time.

"""
_MAX_THREADS = 24

def __init__(self, desired_tile_form='RGB',
user_agent='CartoPy/' + cartopy.__version__):
user_agent='CartoPy/' + cartopy.__version__, cache=False):
self.imgs = []
self.crs = ccrs.Mercator.GOOGLE
self.desired_tile_form = desired_tile_form
Expand All @@ -49,6 +56,18 @@ def __init__(self, desired_tile_form='RGB',
# osm may reject requests if there are too many of them, in which case
# a change of user_agent may fix the issue.

# Enable a cache mechanism when cache is equal to True or to a path.
self._default_cache = False
if cache is True:
self._default_cache = True
self.cache_path = cartopy.config["cache_dir"]
elif cache is False:
self.cache_path = None
else:
self.cache_path = cache
self.cache = set({})
self._load_cache()

def image_for_domain(self, target_domain, target_z):
tiles = []

Expand Down Expand Up @@ -79,6 +98,26 @@ def fetch_tile(tile):
img, extent, origin = _merge_tiles(tiles)
return img, extent, origin

@property
def _cache_dir(self):
"""Return the name of the cache directory"""
return os.path.join(
self.cache_path,
self.__class__.__name__
)

def _load_cache(self):
"""Load the cache"""
if self.cache_path is not None:
cache_dir = self._cache_dir
if not os.path.exists(cache_dir):
os.makedirs(cache_dir)
if self._default_cache:
warnings.warn(
'Cartopy created the following directory to cache '
'GoogleWTS tiles: {}'.format(cache_dir))
self.cache = self.cache.union(set(os.listdir(cache_dir)))

def _find_images(self, target_domain, target_z, start_tile=(0, 0, 0)):
"""Target domain is a shapely polygon in native coordinates."""

Expand Down Expand Up @@ -168,27 +207,45 @@ def _image_url(self, tile):
def get_image(self, tile):
from urllib.request import urlopen, Request, HTTPError, URLError

url = self._image_url(tile)
try:
request = Request(url, headers={"User-Agent": self.user_agent})
fh = urlopen(request)
im_data = io.BytesIO(fh.read())
fh.close()
img = Image.open(im_data)
if self.cache_path is not None:
filename = "_".join([str(i) for i in tile]) + ".npy"
cached_file = os.path.join(
self._cache_dir,
filename
)
else:
filename = None
cached_file = None

except (HTTPError, URLError) as err:
print(err)
img = Image.fromarray(np.full((256, 256, 3), (250, 250, 250),
dtype=np.uint8))
if filename in self.cache:
img = np.load(cached_file, allow_pickle=False)
else:
url = self._image_url(tile)
try:
request = Request(url, headers={"User-Agent": self.user_agent})
fh = urlopen(request)
im_data = io.BytesIO(fh.read())
fh.close()
img = Image.open(im_data)

except (HTTPError, URLError) as err:
print(err)
img = Image.fromarray(np.full((256, 256, 3), (250, 250, 250),
dtype=np.uint8))

img = img.convert(self.desired_tile_form)
if self.cache_path is not None:
np.save(cached_file, img, allow_pickle=False)
self.cache.add(filename)

img = img.convert(self.desired_tile_form)
return img, self.tileextent(tile), 'lower'


class GoogleTiles(GoogleWTS):
def __init__(self, desired_tile_form='RGB', style="street",
url=('https://mts0.google.com/vt/lyrs={style}'
'@177000000&hl=en&src=api&x={x}&y={y}&z={z}&s=G')):
'@177000000&hl=en&src=api&x={x}&y={y}&z={z}&s=G'),
cache=False):
"""
Parameters
----------
Expand Down Expand Up @@ -219,7 +276,8 @@ def __init__(self, desired_tile_form='RGB', style="street",
not Image.core.jpeg_decoder:
msg = "The '%s' style requires pillow with jpeg decoding support."
raise ValueError(msg % self.style)
return super().__init__(desired_tile_form=desired_tile_form)
return super().__init__(desired_tile_form=desired_tile_form,
cache=cache)

def _image_url(self, tile):
style_dict = {
Expand Down Expand Up @@ -293,8 +351,10 @@ class Stamen(GoogleWTS):
attribute this imagery.

"""
def __init__(self, style='toner', desired_tile_form='RGB'):
super().__init__(desired_tile_form=desired_tile_form)
def __init__(self, style='toner',
desired_tile_form='RGB', cache=False):
super().__init__(desired_tile_form=desired_tile_form,
cache=cache)
self.style = style

def _image_url(self, tile):
Expand Down Expand Up @@ -327,7 +387,7 @@ class StamenTerrain(Stamen):


"""
def __init__(self):
def __init__(self, cache=False):
warnings.warn(
"The StamenTerrain class was deprecated in v0.17. "
"Please use Stamen('terrain-background') instead.",
Expand All @@ -337,7 +397,8 @@ def __init__(self):
# NOTE: This subclass of Stamen exists for legacy reasons.
# No further Stamen subclasses will be accepted as
# they can easily be created in user code with Stamen(style_name).
return super().__init__(style='terrain-background')
return super().__init__(style='terrain-background',
cache=cache)


class MapboxTiles(GoogleWTS):
Expand All @@ -347,7 +408,7 @@ class MapboxTiles(GoogleWTS):
For terms of service, see https://www.mapbox.com/tos/.

"""
def __init__(self, access_token, map_id):
def __init__(self, access_token, map_id, cache=False):
"""
Set up a new Mapbox tiles instance.

Expand All @@ -365,7 +426,7 @@ def __init__(self, access_token, map_id):
"""
self.access_token = access_token
self.map_id = map_id
super().__init__()
super().__init__(cache=cache)

def _image_url(self, tile):
x, y, z = tile
Expand All @@ -385,7 +446,7 @@ class MapboxStyleTiles(GoogleWTS):
For terms of service, see https://www.mapbox.com/tos/.

"""
def __init__(self, access_token, username, map_id):
def __init__(self, access_token, username, map_id, cache=False):
"""
Set up a new instance to retrieve tiles from a Mapbox style.

Expand All @@ -408,7 +469,7 @@ def __init__(self, access_token, username, map_id):
self.access_token = access_token
self.username = username
self.map_id = map_id
super().__init__()
super().__init__(cache=cache)

def _image_url(self, tile):
x, y, z = tile
Expand Down Expand Up @@ -523,7 +584,11 @@ class OrdnanceSurvey(GoogleWTS):
https://developer.ordnancesurvey.co.uk/os-api-framework-agreement.
"""
# API Documentation: https://apidocs.os.uk/docs/os-maps-wmts
def __init__(self, apikey, layer='Road', desired_tile_form='RGB'):
def __init__(self,
apikey,
layer='Road',
desired_tile_form='RGB',
cache=False):
"""
Parameters
----------
Expand All @@ -539,7 +604,8 @@ def __init__(self, apikey, layer='Road', desired_tile_form='RGB'):
desired_tile_form: optional
Defaults to 'RGB'.
"""
super().__init__(desired_tile_form=desired_tile_form)
super().__init__(desired_tile_form=desired_tile_form,
cache=cache)
self.apikey = apikey

if layer not in ['Outdoor', 'Road', 'Light', 'Night', 'Leisure']:
Expand Down
91 changes: 91 additions & 0 deletions lib/cartopy/tests/test_img_tiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,17 @@
# See COPYING and COPYING.LESSER in the root of the repository for full
# licensing details.

import hashlib
import os
import types
import warnings

import numpy as np
from numpy.testing import assert_array_almost_equal as assert_arr_almost
import pytest
import shapely.geometry as sgeom

from cartopy import config
import cartopy.crs as ccrs
import cartopy.io.img_tiles as cimgt

Expand Down Expand Up @@ -285,3 +288,91 @@ def test_ordnance_survey_get_image():

# The extent is the same though
assert extent1 == extent2


@pytest.mark.network
@pytest.mark.parametrize('cache_dir', ["tmpdir", True, False])
def test_cache(cache_dir, tmpdir):
if cache_dir == "tmpdir":
tmpdir_str = tmpdir.strpath
else:
tmpdir_str = cache_dir

if cache_dir is True:
config["cache_dir"] = tmpdir.strpath

# Fetch tiles and save them in the cache
with warnings.catch_warnings(record=True) as w:
gt = cimgt.GoogleTiles(cache=tmpdir_str)
gt._image_url = types.MethodType(GOOGLE_IMAGE_URL_REPLACEMENT, gt)

ll_target_domain = sgeom.box(-10, 50, 10, 60)
multi_poly = gt.crs.project_geometry(ll_target_domain, ccrs.PlateCarree())
target_domain = multi_poly.geoms[0]

img_init, _, _ = gt.image_for_domain(target_domain, 6)

# Do not check the result if the cache is disabled
if cache_dir is False:
assert gt.cache_path is None
return

# Check that the warning is properly raised (only when cache is True)
if cache_dir is True:
assert len(w) == 1
else:
assert len(w) == 0

# Define expected results
x_y_z_f_h = [
(30, 18, 6, '30_18_6.npy', '545db25f1aa348ad85e1f437fd0db0d9'),
dopplershift marked this conversation as resolved.
Show resolved Hide resolved
(30, 19, 6, '30_19_6.npy', '10355add0674bfa33f673ea27a6d1206'),
(30, 20, 6, '30_20_6.npy', 'ab3e7f2ed8d71977ac176094973695ae'),
(30, 21, 6, '30_21_6.npy', '3e8947b93a6ffa07f22cfea4042a4740'),
(31, 18, 6, '31_18_6.npy', 'd0fa58b9146aa99b273eb75256b328cc'),
(31, 19, 6, '31_19_6.npy', '9255bd0cd22736bd2c25a9087bd47b20'),
(31, 20, 6, '31_20_6.npy', 'ac0f7e32bdf8edb50d1dccf3ec0ef446'),
(31, 21, 6, '31_21_6.npy', 'f36b8cc1825bf267b2daead837facae9'),
(32, 18, 6, '32_18_6.npy', '9f4ddd90cd1ae76ef2bbc8f0252ead91'),
(32, 19, 6, '32_19_6.npy', 'a995803578bb94ecfca8563754717196'),
(32, 20, 6, '32_20_6.npy', 'def9e71d77fd6007c77c2a14dfae858f'),
(32, 21, 6, '32_21_6.npy', 'a3d7935037019ec58ae78f60e6fb924e'),
(33, 18, 6, '33_18_6.npy', '4e51e32da73fb99229817dcd7b7e1f4f'),
(33, 19, 6, '33_19_6.npy', 'b9b5057fa012c5788cbbe1e18c9bb512'),
(33, 20, 6, '33_20_6.npy', 'b55a7c0a8d86167df496732f85bddcf9'),
(33, 21, 6, '33_21_6.npy', '4208ba897c460e9bb0d2469552e127ff')
]

# Check the results
cache_dir_res = os.path.join(gt.cache_path, "GoogleTiles")
files = [i for i in os.listdir(cache_dir_res)]
hashes = {
f:
hashlib.md5(
np.load(os.path.join(cache_dir_res, f), allow_pickle=True).data
).hexdigest()
for f in files
}

assert sorted(files) == [f for x, y, z, f, h in x_y_z_f_h]
assert set(files) == gt.cache

assert sorted(hashes.values()) == sorted([
h for x, y, z, f, h in x_y_z_f_h
])

# Update images in cache (all white)
for f in files:
filename = os.path.join(cache_dir_res, f)
img = np.load(filename, allow_pickle=True)
img.fill(255)
np.save(filename, img, allow_pickle=True)

gt_cache = cimgt.GoogleTiles(cache=tmpdir_str)
gt_cache._image_url = types.MethodType(
GOOGLE_IMAGE_URL_REPLACEMENT, gt_cache)
img_cache, _, _ = gt_cache.image_for_domain(target_domain, 6)

# Check that the new image_for_domain() call used cached images
assert gt_cache.cache == gt.cache
assert (img_cache == 255).all()