From 55f3abd9b800d05fb710a71b9fe810643b000e68 Mon Sep 17 00:00:00 2001 From: Pablo Iturrieta <69042881+pabloitu@users.noreply.github.com> Date: Mon, 14 Nov 2022 19:59:01 +0100 Subject: [PATCH] feat: Added query_bsi (#201) * feat: Added query_bsi, based on comcat query functions. Can be called from csep.__init__, wrapping the readers._query_bsi. This is based in csep.utils.comcat.search functions. mods: (i) Exception catchs for Hostname mismatch in SSL verification (the bsi does not go through https). (ii) in comcat and bsi, ids are set different. SummaryEvent now handles this with try/except. (iii) bsi returns datetimes as utc strings, instead of timestamp as comcat. * docs: added query_bsi to api reference and mentioned it in tutorial/catalog_filtering styl: modified query_comcat docstring * fix: added maxdepth to query_comcat and query_bsi wrappers at top and bottom level. default=1000. Maybe these functions should have the arguments to completely define an experiment catalog (perhaps just max_mag and its ready) --- csep/__init__.py | 64 +++++++++++++++++++++++-- csep/utils/comcat.py | 48 +++++++++++++++++-- csep/utils/readers.py | 28 +++++++++-- docs/reference/api_reference.rst | 1 + examples/tutorials/catalog_filtering.py | 5 +- tests/artifacts/BSI/vcr_search.yaml | 46 ++++++++++++++++++ tests/artifacts/BSI/vcr_summary.yaml | 46 ++++++++++++++++++ tests/test_bsi.py | 42 ++++++++++++++++ 8 files changed, 268 insertions(+), 12 deletions(-) create mode 100644 tests/artifacts/BSI/vcr_search.yaml create mode 100644 tests/artifacts/BSI/vcr_summary.yaml create mode 100644 tests/test_bsi.py diff --git a/csep/__init__.py b/csep/__init__.py index 4149d43c..de1a03f8 100644 --- a/csep/__init__.py +++ b/csep/__init__.py @@ -188,7 +188,9 @@ def load_catalog(filename, type='csep-csv', format='native', loader=None, apply_ def query_comcat(start_time, end_time, min_magnitude=2.50, min_latitude=31.50, max_latitude=43.00, - min_longitude=-125.40, max_longitude=-113.10, verbose=True, + min_longitude=-125.40, max_longitude=-113.10, + max_depth=1000, + verbose=True, apply_filters=False, **kwargs): """ Access Comcat catalog through web service @@ -201,11 +203,11 @@ def query_comcat(start_time, end_time, min_magnitude=2.50, max_latitude: max latitude of bounding box min_longitude: min latitude of bounding box max_longitude: max longitude of bounding box - region: :class:`csep.core.regions.CartesianGrid2D + max_depth: maximum depth of the bounding box verbose (bool): print catalog summary statistics Returns: - :class:`csep.core.catalogs.ComcatCatalog + :class:`csep.core.catalogs.CSEPCatalog """ # Timezone should be in UTC @@ -213,7 +215,8 @@ def query_comcat(start_time, end_time, min_magnitude=2.50, eventlist = readers._query_comcat(start_time=start_time, end_time=end_time, min_magnitude=min_magnitude, min_latitude=min_latitude, max_latitude=max_latitude, - min_longitude=min_longitude, max_longitude=max_longitude) + min_longitude=min_longitude, max_longitude=max_longitude, + max_depth=max_depth) t1 = time.time() comcat = catalogs.CSEPCatalog(data=eventlist, date_accessed=utc_now_datetime(), **kwargs) print("Fetched ComCat catalog in {} seconds.\n".format(t1 - t0)) @@ -234,6 +237,59 @@ def query_comcat(start_time, end_time, min_magnitude=2.50, return comcat + +def query_bsi(start_time, end_time, min_magnitude=2.50, + min_latitude=32.0, max_latitude=50.0, + min_longitude=2.0, max_longitude=21.0, + max_depth=1000, + verbose=True, + apply_filters=False, **kwargs): + """ + Access BSI catalog through web service + + Args: + start_time: datetime object of start of catalog + end_time: datetime object for end of catalog + min_magnitude: minimum magnitude to query + min_latitude: maximum magnitude to query + max_latitude: max latitude of bounding box + min_longitude: min latitude of bounding box + max_longitude: max longitude of bounding box + max_depth: maximum depth of the bounding box + verbose (bool): print catalog summary statistics + + Returns: + :class:`csep.core.catalogs.CSEPCatalog + """ + + # Timezone should be in UTC + t0 = time.time() + eventlist = readers._query_bsi(start_time=start_time, end_time=end_time, + min_magnitude=min_magnitude, + min_latitude=min_latitude, max_latitude=max_latitude, + min_longitude=min_longitude, max_longitude=max_longitude, + max_depth=max_depth) + t1 = time.time() + bsi = catalogs.CSEPCatalog(data=eventlist, date_accessed=utc_now_datetime(), **kwargs) + print("Fetched BSI catalog in {} seconds.\n".format(t1 - t0)) + + if apply_filters: + try: + bsi = bsi.filter().filter_spatial() + except CSEPCatalogException: + bsi = bsi.filter() + + if verbose: + print("Downloaded catalog from Bollettino Sismico Italiano (BSI) with following parameters") + print("Start Date: {}\nEnd Date: {}".format(str(bsi.start_time), str(bsi.end_time))) + print("Min Latitude: {} and Max Latitude: {}".format(bsi.min_latitude, bsi.max_latitude)) + print("Min Longitude: {} and Max Longitude: {}".format(bsi.min_longitude, bsi.max_longitude)) + print("Min Magnitude: {}".format(bsi.min_magnitude)) + print(f"Found {bsi.event_count} events in the BSI catalog.") + + return bsi + + def load_evaluation_result(fname): """ Load evaluation result stored as json file diff --git a/csep/utils/comcat.py b/csep/utils/comcat.py index da0d2558..ecaea5f9 100644 --- a/csep/utils/comcat.py +++ b/csep/utils/comcat.py @@ -1,8 +1,9 @@ # python imports -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from urllib import request -from urllib.error import HTTPError +from urllib.error import HTTPError, URLError from urllib.parse import urlparse, urlencode +import ssl import json import time from collections import OrderedDict @@ -293,6 +294,39 @@ def _search(**newargs): except Exception as msg: raise Exception( 'Error downloading data from url %s. "%s".' % (url, msg)) + + except ssl.SSLCertVerificationError as SSLe: + # Fails to verify SSL certificate, when there is a hostname mismatch + if SSLe.verify_code == 62: + try: + context = ssl._create_unverified_context() + fh = request.urlopen(url, timeout=TIMEOUT, context=context) + data = fh.read().decode('utf8') + fh.close() + jdict = json.loads(data) + events = [] + for feature in jdict['features']: + events.append(SummaryEvent(feature)) + except Exception as msg: + raise Exception( + 'Error downloading data from url %s. "%s".' % (url, msg)) + + except URLError as URLe: + # Fails to verify SSL certificate, when there is a hostname mismatch + if isinstance(URLe.reason, ssl.SSLCertVerificationError) and URLe.reason.verify_code == 62: + try: + context = ssl._create_unverified_context() + fh = request.urlopen(url, timeout=TIMEOUT, context=context) + data = fh.read().decode('utf8') + fh.close() + jdict = json.loads(data) + events = [] + for feature in jdict['features']: + events.append(SummaryEvent(feature)) + except Exception as msg: + raise Exception( + 'Error downloading data from url %s. "%s".' % (url, msg)) + except Exception as msg: raise Exception( 'Error downloading data from url %s. "%s".' % (url, msg)) @@ -358,7 +392,11 @@ def id(self): Returns: str: Authoritative origin ID. """ - return self._jdict['id'] + ## comcat has an id key in each feature, whereas bsi has eventId within the properties dict + try: + return self._jdict['id'] + except: + return self._jdict['properties']['eventId'] @property def time(self): @@ -367,6 +405,10 @@ def time(self): datetime: Authoritative origin time. """ time_in_msec = self._jdict['properties']['time'] + # Comcat gives the event time in a ms timestamp, whereas bsi in datetime isoformat + if isinstance(time_in_msec, str): + event_dtime = datetime.fromisoformat(time_in_msec).replace(tzinfo=timezone.utc) + time_in_msec = event_dtime.timestamp() * 1000 time_in_sec = time_in_msec // 1000 msec = time_in_msec - (time_in_sec * 1000) dtime = datetime.utcfromtimestamp(time_in_sec) diff --git a/csep/utils/readers.py b/csep/utils/readers.py index b5352279..4569634b 100644 --- a/csep/utils/readers.py +++ b/csep/utils/readers.py @@ -655,8 +655,9 @@ def jma_csv(fname): return events def _query_comcat(start_time, end_time, min_magnitude=2.50, - min_latitude=31.50, max_latitude=43.00, - min_longitude=-125.40, max_longitude=-113.10, extra_comcat_params=None): + min_latitude=31.50, max_latitude=43.00, + min_longitude=-125.40, max_longitude=-113.10, + max_depth=1000, extra_comcat_params=None): """ Return eventlist from ComCat web service. @@ -668,6 +669,7 @@ def _query_comcat(start_time, end_time, min_magnitude=2.50, max_latitude (float): maximum latitude of query min_longitude (float): minimum longitude of query max_longitude (float): maximum longitude of query + max_depth (float): maximum depth of query extra_comcat_params (dict): additional parameters to pass to comcat search function Returns: @@ -679,10 +681,30 @@ def _query_comcat(start_time, end_time, min_magnitude=2.50, eventlist = search(minmagnitude=min_magnitude, minlatitude=min_latitude, maxlatitude=max_latitude, minlongitude=min_longitude, maxlongitude=max_longitude, - starttime=start_time, endtime=end_time, **extra_comcat_params) + starttime=start_time, endtime=end_time, + maxdepth=max_depth, **extra_comcat_params) return eventlist +def _query_bsi(start_time, end_time, min_magnitude=2.50, + min_latitude=32.0, max_latitude=50.0, + min_longitude=2.0, max_longitude=21.0, + max_depth=1000, extra_bsi_params=None): + """ + Queries INGV Bulletino Sismico Italiano, revised version. + :return: csep.core.Catalog object + """ + extra_bsi_params = extra_bsi_params or {} + bsi_host = 'webservices.rm.ingv.it' + extra_bsi_params.update({'host': bsi_host, 'limit': 15000, 'offset': 0}) + # get eventlist from Comcat + eventlist = search(minmagnitude=min_magnitude, + minlatitude=min_latitude, maxlatitude=max_latitude, + minlongitude=min_longitude, maxlongitude=max_longitude, + maxdepth=max_depth, + starttime=start_time, endtime=end_time, **extra_bsi_params) + + return eventlist def _parse_datetime_to_zmap(date, time): """ Helping function to return datetime in zmap format. diff --git a/docs/reference/api_reference.rst b/docs/reference/api_reference.rst index 9f8a6b36..6e3ce176 100644 --- a/docs/reference/api_reference.rst +++ b/docs/reference/api_reference.rst @@ -16,6 +16,7 @@ Loading catalogs and forecasts load_stochastic_event_sets load_catalog query_comcat + query_bsi load_gridded_forecast load_catalog_forecast diff --git a/examples/tutorials/catalog_filtering.py b/examples/tutorials/catalog_filtering.py index 406ee565..a21937ff 100644 --- a/examples/tutorials/catalog_filtering.py +++ b/examples/tutorials/catalog_filtering.py @@ -30,8 +30,9 @@ # Load catalog # ------------ # -# PyCSEP provides access to the ComCat web API using :func:`csep.query_comcat`. This function requires a -# :class:`datetime.datetime` to specify the start and end dates. +# PyCSEP provides access to the ComCat web API using :func:`csep.query_comcat` and to the Bollettino Sismico Italiano +# API using :func:`csep.query_bsi`. These functions require a :class:`datetime.datetime` to specify the start and end +# dates. start_time = csep.utils.time_utils.strptime_to_utc_datetime('2019-01-01 00:00:00.0') end_time = csep.utils.time_utils.utc_now_datetime() diff --git a/tests/artifacts/BSI/vcr_search.yaml b/tests/artifacts/BSI/vcr_search.yaml new file mode 100644 index 00000000..76053c90 --- /dev/null +++ b/tests/artifacts/BSI/vcr_search.yaml @@ -0,0 +1,46 @@ +interactions: +- request: + body: null + headers: + Connection: + - close + Host: + - webservices.rm.ingv.it + User-Agent: + - Python-urllib/3.10 + method: GET + uri: https://webservices.rm.ingv.it/fdsnws/event/1/query?format=geojson&starttime=2009-04-06T00%3A00%3A00&endtime=2009-04-07T00%3A00%3A00&limit=15000&maxdepth=1000&maxmagnitude=10.0&mindepth=-100&minmagnitude=5.5&offset=0&orderby=time-asc&eventtype=earthquake + response: + body: + string: '{"type":"FeatureCollection","features":[{"type":"Feature","properties":{"eventId":1895389,"originId":755599,"time":"2009-04-06T01:32:40.400000","author":"BULLETIN-SISPICK","magType":"Mw","mag":6.1,"magAuthor":"--","type":"earthquake","place":"2 + km SW L''Aquila (AQ)","version":1000,"geojson_creationTime":"2022-09-21T12:18:03"},"geometry":{"type":"Point","coordinates":[13.38,42.342,8.3]}}]}' + headers: + Access-Control-Allow-Origin: + - '*' + Cache-Control: + - public, max-age=60 + Connection: + - close + Content-Type: + - application/json + Date: + - Wed, 21 Sep 2022 12:18:03 GMT + Server: + - nginx + Transfer-Encoding: + - chunked + Vary: + - Accept-Encoding + - Accept-Encoding + X-Cache-Status: + - EXPIRED + X-RateLimit-Limit: + - '10' + X-RateLimit-Reset: + - '1' + X-UA-Compatible: + - IE=Edge + status: + code: 200 + message: OK +version: 1 diff --git a/tests/artifacts/BSI/vcr_summary.yaml b/tests/artifacts/BSI/vcr_summary.yaml new file mode 100644 index 00000000..9349aa3d --- /dev/null +++ b/tests/artifacts/BSI/vcr_summary.yaml @@ -0,0 +1,46 @@ +interactions: +- request: + body: null + headers: + Connection: + - close + Host: + - webservices.rm.ingv.it + User-Agent: + - Python-urllib/3.10 + method: GET + uri: https://webservices.rm.ingv.it/fdsnws/event/1/query?format=geojson&starttime=2009-04-06T00%3A00%3A00&endtime=2009-04-07T00%3A00%3A00&limit=15000&maxdepth=1000&maxmagnitude=10.0&mindepth=-100&minmagnitude=5.5&offset=0&orderby=time-asc&eventtype=earthquake + response: + body: + string: '{"type":"FeatureCollection","features":[{"type":"Feature","properties":{"eventId":1895389,"originId":755599,"time":"2009-04-06T01:32:40.400000","author":"BULLETIN-SISPICK","magType":"Mw","mag":6.1,"magAuthor":"--","type":"earthquake","place":"2 + km SW L''Aquila (AQ)","version":1000,"geojson_creationTime":"2022-09-21T12:14:55"},"geometry":{"type":"Point","coordinates":[13.38,42.342,8.3]}}]}' + headers: + Access-Control-Allow-Origin: + - '*' + Cache-Control: + - public, max-age=60 + Connection: + - close + Content-Type: + - application/json + Date: + - Wed, 21 Sep 2022 12:14:55 GMT + Server: + - nginx + Transfer-Encoding: + - chunked + Vary: + - Accept-Encoding + - Accept-Encoding + X-Cache-Status: + - MISS + X-RateLimit-Limit: + - '10' + X-RateLimit-Reset: + - '1' + X-UA-Compatible: + - IE=Edge + status: + code: 200 + message: OK +version: 1 diff --git a/tests/test_bsi.py b/tests/test_bsi.py new file mode 100644 index 00000000..4673bfc8 --- /dev/null +++ b/tests/test_bsi.py @@ -0,0 +1,42 @@ +from datetime import datetime +import os.path +import vcr +from csep.utils.comcat import search + +HOST = 'webservices.rm.ingv.it' + + +def get_datadir(): + root_dir = os.path.dirname(os.path.abspath(__file__)) + data_dir = os.path.join(root_dir, 'artifacts', 'BSI') + return data_dir + + +def test_search(): + datadir = get_datadir() + tape_file = os.path.join(datadir, 'vcr_search.yaml') + with vcr.use_cassette(tape_file): + # L'Aquila + eventlist = search(starttime=datetime(2009, 4, 6, 0, 0, 0), + endtime=datetime(2009, 4, 7, 0, 0, 0), + minmagnitude=5.5, host=HOST, limit=15000, offset=0) + event = eventlist[0] + assert event.id == 1895389 + + +def test_summary(): + datadir = get_datadir() + tape_file = os.path.join(datadir, 'vcr_summary.yaml') + with vcr.use_cassette(tape_file): + eventlist = search(starttime=datetime(2009, 4, 6, 0, 0, 0), + endtime=datetime(2009, 4, 7, 0, 0, 0), + minmagnitude=5.5, host=HOST, limit=15000, offset=0) + event = eventlist[0] + cmp = '1895389 2009-04-06 01:32:40.400000 (42.342,13.380) 8.3 km M6.1' + assert str(event) == cmp + assert event.id == 1895389 + assert event.time == datetime(2009, 4, 6, 1, 32, 40, 400000) + assert event.latitude == 42.342 + assert event.longitude == 13.380 + assert event.depth == 8.3 + assert event.magnitude == 6.1