Skip to content

Commit

Permalink
PB-35 Improve logging of URL validation
Browse files Browse the repository at this point in the history
Switch to log level "WARN". Also make sure every validation logs its
error, including the collection, so we know who the user might be
  • Loading branch information
schtibe committed Jul 30, 2024
1 parent df66727 commit c9c4d38
Showing 1 changed file with 23 additions and 12 deletions.
35 changes: 23 additions & 12 deletions app/stac_api/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,22 +566,30 @@ def _validate_href_scheme(url, collection):
"""Validate if the url scheme is disallowed"""
_url = urlparse(url)
if _url.scheme in settings.DISALLOWED_EXTERNAL_ASSET_URL_SCHEMES:
logger.info(
logger.warn(
"Attempted external asset upload with disallowed URL scheme",
extra={
'url': url,
'collection': collection,
'collection': collection, # to have the means to know who this might have been
'disallowed_schemes': settings.DISALLOWED_EXTERNAL_ASSET_URL_SCHEMES
}
)
raise ValidationError(_(f'{_url.scheme} is not a allowed url scheme'))


def _validate_href_general_pattern(url):
def _validate_href_general_pattern(url, collection):
try:
validator = URLValidator()
validator(url)
except exceptions.ValidationError as exc:
logger.warn(
f"Attempted external asset upload with invalid URL {url}",
extra={
# to have the means to know who this might have been
'collection': collection,
}
)

error = _('Invalid URL provided')
raise ValidationError(error) from exc

Expand All @@ -594,10 +602,13 @@ def _validate_href_configured_pattern(url, collection):
if url.startswith(entry):
return True

logger.info(
logger.warn(
"Attempted external asset upload didn't match the whitelist",
extra={
'url': url, 'whitelist': whitelist, 'collection': collection
# log collection to have the means to know who this might have been
'url': url,
'whitelist': whitelist,
'collection': collection
}
)

Expand All @@ -612,33 +623,33 @@ def _validate_href_reachability(url, collection):
response = requests.head(url, timeout=settings.EXTERNAL_URL_REACHABLE_TIMEOUT)

if response.status_code > 400:
logger.info(
logger.warn(
"Attempted external asset upload failed the reachability check",
extra={
'url': url,
'collection': collection,
'collection': collection, # to have the means to know who this might have been
'response': response,
}
)
raise ValidationError(unreachable_error)
except requests.Timeout as exc:
logger.info(
logger.warn(
"Attempted external asset upload resulted in a timeout",
extra={
'url': url,
'collection': collection,
'collection': collection, # to have the means to know who this might have been
'exception': exc,
'timeout': settings.EXTERNAL_URL_REACHABLE_TIMEOUT
}
)
error = _('Checking href URL resulted in timeout')
raise ValidationError(error) from exc
except requests.ConnectionError as exc:
logger.info(
logger.warn(
"Attempted external asset upload resulted in connection error",
extra={
'url': url,
'collection': collection,
'collection': collection, # to have the means to know who this might have been
'exception': exc,
}
)
Expand All @@ -649,6 +660,6 @@ def validate_href_url(url, collection):
"""Validate the href URL """

_validate_href_scheme(url, collection)
_validate_href_general_pattern(url)
_validate_href_general_pattern(url, collection)
_validate_href_configured_pattern(url, collection)
_validate_href_reachability(url, collection)

0 comments on commit c9c4d38

Please sign in to comment.