Skip to content

Commit

Permalink
Fix parse_url (#2161)
Browse files Browse the repository at this point in the history
FIx url parsing. 

---------

Co-authored-by: Anton Pirker <anton.pirker@sentry.io>
  • Loading branch information
sentrivana and antonpirker authored Jun 7, 2023
1 parent 8a6c19c commit dd6bbe0
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 17 deletions.
40 changes: 23 additions & 17 deletions sentry_sdk/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1353,8 +1353,8 @@ def from_base64(base64_string):
Components = namedtuple("Components", ["scheme", "netloc", "path", "query", "fragment"])


def sanitize_url(url, remove_authority=True, remove_query_values=True):
# type: (str, bool, bool) -> str
def sanitize_url(url, remove_authority=True, remove_query_values=True, split=False):
# type: (str, bool, bool, bool) -> Union[str, Components]
"""
Removes the authority and query parameter values from a given URL.
"""
Expand Down Expand Up @@ -1383,17 +1383,18 @@ def sanitize_url(url, remove_authority=True, remove_query_values=True):
else:
query_string = parsed_url.query

safe_url = urlunsplit(
Components(
scheme=parsed_url.scheme,
netloc=netloc,
query=query_string,
path=parsed_url.path,
fragment=parsed_url.fragment,
)
components = Components(
scheme=parsed_url.scheme,
netloc=netloc,
query=query_string,
path=parsed_url.path,
fragment=parsed_url.fragment,
)

return safe_url
if split:
return components
else:
return urlunsplit(components)


ParsedUrl = namedtuple("ParsedUrl", ["url", "query", "fragment"])
Expand All @@ -1406,20 +1407,25 @@ def parse_url(url, sanitize=True):
parameters will be sanitized to remove sensitive data. The autority (username and password)
in the URL will always be removed.
"""
url = sanitize_url(url, remove_authority=True, remove_query_values=sanitize)
parsed_url = sanitize_url(
url, remove_authority=True, remove_query_values=sanitize, split=True
)

parsed_url = urlsplit(url)
base_url = urlunsplit(
Components(
scheme=parsed_url.scheme,
netloc=parsed_url.netloc,
scheme=parsed_url.scheme, # type: ignore
netloc=parsed_url.netloc, # type: ignore
query="",
path=parsed_url.path,
path=parsed_url.path, # type: ignore
fragment="",
)
)

return ParsedUrl(url=base_url, query=parsed_url.query, fragment=parsed_url.fragment)
return ParsedUrl(
url=base_url,
query=parsed_url.query, # type: ignore
fragment=parsed_url.fragment, # type: ignore
)


def is_valid_sample_rate(rate, source):
Expand Down
18 changes: 18 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,24 @@ def test_sanitize_url(url, expected_result):
assert parts == expected_parts


def test_sanitize_url_and_split():
parts = sanitize_url(
"https://username:password@example.com?token=abc&sessionid=123&save=true",
split=True,
)

expected_query = sorted(
"token=[Filtered]&sessionid=[Filtered]&save=[Filtered]".split("&")
)
query = sorted(parts.query.split("&"))

assert parts.scheme == "https"
assert parts.netloc == "[Filtered]:[Filtered]@example.com"
assert query == expected_query
assert parts.path == ""
assert parts.fragment == ""


@pytest.mark.parametrize(
("url", "sanitize", "expected_url", "expected_query", "expected_fragment"),
[
Expand Down

0 comments on commit dd6bbe0

Please sign in to comment.