Skip to content

Commit

Permalink
feat: support brotli encoding (#403)
Browse files Browse the repository at this point in the history
Fixes #204 🦕
  • Loading branch information
andrewsg authored Oct 27, 2023
1 parent 975b947 commit 295e40a
Show file tree
Hide file tree
Showing 7 changed files with 179 additions and 11 deletions.
60 changes: 53 additions & 7 deletions google/resumable_media/requests/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,7 @@ def _add_decoder(response_raw, checksum):
This is so that we can intercept the compressed bytes before they are
decoded.
Only patches if the content encoding is ``gzip``.
Only patches if the content encoding is ``gzip`` or ``br``.
Args:
response_raw (urllib3.response.HTTPResponse): The raw response for
Expand All @@ -598,12 +598,16 @@ def _add_decoder(response_raw, checksum):
caller will no longer need to hash to decoded bytes.
"""
encoding = response_raw.headers.get("content-encoding", "").lower()
if encoding != "gzip":
if encoding == "gzip":
response_raw._decoder = _GzipDecoder(checksum)
return _helpers._DoNothingHash()
# Only activate if brotli is installed
elif encoding == "br" and _BrotliDecoder: # type: ignore
response_raw._decoder = _BrotliDecoder(checksum)
return _helpers._DoNothingHash()
else:
return checksum

response_raw._decoder = _GzipDecoder(checksum)
return _helpers._DoNothingHash()


class _GzipDecoder(urllib3.response.GzipDecoder):
"""Custom subclass of ``urllib3`` decoder for ``gzip``-ed bytes.
Expand All @@ -617,7 +621,7 @@ class _GzipDecoder(urllib3.response.GzipDecoder):
"""

def __init__(self, checksum):
super(_GzipDecoder, self).__init__()
super().__init__()
self._checksum = checksum

def decompress(self, data):
Expand All @@ -630,4 +634,46 @@ def decompress(self, data):
bytes: The decompressed bytes from ``data``.
"""
self._checksum.update(data)
return super(_GzipDecoder, self).decompress(data)
return super().decompress(data)


# urllib3.response.BrotliDecoder might not exist depending on whether brotli is
# installed.
if hasattr(urllib3.response, "BrotliDecoder"):

class _BrotliDecoder:
"""Handler for ``brotli`` encoded bytes.
Allows a checksum function to see the compressed bytes before they are
decoded. This way the checksum of the compressed value can be computed.
Because BrotliDecoder's decompress method is dynamically created in
urllib3, a subclass is not practical. Instead, this class creates a
captive urllib3.requests.BrotliDecoder instance and acts as a proxy.
Args:
checksum (object):
A checksum which will be updated with compressed bytes.
"""

def __init__(self, checksum):
self._decoder = urllib3.response.BrotliDecoder()
self._checksum = checksum

def decompress(self, data):
"""Decompress the bytes.
Args:
data (bytes): The compressed bytes to be decompressed.
Returns:
bytes: The decompressed bytes from ``data``.
"""
self._checksum.update(data)
return self._decoder.decompress(data)

def flush(self):
return self._decoder.flush()

else: # pragma: NO COVER
_BrotliDecoder = None # type: ignore # pragma: NO COVER
4 changes: 2 additions & 2 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def unit(session):
)

# Install all test dependencies, then install this package in-place.
session.install("mock", "pytest", "pytest-cov", "pytest-asyncio<=0.14.0")
session.install("mock", "pytest", "pytest-cov", "pytest-asyncio<=0.14.0", "brotli")
session.install("-e", ".[requests,aiohttp]", "-c", constraints_path)

# Run py.test against the unit tests.
Expand Down Expand Up @@ -220,7 +220,7 @@ def system(session):

# Install all test dependencies, then install this package into the
# virtualenv's dist-packages.
session.install("mock", "pytest", "google-cloud-testutils")
session.install("mock", "pytest", "google-cloud-testutils", "brotli")
session.install("-e", ".[requests,aiohttp]", "-c", constraints_path)

# Run py.test against the async system tests.
Expand Down
2 changes: 1 addition & 1 deletion testing/constraints-3.7.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
crcmod==1.7
google-crc32c==1.0
aiohttp==3.6.2
requests==2.18.0
requests==2.23.0
64 changes: 64 additions & 0 deletions tests/data/brotli.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
abcdefghijklmnopqrstuvwxyz0123456789
Binary file added tests/data/brotli.txt.br
Binary file not shown.
32 changes: 31 additions & 1 deletion tests/system/requests/test_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,15 @@ def get_path(filename):
"slices": (),
"metadata": {"contentEncoding": "gzip"},
},
{
"path": get_path("brotli.txt.br"),
"uncompressed": get_path("brotli.txt"),
"content_type": PLAIN_TEXT,
"md5": "MffJw7pTSX/7CVWFFPgwQA==",
"crc32c": "GGK0OQ==",
"slices": (),
"metadata": {"contentEncoding": "br"},
},
)


Expand Down Expand Up @@ -298,7 +307,7 @@ def test_download_gzip_w_stored_content_headers(
self, add_files, authorized_transport
):
# Retrieve the gzip compressed file
info = ALL_FILES[-1]
info = ALL_FILES[-2]
actual_contents = self._get_contents(info)
blob_name = get_blob_name(info)

Expand All @@ -314,6 +323,27 @@ def test_download_gzip_w_stored_content_headers(
assert stream.getvalue() == actual_contents
check_tombstoned(download, authorized_transport)

@pytest.mark.parametrize("checksum", ["md5", "crc32c"])
def test_download_brotli_w_stored_content_headers(
self, add_files, authorized_transport, checksum
):
# Retrieve the br compressed file
info = ALL_FILES[-1]
actual_contents = self._get_contents(info)
blob_name = get_blob_name(info)

# Create the actual download object.
media_url = utils.DOWNLOAD_URL_TEMPLATE.format(blob_name=blob_name)
stream = io.BytesIO()
download = self._make_one(media_url, stream=stream, checksum=checksum)
# Consume the resource.
response = download.consume(authorized_transport)
assert response.status_code == http.client.OK
assert response.headers.get(_helpers._STORED_CONTENT_ENCODING_HEADER) == "br"
assert response.headers.get("X-Goog-Stored-Content-Length") is not None
assert stream.getvalue() == actual_contents
check_tombstoned(download, authorized_transport)

def test_extra_headers(self, authorized_transport, secret_file):
blob_name, data, headers = secret_file
# Create the actual download object.
Expand Down
28 changes: 28 additions & 0 deletions tests/unit/requests/test_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -1110,6 +1110,18 @@ def test_gzipped(self):
assert isinstance(response_raw._decoder, download_mod._GzipDecoder)
assert response_raw._decoder._checksum is mock.sentinel.md5_hash

def test_brotli(self):
headers = {"content-encoding": "br"}
response_raw = mock.Mock(headers=headers, spec=["headers", "_decoder"])
md5_hash = download_mod._add_decoder(response_raw, mock.sentinel.md5_hash)

assert md5_hash is not mock.sentinel.md5_hash
assert isinstance(md5_hash, _helpers._DoNothingHash)
assert isinstance(response_raw._decoder, download_mod._BrotliDecoder)
assert response_raw._decoder._checksum is mock.sentinel.md5_hash
# Go ahead and exercise the flush method, added only for completion
response_raw._decoder.flush()


class Test_GzipDecoder(object):
def test_constructor(self):
Expand All @@ -1127,6 +1139,22 @@ def test_decompress(self):
md5_hash.update.assert_called_once_with(data)


class Test_BrotliDecoder(object):
def test_constructor(self):
decoder = download_mod._BrotliDecoder(mock.sentinel.md5_hash)
assert decoder._checksum is mock.sentinel.md5_hash

def test_decompress(self):
md5_hash = mock.Mock(spec=["update"])
decoder = download_mod._BrotliDecoder(md5_hash)

data = b"\xc1\xf8I\xc0/\x83\xf3\xfa"
result = decoder.decompress(data)

assert result == b""
md5_hash.update.assert_called_once_with(data)


def _mock_response(status_code=http.client.OK, chunks=(), headers=None):
if headers is None:
headers = {}
Expand Down

0 comments on commit 295e40a

Please sign in to comment.