diff --git a/docs/latest/.buildinfo b/docs/latest/.buildinfo index 44e4d9da..38761a16 100644 --- a/docs/latest/.buildinfo +++ b/docs/latest/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. -config: 68666af324b279e5e7d7ada9d27ddd71 +config: b911398855f7668d2aa125e82024376d tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/google/resumable_media/_download.py b/google/resumable_media/_download.py index 5d2d10d1..6260d742 100644 --- a/google/resumable_media/_download.py +++ b/google/resumable_media/_download.py @@ -354,36 +354,24 @@ def _process_response(self, response): self._get_status_code, callback=self._make_invalid, ) - headers = self._get_headers(response) - response_body = self._get_body(response) - - start_byte, end_byte, total_bytes = get_range_info( + content_length = _helpers.header_required( + response, u"content-length", self._get_headers, callback=self._make_invalid + ) + num_bytes = int(content_length) + _, end_byte, total_bytes = get_range_info( response, self._get_headers, callback=self._make_invalid ) - - transfer_encoding = headers.get(u"transfer-encoding") - - if transfer_encoding is None: - content_length = _helpers.header_required( + response_body = self._get_body(response) + if len(response_body) != num_bytes: + self._make_invalid() + raise common.InvalidResponse( response, - u"content-length", - self._get_headers, - callback=self._make_invalid, + u"Response is different size than content-length", + u"Expected", + num_bytes, + u"Received", + len(response_body), ) - num_bytes = int(content_length) - if len(response_body) != num_bytes: - self._make_invalid() - raise common.InvalidResponse( - response, - u"Response is different size than content-length", - u"Expected", - num_bytes, - u"Received", - len(response_body), - ) - else: - # 'content-length' header not allowed with chunked encoding. - num_bytes = end_byte - start_byte + 1 # First update ``bytes_downloaded``. self._bytes_downloaded += num_bytes diff --git a/google/resumable_media/requests/_helpers.py b/google/resumable_media/requests/_helpers.py index 80cf4542..b7343ce5 100644 --- a/google/resumable_media/requests/_helpers.py +++ b/google/resumable_media/requests/_helpers.py @@ -25,7 +25,6 @@ _DEFAULT_RETRY_STRATEGY = common.RetryStrategy() -_SINGLE_GET_CHUNK_SIZE = 8192 # The number of seconds to wait to establish a connection # (connect() call on socket). Avoid setting this to a multiple of 3 to not # Align with TCP Retransmission timing. (typically 2.5-3s) @@ -76,12 +75,7 @@ def _get_body(response): Returns: bytes: The body of the ``response``. """ - if response._content is False: - response._content = b"".join( - response.raw.stream(_SINGLE_GET_CHUNK_SIZE, decode_content=False) - ) - response._content_consumed = True - return response._content + return response.content def http_request( diff --git a/google/resumable_media/requests/download.py b/google/resumable_media/requests/download.py index a41ff623..1cb1313b 100644 --- a/google/resumable_media/requests/download.py +++ b/google/resumable_media/requests/download.py @@ -18,12 +18,15 @@ import hashlib import logging +import urllib3.response + from google.resumable_media import _download from google.resumable_media import common from google.resumable_media.requests import _helpers _LOGGER = logging.getLogger(__name__) +_SINGLE_GET_CHUNK_SIZE = 8192 _HASH_HEADER = u"x-goog-hash" _MISSING_MD5 = u"""\ No MD5 checksum was returned from the service while downloading {} @@ -113,13 +116,13 @@ def _write_to_stream(self, response): with response: # NOTE: This might "donate" ``md5_hash`` to the decoder and replace # it with a ``_DoNothingHash``. - body_iter = response.raw.stream( - _helpers._SINGLE_GET_CHUNK_SIZE, decode_content=False + local_hash = _add_decoder(response.raw, md5_hash) + body_iter = response.iter_content( + chunk_size=_SINGLE_GET_CHUNK_SIZE, decode_unicode=False ) for chunk in body_iter: self._stream.write(chunk) - md5_hash.update(chunk) - response._content_consumed = True + local_hash.update(chunk) if expected_md5_hash is None: return @@ -155,22 +158,22 @@ def consume(self, transport): """ method, url, payload, headers = self._prepare_request() # NOTE: We assume "payload is None" but pass it along anyway. - response = _helpers.http_request( - transport, - method, - url, - data=payload, - headers=headers, - retry_strategy=self._retry_strategy, - stream=True, - ) + request_kwargs = { + u"data": payload, + u"headers": headers, + u"retry_strategy": self._retry_strategy, + } + if self._stream is not None: + request_kwargs[u"stream"] = True - self._process_response(response) + result = _helpers.http_request(transport, method, url, **request_kwargs) + + self._process_response(result) if self._stream is not None: - self._write_to_stream(response) + self._write_to_stream(result) - return response + return result class ChunkedDownload(_helpers.RequestsMixin, _download.ChunkedDownload): @@ -216,17 +219,16 @@ def consume_next_chunk(self, transport): """ method, url, payload, headers = self._prepare_request() # NOTE: We assume "payload is None" but pass it along anyway. - response = _helpers.http_request( + result = _helpers.http_request( transport, method, url, data=payload, headers=headers, retry_strategy=self._retry_strategy, - stream=True, ) - self._process_response(response) - return response + self._process_response(result) + return result def _parse_md5_header(header_value, response): @@ -294,3 +296,58 @@ def update(self, unused_chunk): Args: unused_chunk (bytes): A chunk of data. """ + + +def _add_decoder(response_raw, md5_hash): + """Patch the ``_decoder`` on a ``urllib3`` response. + + This is so that we can intercept the compressed bytes before they are + decoded. + + Only patches if the content encoding is ``gzip``. + + Args: + response_raw (urllib3.response.HTTPResponse): The raw response for + an HTTP request. + md5_hash (Union[_DoNothingHash, hashlib.md5]): A hash function which + will get updated when it encounters compressed bytes. + + Returns: + Union[_DoNothingHash, hashlib.md5]: Either the original ``md5_hash`` + if ``_decoder`` is not patched. Otherwise, returns a ``_DoNothingHash`` + since the caller will no longer need to hash to decoded bytes. + """ + encoding = response_raw.headers.get(u"content-encoding", u"").lower() + if encoding != u"gzip": + return md5_hash + + response_raw._decoder = _GzipDecoder(md5_hash) + return _DoNothingHash() + + +class _GzipDecoder(urllib3.response.GzipDecoder): + """Custom subclass of ``urllib3`` decoder for ``gzip``-ed bytes. + + Allows an MD5 hash function to see the compressed bytes before they are + decoded. This way the hash of the compressed value can be computed. + + Args: + md5_hash (Union[_DoNothingHash, hashlib.md5]): A hash function which + will get updated when it encounters compressed bytes. + """ + + def __init__(self, md5_hash): + super(_GzipDecoder, self).__init__() + self._md5_hash = md5_hash + + def decompress(self, data): + """Decompress the bytes. + + Args: + data (bytes): The compressed bytes to be decompressed. + + Returns: + bytes: The decompressed bytes from ``data``. + """ + self._md5_hash.update(data) + return super(_GzipDecoder, self).decompress(data) diff --git a/tests/system/requests/test_download.py b/tests/system/requests/test_download.py index 9314dcb6..df5d40f1 100644 --- a/tests/system/requests/test_download.py +++ b/tests/system/requests/test_download.py @@ -25,9 +25,8 @@ from six.moves import http_client from google import resumable_media -from google.resumable_media import requests as resumable_requests -from google.resumable_media.requests import download as download_mod -from google.resumable_media.requests import _helpers +import google.resumable_media.requests as resumable_requests +import google.resumable_media.requests.download as download_mod from tests.system import utils @@ -61,11 +60,12 @@ { u"path": os.path.realpath(os.path.join(DATA_DIR, u"file.txt")), u"content_type": PLAIN_TEXT, - u"checksum": u"XHSHAr/SpIeZtZbjgQ4nGw==", + u"checksum": u"KHRs/+ZSrc/FuuR4qz/PZQ==", u"slices": (), }, { u"path": os.path.realpath(os.path.join(DATA_DIR, u"gzipped.txt.gz")), + u"uncompressed": os.path.realpath(os.path.join(DATA_DIR, u"gzipped.txt")), u"content_type": PLAIN_TEXT, u"checksum": u"KHRs/+ZSrc/FuuR4qz/PZQ==", u"slices": (), @@ -126,13 +126,13 @@ def _get_contents_for_upload(info): def _get_contents(info): - full_path = info[u"path"] + full_path = info.get(u"uncompressed", info[u"path"]) with open(full_path, u"rb") as file_obj: return file_obj.read() def _get_blob_name(info): - full_path = info[u"path"] + full_path = info.get(u"uncompressed", info[u"path"]) return os.path.basename(full_path) @@ -179,12 +179,6 @@ def check_tombstoned(download, transport): assert exc_info.match(u"Download has finished.") -def read_raw_content(response): - return b"".join( - response.raw.stream(_helpers._SINGLE_GET_CHUNK_SIZE, decode_content=False) - ) - - def test_download_full(add_files, authorized_transport): for info in ALL_FILES: actual_contents = _get_contents(info) @@ -196,7 +190,7 @@ def test_download_full(add_files, authorized_transport): # Consume the resource. response = download.consume(authorized_transport) assert response.status_code == http_client.OK - assert read_raw_content(response) == actual_contents + assert response.content == actual_contents check_tombstoned(download, authorized_transport) @@ -221,6 +215,7 @@ def test_download_to_stream(add_files, authorized_transport): check_tombstoned(download, authorized_transport) +@pytest.mark.xfail # See: #76 def test_corrupt_download(add_files, corrupting_transport): for info in ALL_FILES: blob_name = _get_blob_name(info) @@ -396,7 +391,8 @@ def consume_chunks(download, authorized_transport, total_bytes, actual_contents) return num_responses, response -def test_chunked_download_full(add_files, authorized_transport): +@pytest.mark.xfail # See issue #56 +def test_chunked_download(add_files, authorized_transport): for info in ALL_FILES: actual_contents = _get_contents(info) blob_name = _get_blob_name(info) diff --git a/tests/unit/requests/test__helpers.py b/tests/unit/requests/test__helpers.py index 81d6a50e..e5ec9746 100644 --- a/tests/unit/requests/test__helpers.py +++ b/tests/unit/requests/test__helpers.py @@ -28,22 +28,12 @@ def test__get_status_code(self): def test__get_headers(self): headers = {u"fruit": u"apple"} - response = mock.Mock(headers=headers, spec=["headers"]) + response = mock.Mock(headers=headers, spec=[u"headers"]) assert headers == _helpers.RequestsMixin._get_headers(response) - def test__get_body_wo_content_consumed(self): + def test__get_body(self): body = b"This is the payload." - raw = mock.Mock(spec=["stream"]) - raw.stream.return_value = iter([body]) - response = mock.Mock(raw=raw, _content=False, spec=["raw", "_content"]) - assert body == _helpers.RequestsMixin._get_body(response) - raw.stream.assert_called_once_with( - _helpers._SINGLE_GET_CHUNK_SIZE, decode_content=False - ) - - def test__get_body_w_content_consumed(self): - body = b"This is the payload." - response = mock.Mock(_content=body, spec=["_content"]) + response = mock.Mock(content=body, spec=[u"content"]) assert body == _helpers.RequestsMixin._get_body(response) diff --git a/tests/unit/requests/test_download.py b/tests/unit/requests/test_download.py index 54a14c28..c86f60ad 100644 --- a/tests/unit/requests/test_download.py +++ b/tests/unit/requests/test_download.py @@ -19,8 +19,7 @@ from six.moves import http_client from google.resumable_media import common -from google.resumable_media.requests import download as download_mod -from google.resumable_media.requests import _helpers +import google.resumable_media.requests.download as download_mod EXAMPLE_URL = ( @@ -31,7 +30,7 @@ class TestDownload(object): - @mock.patch("google.resumable_media.requests.download._LOGGER") + @mock.patch(u"google.resumable_media.requests.download._LOGGER") def test__get_expected_md5_present(self, _LOGGER): download = download_mod.Download(EXAMPLE_URL) @@ -44,7 +43,7 @@ def test__get_expected_md5_present(self, _LOGGER): assert expected_md5_hash == checksum _LOGGER.info.assert_not_called() - @mock.patch("google.resumable_media.requests.download._LOGGER") + @mock.patch(u"google.resumable_media.requests.download._LOGGER") def test__get_expected_md5_missing(self, _LOGGER): download = download_mod.Download(EXAMPLE_URL) @@ -72,8 +71,8 @@ def test__write_to_stream_no_hash_check(self): # Check mocks. response.__enter__.assert_called_once_with() response.__exit__.assert_called_once_with(None, None, None) - response.raw.stream.assert_called_once_with( - _helpers._SINGLE_GET_CHUNK_SIZE, decode_content=False + response.iter_content.assert_called_once_with( + chunk_size=download_mod._SINGLE_GET_CHUNK_SIZE, decode_unicode=False ) def test__write_to_stream_with_hash_check_success(self): @@ -95,8 +94,8 @@ def test__write_to_stream_with_hash_check_success(self): # Check mocks. response.__enter__.assert_called_once_with() response.__exit__.assert_called_once_with(None, None, None) - response.raw.stream.assert_called_once_with( - _helpers._SINGLE_GET_CHUNK_SIZE, decode_content=False + response.iter_content.assert_called_once_with( + chunk_size=download_mod._SINGLE_GET_CHUNK_SIZE, decode_unicode=False ) def test__write_to_stream_with_hash_check_fail(self): @@ -128,8 +127,8 @@ def test__write_to_stream_with_hash_check_fail(self): # Check mocks. response.__enter__.assert_called_once_with() response.__exit__.assert_called_once_with(None, None, None) - response.raw.stream.assert_called_once_with( - _helpers._SINGLE_GET_CHUNK_SIZE, decode_content=False + response.iter_content.assert_called_once_with( + chunk_size=download_mod._SINGLE_GET_CHUNK_SIZE, decode_unicode=False ) def _consume_helper( @@ -138,7 +137,7 @@ def _consume_helper( download = download_mod.Download( EXAMPLE_URL, stream=stream, end=end, headers=headers ) - transport = mock.Mock(spec=["request"]) + transport = mock.Mock(spec=[u"request"]) transport.request.return_value = _mock_response( chunks=chunks, headers=response_headers ) @@ -147,16 +146,12 @@ def _consume_helper( ret_val = download.consume(transport) assert ret_val is transport.request.return_value + called_kwargs = {u"data": None, u"headers": download._headers} if chunks: assert stream is not None - + called_kwargs[u"stream"] = True transport.request.assert_called_once_with( - u"GET", - EXAMPLE_URL, - data=None, - headers=download._headers, - stream=True, - timeout=EXPECTED_TIMEOUT, + u"GET", EXAMPLE_URL, timeout=EXPECTED_TIMEOUT, **called_kwargs ) range_bytes = u"bytes={:d}-{:d}".format(0, end) @@ -179,8 +174,8 @@ def test_consume_with_stream(self): response = transport.request.return_value response.__enter__.assert_called_once_with() response.__exit__.assert_called_once_with(None, None, None) - response.raw.stream.assert_called_once_with( - _helpers._SINGLE_GET_CHUNK_SIZE, decode_content=False + response.iter_content.assert_called_once_with( + chunk_size=download_mod._SINGLE_GET_CHUNK_SIZE, decode_unicode=False ) def test_consume_with_stream_hash_check_success(self): @@ -198,8 +193,8 @@ def test_consume_with_stream_hash_check_success(self): response = transport.request.return_value response.__enter__.assert_called_once_with() response.__exit__.assert_called_once_with(None, None, None) - response.raw.stream.assert_called_once_with( - _helpers._SINGLE_GET_CHUNK_SIZE, decode_content=False + response.iter_content.assert_called_once_with( + chunk_size=download_mod._SINGLE_GET_CHUNK_SIZE, decode_unicode=False ) def test_consume_with_stream_hash_check_fail(self): @@ -210,7 +205,7 @@ def test_consume_with_stream_hash_check_fail(self): bad_checksum = u"anVzdCBub3QgdGhpcyAxLA==" header_value = u"crc32c=V0FUPw==,md5={}".format(bad_checksum) headers = {download_mod._HASH_HEADER: header_value} - transport = mock.Mock(spec=["request"]) + transport = mock.Mock(spec=[u"request"]) transport.request.return_value = _mock_response(chunks=chunks, headers=headers) assert not download.finished @@ -267,10 +262,10 @@ def _mock_response( ): response_headers = self._response_headers(start_byte, end_byte, total_bytes) return mock.Mock( - _content=content, + content=content, headers=response_headers, status_code=status_code, - spec=[u"_content", u"headers", u"status_code"], + spec=[u"content", u"headers", u"status_code"], ) def test_consume_next_chunk_already_finished(self): @@ -280,7 +275,7 @@ def test_consume_next_chunk_already_finished(self): download.consume_next_chunk(None) def _mock_transport(self, start, chunk_size, total_bytes, content=b""): - transport = mock.Mock(spec=["request"]) + transport = mock.Mock(spec=[u"request"]) assert len(content) == chunk_size transport.request.return_value = self._mock_response( start, @@ -317,7 +312,6 @@ def test_consume_next_chunk(self): EXAMPLE_URL, data=None, headers=download_headers, - stream=True, timeout=EXPECTED_TIMEOUT, ) assert stream.getvalue() == data @@ -379,26 +373,67 @@ def test__DoNothingHash(): assert return_value is None +class Test__add_decoder(object): + def test_non_gzipped(self): + response_raw = mock.Mock(headers={}, spec=[u"headers"]) + md5_hash = download_mod._add_decoder(response_raw, mock.sentinel.md5_hash) + + assert md5_hash is mock.sentinel.md5_hash + + def test_gzipped(self): + headers = {u"content-encoding": u"gzip"} + response_raw = mock.Mock(headers=headers, spec=[u"headers", u"_decoder"]) + md5_hash = download_mod._add_decoder(response_raw, mock.sentinel.md5_hash) + + assert md5_hash is not mock.sentinel.md5_hash + assert isinstance(md5_hash, download_mod._DoNothingHash) + assert isinstance(response_raw._decoder, download_mod._GzipDecoder) + assert response_raw._decoder._md5_hash is mock.sentinel.md5_hash + + +class Test_GzipDecoder(object): + def test_constructor(self): + decoder = download_mod._GzipDecoder(mock.sentinel.md5_hash) + assert decoder._md5_hash is mock.sentinel.md5_hash + + def test_decompress(self): + md5_hash = mock.Mock(spec=["update"]) + decoder = download_mod._GzipDecoder(md5_hash) + + data = b"\x1f\x8b\x08\x08" + result = decoder.decompress(data) + + assert result == b"" + md5_hash.update.assert_called_once_with(data) + + def _mock_response(status_code=http_client.OK, chunks=(), headers=None): if headers is None: headers = {} if chunks: - mock_raw = mock.Mock(headers=headers, spec=["headers", "stream"]) - mock_raw.stream.return_value = iter(chunks) + mock_raw = mock.Mock(headers=headers, spec=[u"headers"]) response = mock.MagicMock( headers=headers, status_code=int(status_code), raw=mock_raw, - spec=["__enter__", "__exit__", "raw", "status_code", "headers", "raw"], + spec=[ + u"__enter__", + u"__exit__", + u"iter_content", + u"status_code", + u"headers", + u"raw", + ], ) # i.e. context manager returns ``self``. response.__enter__.return_value = response response.__exit__.return_value = None + response.iter_content.return_value = iter(chunks) return response else: return mock.Mock( headers=headers, status_code=int(status_code), - spec=["status_code", "headers"], + spec=[u"status_code", u"headers"], ) diff --git a/tests/unit/test__download.py b/tests/unit/test__download.py index bfad49c1..a984b876 100644 --- a/tests/unit/test__download.py +++ b/tests/unit/test__download.py @@ -128,7 +128,7 @@ def test__process_response(self): # Make sure **not finished** before. assert not download.finished - response = mock.Mock(status_code=int(http_client.OK), spec=["status_code"]) + response = mock.Mock(status_code=int(http_client.OK), spec=[u"status_code"]) ret_val = download._process_response(response) assert ret_val is None # Make sure **finished** after. @@ -141,7 +141,7 @@ def test__process_response_bad_status(self): # Make sure **not finished** before. assert not download.finished response = mock.Mock( - status_code=int(http_client.NOT_FOUND), spec=["status_code"] + status_code=int(http_client.NOT_FOUND), spec=[u"status_code"] ) with pytest.raises(common.InvalidResponse) as exc_info: download._process_response(response) @@ -263,7 +263,7 @@ def _mock_response( content=content, headers=response_headers, status_code=status_code, - spec=["content", "headers", "status_code"], + spec=[u"content", u"headers", u"status_code"], ) def test__prepare_request_already_finished(self): @@ -322,8 +322,7 @@ def test__make_invalid(self): assert download.invalid def test__process_response(self): - data = b"1234xyztL" * 37 - chunk_size = len(data) + chunk_size = 333 stream = io.BytesIO() download = _download.ChunkedDownload(EXAMPLE_URL, chunk_size, stream) _fix_up_virtual(download) @@ -337,6 +336,7 @@ def test__process_response(self): assert download.bytes_downloaded == already assert download.total_bytes is None # Actually call the method to update. + data = b"1234xyztL" * 37 # 9 * 37 == 33 response = self._mock_response( already, already + chunk_size - 1, @@ -351,42 +351,9 @@ def test__process_response(self): assert download.total_bytes == total_bytes assert stream.getvalue() == data - def test__process_response_transfer_encoding(self): - data = b"1234xyztL" * 37 - chunk_size = len(data) - stream = io.BytesIO() - download = _download.ChunkedDownload(EXAMPLE_URL, chunk_size, stream) - _fix_up_virtual(download) - - already = 22 - download._bytes_downloaded = already - total_bytes = 4444 - - # Check internal state before. - assert not download.finished - assert download.bytes_downloaded == already - assert download.total_bytes is None - assert not download.invalid - # Actually call the method to update. - response = self._mock_response( - already, - already + chunk_size - 1, - total_bytes, - content=data, - status_code=int(http_client.PARTIAL_CONTENT), - ) - response.headers[u"transfer-encoding"] = "chunked" - del response.headers[u"content-length"] - download._process_response(response) - # Check internal state after. - assert not download.finished - assert download.bytes_downloaded == already + chunk_size - assert download.total_bytes == total_bytes - assert stream.getvalue() == data - def test__process_response_bad_status(self): chunk_size = 384 - stream = mock.Mock(spec=["write"]) + stream = mock.Mock(spec=[u"write"]) download = _download.ChunkedDownload(EXAMPLE_URL, chunk_size, stream) _fix_up_virtual(download) @@ -426,12 +393,10 @@ def test__process_response_missing_content_length(self): assert download.total_bytes is None assert not download.invalid # Actually call the method to update. - headers = {u"content-range": u"bytes 0-99/99"} response = mock.Mock( - headers=headers, + headers={}, status_code=int(http_client.PARTIAL_CONTENT), - content=b"DEADBEEF", - spec=["headers", "status_code", "content"], + spec=[u"headers", u"status_code"], ) with pytest.raises(common.InvalidResponse) as exc_info: download._process_response(response) @@ -465,7 +430,7 @@ def test__process_response_bad_content_range(self): content=data, headers=headers, status_code=int(http_client.PARTIAL_CONTENT), - spec=["content", "headers", "status_code"], + spec=[u"content", u"headers", u"status_code"], ) with pytest.raises(common.InvalidResponse) as exc_info: download._process_response(response) @@ -482,7 +447,7 @@ def test__process_response_bad_content_range(self): def test__process_response_body_wrong_length(self): chunk_size = 10 - stream = mock.Mock(spec=["write"]) + stream = mock.Mock(spec=[u"write"]) download = _download.ChunkedDownload(EXAMPLE_URL, chunk_size, stream) _fix_up_virtual(download) @@ -639,7 +604,7 @@ class Test_get_range_info(object): @staticmethod def _make_response(content_range): headers = {u"content-range": content_range} - return mock.Mock(headers=headers, spec=["headers"]) + return mock.Mock(headers=headers, spec=[u"headers"]) def _success_helper(self, **kwargs): content_range = u"Bytes 7-11/42" @@ -679,7 +644,7 @@ def test_failure_with_callback(self): callback.assert_called_once_with() def _missing_header_helper(self, **kwargs): - response = mock.Mock(headers={}, spec=["headers"]) + response = mock.Mock(headers={}, spec=[u"headers"]) with pytest.raises(common.InvalidResponse) as exc_info: _download.get_range_info(response, _get_headers, **kwargs)