From 0d1ccfebf96e3c573b3cf771a72f5cba13989219 Mon Sep 17 00:00:00 2001 From: Jacques Raphanel Date: Sat, 24 Feb 2024 18:39:52 +0100 Subject: [PATCH 1/2] fix: invalid compressed file for huge file --- pdbstore/store/entry.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pdbstore/store/entry.py b/pdbstore/store/entry.py index a18af3b..bb93fe6 100644 --- a/pdbstore/store/entry.py +++ b/pdbstore/store/entry.py @@ -102,6 +102,16 @@ def commit(self, force: Optional[bool] = False) -> bool: if not dest_dir.is_dir(): dest_dir.mkdir(parents=True) + if self.compressed: + # Sanity check to limit compression for file having size with less than 2GB + # 2GB is the limit of cab files as Microsoft documentation + max_cab_file_size = 2147483648 + if max_cab_file_size < self.source_file.stat().st_size: + self.compressed = False + PDBStoreOutput().warning( + f"Disable compression for {self.source_file} since file size is more than 2GB" + ) + if self.compressed: PDBStoreOutput().debug( f"Compressing {self.source_file} to {str(dest_dir / (self.file_name[:-1] + '_'))}" From 1e6b324ca4315f4d50d766550611e697a9d22707 Mon Sep 17 00:00:00 2001 From: Jacques Raphanel Date: Fri, 1 Mar 2024 12:21:11 +0100 Subject: [PATCH 2/2] feat: add test for large file --- pdbstore/io/file.py | 15 +++++++++++++++ pdbstore/store/entry.py | 8 +++++--- tests/unit/test_file.py | 18 +++++++++++++++++- tests/unit/test_transaction_entry.py | 16 ++++++++++++++++ 4 files changed, 53 insertions(+), 4 deletions(-) diff --git a/pdbstore/io/file.py b/pdbstore/io/file.py index d49b23a..2de4457 100644 --- a/pdbstore/io/file.py +++ b/pdbstore/io/file.py @@ -287,3 +287,18 @@ def _explore_dirs(rootdir: str, recursive: bool = False) -> List[Path]: files_list.append(Path(file)) return files_list + + +def get_file_size(path: PathLike) -> int: + """Get file size + + :param path: The file path + :return: The file size + """ + if not path: + return 0 + file_path: Path = util.str_to_path(path) + if not file_path or not file_path.exists(): + return 0 + + return file_path.stat().st_size diff --git a/pdbstore/store/entry.py b/pdbstore/store/entry.py index bb93fe6..081e88b 100644 --- a/pdbstore/store/entry.py +++ b/pdbstore/store/entry.py @@ -12,6 +12,9 @@ class TransactionEntry: """A SymbolStore transaction entry representation""" + # File size limit to disable compression + MAX_COMPRESSED_FILE_SIZE: int = 2147482624 + def __init__( self, store: "Store", # type: ignore[name-defined] # noqa: F821 @@ -104,9 +107,8 @@ def commit(self, force: Optional[bool] = False) -> bool: if self.compressed: # Sanity check to limit compression for file having size with less than 2GB - # 2GB is the limit of cab files as Microsoft documentation - max_cab_file_size = 2147483648 - if max_cab_file_size < self.source_file.stat().st_size: + # 2GB is the limit of cab files as per Microsoft documentation + if self.MAX_COMPRESSED_FILE_SIZE < io.file.get_file_size(self.source_file): self.compressed = False PDBStoreOutput().warning( f"Disable compression for {self.source_file} since file size is more than 2GB" diff --git a/tests/unit/test_file.py b/tests/unit/test_file.py index 7b1e7a9..3185451 100644 --- a/tests/unit/test_file.py +++ b/tests/unit/test_file.py @@ -5,6 +5,7 @@ from pdbstore.exceptions import ReadFileError from pdbstore.io import file +from pdbstore.typing import Generator NEWLINES_FILE_CONTENT = "first line\nsecond line" @@ -13,7 +14,7 @@ @pytest.fixture(name="file_access") -def fixture_file_access(tmp_path, request) -> Path: +def fixture_file_access(tmp_path, request) -> Generator[Path, None, None]: """Generate temporary history file""" dest = tmp_path / f"file-{time.time()}.bin" with open(dest, "wb") as hfp: @@ -84,3 +85,18 @@ def test_text_file_with_split_windows(file_access): content = file.read_text_file(file_access, True) assert content == NEWLINES_FILE_CONTENT.split("\n") + + +@pytest.mark.parametrize("file_access", [[TEXT_FILE_WITH_CRLF]], indirect=True) +def test_valid_file_size(file_access): + """test valid file size behavior""" + assert file.get_file_size(file_access) > 0 + + +@pytest.mark.parametrize( + "file_path", + [None, "", "/invalid/path"], +) +def test_invalid_file_size(file_path): + """test invalid file size behavior""" + assert file.get_file_size(file_path) == 0 diff --git a/tests/unit/test_transaction_entry.py b/tests/unit/test_transaction_entry.py index 2d44510..310d5f7 100644 --- a/tests/unit/test_transaction_entry.py +++ b/tests/unit/test_transaction_entry.py @@ -185,3 +185,19 @@ def test_extract_failure(tmp_path, tmp_store, test_data_native_dir, fake_process entry.compressed = False with pytest.raises(exceptions.CopyFileError): entry.extract(tmp_path) + + +def test_large_compressed_file(tmp_store, test_data_native_dir): + """test no compress for very large file""" + with mock.patch("pdbstore.io.file.get_file_size") as _get_file_size: + _get_file_size.return_value = TransactionEntry.MAX_COMPRESSED_FILE_SIZE + 10 + entry = TransactionEntry( + tmp_store, + "dummylib.pdb", + "1972BE39B97341928816018A8ECD08D91", + test_data_native_dir / "dummylib.pdb", + True, + ) + assert entry.commit() is True + assert entry.is_compressed() is False + assert entry.stored_path.exists()