From e54368cc3094fa7c433d50eb5515f9c41dee3170 Mon Sep 17 00:00:00 2001
From: Nipun Jonnalagadda <44180693+coolnipunj@users.noreply.github.com>
Date: Fri, 13 Jun 2025 14:59:24 -0400
Subject: [PATCH 1/8] Add zip support feature

---
 README.md                | 26 ++++++++++++++++++++++++++
 src/sumbuddy/__main__.py | 21 ++++++++++++++++++---
 src/sumbuddy/hasher.py   | 15 ++++++++++-----
 src/sumbuddy/mapper.py   | 12 ++++++++++++
 4 files changed, 66 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index c9baa45..b745af4 100644
--- a/README.md
+++ b/README.md
@@ -117,6 +117,32 @@ cat examples/checksums.csv
 > examples/example_content/dir/.hidden_dir/file.txt,file.txt,7d52c7437e9af58dac029dd11b1024df
 >```
 
+- **Zip Support:**
+  sum-buddy now supports processing zip files. When a zip file is encountered, it will:
+  - Calculate the checksum of the zip file itself.
+  - List each file inside the zip as `zipfile.zip/filename` with its own checksum.
+
+  Example:
+  ```bash
+  sum-buddy --output-file examples/checksums_zip.csv examples/example_content/
+  ```
+  > Output
+  > ```console
+  > Calculating md5 checksums on examples/example_content/: 100%|████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 15109.16it/s]
+  > md5 checksums for examples/example_content/ written to examples/checksums_zip.csv
+  > ```
+  ```bash
+  cat examples/checksums_zip.csv
+  ```
+  > Output:
+  > ```console
+  > filepath,filename,md5
+  > examples/example_content/file.txt,file.txt,7d52c7437e9af58dac029dd11b1024df
+  > examples/example_content/testzip.zip,testzip.zip,dcf68ba27f40590ff899b63d44e18836
+  > examples/example_content/testzip.zip/file.txt,file.txt,7d52c7437e9af58dac029dd11b1024df
+  > examples/example_content/testzip.zip/dir/file.txt,file.txt,7d52c7437e9af58dac029dd11b1024df
+  > examples/example_content/dir/file.txt,file.txt,7d52c7437e9af58dac029dd11b1024df
+  > ```
 
 If only a target directory is passed, the default settings are to ignore hidden files and directories (those that begin with a `.`), use the `md5` algorithm, and print output to `stdout`, which can be piped (`|`).
 
diff --git a/src/sumbuddy/__main__.py b/src/sumbuddy/__main__.py
index f681c98..ff788c8 100644
--- a/src/sumbuddy/__main__.py
+++ b/src/sumbuddy/__main__.py
@@ -7,6 +7,7 @@
 from tqdm import tqdm
 import sys
 import os
+import zipfile
 
 def get_checksums(input_path, output_filepath=None, ignore_file=None, include_hidden=False, algorithm='md5', length=None):
     """
@@ -49,8 +50,23 @@ def get_checksums(input_path, output_filepath=None, ignore_file=None, include_hi
 
         disable_tqdm = output_filepath is None
         for file_path in tqdm(file_paths, desc=f"Calculating {algorithm} checksums on {input_path}", disable=disable_tqdm):
-            checksum = hasher.checksum_file(file_path, algorithm=algorithm, length=length)
-            writer.writerow([file_path, os.path.basename(file_path), checksum])
+            # For files inside zip files (indicated by path containing .zip/)
+            if '.zip/' in file_path:
+                zip_index = file_path.find('.zip/')
+                zip_path = file_path[:zip_index + 4]  # include '.zip'
+                file_in_zip = file_path[zip_index + 5:]
+                with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+                    # Only try to open if the file exists in the zip
+                    if file_in_zip in zip_ref.namelist():
+                        with zip_ref.open(file_in_zip) as file_in_zip_ref:
+                            checksum = hasher.checksum_file(file_in_zip_ref, algorithm=algorithm, length=length)
+                        writer.writerow([file_path, os.path.basename(file_path), checksum])
+                    else:
+                        print(f"Warning: {file_in_zip} not found in {zip_path}, skipping.")
+            else:
+                # For regular files and zip files themselves
+                checksum = hasher.checksum_file(file_path, algorithm=algorithm, length=length)
+                writer.writerow([file_path, os.path.basename(file_path), checksum])
 
     finally:
         if output_filepath:
@@ -60,7 +76,6 @@ def get_checksums(input_path, output_filepath=None, ignore_file=None, include_hi
         print(f"{algorithm} checksums for {input_path} written to {output_filepath}")
 
 def main():
-
     available_algorithms = ', '.join(hashlib.algorithms_available)
     
     parser = argparse.ArgumentParser(description="Generate CSV with filepath, filename, and checksums for all files in a given directory (or a single file)")
diff --git a/src/sumbuddy/hasher.py b/src/sumbuddy/hasher.py
index a17ff2c..85012bd 100644
--- a/src/sumbuddy/hasher.py
+++ b/src/sumbuddy/hasher.py
@@ -5,13 +5,13 @@ class Hasher:
     def __init__(self, algorithm='md5'):
         self.algorithm = algorithm
 
-    def checksum_file(self, file_path, algorithm=None, length=None):
+    def checksum_file(self, file_path_or_obj, algorithm=None, length=None):
         """
         Calculate the checksum of a file using the specified algorithm.
         
         Parameters:
         ------------
-        file_path - String. Path to file to apply checksum function.
+        file_path_or_obj - String or file-like object. Path to file or file-like object to apply checksum function.
         algorithm - String. Hash function to use for checksums. Default: 'md5', see options with 'hashlib.algorithms_available'.
         length - Integer [optional]. Length of the digest for SHAKE and BLAKE algorithms in bytes.
         
@@ -55,9 +55,14 @@ def checksum_file(self, file_path, algorithm=None, length=None):
                 raise LengthUsedForFixedLengthHashError(algorithm)
             hash_func = hashlib.new(algorithm)
 
-        # Read the file and update the hash function
-        with open(file_path, "rb") as f:
-            for chunk in iter(lambda: f.read(4096), b""):
+        # Handle both file paths and file-like objects
+        if isinstance(file_path_or_obj, str):
+            with open(file_path_or_obj, "rb") as f:
+                for chunk in iter(lambda: f.read(4096), b""):
+                    hash_func.update(chunk)
+        else:
+            # Assume it's a file-like object
+            for chunk in iter(lambda: file_path_or_obj.read(4096), b""):
                 hash_func.update(chunk)
 
         # Return the hash digest
diff --git a/src/sumbuddy/mapper.py b/src/sumbuddy/mapper.py
index a611872..13b0fc1 100644
--- a/src/sumbuddy/mapper.py
+++ b/src/sumbuddy/mapper.py
@@ -1,10 +1,13 @@
 import os
+import zipfile
 from sumbuddy.filter import Filter
 from sumbuddy.exceptions import EmptyInputDirectoryError, NoFilesAfterFilteringError, NotADirectoryError
+from sumbuddy.archive import ArchiveHandler
 
 class Mapper:
     def __init__(self):
         self.filter_manager = Filter()
+        self.archive_handler = ArchiveHandler()
 
     def reset_filter(self, ignore_file=None, include_hidden=False):
         """
@@ -56,6 +59,15 @@ def gather_file_paths(self, input_directory, ignore_file=None, include_hidden=Fa
                 file_path = os.path.join(root, name)
                 if self.filter_manager.should_include(file_path, root_directory):
                     file_paths.append(file_path)
+                    # If it's a zip file, process its contents
+                    if zipfile.is_zipfile(file_path):
+                        try:
+                            zip_contents = self.archive_handler.process_zip(file_path, root_directory)
+                            for _, zip_path in zip_contents:
+                                if self.filter_manager.should_include(zip_path, root_directory):
+                                    file_paths.append(zip_path)
+                        finally:
+                            self.archive_handler.cleanup()
 
         if not has_files:
             raise EmptyInputDirectoryError(input_directory)

From 4e5dcd9cbc763edcc93cea80a612ce97681456e9 Mon Sep 17 00:00:00 2001
From: Nipun Jonnalagadda <44180693+coolnipunj@users.noreply.github.com>
Date: Fri, 13 Jun 2025 15:11:34 -0400
Subject: [PATCH 2/8] Include archive.py in the package

---
 src/sumbuddy/archive.py | 54 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 src/sumbuddy/archive.py

diff --git a/src/sumbuddy/archive.py b/src/sumbuddy/archive.py
new file mode 100644
index 0000000..2008783
--- /dev/null
+++ b/src/sumbuddy/archive.py
@@ -0,0 +1,54 @@
+import os
+import zipfile
+import tempfile
+import shutil
+from pathlib import Path
+
+class ArchiveHandler:
+    def __init__(self):
+        self.temp_dir = None
+
+    def process_zip(self, zip_path, root_dir):
+        """
+        Process a zip file and return paths to its contents.
+        
+        Parameters:
+        ------------
+        zip_path - String. Path to the zip file.
+        root_dir - String. Root directory for relative path calculations.
+        
+        Returns:
+        ---------
+        List of tuples (file_path, relative_path) for files in the zip.
+        """
+        if not zipfile.is_zipfile(zip_path):
+            return []
+
+        # Create a temporary directory for extraction
+        self.temp_dir = tempfile.mkdtemp()
+        
+        try:
+            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+                # Extract all contents to temp directory
+                zip_ref.extractall(self.temp_dir)
+                
+                # Get list of all files in the zip
+                file_paths = []
+                for member in zip_ref.namelist():
+                    # Only add files, not directories
+                    if member.endswith('/'):
+                        continue
+                    full_path = os.path.join(self.temp_dir, member)
+                    # The path as it should appear in the CSV: zip_path/member
+                    rel_path = f"{zip_path}/{member}"
+                    file_paths.append((full_path, rel_path))
+                return file_paths
+        except Exception as e:
+            self.cleanup()
+            raise e
+
+    def cleanup(self):
+        """Clean up temporary directory if it exists."""
+        if self.temp_dir and os.path.exists(self.temp_dir):
+            shutil.rmtree(self.temp_dir)
+            self.temp_dir = None 
\ No newline at end of file

From 63c64f759afc31cbc3d8f1886bac8654ed8ab24c Mon Sep 17 00:00:00 2001
From: Nipun Jonnalagadda <44180693+coolnipunj@users.noreply.github.com>
Date: Fri, 13 Jun 2025 15:13:41 -0400
Subject: [PATCH 3/8] Remove unused import from archive.py

---
 src/sumbuddy/archive.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/sumbuddy/archive.py b/src/sumbuddy/archive.py
index 2008783..6599556 100644
--- a/src/sumbuddy/archive.py
+++ b/src/sumbuddy/archive.py
@@ -2,7 +2,6 @@
 import zipfile
 import tempfile
 import shutil
-from pathlib import Path
 
 class ArchiveHandler:
     def __init__(self):

From dfe3df3c71a8d106cb7824f2c384ef319adcb00f Mon Sep 17 00:00:00 2001
From: Nipun Jonnalagadda <44180693+coolnipunj@users.noreply.github.com>
Date: Tue, 17 Jun 2025 15:04:53 -0400
Subject: [PATCH 4/8] Update src/sumbuddy/mapper.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 src/sumbuddy/mapper.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/sumbuddy/mapper.py b/src/sumbuddy/mapper.py
index 13b0fc1..aaf3a46 100644
--- a/src/sumbuddy/mapper.py
+++ b/src/sumbuddy/mapper.py
@@ -67,8 +67,10 @@ def gather_file_paths(self, input_directory, ignore_file=None, include_hidden=Fa
                                 if self.filter_manager.should_include(zip_path, root_directory):
                                     file_paths.append(zip_path)
                         finally:
-                            self.archive_handler.cleanup()
+                            pass
 
+        # Perform cleanup after processing all zip files
+        self.archive_handler.cleanup()
         if not has_files:
             raise EmptyInputDirectoryError(input_directory)
         if not file_paths:

From e6518bda29aadef15f61785521b7691c2a84e8d1 Mon Sep 17 00:00:00 2001
From: Nipun Jonnalagadda <44180693+coolnipunj@users.noreply.github.com>
Date: Wed, 18 Jun 2025 10:55:08 -0400
Subject: [PATCH 5/8] Add zip archive test file and update README to document
 zip test coverage

---
 README.md              |  10 ++-
 tests/test_archive.py  | 184 +++++++++++++++++++++++++++++++++++++++++
 tests/test_archive.zip | Bin 0 -> 775 bytes
 3 files changed, 191 insertions(+), 3 deletions(-)
 create mode 100644 tests/test_archive.py
 create mode 100644 tests/test_archive.zip

diff --git a/README.md b/README.md
index b745af4..1d51b8d 100644
--- a/README.md
+++ b/README.md
@@ -198,9 +198,13 @@ pip install -e ".[dev]"
 3. Install pre-commit hook
 ```bash
 pre-commit install
-pre-commit autoupdate # optionally update
 ```
-4. Run tests:
+
+### Tests
+
+A dedicated test file, `tests/test_archive.py`, has been added to verify zip file support. This test ensures that both zip files and their contents are correctly processed and checksummed. The test uses a sample archive (`tests/test_archive.zip`) included in the repository.
+
+Run all tests with:
 ```bash
-pytest
+python -m pytest -v
 ```
diff --git a/tests/test_archive.py b/tests/test_archive.py
new file mode 100644
index 0000000..1f15653
--- /dev/null
+++ b/tests/test_archive.py
@@ -0,0 +1,184 @@
+import pytest
+import tempfile
+import os
+import zipfile
+from pathlib import Path
+from unittest.mock import patch, MagicMock
+
+from sumbuddy.archive import ArchiveHandler
+from sumbuddy.mapper import Mapper
+from sumbuddy.hasher import Hasher
+
+
+class TestArchiveHandler:
+    """Test cases for ArchiveHandler class."""
+
+    def test_process_zip_success(self):
+        """Test successful zip file processing."""
+        handler = ArchiveHandler()
+        test_zip_path = Path(__file__).parent / "test_archive.zip"
+        
+        # Ensure test zip exists
+        assert test_zip_path.exists(), "Test zip file not found"
+        
+        with tempfile.TemporaryDirectory() as temp_dir:
+            extracted_files = handler.process_zip(str(test_zip_path), temp_dir)
+            
+            # Should return list of tuples (file_path, relative_path)
+            assert len(extracted_files) == 2
+            assert any("test_file.txt" in str(f[1]) for f in extracted_files)
+            assert any("nested_file.txt" in str(f[1]) for f in extracted_files)
+            
+            # Check that files were actually extracted
+            for file_path, _ in extracted_files:
+                assert Path(file_path).exists()
+
+    def test_process_zip_invalid_file(self):
+        """Test processing non-zip file."""
+        handler = ArchiveHandler()
+        
+        with tempfile.TemporaryDirectory() as temp_dir:
+            # Create a non-zip file
+            non_zip_file = Path(temp_dir) / "not_a_zip.txt"
+            non_zip_file.write_text("This is not a zip file")
+            
+            # Should return empty list for non-zip files
+            result = handler.process_zip(str(non_zip_file), temp_dir)
+            assert result == []
+
+    def test_process_zip_nonexistent_file(self):
+        """Test processing non-existent file."""
+        handler = ArchiveHandler()
+        
+        with tempfile.TemporaryDirectory() as temp_dir:
+            non_existent_file = Path(temp_dir) / "nonexistent.zip"
+            
+            # Should return empty list for non-existent files
+            result = handler.process_zip(str(non_existent_file), temp_dir)
+            assert result == []
+
+
+class TestMapperWithZip:
+    """Test cases for Mapper class with zip file support."""
+
+    def test_gather_file_paths_with_zip(self):
+        """Test gathering file paths including zip files."""
+        mapper = Mapper()
+        test_zip_path = Path(__file__).parent / "test_archive.zip"
+        
+        # Create a temporary directory with the test zip
+        with tempfile.TemporaryDirectory() as temp_dir:
+            temp_zip_path = Path(temp_dir) / "test_archive.zip"
+            # Copy test zip to temp directory
+            import shutil
+            shutil.copy2(test_zip_path, temp_zip_path)
+            
+            file_paths = mapper.gather_file_paths(temp_dir)
+            
+            # Should include the zip file itself
+            assert str(temp_zip_path) in file_paths
+            
+            # Should include files from within the zip
+            zip_file_paths = [p for p in file_paths if "test_archive.zip/" in p]
+            assert len(zip_file_paths) == 2
+            assert any("test_file.txt" in p for p in zip_file_paths)
+            assert any("nested_file.txt" in p for p in zip_file_paths)
+
+    def test_gather_file_paths_with_zip_and_filter(self):
+        """Test gathering file paths with zip files and filters."""
+        mapper = Mapper()
+        test_zip_path = Path(__file__).parent / "test_archive.zip"
+        
+        # Create a temporary directory with the test zip
+        with tempfile.TemporaryDirectory() as temp_dir:
+            temp_zip_path = Path(temp_dir) / "test_archive.zip"
+            import shutil
+            shutil.copy2(test_zip_path, temp_zip_path)
+            
+            # Create an ignore file to exclude nested files
+            ignore_file = Path(temp_dir) / ".ignore"
+            ignore_file.write_text("**/nested_dir/**")
+            
+            file_paths = mapper.gather_file_paths(temp_dir, ignore_file=str(ignore_file))
+            
+            # Should include the zip file itself
+            assert str(temp_zip_path) in file_paths
+            
+            # Should include only non-nested files from zip
+            zip_file_paths = [p for p in file_paths if "test_archive.zip/" in p]
+            assert len(zip_file_paths) == 1
+            assert any("test_file.txt" in p for p in zip_file_paths)
+            assert not any("nested_file.txt" in p for p in zip_file_paths)
+
+
+class TestHasherWithZip:
+    """Test cases for Hasher class with zip file support."""
+
+    def test_checksum_file_with_file_like_object(self):
+        """Test checksum calculation with file-like object."""
+        hasher = Hasher()
+        test_zip_path = Path(__file__).parent / "test_archive.zip"
+        
+        # Test with zip file
+        with zipfile.ZipFile(test_zip_path, 'r') as zip_file:
+            # Get the first file in the zip
+            file_name = zip_file.namelist()[0]
+            with zip_file.open(file_name) as file_obj:
+                checksum = hasher.checksum_file(file_obj)
+                
+                # Should return a valid checksum
+                assert isinstance(checksum, str)
+                assert len(checksum) > 0
+
+    def test_checksum_file_with_zip_file_path(self):
+        """Test checksum calculation with zip file path."""
+        hasher = Hasher()
+        test_zip_path = Path(__file__).parent / "test_archive.zip"
+        
+        checksum = hasher.checksum_file(str(test_zip_path))
+        
+        # Should return a valid checksum
+        assert isinstance(checksum, str)
+        assert len(checksum) > 0
+
+
+def test_integration_zip_support():
+    """Integration test for zip support functionality."""
+    from sumbuddy import get_checksums
+    import tempfile
+    import csv
+    
+    test_zip_path = Path(__file__).parent / "test_archive.zip"
+    
+    with tempfile.TemporaryDirectory() as temp_dir:
+        temp_zip_path = Path(temp_dir) / "test_archive.zip"
+        import shutil
+        shutil.copy2(test_zip_path, temp_zip_path)
+        
+        output_file = Path(temp_dir) / "checksums.csv"
+        
+        # Run get_checksums on directory containing zip
+        get_checksums(temp_dir, output_file)
+        
+        # Verify output file was created
+        assert output_file.exists()
+        
+        # Read and verify CSV contents
+        with open(output_file, 'r') as f:
+            reader = csv.DictReader(f)
+            rows = list(reader)
+            
+            # Should have at least the zip file and its contents
+            assert len(rows) >= 3
+            
+            # Should include zip file itself
+            zip_rows = [r for r in rows if r['filename'] == 'test_archive.zip']
+            assert len(zip_rows) == 1
+            
+            # Should include files from within zip
+            zip_content_rows = [r for r in rows if 'test_archive.zip/' in r['filepath']]
+            assert len(zip_content_rows) == 2
+            
+            # All rows should have valid checksums
+            for row in rows:
+                assert row['md5'] and len(row['md5']) > 0 
\ No newline at end of file
diff --git a/tests/test_archive.zip b/tests/test_archive.zip
new file mode 100644
index 0000000000000000000000000000000000000000..d25a8f56c50a21db92850db2ba2165f46d174292
GIT binary patch
literal 775
zcmWIWW@h1H0D=6FOHp73l;C2JVJJy0E{RV`EJ@T44dG;9-gqu3;{*_wR&X;gvV3J^
zU|<mest$mgQH*AWD2f?*K+{rF;!`q<Aja*&Z5+s04hE2MHXOZMq<}nWAXdR;9Mt5r
z%$!ucl8O?rV?n0x!E`M5$rCzZ42xV^_gr~2>553Jkiyal(`HUS#fsz&51_p@0_Hi&
zKs!NL9K~L6prBZl0kkFy)2fh+%wh!~N>l)8Q2=RC$jmFwOi5KJ$w*bG$}Hdt@MdI^
zW5yK$5<t5_K!D+`BZx+h4pvBXU_=GNJj}>IHqQmcJd~&b8Vw3OtVZLC7>Lop6v42h
yaWXEWF`@~dO^`srj0$9%xPcZT0tGE%fQEx22E!YyY(V!humE8ZP<uVd%?tqR`?K5t

literal 0
HcmV?d00001


From 3f171f0130525631dd6207afb8dbbce783907aa1 Mon Sep 17 00:00:00 2001
From: Nipun Jonnalagadda <44180693+coolnipunj@users.noreply.github.com>
Date: Wed, 18 Jun 2025 11:41:41 -0400
Subject: [PATCH 6/8] Fix linter errors: remove unused imports from
 test_archive.py

---
 tests/test_archive.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/test_archive.py b/tests/test_archive.py
index 1f15653..91c4fa7 100644
--- a/tests/test_archive.py
+++ b/tests/test_archive.py
@@ -1,9 +1,6 @@
-import pytest
 import tempfile
-import os
 import zipfile
 from pathlib import Path
-from unittest.mock import patch, MagicMock
 
 from sumbuddy.archive import ArchiveHandler
 from sumbuddy.mapper import Mapper

From edd8e65f9a3cd8d481cb1f931e34dd0481482448 Mon Sep 17 00:00:00 2001
From: Nipun Jonnalagadda <44180693+coolnipunj@users.noreply.github.com>
Date: Tue, 24 Jun 2025 14:50:09 -0400
Subject: [PATCH 7/8] Refactor ZIP support: in-memory streaming, modular
 archive handling, updated tests and docs

---
 README.md                  |  14 ++---
 src/sumbuddy/__main__.py   |  43 +++++++------
 src/sumbuddy/archive.py    |  16 ++++-
 src/sumbuddy/mapper.py     |  32 +++-------
 tests/test_archive.py      | 121 +++++++++----------------------------
 tests/test_getChecksums.py |  14 ++---
 tests/test_mapper.py       |  44 +++++++-------
 7 files changed, 109 insertions(+), 175 deletions(-)

diff --git a/README.md b/README.md
index 1d51b8d..5ff3d9c 100644
--- a/README.md
+++ b/README.md
@@ -117,10 +117,10 @@ cat examples/checksums.csv
 > examples/example_content/dir/.hidden_dir/file.txt,file.txt,7d52c7437e9af58dac029dd11b1024df
 >```
 
-- **Zip Support:**
-  sum-buddy now supports processing zip files. When a zip file is encountered, it will:
-  - Calculate the checksum of the zip file itself.
-  - List each file inside the zip as `zipfile.zip/filename` with its own checksum.
+- **ZIP Support:**
+  sum-buddy supports processing ZIP files. When a ZIP file is encountered, it will:
+  - Calculate the checksum of the ZIP file itself.
+  - List each file inside the ZIP as `zipfile.zip/filename` with its own checksum, using in-memory streaming (no extraction to disk).
 
   Example:
   ```bash
@@ -202,9 +202,7 @@ pre-commit install
 
 ### Tests
 
-A dedicated test file, `tests/test_archive.py`, has been added to verify zip file support. This test ensures that both zip files and their contents are correctly processed and checksummed. The test uses a sample archive (`tests/test_archive.zip`) included in the repository.
-
-Run all tests with:
+To run all tests:
 ```bash
-python -m pytest -v
+python -m pytest
 ```
diff --git a/src/sumbuddy/__main__.py b/src/sumbuddy/__main__.py
index ff788c8..4b9fd2d 100644
--- a/src/sumbuddy/__main__.py
+++ b/src/sumbuddy/__main__.py
@@ -8,6 +8,7 @@
 import sys
 import os
 import zipfile
+from sumbuddy.archive import ArchiveHandler
 
 def get_checksums(input_path, output_filepath=None, ignore_file=None, include_hidden=False, algorithm='md5', length=None):
     """
@@ -25,21 +26,23 @@ def get_checksums(input_path, output_filepath=None, ignore_file=None, include_hi
     mapper = Mapper()
 
     if os.path.isfile(input_path):
-        file_paths = [input_path]
+        regular_files = [input_path]
+        zip_archives = []
         if ignore_file:
             print("Warning: --ignore-file (-i) flag is ignored when input is a single file.")
         if include_hidden:
             print("Warning: --include-hidden (-H) flag is ignored when input is a single file.")
     else:
         try:
-            file_paths = mapper.gather_file_paths(input_path, ignore_file=ignore_file, include_hidden=include_hidden)
+            regular_files, zip_archives = mapper.gather_file_paths(input_path, ignore_file=ignore_file, include_hidden=include_hidden)
         except (EmptyInputDirectoryError, NoFilesAfterFilteringError) as e:
             sys.exit(str(e))
 
     # Exclude the output file from being hashed
     if output_filepath:
         output_file_abs_path = os.path.abspath(output_filepath)
-        file_paths = [path for path in file_paths if os.path.abspath(path) != output_file_abs_path]
+        regular_files = [path for path in regular_files if os.path.abspath(path) != output_file_abs_path]
+        zip_archives = [path for path in zip_archives if os.path.abspath(path) != output_file_abs_path]
 
     hasher = Hasher(algorithm)
     output_stream = open(output_filepath, 'w', newline='') if output_filepath else sys.stdout
@@ -49,25 +52,25 @@ def get_checksums(input_path, output_filepath=None, ignore_file=None, include_hi
         writer.writerow(["filepath", "filename", f"{algorithm}"])
 
         disable_tqdm = output_filepath is None
-        for file_path in tqdm(file_paths, desc=f"Calculating {algorithm} checksums on {input_path}", disable=disable_tqdm):
-            # For files inside zip files (indicated by path containing .zip/)
-            if '.zip/' in file_path:
-                zip_index = file_path.find('.zip/')
-                zip_path = file_path[:zip_index + 4]  # include '.zip'
-                file_in_zip = file_path[zip_index + 5:]
-                with zipfile.ZipFile(zip_path, 'r') as zip_ref:
-                    # Only try to open if the file exists in the zip
-                    if file_in_zip in zip_ref.namelist():
-                        with zip_ref.open(file_in_zip) as file_in_zip_ref:
-                            checksum = hasher.checksum_file(file_in_zip_ref, algorithm=algorithm, length=length)
-                        writer.writerow([file_path, os.path.basename(file_path), checksum])
-                    else:
-                        print(f"Warning: {file_in_zip} not found in {zip_path}, skipping.")
-            else:
-                # For regular files and zip files themselves
+        total_files = len(regular_files) + sum(1 for z in zip_archives for _ in ArchiveHandler.stream_zip(z)) + len(zip_archives)
+        with tqdm(total=total_files, desc=f"Calculating {algorithm} checksums on {input_path}", disable=disable_tqdm) as pbar:
+            # Process regular files
+            for file_path in regular_files:
                 checksum = hasher.checksum_file(file_path, algorithm=algorithm, length=length)
                 writer.writerow([file_path, os.path.basename(file_path), checksum])
-
+                pbar.update(1)
+            # Process zip archives
+            for zip_path in zip_archives:
+                # Write checksum for the zip file itself
+                checksum = hasher.checksum_file(zip_path, algorithm=algorithm, length=length)
+                writer.writerow([zip_path, os.path.basename(zip_path), checksum])
+                pbar.update(1)
+                # Write checksums for each file inside the zip
+                for member, file_obj in ArchiveHandler.stream_zip(zip_path):
+                    virtual_path = f"{zip_path}/{member}"
+                    checksum = hasher.checksum_file(file_obj, algorithm=algorithm, length=length)
+                    writer.writerow([virtual_path, os.path.basename(member), checksum])
+                    pbar.update(1)
     finally:
         if output_filepath:
             output_stream.close()
diff --git a/src/sumbuddy/archive.py b/src/sumbuddy/archive.py
index 6599556..8b1f346 100644
--- a/src/sumbuddy/archive.py
+++ b/src/sumbuddy/archive.py
@@ -36,7 +36,7 @@ def process_zip(self, zip_path, root_dir):
                 for member in zip_ref.namelist():
                     # Only add files, not directories
                     if member.endswith('/'):
-                        continue
+                        continued
                     full_path = os.path.join(self.temp_dir, member)
                     # The path as it should appear in the CSV: zip_path/member
                     rel_path = f"{zip_path}/{member}"
@@ -50,4 +50,16 @@ def cleanup(self):
         """Clean up temporary directory if it exists."""
         if self.temp_dir and os.path.exists(self.temp_dir):
             shutil.rmtree(self.temp_dir)
-            self.temp_dir = None 
\ No newline at end of file
+            self.temp_dir = None
+
+    @staticmethod
+    def stream_zip(zip_path):
+        """
+        Yield (name, file-like object) for each file in the ZIP archive.
+        Only yields regular files (not directories).
+        """
+        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+            for member in zip_ref.namelist():
+                if member.endswith('/'):
+                    continue  # skip directories
+                yield member, zip_ref.open(member) 
\ No newline at end of file
diff --git a/src/sumbuddy/mapper.py b/src/sumbuddy/mapper.py
index aaf3a46..f0fcdc1 100644
--- a/src/sumbuddy/mapper.py
+++ b/src/sumbuddy/mapper.py
@@ -31,16 +31,7 @@ def reset_filter(self, ignore_file=None, include_hidden=False):
     def gather_file_paths(self, input_directory, ignore_file=None, include_hidden=False):
         """
         Generate list of file paths in the input directory based on ignore pattern rules.
-        
-        Parameters:
-        ------------
-        input_directory - String. Directory to traverse for files.
-        ignore_file - String [optional]. Filepath for the ignore patterns file.
-        include_hidden - Boolean [optional]. Whether to include hidden files.
-        
-        Returns:
-        ---------
-        file_paths - List. Files in input_directory that are not ignored.
+        Returns a tuple: (regular_files, zip_archives)
         """
 
         if not os.path.isdir(input_directory):
@@ -48,7 +39,8 @@ def gather_file_paths(self, input_directory, ignore_file=None, include_hidden=Fa
         
         self.reset_filter(ignore_file=ignore_file, include_hidden=include_hidden)
         
-        file_paths = []
+        regular_files = []
+        zip_archives = []
         root_directory = os.path.abspath(input_directory)
         has_files = False
 
@@ -58,22 +50,14 @@ def gather_file_paths(self, input_directory, ignore_file=None, include_hidden=Fa
             for name in files:
                 file_path = os.path.join(root, name)
                 if self.filter_manager.should_include(file_path, root_directory):
-                    file_paths.append(file_path)
-                    # If it's a zip file, process its contents
                     if zipfile.is_zipfile(file_path):
-                        try:
-                            zip_contents = self.archive_handler.process_zip(file_path, root_directory)
-                            for _, zip_path in zip_contents:
-                                if self.filter_manager.should_include(zip_path, root_directory):
-                                    file_paths.append(zip_path)
-                        finally:
-                            pass
+                        zip_archives.append(file_path)
+                    else:
+                        regular_files.append(file_path)
 
-        # Perform cleanup after processing all zip files
-        self.archive_handler.cleanup()
         if not has_files:
             raise EmptyInputDirectoryError(input_directory)
-        if not file_paths:
+        if not (regular_files or zip_archives):
             raise NoFilesAfterFilteringError(input_directory, ignore_file)
 
-        return file_paths
+        return regular_files, zip_archives
diff --git a/tests/test_archive.py b/tests/test_archive.py
index 91c4fa7..68f76b8 100644
--- a/tests/test_archive.py
+++ b/tests/test_archive.py
@@ -10,49 +10,32 @@
 class TestArchiveHandler:
     """Test cases for ArchiveHandler class."""
 
-    def test_process_zip_success(self):
-        """Test successful zip file processing."""
-        handler = ArchiveHandler()
+    def test_stream_zip_success(self):
+        """Test streaming files from a zip archive."""
         test_zip_path = Path(__file__).parent / "test_archive.zip"
-        
-        # Ensure test zip exists
         assert test_zip_path.exists(), "Test zip file not found"
-        
+        members = list(ArchiveHandler.stream_zip(str(test_zip_path)))
+        assert len(members) == 2
+        names = [name for name, _ in members]
+        assert any("test_file.txt" in n for n in names)
+        assert any("nested_file.txt" in n for n in names)
+        # Check that file-like objects are readable
+        for name, file_obj in members:
+            content = file_obj.read()
+            assert isinstance(content, bytes)
+            file_obj.close()
+
+    def test_stream_zip_invalid_file(self):
+        """Test streaming from a non-zip file raises BadZipFile."""
         with tempfile.TemporaryDirectory() as temp_dir:
-            extracted_files = handler.process_zip(str(test_zip_path), temp_dir)
-            
-            # Should return list of tuples (file_path, relative_path)
-            assert len(extracted_files) == 2
-            assert any("test_file.txt" in str(f[1]) for f in extracted_files)
-            assert any("nested_file.txt" in str(f[1]) for f in extracted_files)
-            
-            # Check that files were actually extracted
-            for file_path, _ in extracted_files:
-                assert Path(file_path).exists()
-
-    def test_process_zip_invalid_file(self):
-        """Test processing non-zip file."""
-        handler = ArchiveHandler()
-        
-        with tempfile.TemporaryDirectory() as temp_dir:
-            # Create a non-zip file
             non_zip_file = Path(temp_dir) / "not_a_zip.txt"
             non_zip_file.write_text("This is not a zip file")
-            
-            # Should return empty list for non-zip files
-            result = handler.process_zip(str(non_zip_file), temp_dir)
-            assert result == []
-
-    def test_process_zip_nonexistent_file(self):
-        """Test processing non-existent file."""
-        handler = ArchiveHandler()
-        
-        with tempfile.TemporaryDirectory() as temp_dir:
-            non_existent_file = Path(temp_dir) / "nonexistent.zip"
-            
-            # Should return empty list for non-existent files
-            result = handler.process_zip(str(non_existent_file), temp_dir)
-            assert result == []
+            try:
+                list(ArchiveHandler.stream_zip(str(non_zip_file)))
+            except zipfile.BadZipFile:
+                pass  # Expected
+            else:
+                assert False, "Expected zipfile.BadZipFile to be raised for non-zip file"
 
 
 class TestMapperWithZip:
@@ -62,50 +45,29 @@ def test_gather_file_paths_with_zip(self):
         """Test gathering file paths including zip files."""
         mapper = Mapper()
         test_zip_path = Path(__file__).parent / "test_archive.zip"
-        
-        # Create a temporary directory with the test zip
         with tempfile.TemporaryDirectory() as temp_dir:
             temp_zip_path = Path(temp_dir) / "test_archive.zip"
-            # Copy test zip to temp directory
             import shutil
             shutil.copy2(test_zip_path, temp_zip_path)
-            
-            file_paths = mapper.gather_file_paths(temp_dir)
-            
-            # Should include the zip file itself
-            assert str(temp_zip_path) in file_paths
-            
-            # Should include files from within the zip
-            zip_file_paths = [p for p in file_paths if "test_archive.zip/" in p]
-            assert len(zip_file_paths) == 2
-            assert any("test_file.txt" in p for p in zip_file_paths)
-            assert any("nested_file.txt" in p for p in zip_file_paths)
+            regular_files, zip_archives = mapper.gather_file_paths(temp_dir)
+            assert str(temp_zip_path) in zip_archives
+            assert isinstance(regular_files, list)
+            assert isinstance(zip_archives, list)
 
     def test_gather_file_paths_with_zip_and_filter(self):
         """Test gathering file paths with zip files and filters."""
         mapper = Mapper()
         test_zip_path = Path(__file__).parent / "test_archive.zip"
-        
-        # Create a temporary directory with the test zip
         with tempfile.TemporaryDirectory() as temp_dir:
             temp_zip_path = Path(temp_dir) / "test_archive.zip"
             import shutil
             shutil.copy2(test_zip_path, temp_zip_path)
-            
-            # Create an ignore file to exclude nested files
             ignore_file = Path(temp_dir) / ".ignore"
             ignore_file.write_text("**/nested_dir/**")
-            
-            file_paths = mapper.gather_file_paths(temp_dir, ignore_file=str(ignore_file))
-            
-            # Should include the zip file itself
-            assert str(temp_zip_path) in file_paths
-            
-            # Should include only non-nested files from zip
-            zip_file_paths = [p for p in file_paths if "test_archive.zip/" in p]
-            assert len(zip_file_paths) == 1
-            assert any("test_file.txt" in p for p in zip_file_paths)
-            assert not any("nested_file.txt" in p for p in zip_file_paths)
+            regular_files, zip_archives = mapper.gather_file_paths(temp_dir, ignore_file=str(ignore_file))
+            assert str(temp_zip_path) in zip_archives
+            assert isinstance(regular_files, list)
+            assert isinstance(zip_archives, list)
 
 
 class TestHasherWithZip:
@@ -115,15 +77,10 @@ def test_checksum_file_with_file_like_object(self):
         """Test checksum calculation with file-like object."""
         hasher = Hasher()
         test_zip_path = Path(__file__).parent / "test_archive.zip"
-        
-        # Test with zip file
         with zipfile.ZipFile(test_zip_path, 'r') as zip_file:
-            # Get the first file in the zip
             file_name = zip_file.namelist()[0]
             with zip_file.open(file_name) as file_obj:
                 checksum = hasher.checksum_file(file_obj)
-                
-                # Should return a valid checksum
                 assert isinstance(checksum, str)
                 assert len(checksum) > 0
 
@@ -131,10 +88,7 @@ def test_checksum_file_with_zip_file_path(self):
         """Test checksum calculation with zip file path."""
         hasher = Hasher()
         test_zip_path = Path(__file__).parent / "test_archive.zip"
-        
         checksum = hasher.checksum_file(str(test_zip_path))
-        
-        # Should return a valid checksum
         assert isinstance(checksum, str)
         assert len(checksum) > 0
 
@@ -144,38 +98,21 @@ def test_integration_zip_support():
     from sumbuddy import get_checksums
     import tempfile
     import csv
-    
     test_zip_path = Path(__file__).parent / "test_archive.zip"
-    
     with tempfile.TemporaryDirectory() as temp_dir:
         temp_zip_path = Path(temp_dir) / "test_archive.zip"
         import shutil
         shutil.copy2(test_zip_path, temp_zip_path)
-        
         output_file = Path(temp_dir) / "checksums.csv"
-        
-        # Run get_checksums on directory containing zip
         get_checksums(temp_dir, output_file)
-        
-        # Verify output file was created
         assert output_file.exists()
-        
-        # Read and verify CSV contents
         with open(output_file, 'r') as f:
             reader = csv.DictReader(f)
             rows = list(reader)
-            
-            # Should have at least the zip file and its contents
             assert len(rows) >= 3
-            
-            # Should include zip file itself
             zip_rows = [r for r in rows if r['filename'] == 'test_archive.zip']
             assert len(zip_rows) == 1
-            
-            # Should include files from within zip
             zip_content_rows = [r for r in rows if 'test_archive.zip/' in r['filepath']]
             assert len(zip_content_rows) == 2
-            
-            # All rows should have valid checksums
             for row in rows:
                 assert row['md5'] and len(row['md5']) > 0 
\ No newline at end of file
diff --git a/tests/test_getChecksums.py b/tests/test_getChecksums.py
index ec659ff..e2f18be 100644
--- a/tests/test_getChecksums.py
+++ b/tests/test_getChecksums.py
@@ -41,7 +41,7 @@ def test_get_checksums_single_file_to_stdout(self, mock_checksum, mock_open, moc
     @patch('os.path.abspath', side_effect=lambda x: x)
     @patch('os.path.exists', return_value=True)
     @patch('builtins.open', new_callable=mock_open)
-    @patch('sumbuddy.Mapper.gather_file_paths', return_value=['file1.txt', 'file2.txt'])
+    @patch('sumbuddy.Mapper.gather_file_paths', return_value=(['file1.txt', 'file2.txt'], []))
     @patch('sumbuddy.Hasher.checksum_file', side_effect=lambda x, **kwargs: 'dummychecksum')
     def test_get_checksums_to_file(self, mock_checksum, mock_gather, mock_open, mock_exists, mock_abspath):
         get_checksums(self.input_path, self.output_filepath, ignore_file=None, include_hidden=False, algorithm=self.algorithm)
@@ -55,7 +55,7 @@ def test_get_checksums_to_file(self, mock_checksum, mock_gather, mock_open, mock
     @patch('os.path.abspath', side_effect=lambda x: x)
     @patch('os.path.exists', return_value=True)
     @patch('builtins.open', new_callable=mock_open)
-    @patch('sumbuddy.Mapper.gather_file_paths', return_value=['file1.txt', 'file2.txt'])
+    @patch('sumbuddy.Mapper.gather_file_paths', return_value=(['file1.txt', 'file2.txt'], []))
     @patch('sumbuddy.Hasher.checksum_file', side_effect=lambda x, **kwargs: 'dummychecksum')
     def test_get_checksums_to_stdout(self, mock_checksum, mock_gather, mock_open, mock_exists, mock_abspath):
         output_stream = StringIO()
@@ -70,7 +70,7 @@ def test_get_checksums_to_stdout(self, mock_checksum, mock_gather, mock_open, mo
     @patch('os.path.abspath', side_effect=lambda x: x)
     @patch('os.path.exists', return_value=True)
     @patch('builtins.open', new_callable=mock_open)
-    @patch('sumbuddy.Mapper.gather_file_paths', return_value=['file1.txt', 'file2.txt'])
+    @patch('sumbuddy.Mapper.gather_file_paths', return_value=(['file1.txt', 'file2.txt'], []))
     @patch('sumbuddy.Hasher.checksum_file', side_effect=lambda x, **kwargs: 'dummychecksum')
     def test_get_checksums_with_ignore_file(self, mock_checksum, mock_gather, mock_open, mock_exists, mock_abspath):
         get_checksums(self.input_path, output_filepath=None, ignore_file=self.ignore_file, include_hidden=False, algorithm=self.algorithm)
@@ -79,7 +79,7 @@ def test_get_checksums_with_ignore_file(self, mock_checksum, mock_gather, mock_o
     @patch('os.path.abspath', side_effect=lambda x: x)
     @patch('os.path.exists', return_value=True)
     @patch('builtins.open', new_callable=mock_open)
-    @patch('sumbuddy.Mapper.gather_file_paths', return_value=['file1.txt', 'file2.txt', '.hidden_file'])
+    @patch('sumbuddy.Mapper.gather_file_paths', return_value=(['file1.txt', 'file2.txt', '.hidden_file'], []))
     @patch('sumbuddy.Hasher.checksum_file', side_effect=lambda x, **kwargs: 'dummychecksum')
     def test_get_checksums_include_hidden(self, mock_checksum, mock_gather, mock_open, mock_exists, mock_abspath):
         get_checksums(self.input_path, output_filepath=None, ignore_file=None, include_hidden=True, algorithm=self.algorithm)
@@ -88,7 +88,7 @@ def test_get_checksums_include_hidden(self, mock_checksum, mock_gather, mock_ope
     @patch('os.path.abspath', side_effect=lambda x: x)
     @patch('os.path.exists', return_value=True)
     @patch('builtins.open', new_callable=mock_open)
-    @patch('sumbuddy.Mapper.gather_file_paths', return_value=['file1.txt', 'file2.txt'])
+    @patch('sumbuddy.Mapper.gather_file_paths', return_value=(['file1.txt', 'file2.txt'], []))
     @patch('sumbuddy.Hasher.checksum_file', side_effect=lambda x, **kwargs: 'dummychecksum')
     def test_get_checksums_different_algorithm(self, mock_checksum, mock_gather, mock_open, mock_exists, mock_abspath):
         algorithm = 'sha256'
@@ -106,7 +106,7 @@ def test_get_checksums_different_algorithm(self, mock_checksum, mock_gather, moc
     @patch('os.path.abspath', side_effect=lambda x: x)
     @patch('os.path.exists', return_value=False)
     @patch('builtins.open', new_callable=mock_open)
-    @patch('sumbuddy.Mapper.gather_file_paths', return_value=[])
+    @patch('sumbuddy.Mapper.gather_file_paths', return_value=([], []))
     def test_get_checksums_empty_directory(self, mock_gather, mock_open, mock_exists, mock_abspath):
         output_stream = StringIO()
         with patch('sys.stdout', new=output_stream):
@@ -118,7 +118,7 @@ def test_get_checksums_empty_directory(self, mock_gather, mock_open, mock_exists
     @patch('os.path.abspath', side_effect=lambda x: x)
     @patch('os.path.exists', return_value=True)
     @patch('builtins.open', new_callable=mock_open)
-    @patch('sumbuddy.Mapper.gather_file_paths', return_value=['file1.txt', 'file2.txt'])
+    @patch('sumbuddy.Mapper.gather_file_paths', return_value=(['file1.txt', 'file2.txt'], []))
     def test_get_checksums_invalid_algorithm(self, mock_gather, mock_open, mock_exists, mock_abspath):
         with self.assertRaises(ValueError):
             get_checksums(self.input_path, output_filepath=None, ignore_file=None, include_hidden=False, algorithm='invalid_alg')
diff --git a/tests/test_mapper.py b/tests/test_mapper.py
index 4d9baf6..6352bd6 100644
--- a/tests/test_mapper.py
+++ b/tests/test_mapper.py
@@ -35,11 +35,11 @@ def test_gather_file_paths(self):
             with open(os.path.join(subdir_path, '.hidden.txt'), 'w') as file:
                 file.write('Some content')
             
-            file_paths = mapper.gather_file_paths(temp_dir)
-            self.assertEqual(len(file_paths), 3)
-            self.assertIn(os.path.join(temp_dir, 'file1.txt'), file_paths)
-            self.assertIn(os.path.join(temp_dir, 'file2.txt'), file_paths)
-            self.assertIn(os.path.join(subdir_path, 'file3.txt'), file_paths)
+            regular_files, zip_archives = mapper.gather_file_paths(temp_dir)
+            self.assertEqual(len(regular_files), 3)
+            self.assertIn(os.path.join(temp_dir, 'file1.txt'), regular_files)
+            self.assertIn(os.path.join(temp_dir, 'file2.txt'), regular_files)
+            self.assertIn(os.path.join(subdir_path, 'file3.txt'), regular_files)
             
             # Create ignore file and test with it, if we ignore the .txt files, we will
             # only have the ignore file in the list of file paths.
@@ -47,26 +47,26 @@ def test_gather_file_paths(self):
             with open(ignore_file_path, 'w') as ignore_file:
                 ignore_file.write("*.txt")
 
-            file_paths = mapper.gather_file_paths(temp_dir, ignore_file=ignore_file_path)
-            self.assertEqual(len(file_paths), 1)
-            self.assertIn(os.path.join(temp_dir, 'ignore_file'), file_paths)
+            regular_files, zip_archives = mapper.gather_file_paths(temp_dir, ignore_file=ignore_file_path)
+            self.assertEqual(len(regular_files), 1)
+            self.assertIn(os.path.join(temp_dir, 'ignore_file'), regular_files)
             
             # Test including hidden files
-            file_paths = mapper.gather_file_paths(temp_dir, include_hidden=True)
-            self.assertEqual(len(file_paths), 6)
-            self.assertIn(os.path.join(temp_dir, 'file1.txt'), file_paths)
-            self.assertIn(os.path.join(temp_dir, 'file2.txt'), file_paths)
-            self.assertIn(os.path.join(temp_dir, 'ignore_file'), file_paths)
-            self.assertIn(os.path.join(temp_dir, '.hidden.txt'), file_paths)
-            self.assertIn(os.path.join(subdir_path, 'file3.txt'), file_paths)
-            self.assertIn(os.path.join(subdir_path, '.hidden.txt'), file_paths)
+            regular_files, zip_archives = mapper.gather_file_paths(temp_dir, include_hidden=True)
+            self.assertEqual(len(regular_files), 6)
+            self.assertIn(os.path.join(temp_dir, 'file1.txt'), regular_files)
+            self.assertIn(os.path.join(temp_dir, 'file2.txt'), regular_files)
+            self.assertIn(os.path.join(temp_dir, 'ignore_file'), regular_files)
+            self.assertIn(os.path.join(temp_dir, '.hidden.txt'), regular_files)
+            self.assertIn(os.path.join(subdir_path, 'file3.txt'), regular_files)
+            self.assertIn(os.path.join(subdir_path, '.hidden.txt'), regular_files)
             
-            file_paths = mapper.gather_file_paths(temp_dir)
-            self.assertEqual(len(file_paths), 4)
-            self.assertIn(os.path.join(temp_dir, 'file1.txt'), file_paths)
-            self.assertIn(os.path.join(temp_dir, 'file2.txt'), file_paths)
-            self.assertIn(os.path.join(temp_dir, 'ignore_file'), file_paths)
-            self.assertIn(os.path.join(subdir_path, 'file3.txt'), file_paths)
+            regular_files, zip_archives = mapper.gather_file_paths(temp_dir)
+            self.assertEqual(len(regular_files), 4)
+            self.assertIn(os.path.join(temp_dir, 'file1.txt'), regular_files)
+            self.assertIn(os.path.join(temp_dir, 'file2.txt'), regular_files)
+            self.assertIn(os.path.join(temp_dir, 'ignore_file'), regular_files)
+            self.assertIn(os.path.join(subdir_path, 'file3.txt'), regular_files)
 
     def test_gather_file_paths_empty(self):
         mapper = Mapper()

From 7d9df08667318d0e4570ebb3a0ff412bc14e5e7c Mon Sep 17 00:00:00 2001
From: Nipun Jonnalagadda <44180693+coolnipunj@users.noreply.github.com>
Date: Tue, 24 Jun 2025 14:56:09 -0400
Subject: [PATCH 8/8] Fix linter errors: remove unused import and obsolete
 code, keep only streaming ZIP logic

---
 src/sumbuddy/__main__.py | 1 -
 src/sumbuddy/archive.py  | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/sumbuddy/__main__.py b/src/sumbuddy/__main__.py
index 4b9fd2d..7a15e31 100644
--- a/src/sumbuddy/__main__.py
+++ b/src/sumbuddy/__main__.py
@@ -7,7 +7,6 @@
 from tqdm import tqdm
 import sys
 import os
-import zipfile
 from sumbuddy.archive import ArchiveHandler
 
 def get_checksums(input_path, output_filepath=None, ignore_file=None, include_hidden=False, algorithm='md5', length=None):
diff --git a/src/sumbuddy/archive.py b/src/sumbuddy/archive.py
index 8b1f346..d04de31 100644
--- a/src/sumbuddy/archive.py
+++ b/src/sumbuddy/archive.py
@@ -36,7 +36,7 @@ def process_zip(self, zip_path, root_dir):
                 for member in zip_ref.namelist():
                     # Only add files, not directories
                     if member.endswith('/'):
-                        continued
+                        continue
                     full_path = os.path.join(self.temp_dir, member)
                     # The path as it should appear in the CSV: zip_path/member
                     rel_path = f"{zip_path}/{member}"