Change code formatter to ruff

iscc · Jun 27, 2024 · 74ec2eb · 74ec2eb
1 parent b53f223
commit 74ec2eb
Show file tree

Hide file tree

Showing 8 changed files with 81 additions and 148 deletions.
diff --git a/README.md b/README.md
@@ -110,7 +110,7 @@ The explanation below is copied from
 codebase is little more than a translation of that implementation:
 
 > The following optimizations and variations on FastCDC are involved in the chunking algorithm:
-> * 31 bit integers to avoid 64 bit integers for the sake of the Javascript reference implementation.
+> * 31 bit integers to avoid 64-bit integers for the sake of the Javascript reference implementation.
 > * A right shift instead of a left shift to remove the need for an additional modulus operator, which would otherwise have been necessary to prevent overflow.
 > * Masks are no longer zero-padded since a right shift is used instead of a left shift.
 > * A more adaptive threshold based on a combination of average and minimum chunk size (rather than just average chunk size) to decide the pivot point at which to switch masks. A larger minimum chunk size now switches from the strict mask to the eager mask earlier.

diff --git a/build.py b/build.py
@@ -3,6 +3,7 @@
 The shared library can also be built manually using the command:
 $ cythonize -X language_level=3 -a -i ./fastcdc/fastcdc_cy.pyx
 """
+
 from distutils.command.build_ext import build_ext
 
 

diff --git a/fastcdc/fastcdc_py.py b/fastcdc/fastcdc_py.py
@@ -25,10 +25,10 @@ def chunk_generator(stream, min_size, avg_size, max_size, fat, hf):
     read_size = max(1024 * 64, max_size)
     offset = 0
     while offset < len(stream):
-        blob = stream[offset:offset + read_size]
+        blob = stream[offset : offset + read_size]
         cp = cdc_offset(blob, min_size, avg_size, max_size, cs, mask_s, mask_l)
-        raw = bytes(blob[:cp]) if fat else b''
-        h = hf(blob[:cp]).hexdigest() if hf else ''
+        raw = bytes(blob[:cp]) if fat else b""
+        h = hf(blob[:cp]).hexdigest() if hf else ""
         yield Chunk(offset, cp, raw, h)
         offset += cp
 
@@ -87,7 +87,7 @@ def center_size(average, minimum, source_size):
 
 
 def mask(bits):
-    return 2 ** bits - 1
+    return 2**bits - 1
 
 
 ########################################################################################

diff --git a/fastcdc/original.py b/fastcdc/original.py
@@ -2,6 +2,7 @@
 """
 True to the original port of https://github.com/nlfiedler/fastcdc-rs
 """
+
 import os
 from dataclasses import dataclass
 from mmap import mmap, ACCESS_READ
@@ -26,7 +27,6 @@ class Chunk:
 
 @dataclass
 class FastCDC:
-
     source: Union[ByteString, BinaryIO, Text]
     bytes_processed: int
     bytes_remaining: int

diff --git a/fastcdc/scan.py b/fastcdc/scan.py
@@ -10,10 +10,15 @@
 
 @click.command(cls=DefaultHelp)
 @click.argument(
-    "paths", type=click.Path(exists=True, file_okay=False, resolve_path=True), nargs=-1,
+    "paths",
+    type=click.Path(exists=True, file_okay=False, resolve_path=True),
+    nargs=-1,
 )
 @click.option(
-    "-r", "--recursive", help="Scan directory tree recursively.", is_flag=True,
+    "-r",
+    "--recursive",
+    help="Scan directory tree recursively.",
+    is_flag=True,
 )
 @click.option(
     "-s",

diff --git a/fastcdc/utils.py b/fastcdc/utils.py
@@ -31,7 +31,7 @@ def logarithm2(value: int) -> int:
 def mask(bits: int) -> int:
     assert bits >= 1
     assert bits <= 31
-    return 2 ** bits - 1
+    return 2**bits - 1
 
 
 class DefaultHelp(click.Command):
@@ -87,12 +87,12 @@ def iter_files(path, recursive=False):
 def get_memoryview(data):
     # Handle file path string and Path object
     if isinstance(data, (str, Path)):
-        with open(data, 'rb') as f:
+        with open(data, "rb") as f:
             mm = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
             return memoryview(mm)
 
     # Handle file object opened in 'rb' mode
-    if hasattr(data, 'fileno'):
+    if hasattr(data, "fileno"):
         mm = mmap.mmap(data.fileno(), 0, access=mmap.ACCESS_READ)
         return memoryview(mm)
 

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -43,11 +43,23 @@ blake3 = { version = "^0.3", optional = true }
 [tool.poetry.extras]
 hashes = ["xxhash", "blake3"]
 
-[tool.poetry.dev-dependencies]
-pytest = "*"
-black = "*"
+[tool.poetry.group.dev.dependencies]
 cython = "*"
+pytest = "*"
 pytest-benchmark = "*"
+poethepoet = "*"
+ruff = "*"
+
+[tool.ruff]
+line-length = 88
+
+[tool.ruff.format]
+line-ending = "lf"
+
+[tool.poe.tasks]
+format-code = { cmd = "poetry run ruff format", help = "Code style formating with ruff" }
+test = { cmd = "poetry run pytest", help = "Run tests" }
+all = ["format-code", "test"]
 
 [tool.poetry.build]
 generate-setup-file = true
-Original file line number
+Diff line change
@@ Expand Up / @@ -3,6 +3,7 @@ @@
     The shared library can also be built manually using the command:
     $ cythonize -X language_level=3 -a -i ./fastcdc/fastcdc_cy.pyx
     """
     from distutils.command.build_ext import build_ext
@@ Expand Down @@