Skip to content

Commit

Permalink
Change code formatter to ruff
Browse files Browse the repository at this point in the history
  • Loading branch information
titusz committed Jun 27, 2024
1 parent b53f223 commit 74ec2eb
Show file tree
Hide file tree
Showing 8 changed files with 81 additions and 148 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ The explanation below is copied from
codebase is little more than a translation of that implementation:
> The following optimizations and variations on FastCDC are involved in the chunking algorithm:
> * 31 bit integers to avoid 64 bit integers for the sake of the Javascript reference implementation.
> * 31 bit integers to avoid 64-bit integers for the sake of the Javascript reference implementation.
> * A right shift instead of a left shift to remove the need for an additional modulus operator, which would otherwise have been necessary to prevent overflow.
> * Masks are no longer zero-padded since a right shift is used instead of a left shift.
> * A more adaptive threshold based on a combination of average and minimum chunk size (rather than just average chunk size) to decide the pivot point at which to switch masks. A larger minimum chunk size now switches from the strict mask to the eager mask earlier.
Expand Down
1 change: 1 addition & 0 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
The shared library can also be built manually using the command:
$ cythonize -X language_level=3 -a -i ./fastcdc/fastcdc_cy.pyx
"""

from distutils.command.build_ext import build_ext


Expand Down
8 changes: 4 additions & 4 deletions fastcdc/fastcdc_py.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ def chunk_generator(stream, min_size, avg_size, max_size, fat, hf):
read_size = max(1024 * 64, max_size)
offset = 0
while offset < len(stream):
blob = stream[offset:offset + read_size]
blob = stream[offset : offset + read_size]
cp = cdc_offset(blob, min_size, avg_size, max_size, cs, mask_s, mask_l)
raw = bytes(blob[:cp]) if fat else b''
h = hf(blob[:cp]).hexdigest() if hf else ''
raw = bytes(blob[:cp]) if fat else b""
h = hf(blob[:cp]).hexdigest() if hf else ""
yield Chunk(offset, cp, raw, h)
offset += cp

Expand Down Expand Up @@ -87,7 +87,7 @@ def center_size(average, minimum, source_size):


def mask(bits):
return 2 ** bits - 1
return 2**bits - 1


########################################################################################
Expand Down
2 changes: 1 addition & 1 deletion fastcdc/original.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"""
True to the original port of https://github.com/nlfiedler/fastcdc-rs
"""

import os
from dataclasses import dataclass
from mmap import mmap, ACCESS_READ
Expand All @@ -26,7 +27,6 @@ class Chunk:

@dataclass
class FastCDC:

source: Union[ByteString, BinaryIO, Text]
bytes_processed: int
bytes_remaining: int
Expand Down
9 changes: 7 additions & 2 deletions fastcdc/scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,15 @@

@click.command(cls=DefaultHelp)
@click.argument(
"paths", type=click.Path(exists=True, file_okay=False, resolve_path=True), nargs=-1,
"paths",
type=click.Path(exists=True, file_okay=False, resolve_path=True),
nargs=-1,
)
@click.option(
"-r", "--recursive", help="Scan directory tree recursively.", is_flag=True,
"-r",
"--recursive",
help="Scan directory tree recursively.",
is_flag=True,
)
@click.option(
"-s",
Expand Down
6 changes: 3 additions & 3 deletions fastcdc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def logarithm2(value: int) -> int:
def mask(bits: int) -> int:
assert bits >= 1
assert bits <= 31
return 2 ** bits - 1
return 2**bits - 1


class DefaultHelp(click.Command):
Expand Down Expand Up @@ -87,12 +87,12 @@ def iter_files(path, recursive=False):
def get_memoryview(data):
# Handle file path string and Path object
if isinstance(data, (str, Path)):
with open(data, 'rb') as f:
with open(data, "rb") as f:
mm = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
return memoryview(mm)

# Handle file object opened in 'rb' mode
if hasattr(data, 'fileno'):
if hasattr(data, "fileno"):
mm = mmap.mmap(data.fileno(), 0, access=mmap.ACCESS_READ)
return memoryview(mm)

Expand Down
183 changes: 49 additions & 134 deletions poetry.lock

Large diffs are not rendered by default.

18 changes: 15 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,23 @@ blake3 = { version = "^0.3", optional = true }
[tool.poetry.extras]
hashes = ["xxhash", "blake3"]

[tool.poetry.dev-dependencies]
pytest = "*"
black = "*"
[tool.poetry.group.dev.dependencies]
cython = "*"
pytest = "*"
pytest-benchmark = "*"
poethepoet = "*"
ruff = "*"

[tool.ruff]
line-length = 88

[tool.ruff.format]
line-ending = "lf"

[tool.poe.tasks]
format-code = { cmd = "poetry run ruff format", help = "Code style formating with ruff" }
test = { cmd = "poetry run pytest", help = "Run tests" }
all = ["format-code", "test"]

[tool.poetry.build]
generate-setup-file = true
Expand Down

0 comments on commit 74ec2eb

Please sign in to comment.