Merge branch 'master' into master

pdfminer · Jun 27, 2024 · 4cdd40c · 4cdd40c
2 parents f121741 + 16cb34c
commit 4cdd40c
Show file tree

Hide file tree

Showing 6 changed files with 30 additions and 27 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 ### Added
 
 - Support for zipped jpeg's ([#938](https://github.com/pdfminer/pdfminer.six/pull/938))
+- Support for setuptools-git-versioning version 2.0.0 ([#957](https://github.com/pdfminer/pdfminer.six/pull/957))
 
 ### Fixed
 
@@ -16,6 +17,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 - `ValueError` wrong error message when specifying codec for text output  ([#902](https://github.com/pdfminer/pdfminer.six/pull/902))
 - Resolve stream filter parameters ([#906](https://github.com/pdfminer/pdfminer.six/pull/906))
 - Reading cmap's with whitespace in the name ([#935](https://github.com/pdfminer/pdfminer.six/pull/935))
+- Optimize `apply_png_predictor` by using lists ([#912](https://github.com/pdfminer/pdfminer.six/pull/912))
 
 ## [20231228]
 

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -63,19 +63,19 @@ Any contribution is appreciated! You might want to:
 2. Install dev dependencies
 
     ```sh
-    pip install -e .[dev]
+    pip install -e ".[dev]"
     ```
 
-3. Run the tests
+3. Run all formatting, linting and tests
 
     On all Python versions:
 
     ```sh
     nox
    ```
 
-   Or on a single Python version:
+   Or only the tests on a single Python version:
 
    ```sh
-    nox -e py36
+    nox -e tests-3.12
     ```
diff --git a/README.md b/README.md
@@ -41,25 +41,26 @@ How to use
 
 * Install Python 3.8 or newer.
 * Install pdfminer.six.
-
-  `pip install pdfminer.six`
+  ```bash
+  pip install pdfminer.six
 
 * (Optionally) install extra dependencies for extracting images.
 
-  `pip install 'pdfminer.six[image]'`
+  ```bash
+  pip install 'pdfminer.six[image]'
 
 * Use the command-line interface to extract text from pdf.
 
-  `pdf2txt.py example.pdf`
+  ```bash
+  pdf2txt.py example.pdf
 
 * Or use it with Python. 
+  ```python
+  from pdfminer.high_level import extract_text
 
-```python
-from pdfminer.high_level import extract_text
-
-text = extract_text("example.pdf")
-print(text)
-```
+  text = extract_text("example.pdf")
+  print(text)
+  ```
 
 Contributing
 ------------

diff --git a/pdfminer/cmapdb.py b/pdfminer/cmapdb.py
@@ -3,9 +3,9 @@
 CMaps provide the mapping between character codes and Unicode
 code-points to character ids (CIDs).
 
-More information is available on the Adobe website:
+More information is available on:
 
-  http://opensource.adobe.com/wiki/display/cmap/CMap+Resources
+  https://github.com/adobe-type-tools/cmap-resources
 
 """
 

diff --git a/pdfminer/utils.py b/pdfminer/utils.py
@@ -138,16 +138,16 @@ def apply_png_predictor(
 
     nbytes = colors * columns * bitspercomponent // 8
     bpp = colors * bitspercomponent // 8  # number of bytes per complete pixel
-    buf = b""
-    line_above = b"\x00" * columns
+    buf = []
+    line_above = list(b"\x00" * columns)
     for scanline_i in range(0, len(data), nbytes + 1):
         filter_type = data[scanline_i]
         line_encoded = data[scanline_i + 1 : scanline_i + 1 + nbytes]
-        raw = b""
+        raw = []
 
         if filter_type == 0:
             # Filter type 0: None
-            raw += line_encoded
+            raw = list(line_encoded)
 
         elif filter_type == 1:
             # Filter type 1: Sub
@@ -162,7 +162,7 @@ def apply_png_predictor(
                 else:
                     raw_x_bpp = int(raw[j - bpp])
                 raw_x = (sub_x + raw_x_bpp) & 255
-                raw += bytes((raw_x,))
+                raw.append(raw_x)
 
         elif filter_type == 2:
             # Filter type 2: Up
@@ -173,7 +173,7 @@ def apply_png_predictor(
             # the prior scanline.
             for (up_x, prior_x) in zip(line_encoded, line_above):
                 raw_x = (up_x + prior_x) & 255
-                raw += bytes((raw_x,))
+                raw.append(raw_x)
 
         elif filter_type == 3:
             # Filter type 3: Average
@@ -191,7 +191,7 @@ def apply_png_predictor(
                     raw_x_bpp = int(raw[j - bpp])
                 prior_x = int(line_above[j])
                 raw_x = (average_x + (raw_x_bpp + prior_x) // 2) & 255
-                raw += bytes((raw_x,))
+                raw.append(raw_x)
 
         elif filter_type == 4:
             # Filter type 4: Paeth
@@ -212,14 +212,14 @@ def apply_png_predictor(
                 prior_x = int(line_above[j])
                 paeth = paeth_predictor(raw_x_bpp, prior_x, prior_x_bpp)
                 raw_x = (paeth_x + paeth) & 255
-                raw += bytes((raw_x,))
+                raw.append(raw_x)
 
         else:
             raise ValueError("Unsupported predictor value: %d" % filter_type)
 
-        buf += raw
+        buf.extend(raw)
         line_above = raw
-    return buf
+    return bytes(buf)
 
 
 Point = Tuple[float, float]

diff --git a/setup.py b/setup.py
@@ -10,7 +10,7 @@
     setuptools_git_versioning={
         "enabled": True,
     },
-    setup_requires=["setuptools-git-versioning<2"],
+    setup_requires=["setuptools-git-versioning<3"],
     packages=["pdfminer"],
     package_data={"pdfminer": ["cmap/*.pickle.gz", "py.typed"]},
     install_requires=[