hongbo-miao · mergify · Jan 7, 2025 · Jan 7, 2025
@@ -171,10 +171,12 @@ exclude = [
 # https://docs.astral.sh/ruff/rules/
 [lint]
 select = [
+  "AIR",  # Airflow
   "E",    # pycodestyle
   "F",    # Pyflakes
   "FAST", # FastAPI
   "I",    # isort
+  "PERF", # Perflint
   "PGH",  # pygrep-hooks
   "PL",   # Pylint
   "TRY",  # tryceratops

@@ -14,24 +14,23 @@ def download_sample_images(data_path: Path) -> None:
     data_path.mkdir(parents=True, exist_ok=True)
 
     # Create a class subdirectory (e.g., "class0")
-    class_dir = data_path / "class0"
-    class_dir.mkdir(parents=True, exist_ok=True)
+    class_dir_path = data_path / Path("class0")
+    class_dir_path.mkdir(parents=True, exist_ok=True)
 
     # Sample image URLs
     image_urls: list[str] = [
         "https://raw.githubusercontent.com/pytorch/hub/master/images/dog.jpg"
     ]
 
     # Download images into the class subdirectory
-    for i, url in enumerate(image_urls):
-        try:
-            filename = f"image_{i}.jpg"
-            filepath = class_dir / filename
+    try:
+        for i, url in enumerate(image_urls):
+            filepath = class_dir_path / f"image_{i}.jpg"
             if not filepath.exists():
                 logger.info(f"Downloading {url} to {filepath}")
                 urllib.request.urlretrieve(url, str(filepath))
-        except Exception:
-            logger.exception(f"Error downloading {url}")
+    except Exception:
+        logger.exception(f"Error downloading {url}")
 
 
 @pipeline_def(batch_size=2, num_threads=2, device_id=None)

@@ -31,8 +31,8 @@ def main() -> None:
 
     converter = DocumentConverter()
 
-    for pdf_path in pdf_paths:
-        try:
+    try:
+        for pdf_path in pdf_paths:
             # Convert PDF to markdown
             res = converter.convert(pdf_path)
             markdown_content = res.document.export_to_markdown()
@@ -41,8 +41,8 @@ def main() -> None:
             markdown_path = pdf_path.with_suffix(".md")
             markdown_path.write_text(markdown_content, encoding="utf-8")
             logger.info(f"Converted {pdf_path.name}")
-        except Exception as e:
-            logger.info(f"Error processing {pdf_path.name}: {e}")
+    except Exception as e:
+        logger.info(f"Error processing PDFs: {e}")
 
 
 if __name__ == "__main__":