Skip to content

Commit

Permalink
Rearrange code to get rid of the Global class
Browse files Browse the repository at this point in the history
  • Loading branch information
benoit74 committed Mar 2, 2024
1 parent ca8bf0b commit 2864b2a
Show file tree
Hide file tree
Showing 16 changed files with 378 additions and 242 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -210,3 +210,4 @@ pyrightconfig.json

# ignore all vscode, this is not standard configuration in this place
.vscode
output
1 change: 0 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ LABEL org.opencontainers.image.source https://github.com/openzim/ifixit
# TODO: do we really need all these packages?
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
# locales required if tool has any i18n support
locales \
locales-all \
libmagic1 \
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,15 @@ Docker container as explained below.

First, build the Docker image (to be ran in the main folder of this repo):
```
docker build -t ghcr.io/openzim/ifixit:local .
docker build -t local-ifixit .
```

Then run the scraper with CLI arguments needed for your test (everything after `ifixit2zim` in the example below).

For instance, if you want to run a scrape of only the `Apple_PDA` category, including its guides,
in French :
```
docker run -it -v $(pwd)/output:/output --rm ghcr.io/openzim/fixit:local ifixit2zim --language fr --output /output --tmp-dir /tmp --category Apple_PDA
docker run -it -v $(pwd)/output:/output --rm local-ifixit ifixit2zim --language fr --output /output --tmp-dir /tmp --category Apple_PDA
```

This will produce a ZIM in the output folder of your current directory.
Expand Down
80 changes: 49 additions & 31 deletions src/ifixit2zim/constants.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import pathlib
import tempfile
import urllib.parse
from dataclasses import dataclass

from zimscraperlib.i18n import get_language_details

Expand Down Expand Up @@ -144,6 +143,24 @@
# https://www.ifixit.com/Guide/MacBook+Air+11-Inch+Late+2010+Battery+Replacement/4384
# https://www.ifixit.com/Teardown/Apple+Watch+Teardown/40655

TITLE = {

Check warning on line 146 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L146

Added line #L146 was not covered by tests
"en": {
"title_en": "iFixit in English",
"title_fr": "iFixit in French",
"title_pt": "iFixit in Portuguese",
"title_de": "iFixit in German",
"title_ko": "iFixit in Korean",
"title_zh": "iFixit in Chinese",
"title_ru": "iFixit in Russian",
"title_nl": "iFixit in Dutch",
"title_ja": "iFixit in Japanese",
"title_tr": "iFixit in Turkish",
"title_es": "iFixit in Spanish",
"title_it": "iFixit in Italian",
},
"fr": {"title_fr": "iFixit en Français"},
}

HOME_LABELS = {
"en": {"top_title": "Repair guides for every thing, written by everyone."},
"fr": {"top_title": "Tutoriels de réparation pour tout, écrits par tous."},
Expand Down Expand Up @@ -800,7 +817,6 @@
UNAVAILABLE_OFFLINE_INFOS = ["toolkits"]


@dataclass
class Configuration:
fpath: pathlib.Path

Check warning on line 821 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L820-L821

Added lines #L820 - L821 were not covered by tests

Expand All @@ -815,14 +831,14 @@ class Configuration:
tag: list[str]

Check warning on line 831 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L824-L831

Added lines #L824 - L831 were not covered by tests

# filesystem
_output_dir: str # TODO: rename output_name
_tmp_dir: str # IDEM
output_dir: pathlib.Path # TODO: rename output_path
tmp_dir: pathlib.Path # IDEM
_output_name: str
_tmp_name: str
output_path: pathlib.Path
tmp_path: pathlib.Path

Check warning on line 837 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L834-L837

Added lines #L834 - L837 were not covered by tests

required = (

Check warning on line 839 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L839

Added line #L839 was not covered by tests
"lang_code",
"output_dir",
"output_path",
)

lang_code: str
Expand Down Expand Up @@ -859,35 +875,21 @@ class Configuration:
stats_filename: str | None
skip_checks: bool

Check warning on line 876 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L868-L876

Added lines #L868 - L876 were not covered by tests

@staticmethod
def get_url(lang_code: str) -> urllib.parse.ParseResult:
return urllib.parse.urlparse(URLS[lang_code])

@property
def domain(self) -> str:
return self.main_url.netloc

@property
def api_url(self) -> str:
return self.main_url.geturl() + API_PREFIX

@property
def s3_url(self) -> str | None:
return self.s3_url_with_credentials

def __post_init__(self):
def __init__(self, **kwargs):

Check warning on line 878 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L878

Added line #L878 was not covered by tests
for key, value in kwargs.items():
self.__setattr__(key, value)
self.main_url = Configuration.get_url(self.lang_code)

Check warning on line 881 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L880-L881

Added lines #L880 - L881 were not covered by tests
self.language = get_language_details(self.lang_code)
self.output_dir = pathlib.Path(self._output_dir).expanduser().resolve()
self.output_dir.mkdir(parents=True, exist_ok=True)
self.output_path = pathlib.Path(self._output_name).expanduser().resolve()
self.output_path.mkdir(parents=True, exist_ok=True)

Check warning on line 884 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L883-L884

Added lines #L883 - L884 were not covered by tests

self.tmp_dir = pathlib.Path(self._tmp_dir).expanduser().resolve()
self.tmp_dir.mkdir(parents=True, exist_ok=True)
self.tmp_path = pathlib.Path(self._tmp_name).expanduser().resolve()
self.tmp_path.mkdir(parents=True, exist_ok=True)

Check warning on line 887 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L886-L887

Added lines #L886 - L887 were not covered by tests
if self.build_dir_is_tmp_dir:
self.build_dir = self.tmp_dir
self.build_path = self.tmp_path

Check warning on line 889 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L889

Added line #L889 was not covered by tests
else:
self.build_dir = pathlib.Path(
tempfile.mkdtemp(prefix=f"ifixit_{self.lang_code}_", dir=self.tmp_dir)
self.build_path = pathlib.Path(

Check warning on line 891 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L891

Added line #L891 was not covered by tests
tempfile.mkdtemp(prefix=f"ifixit_{self.lang_code}_", dir=self.tmp_path)
)

self.stats_path = None

Check warning on line 895 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L895

Added line #L895 was not covered by tests
Expand All @@ -901,3 +903,19 @@ def __post_init__(self):
if ";" in tag:
self.tag += [p.strip() for p in tag.split(";")]
self.tag.remove(tag)

@staticmethod

Check warning on line 907 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L907

Added line #L907 was not covered by tests
def get_url(lang_code: str) -> urllib.parse.ParseResult:
return urllib.parse.urlparse(URLS[lang_code])

Check warning on line 909 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L909

Added line #L909 was not covered by tests

@property

Check warning on line 911 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L911

Added line #L911 was not covered by tests
def domain(self) -> str:
return self.main_url.netloc

Check warning on line 913 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L913

Added line #L913 was not covered by tests

@property

Check warning on line 915 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L915

Added line #L915 was not covered by tests
def api_url(self) -> str:
return self.main_url.geturl() + API_PREFIX

Check warning on line 917 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L917

Added line #L917 was not covered by tests

@property

Check warning on line 919 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L919

Added line #L919 was not covered by tests
def s3_url(self) -> str | None:
return self.s3_url_with_credentials

Check warning on line 921 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L921

Added line #L921 was not covered by tests
21 changes: 21 additions & 0 deletions src/ifixit2zim/context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import threading
from dataclasses import dataclass
from typing import Any

Check warning on line 3 in src/ifixit2zim/context.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/context.py#L1-L3

Added lines #L1 - L3 were not covered by tests

from jinja2 import Environment
from zimscraperlib.zim.creator import Creator

Check warning on line 6 in src/ifixit2zim/context.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/context.py#L5-L6

Added lines #L5 - L6 were not covered by tests

from ifixit2zim.processor import Processor
from ifixit2zim.scraper import Configuration
from ifixit2zim.utils import Utils

Check warning on line 10 in src/ifixit2zim/context.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/context.py#L8-L10

Added lines #L8 - L10 were not covered by tests


@dataclass
class Context:
lock: threading.Lock
configuration: Configuration
creator: Creator
utils: Utils
metadata: dict[str, Any]
env: Environment
processor: Processor

Check warning on line 21 in src/ifixit2zim/context.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/context.py#L14-L21

Added lines #L14 - L21 were not covered by tests
6 changes: 3 additions & 3 deletions src/ifixit2zim/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def main():
"--output",
help="Output folder for ZIM file",
default="/output",
dest="_output_dir",
dest="_output_name",
)

parser.add_argument(
Expand Down Expand Up @@ -103,7 +103,7 @@ def main():
"--tmp-dir",
help="Path to create temp folder in. Used for building ZIM file.",
default=os.getenv("TMPDIR", "."),
dest="_tmp_dir",
dest="_tmp_name",
)

parser.add_argument(
Expand Down Expand Up @@ -274,7 +274,7 @@ def main():
scraper = IFixit2Zim(**dict(args._get_kwargs()))

Check warning on line 274 in src/ifixit2zim/entrypoint.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/entrypoint.py#L274

Added line #L274 was not covered by tests
sys.exit(scraper.run())
except Exception as exc:
logger.error(f"FAILED. An error occurred: {exc}")
logger.error("FAILED. An error occurred", exc_info=exc)

Check warning on line 277 in src/ifixit2zim/entrypoint.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/entrypoint.py#L277

Added line #L277 was not covered by tests
if args.debug:
logger.exception(exc)
raise SystemExit(1) from None

Check warning on line 280 in src/ifixit2zim/entrypoint.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/entrypoint.py#L280

Added line #L280 was not covered by tests
Expand Down
43 changes: 29 additions & 14 deletions src/ifixit2zim/imager.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,42 @@
import io
import pathlib
import re
import threading

Check warning on line 8 in src/ifixit2zim/imager.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/imager.py#L8

Added line #L8 was not covered by tests
import urllib.parse

from kiwixstorage import KiwixStorage, NotFoundError
from PIL import Image
from zimscraperlib.download import stream_file
from zimscraperlib.image.optimization import optimize_webp
from zimscraperlib.zim.creator import Creator

Check warning on line 15 in src/ifixit2zim/imager.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/imager.py#L15

Added line #L15 was not covered by tests

from ifixit2zim.constants import IMAGES_ENCODER_VERSION
from ifixit2zim.scraper import IFixit2Zim
from ifixit2zim.executor import Executor
from ifixit2zim.scraper import Configuration
from ifixit2zim.shared import logger
from ifixit2zim.utils import Utils

Check warning on line 21 in src/ifixit2zim/imager.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/imager.py#L17-L21

Added lines #L17 - L21 were not covered by tests


class Imager:
def __init__(self, scraper: IFixit2Zim):
def __init__(

Check warning on line 25 in src/ifixit2zim/imager.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/imager.py#L25

Added line #L25 was not covered by tests
self,
img_executor: Executor,
lock: threading.Lock,
creator: Creator,
utils: Utils,
configuration: Configuration,
):
self.aborted = False
# list of source URLs that we've processed and added to ZIM
self.handled = set()
self.dedup_items = {}
self.scraper = scraper
self.img_executor = img_executor
self.lock = lock
self.creator = creator
self.utils = utils
self.configuration = configuration

Check warning on line 41 in src/ifixit2zim/imager.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/imager.py#L36-L41

Added lines #L36 - L41 were not covered by tests

self.scraper.img_executor.start()
self.img_executor.start()

Check warning on line 43 in src/ifixit2zim/imager.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/imager.py#L43

Added line #L43 was not covered by tests

def abort(self):
"""request imager to cancel processing of futures"""
Expand Down Expand Up @@ -70,7 +85,7 @@ def defer(self, url: str) -> str | None:

# find actual URL should it be from a provider
try:
parsed_url = urllib.parse.urlparse(self.scraper.utils.to_url(url))
parsed_url = urllib.parse.urlparse(self.utils.to_url(url))

Check warning on line 88 in src/ifixit2zim/imager.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/imager.py#L88

Added line #L88 was not covered by tests
except Exception:
logger.warning(f"Can't parse image URL `{url}`. Skipping")
return
Expand All @@ -89,7 +104,7 @@ def defer(self, url: str) -> str | None:
# record that we are processing this one
self.handled.add(path)

self.scraper.img_executor.submit(
self.img_executor.submit(

Check warning on line 107 in src/ifixit2zim/imager.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/imager.py#L107

Added line #L107 was not covered by tests
self.process_image,
url=parsed_url,
path=path,
Expand All @@ -108,22 +123,22 @@ def check_for_duplicate(self, path, content):

def add_image_to_zim(self, path, content, mimetype):
duplicate_path = self.check_for_duplicate(path, content)
with self.scraper.lock:
with self.lock:
if duplicate_path:
self.scraper.creator.add_redirect(
self.creator.add_redirect(

Check warning on line 128 in src/ifixit2zim/imager.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/imager.py#L128

Added line #L128 was not covered by tests
path=path,
target_path=duplicate_path,
)
else:
self.scraper.creator.add_item_for(
self.creator.add_item_for(

Check warning on line 133 in src/ifixit2zim/imager.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/imager.py#L133

Added line #L133 was not covered by tests
path=path,
content=content,
mimetype=mimetype,
)

def add_missing_image_to_zim(self, path):
with self.scraper.lock:
self.scraper.creator.add_redirect(
with self.lock:
self.creator.add_redirect(

Check warning on line 141 in src/ifixit2zim/imager.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/imager.py#L141

Added line #L141 was not covered by tests
path=path,
target_path="assets/NoImage_300x225.jpg",
)
Expand All @@ -137,7 +152,7 @@ def process_image(
return

# just download, optimize and add to ZIM if not using S3
if not self.scraper.configuration.s3_url:
if not self.configuration.s3_url:
try:
fileobj = self.get_image_data(url.geturl())
except Exception as exc:
Expand All @@ -159,7 +174,7 @@ def process_image(
return path

# we are using S3 cache
ident = self.scraper.utils.get_version_ident_for(url.geturl())
ident = self.utils.get_version_ident_for(url.geturl())

Check warning on line 177 in src/ifixit2zim/imager.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/imager.py#L177

Added line #L177 was not covered by tests
if ident is None:
logger.error(f"Unable to query {url.geturl()}. Skipping")
self.add_missing_image_to_zim(
Expand All @@ -168,7 +183,7 @@ def process_image(
return path

# key = self.get_s3_key_for(url.geturl())
s3_storage = KiwixStorage(self.scraper.configuration.s3_url)
s3_storage = KiwixStorage(self.configuration.s3_url)

Check warning on line 186 in src/ifixit2zim/imager.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/imager.py#L186

Added line #L186 was not covered by tests
meta = {"ident": ident, "encoder_version": str(IMAGES_ENCODER_VERSION)}

download_failed = False # useful to trigger reupload or not
Expand Down
Loading

0 comments on commit 2864b2a

Please sign in to comment.