Skip to content

Commit

Permalink
Rearrange code to get rid of the Global class
Browse files Browse the repository at this point in the history
  • Loading branch information
benoit74 committed Mar 2, 2024
1 parent ca8bf0b commit 915ed1d
Show file tree
Hide file tree
Showing 16 changed files with 374 additions and 239 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -210,3 +210,4 @@ pyrightconfig.json

# ignore all vscode, this is not standard configuration in this place
.vscode
output
1 change: 0 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ LABEL org.opencontainers.image.source https://github.com/openzim/ifixit
# TODO: do we really need all these packages?
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
# locales required if tool has any i18n support
locales \
locales-all \
libmagic1 \
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,15 @@ Docker container as explained below.

First, build the Docker image (to be ran in the main folder of this repo):
```
docker build -t ghcr.io/openzim/ifixit:local .
docker build -t local-ifixit .
```

Then run the scraper with CLI arguments needed for your test (everything after `ifixit2zim` in the example below).

For instance, if you want to run a scrape of only the `Apple_PDA` category, including its guides,
in French :
```
docker run -it -v $(pwd)/output:/output --rm ghcr.io/openzim/fixit:local ifixit2zim --language fr --output /output --tmp-dir /tmp --category Apple_PDA
docker run -it -v $(pwd)/output:/output --rm local-ifixit ifixit2zim --language fr --output /output --tmp-dir /tmp --category Apple_PDA
```

This will produce a ZIM in the output folder of your current directory.
Expand Down
79 changes: 49 additions & 30 deletions src/ifixit2zim/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,24 @@
# https://www.ifixit.com/Guide/MacBook+Air+11-Inch+Late+2010+Battery+Replacement/4384
# https://www.ifixit.com/Teardown/Apple+Watch+Teardown/40655

TITLE = {

Check warning on line 147 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L147

Added line #L147 was not covered by tests
"en": {
"title_en": "iFixit in English",
"title_fr": "iFixit in French",
"title_pt": "iFixit in Portuguese",
"title_de": "iFixit in German",
"title_ko": "iFixit in Korean",
"title_zh": "iFixit in Chinese",
"title_ru": "iFixit in Russian",
"title_nl": "iFixit in Dutch",
"title_ja": "iFixit in Japanese",
"title_tr": "iFixit in Turkish",
"title_es": "iFixit in Spanish",
"title_it": "iFixit in Italian",
},
"fr": {"title_fr": "iFixit en Français"},
}

HOME_LABELS = {
"en": {"top_title": "Repair guides for every thing, written by everyone."},
"fr": {"top_title": "Tutoriels de réparation pour tout, écrits par tous."},
Expand Down Expand Up @@ -800,7 +818,6 @@
UNAVAILABLE_OFFLINE_INFOS = ["toolkits"]


@dataclass
class Configuration:
fpath: pathlib.Path

Check warning on line 822 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L821-L822

Added lines #L821 - L822 were not covered by tests

Expand All @@ -815,14 +832,14 @@ class Configuration:
tag: list[str]

Check warning on line 832 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L825-L832

Added lines #L825 - L832 were not covered by tests

# filesystem
_output_dir: str # TODO: rename output_name
_tmp_dir: str # IDEM
output_dir: pathlib.Path # TODO: rename output_path
tmp_dir: pathlib.Path # IDEM
_output_name: str
_tmp_name: str
output_path: pathlib.Path
tmp_path: pathlib.Path

Check warning on line 838 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L835-L838

Added lines #L835 - L838 were not covered by tests

required = (

Check warning on line 840 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L840

Added line #L840 was not covered by tests
"lang_code",
"output_dir",
"output_path",
)

lang_code: str
Expand Down Expand Up @@ -859,35 +876,21 @@ class Configuration:
stats_filename: str | None
skip_checks: bool

Check warning on line 877 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L869-L877

Added lines #L869 - L877 were not covered by tests

@staticmethod
def get_url(lang_code: str) -> urllib.parse.ParseResult:
return urllib.parse.urlparse(URLS[lang_code])

@property
def domain(self) -> str:
return self.main_url.netloc

@property
def api_url(self) -> str:
return self.main_url.geturl() + API_PREFIX

@property
def s3_url(self) -> str | None:
return self.s3_url_with_credentials

def __post_init__(self):
def __init__(self, **kwargs):

Check warning on line 879 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L879

Added line #L879 was not covered by tests
for key, value in kwargs.items():
self.__setattr__(key, value)
self.main_url = Configuration.get_url(self.lang_code)

Check warning on line 882 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L881-L882

Added lines #L881 - L882 were not covered by tests
self.language = get_language_details(self.lang_code)
self.output_dir = pathlib.Path(self._output_dir).expanduser().resolve()
self.output_dir.mkdir(parents=True, exist_ok=True)
self.output_path = pathlib.Path(self._output_name).expanduser().resolve()
self.output_path.mkdir(parents=True, exist_ok=True)

Check warning on line 885 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L884-L885

Added lines #L884 - L885 were not covered by tests

self.tmp_dir = pathlib.Path(self._tmp_dir).expanduser().resolve()
self.tmp_dir.mkdir(parents=True, exist_ok=True)
self.tmp_path = pathlib.Path(self._tmp_name).expanduser().resolve()
self.tmp_path.mkdir(parents=True, exist_ok=True)

Check warning on line 888 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L887-L888

Added lines #L887 - L888 were not covered by tests
if self.build_dir_is_tmp_dir:
self.build_dir = self.tmp_dir
self.build_path = self.tmp_path

Check warning on line 890 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L890

Added line #L890 was not covered by tests
else:
self.build_dir = pathlib.Path(
tempfile.mkdtemp(prefix=f"ifixit_{self.lang_code}_", dir=self.tmp_dir)
self.build_path = pathlib.Path(

Check warning on line 892 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L892

Added line #L892 was not covered by tests
tempfile.mkdtemp(prefix=f"ifixit_{self.lang_code}_", dir=self.tmp_path)
)

self.stats_path = None

Check warning on line 896 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L896

Added line #L896 was not covered by tests
Expand All @@ -901,3 +904,19 @@ def __post_init__(self):
if ";" in tag:
self.tag += [p.strip() for p in tag.split(";")]
self.tag.remove(tag)

@staticmethod

Check warning on line 908 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L908

Added line #L908 was not covered by tests
def get_url(lang_code: str) -> urllib.parse.ParseResult:
return urllib.parse.urlparse(URLS[lang_code])

Check warning on line 910 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L910

Added line #L910 was not covered by tests

@property

Check warning on line 912 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L912

Added line #L912 was not covered by tests
def domain(self) -> str:
return self.main_url.netloc

Check warning on line 914 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L914

Added line #L914 was not covered by tests

@property

Check warning on line 916 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L916

Added line #L916 was not covered by tests
def api_url(self) -> str:
return self.main_url.geturl() + API_PREFIX

Check warning on line 918 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L918

Added line #L918 was not covered by tests

@property

Check warning on line 920 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L920

Added line #L920 was not covered by tests
def s3_url(self) -> str | None:
return self.s3_url_with_credentials

Check warning on line 922 in src/ifixit2zim/constants.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/constants.py#L922

Added line #L922 was not covered by tests
19 changes: 19 additions & 0 deletions src/ifixit2zim/context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from ifixit2zim.utils import Utils
import threading
from ifixit2zim.scraper import Configuration
from zimscraperlib.zim.creator import Creator
from typing import Any
from jinja2 import Environment
from ifixit2zim.processor import Processor
from dataclasses import dataclass

Check warning on line 8 in src/ifixit2zim/context.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/context.py#L1-L8

Added lines #L1 - L8 were not covered by tests


@dataclass
class Context:
lock: threading.Lock
configuration: Configuration
creator: Creator
utils: Utils
metadata: dict[str, Any]
env: Environment
processor: Processor

Check warning on line 19 in src/ifixit2zim/context.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/context.py#L12-L19

Added lines #L12 - L19 were not covered by tests
6 changes: 3 additions & 3 deletions src/ifixit2zim/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def main():
"--output",
help="Output folder for ZIM file",
default="/output",
dest="_output_dir",
dest="_output_name",
)

parser.add_argument(
Expand Down Expand Up @@ -103,7 +103,7 @@ def main():
"--tmp-dir",
help="Path to create temp folder in. Used for building ZIM file.",
default=os.getenv("TMPDIR", "."),
dest="_tmp_dir",
dest="_tmp_name",
)

parser.add_argument(
Expand Down Expand Up @@ -274,7 +274,7 @@ def main():
scraper = IFixit2Zim(**dict(args._get_kwargs()))

Check warning on line 274 in src/ifixit2zim/entrypoint.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/entrypoint.py#L274

Added line #L274 was not covered by tests
sys.exit(scraper.run())
except Exception as exc:
logger.error(f"FAILED. An error occurred: {exc}")
logger.error(f"FAILED. An error occurred", exc_info=exc)

Check warning on line 277 in src/ifixit2zim/entrypoint.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/entrypoint.py#L277

Added line #L277 was not covered by tests
if args.debug:
logger.exception(exc)
raise SystemExit(1) from None

Check warning on line 280 in src/ifixit2zim/entrypoint.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/entrypoint.py#L280

Added line #L280 was not covered by tests
Expand Down
43 changes: 29 additions & 14 deletions src/ifixit2zim/imager.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,34 @@
from zimscraperlib.image.optimization import optimize_webp

from ifixit2zim.constants import IMAGES_ENCODER_VERSION
from ifixit2zim.scraper import IFixit2Zim
from ifixit2zim.shared import logger
from ifixit2zim.utils import Utils
import threading
from ifixit2zim.executor import Executor
from ifixit2zim.scraper import Configuration
from zimscraperlib.zim.creator import Creator

Check warning on line 21 in src/ifixit2zim/imager.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/imager.py#L15-L21

Added lines #L15 - L21 were not covered by tests


class Imager:
def __init__(self, scraper: IFixit2Zim):
def __init__(

Check warning on line 25 in src/ifixit2zim/imager.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/imager.py#L25

Added line #L25 was not covered by tests
self,
img_executor: Executor,
lock: threading.Lock,
creator: Creator,
utils: Utils,
configuration: Configuration,
):
self.aborted = False
# list of source URLs that we've processed and added to ZIM
self.handled = set()
self.dedup_items = {}
self.scraper = scraper
self.img_executor = img_executor
self.lock = lock
self.creator = creator
self.utils = utils
self.configuration = configuration

Check warning on line 41 in src/ifixit2zim/imager.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/imager.py#L36-L41

Added lines #L36 - L41 were not covered by tests

self.scraper.img_executor.start()
self.img_executor.start()

Check warning on line 43 in src/ifixit2zim/imager.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/imager.py#L43

Added line #L43 was not covered by tests

def abort(self):
"""request imager to cancel processing of futures"""
Expand Down Expand Up @@ -70,7 +85,7 @@ def defer(self, url: str) -> str | None:

# find actual URL should it be from a provider
try:
parsed_url = urllib.parse.urlparse(self.scraper.utils.to_url(url))
parsed_url = urllib.parse.urlparse(self.utils.to_url(url))

Check warning on line 88 in src/ifixit2zim/imager.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/imager.py#L88

Added line #L88 was not covered by tests
except Exception:
logger.warning(f"Can't parse image URL `{url}`. Skipping")
return
Expand All @@ -89,7 +104,7 @@ def defer(self, url: str) -> str | None:
# record that we are processing this one
self.handled.add(path)

self.scraper.img_executor.submit(
self.img_executor.submit(

Check warning on line 107 in src/ifixit2zim/imager.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/imager.py#L107

Added line #L107 was not covered by tests
self.process_image,
url=parsed_url,
path=path,
Expand All @@ -108,22 +123,22 @@ def check_for_duplicate(self, path, content):

def add_image_to_zim(self, path, content, mimetype):
duplicate_path = self.check_for_duplicate(path, content)
with self.scraper.lock:
with self.lock:
if duplicate_path:
self.scraper.creator.add_redirect(
self.creator.add_redirect(

Check warning on line 128 in src/ifixit2zim/imager.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/imager.py#L128

Added line #L128 was not covered by tests
path=path,
target_path=duplicate_path,
)
else:
self.scraper.creator.add_item_for(
self.creator.add_item_for(

Check warning on line 133 in src/ifixit2zim/imager.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/imager.py#L133

Added line #L133 was not covered by tests
path=path,
content=content,
mimetype=mimetype,
)

def add_missing_image_to_zim(self, path):
with self.scraper.lock:
self.scraper.creator.add_redirect(
with self.lock:
self.creator.add_redirect(

Check warning on line 141 in src/ifixit2zim/imager.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/imager.py#L141

Added line #L141 was not covered by tests
path=path,
target_path="assets/NoImage_300x225.jpg",
)
Expand All @@ -137,7 +152,7 @@ def process_image(
return

# just download, optimize and add to ZIM if not using S3
if not self.scraper.configuration.s3_url:
if not self.configuration.s3_url:
try:
fileobj = self.get_image_data(url.geturl())
except Exception as exc:
Expand All @@ -159,7 +174,7 @@ def process_image(
return path

# we are using S3 cache
ident = self.scraper.utils.get_version_ident_for(url.geturl())
ident = self.utils.get_version_ident_for(url.geturl())

Check warning on line 177 in src/ifixit2zim/imager.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/imager.py#L177

Added line #L177 was not covered by tests
if ident is None:
logger.error(f"Unable to query {url.geturl()}. Skipping")
self.add_missing_image_to_zim(
Expand All @@ -168,7 +183,7 @@ def process_image(
return path

# key = self.get_s3_key_for(url.geturl())
s3_storage = KiwixStorage(self.scraper.configuration.s3_url)
s3_storage = KiwixStorage(self.configuration.s3_url)

Check warning on line 186 in src/ifixit2zim/imager.py

View check run for this annotation

Codecov / codecov/patch

src/ifixit2zim/imager.py#L186

Added line #L186 was not covered by tests
meta = {"ident": ident, "encoder_version": str(IMAGES_ENCODER_VERSION)}

download_failed = False # useful to trigger reupload or not
Expand Down
Loading

0 comments on commit 915ed1d

Please sign in to comment.