Skip to content

Commit

Permalink
Aria2 based downloader
Browse files Browse the repository at this point in the history
Introducing a new aria2c-based downloader with:

Additionnaly:

- Removed --concurrency param
- Cache has special rule for .meta4 on download.kiwix to store once for both URLs
- Fixed debug calls clearing lines when not using DEBUG
- Downloading base image with aria2 (blocking, no progress)
- New InitDownloader Step to start/shutdown aria2 properly
- All downloads display speed once completed
  • Loading branch information
rgaudin committed Mar 14, 2024
1 parent 72636e7 commit b9e6fd4
Show file tree
Hide file tree
Showing 10 changed files with 990 additions and 298 deletions.
3 changes: 0 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,6 @@ options:
-C, --check Only check inputs, URLs and sizes. Don't download/create image.
-K, --keep [DEBUG] Don't remove output image if creation failed
-X, --overwrite Don't fail on existing output image: remove instead
-T CONCURRENCY, --concurrency CONCURRENCY
Nb. of threads to start for parallel downloads (at most one per file). `0` (default) for auto-selection based on CPUs.
`1` to disable concurrency.
-D, --debug
-V, --version show program's version number and exit
Expand Down
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,12 @@ dependencies = [
"PyYAML==6.0.1",
"cli-ui==0.17.2",
"humanfriendly==10.0",
"progressbar2==4.3.2",
"progressbar2==4.4.2",
"docker-export==1.1.0",
"typeguard==4.1.5",
"offspot-config==1.7.2",
"offspot-config==1.12.2",
"natsort==8.4.0",
"aria2p==0.12.0",
]
dynamic = ["version"]

Expand Down
5 changes: 5 additions & 0 deletions src/image_creator/cache/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ def path_for_file(file: File) -> pathlib.Path:

# add params/query/fragment to basename to ensure uniqueness
fname = path.parts[-1]
# special case for Kiwix load-balancer. Files can be served
# via xxx.yy.meta4 extension but represent the xxx.yy content post-download
# doing this ensures with cache once for both URL (with or without metalink)
if file.url.netloc == "download.kiwix.org" and file.url.path.endswith(".meta4"):
fname = re.sub(r".meta4$", "", fname)
if file.url.params:
fname += f";{file.url.params}"
if file.url.query:
Expand Down
1 change: 0 additions & 1 deletion src/image_creator/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ class Options:

keep_failed: bool
overwrite: bool
concurrency: int
max_size: int | None = None

config_url: urllib.parse.ParseResult | None = None
Expand Down
13 changes: 2 additions & 11 deletions src/image_creator/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,6 @@ def main():
dest="max_size",
help="Maximum image size allowed. Ex: 512GB",
)
parser.add_argument(
"-T",
"--concurrency",
type=int,
default=0,
dest="concurrency",
help="Nb. of threads to start for parallel downloads (at most one per file). "
"`0` (default) for auto-selection based on CPUs. `1` to disable concurrency.",
)
parser.add_argument("-D", "--debug", action="store_true", dest="debug")
parser.add_argument("-V", "--version", action="version", version=__version__)

Expand All @@ -99,12 +90,12 @@ def main():
if kwargs.get("debug"):
logger.exception(exc)
logger.critical(str(exc))
sys.exit(1)
finally:
try:
app.halt() # pyright: ignore [reportUnboundVariable]
except Exception as exc:
logger.debug(f"Errors cleaning-up: {exc}")
sys.exit(1)
finally:
logger.terminate()


Expand Down
2 changes: 2 additions & 0 deletions src/image_creator/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ def message(self, *tokens, end: str = "\n", timed: bool = False):
ui.CONFIG["timestamp"] = False

def debug(self, text: str):
if self.verbose is None or self.verbose > logging.DEBUG:
return
self.clear()
ui.debug(ui.indent(text, num=self.indent_level))

Expand Down
17 changes: 12 additions & 5 deletions src/image_creator/steps/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

from image_creator.constants import logger
from image_creator.steps import Step
from image_creator.utils.download import download_file


class DownloadImage(Step):
Expand All @@ -32,12 +31,16 @@ def run_uncompressed(self, base_file: File, payload: dict[str, Any]) -> int:
if base_file not in payload["cache"] and not base_file.is_local:
logger.start_task(f"Downloading {base_file.geturl()} into {target}…")
try:
download_file(base_file.geturl(), target)
dl = payload["downloader"].download_to(base_file.geturl(), target)
dl.block()
except Exception as exc:
logger.fail_task(str(exc))
return 1
else:
logger.succeed_task(format_size(get_filesize(target)))
logger.succeed_task(
f"{format_size(get_filesize(target))} "
f"({format_size(dl.overall_speed)}/s)"
)
if payload["cache"].should_cache(base_file):
logger.start_task("Adding Base Image to cache…")
if payload["cache"].introduce(base_file, target):
Expand Down Expand Up @@ -77,12 +80,16 @@ def run_compressed(self, base_file: File, payload: dict[str, Any]) -> int:
if base_file not in payload["cache"] and not base_file.is_local:
logger.start_task(f"Downloading {base_file.geturl()} into {xz_fpath}…")
try:
download_file(base_file.geturl(), xz_fpath)
dl = payload["downloader"].download_to(base_file.geturl(), xz_fpath)
dl.block()
except Exception as exc:
logger.fail_task(str(exc))
return 1
else:
logger.succeed_task(format_size(get_filesize(xz_fpath)))
logger.succeed_task(
f"{format_size(get_filesize(xz_fpath))} "
f"({format_size(dl.overall_speed)}/s)"
)
remove_xz = True
if payload["cache"].should_cache(base_file):
logger.start_task("Adding Base Image to cache…")
Expand Down
Loading

0 comments on commit b9e6fd4

Please sign in to comment.