From d0859cf86bacfd9301b5da5db73898efb4325fbe Mon Sep 17 00:00:00 2001 From: Jakob van Santen Date: Tue, 28 Feb 2023 15:49:26 +0100 Subject: [PATCH] JobCommand: reimplement urlretrieve with requests urllib.request.urlretrieve fails to set the `Host:` header, required for hosts like cdn.discordapp.com that are protected by cloudflare. Who knew? --- ampel/cli/JobCommand.py | 8 ++++++-- poetry.lock | 35 +++++++++++++++++++++++++++++++---- pyproject.toml | 4 ++++ 3 files changed, 41 insertions(+), 6 deletions(-) diff --git a/ampel/cli/JobCommand.py b/ampel/cli/JobCommand.py index 5cb732888..2cabcc049 100644 --- a/ampel/cli/JobCommand.py +++ b/ampel/cli/JobCommand.py @@ -9,13 +9,13 @@ import tarfile, tempfile, ujson, yaml, io, os, signal, sys, \ subprocess, platform, shutil, filecmp, psutil, pkg_resources +import requests from time import time, sleep from multiprocessing import Queue, Process from argparse import ArgumentParser from importlib import import_module from typing import Any from collections.abc import Sequence -from urllib.request import urlretrieve from ampel.abstract.AbsEventUnit import AbsEventUnit from ampel.abstract.AbsProcessorTemplate import AbsProcessorTemplate from ampel.model.UnitModel import UnitModel @@ -701,7 +701,11 @@ def _fetch_inputs( ) os.makedirs(resolved_artifact.path.parent, exist_ok=True) with tempfile.NamedTemporaryFile(delete=False) as tf: - urlretrieve(resolved_artifact.http.url, tf.name) + r = requests.get(resolved_artifact.http.url, stream=True) + r.raise_for_status() + for chunk in r.iter_content(chunk_size=1<<13): + tf.write(chunk) + tf.flush() try: with tarfile.open(tf.name) as archive: logger.info(f'{resolved_artifact.name} is a tarball; extracting') diff --git a/poetry.lock b/poetry.lock index 0c8fa2b3e..49183b98e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -130,7 +130,7 @@ name = "charset-normalizer" version = "3.1.0" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." category = "main" -optional = true +optional = false python-versions = ">=3.7.0" files = [ {file = "charset-normalizer-3.1.0.tar.gz", hash = "sha256:34e0a2f9c370eb95597aae63bf85eb5e96826d81e3dcf88b8886012906f509b5"}, @@ -1101,7 +1101,7 @@ name = "requests" version = "2.28.2" description = "Python HTTP for Humans." category = "main" -optional = true +optional = false python-versions = ">=3.7, <4" files = [ {file = "requests-2.28.2-py3-none-any.whl", hash = "sha256:64299f4909223da747622c030b781c0d7811e359c37124b4bd368fb8c6518baa"}, @@ -1456,6 +1456,21 @@ files = [ {file = "types_PyYAML-6.0.12.8-py3-none-any.whl", hash = "sha256:5314a4b2580999b2ea06b2e5f9a7763d860d6e09cdf21c0e9561daa9cbd60178"}, ] +[[package]] +name = "types-requests" +version = "2.28.11.15" +description = "Typing stubs for requests" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "types-requests-2.28.11.15.tar.gz", hash = "sha256:fc8eaa09cc014699c6b63c60c2e3add0c8b09a410c818b5ac6e65f92a26dde09"}, + {file = "types_requests-2.28.11.15-py3-none-any.whl", hash = "sha256:a05e4c7bc967518fba5789c341ea8b0c942776ee474c7873129a61161978e586"}, +] + +[package.dependencies] +types-urllib3 = "<1.27" + [[package]] name = "types-setuptools" version = "65.7.0.4" @@ -1471,6 +1486,18 @@ files = [ [package.dependencies] types-docutils = "*" +[[package]] +name = "types-urllib3" +version = "1.26.25.8" +description = "Typing stubs for urllib3" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "types-urllib3-1.26.25.8.tar.gz", hash = "sha256:ecf43c42d8ee439d732a1110b4901e9017a79a38daca26f08e42c8460069392c"}, + {file = "types_urllib3-1.26.25.8-py3-none-any.whl", hash = "sha256:95ea847fbf0bf675f50c8ae19a665baedcf07e6b4641662c4c3c72e7b2edf1a9"}, +] + [[package]] name = "typing-extensions" version = "4.5.0" @@ -1563,7 +1590,7 @@ name = "urllib3" version = "1.26.14" description = "HTTP library with thread-safe connection pooling, file post, and more." category = "main" -optional = true +optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ {file = "urllib3-1.26.14-py2.py3-none-any.whl", hash = "sha256:75edcdc2f7d85b137124a6c3c9fc3933cdeaa12ecb9a6a959f22797a0feca7e1"}, @@ -1905,4 +1932,4 @@ slack = ["slack-sdk"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.12" -content-hash = "c52794aecc057c0eaa9ac7130154b9e40f610f99e12d2a8ce57955b4159112c6" +content-hash = "af1429b6db8abd750625786a833d0a0f99c68e92f46bac58ed6edeb2269d0e42" diff --git a/pyproject.toml b/pyproject.toml index b34bbed57..88b1fd8fb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,6 +55,7 @@ prometheus-client = ">=0.16,<0.17" xxhash = "^3.0.0" psutil = "^5.8.0" appdirs = "^1.4.4" +requests = "^2.0" fastapi = {version = ">=0.92,<0.93", optional = true} uvicorn = {version = ">=0.20.0,<0.21.0", optional = true, extras = ["standard"]} Sphinx = {version = ">=6.1.2,<6.2.0", optional = true} @@ -79,6 +80,9 @@ server = ["fastapi", "uvicorn"] docs = ["Sphinx", "sphinx-press-theme", "sphinx-autodoc-typehints", "tomlkit"] slack = ["slack_sdk"] +[tool.poetry.group.dev.dependencies] +types-requests = "^2.28.11.15" + [tool.isort] profile = "black"