Skip to content

Commit

Permalink
JobCommand: reimplement urlretrieve with requests
Browse files Browse the repository at this point in the history
urllib.request.urlretrieve fails to set the `Host:` header, required for
hosts like cdn.discordapp.com that are protected by cloudflare. Who knew?
  • Loading branch information
jvansanten committed Mar 8, 2023
1 parent 162f049 commit e1e6811
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 91 deletions.
8 changes: 6 additions & 2 deletions ampel/cli/JobCommand.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@

import tarfile, tempfile, ujson, yaml, io, os, signal, sys, \
subprocess, platform, shutil, filecmp, psutil, pkg_resources
import requests
from time import time, sleep
from multiprocessing import Queue, Process
from argparse import ArgumentParser
from importlib import import_module
from typing import Any
from collections.abc import Sequence
from urllib.request import urlretrieve
from ampel.abstract.AbsEventUnit import AbsEventUnit
from ampel.abstract.AbsProcessorTemplate import AbsProcessorTemplate
from ampel.model.UnitModel import UnitModel
Expand Down Expand Up @@ -701,7 +701,11 @@ def _fetch_inputs(
)
os.makedirs(resolved_artifact.path.parent, exist_ok=True)
with tempfile.NamedTemporaryFile(delete=False) as tf:
urlretrieve(resolved_artifact.http.url, tf.name)
r = requests.get(resolved_artifact.http.url, stream=True)
r.raise_for_status()
for chunk in r.iter_content(chunk_size=1<<13):
tf.write(chunk)
tf.flush()
try:
with tarfile.open(tf.name) as archive:
logger.info(f'{resolved_artifact.name} is a tarball; extracting')
Expand Down
Loading

0 comments on commit e1e6811

Please sign in to comment.