diff --git a/pkgpanda/util.py b/pkgpanda/util.py index bd0c77102b..701cc97047 100644 --- a/pkgpanda/util.py +++ b/pkgpanda/util.py @@ -11,6 +11,7 @@ import subprocess import tarfile import tempfile +import time from contextlib import contextmanager, ExitStack from itertools import chain from multiprocessing import Process @@ -150,6 +151,38 @@ def get_requests_retry_session(max_retries=4, backoff_factor=1, status_forcelist return session +def stream_remote_file_with_retries(out_filename, url, retries=4): + def cleanup(): + try: + os.remove(out_filename) + except Exception: + pass + + while True: + with open(out_filename, "w+b") as f: + r = get_requests_retry_session().get(url, stream=True) + if r.status_code == 301: + raise Exception("got a 301") + r.raise_for_status() + + for chunk in r.iter_content(chunk_size=4096): + f.write(chunk) + f.flush() + + content_length = int(r.headers['content-length']) + final_size = os.fstat(f.fileno()).st_size + if final_size == content_length: + return r + else: + f.close() + cleanup() + retries -= 1 + if retries <= 0: + raise Exception("final file size does not match Content-Length") + time.sleep(2 ** (3 - retries)) + continue + + def download(out_filename, url, work_dir, rm_on_error=True): assert os.path.isabs(out_filename) assert os.path.isabs(work_dir) @@ -167,14 +200,7 @@ def download(out_filename, url, work_dir, rm_on_error=True): src_filename = work_dir + '/' + src_filename shutil.copyfile(src_filename, out_filename) else: - # Download the file. - with open(out_filename, "w+b") as f: - r = get_requests_retry_session().get(url, stream=True) - if r.status_code == 301: - raise Exception("got a 301") - r.raise_for_status() - for chunk in r.iter_content(chunk_size=4096): - f.write(chunk) + stream_remote_file_with_retries(out_filename, url) except Exception as fetch_exception: if rm_on_error: rm_passed = False