Skip to content

Commit

Permalink
Merge pull request #45 from EBI-Metagenomics/bug-fix/fetch-script
Browse files Browse the repository at this point in the history
Fixed path to modules
  • Loading branch information
mberacochea authored Jun 9, 2023
2 parents c91e2dd + d3d0baf commit 9717316
Show file tree
Hide file tree
Showing 7 changed files with 228 additions and 4 deletions.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import time
import urllib.request as request
import urllib.error as error

from utils import download_fasta

LINKS = 'https://ftp.ncbi.nlm.nih.gov/genomes/genbank/assembly_summary_genbank.txt'
Expand Down
1 change: 1 addition & 0 deletions bin/retry/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is a copy of https://github.com/invl/retry
10 changes: 10 additions & 0 deletions bin/retry/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
__all__ = ["retry", "retry_call"]

import logging

from .api import retry, retry_call
from .compat import NullHandler

# Set default logging handler to avoid "No handler found" warnings.
log = logging.getLogger(__name__)
log.addHandler(NullHandler())
141 changes: 141 additions & 0 deletions bin/retry/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import logging
import random
import time
from functools import partial

from .compat import decorator

logging_logger = logging.getLogger(__name__)


def __retry_internal(
f,
exceptions=Exception,
tries=-1,
delay=0,
max_delay=None,
backoff=1,
jitter=0,
logger=logging_logger,
):
"""
Executes a function and retries it if it failed.
:param f: the function to execute.
:param exceptions: an exception or a tuple of exceptions to catch. default: Exception.
:param tries: the maximum number of attempts. default: -1 (infinite).
:param delay: initial delay between attempts. default: 0.
:param max_delay: the maximum value of delay. default: None (no limit).
:param backoff: multiplier applied to delay between attempts. default: 1 (no backoff).
:param jitter: extra seconds added to delay between attempts. default: 0.
fixed if a number, random if a range tuple (min, max)
:param logger: logger.warning(fmt, error, delay) will be called on failed attempts.
default: retry.logging_logger. if None, logging is disabled.
:returns: the result of the f function.
"""
_tries, _delay = tries, delay
while _tries:
try:
return f()
except exceptions as e:
_tries -= 1
if not _tries:
raise

if logger is not None:
logger.warning("%s, retrying in %s seconds...", e, _delay)

time.sleep(_delay)
_delay *= backoff

if isinstance(jitter, tuple):
_delay += random.uniform(*jitter)
else:
_delay += jitter

if max_delay is not None:
_delay = min(_delay, max_delay)


def retry(
exceptions=Exception,
tries=-1,
delay=0,
max_delay=None,
backoff=1,
jitter=0,
logger=logging_logger,
):
"""Returns a retry decorator.
:param exceptions: an exception or a tuple of exceptions to catch. default: Exception.
:param tries: the maximum number of attempts. default: -1 (infinite).
:param delay: initial delay between attempts. default: 0.
:param max_delay: the maximum value of delay. default: None (no limit).
:param backoff: multiplier applied to delay between attempts. default: 1 (no backoff).
:param jitter: extra seconds added to delay between attempts. default: 0.
fixed if a number, random if a range tuple (min, max)
:param logger: logger.warning(fmt, error, delay) will be called on failed attempts.
default: retry.logging_logger. if None, logging is disabled.
:returns: a retry decorator.
"""

@decorator
def retry_decorator(f, *fargs, **fkwargs):
args = fargs if fargs else list()
kwargs = fkwargs if fkwargs else dict()
return __retry_internal(
partial(f, *args, **kwargs),
exceptions,
tries,
delay,
max_delay,
backoff,
jitter,
logger,
)

return retry_decorator


def retry_call(
f,
fargs=None,
fkwargs=None,
exceptions=Exception,
tries=-1,
delay=0,
max_delay=None,
backoff=1,
jitter=0,
logger=logging_logger,
):
"""
Calls a function and re-executes it if it failed.
:param f: the function to execute.
:param fargs: the positional arguments of the function to execute.
:param fkwargs: the named arguments of the function to execute.
:param exceptions: an exception or a tuple of exceptions to catch. default: Exception.
:param tries: the maximum number of attempts. default: -1 (infinite).
:param delay: initial delay between attempts. default: 0.
:param max_delay: the maximum value of delay. default: None (no limit).
:param backoff: multiplier applied to delay between attempts. default: 1 (no backoff).
:param jitter: extra seconds added to delay between attempts. default: 0.
fixed if a number, random if a range tuple (min, max)
:param logger: logger.warning(fmt, error, delay) will be called on failed attempts.
default: retry.logging_logger. if None, logging is disabled.
:returns: the result of the f function.
"""
args = fargs if fargs else list()
kwargs = fkwargs if fkwargs else dict()
return __retry_internal(
partial(f, *args, **kwargs),
exceptions,
tries,
delay,
max_delay,
backoff,
jitter,
logger,
)
31 changes: 31 additions & 0 deletions bin/retry/compat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import functools
import logging

try:
from decorator import decorator
except ImportError:

def decorator(caller):
"""Turns caller into a decorator.
Unlike decorator module, function signature is not preserved.
:param caller: caller(f, *args, **kwargs)
"""

def decor(f):
@functools.wraps(f)
def wrapper(*args, **kwargs):
return caller(f, *args, **kwargs)

return wrapper

return decor


try: # Python 2.7+
from logging import NullHandler
except ImportError:

class NullHandler(logging.Handler):
def emit(self, record):
pass
48 changes: 44 additions & 4 deletions bin/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,55 @@
# You should have received a copy of the GNU General Public License
# along with MGnify genome analysis pipeline. If not, see <https://www.gnu.org/licenses/>.

import os
import urllib.request as request
import urllib.parse
import urllib.error as error
import gzip
import time

import urllib.parse

import requests

# TODO: the methods in this file are repated in "containers/genomes-catalog-update/scripts/fetch_ena.py"
# merge both.
from retry import retry


def download_fasta(url, folder, accession, unzip, checksum):
if not url.lower().startswith(('ftp', 'http')):
print(url, 'is not an URL\n')
return False
else:
max_attempts = 5
attempt = 1
sleep_time = 15
outfile = '{}.fa.gz'.format(accession)
if unzip:
outfile = outfile[:-3]
outpath = os.path.join(folder, outfile)
if not os.path.exists(folder):
os.makedirs(folder)
while attempt <= max_attempts:
try:
response = request.urlopen(url)
content = response.read()
if unzip:
with open(outpath, 'w') as out:
out.write(gzip.decompress(content).decode('utf-8'))
else:
with open(outpath, 'wb') as out:
out.write(content)
break
except (error.HTTPError, error.URLError) as e:
print('Could not retrieve URL', url, ' Reason:', e.reason)
print('Retrying...')
attempt += 1
time.sleep(sleep_time)
if not os.path.exists(outpath) or os.path.getsize(outpath) == 0:
return None
else:
return outfile


def qs50(contamination, completeness):
contam_cutoff = 5.0
qs_cutoff = 50.0
Expand All @@ -36,7 +76,7 @@ def qs50(contamination, completeness):
return True


# @retry(tries=5, delay=10, backoff=1.5)
@retry(tries=5, delay=10, backoff=1.5)
def run_request(query, api_endpoint):
r = requests.get(api_endpoint, params=urllib.parse.urlencode(query))
r.raise_for_status()
Expand Down

0 comments on commit 9717316

Please sign in to comment.