Skip to content

Commit

Permalink
estimate download time of models and stop early if not likely to fini…
Browse files Browse the repository at this point in the history
…sh in time
  • Loading branch information
daroczig committed Feb 23, 2025
1 parent 5f44c28 commit d242ac4
Showing 1 changed file with 47 additions and 9 deletions.
56 changes: 47 additions & 9 deletions images/benchmark-llm/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from sys import stderr
from time import time
from typing import Optional
from urllib.request import urlretrieve
from urllib.request import Request, urlopen, urlretrieve

from psutil import cpu_count

Expand Down Expand Up @@ -49,6 +49,12 @@
default=1,
help="Scale the benchmark timeout by this factor. The default is 1, using a dynamic timeout for each benchmark secnario based on the model size (estimated time to load into memory/VRAM), the task (text generation is slower than prompt processing), and the number of tokens as requiring higher tokens/sec for larger number of tokens. Setting this to 2 means the timeout will be doubled etc.",
)
cli_parser.add_argument(
"--download-timeout",
type=int,
default=60 * 15,
help="Timeout for downloading models in seconds.",
)
cli_args = cli_parser.parse_args()

# #############################################################################
Expand Down Expand Up @@ -123,6 +129,7 @@ def download_models(
model_urls: list[str],
models_dir: str,
model_events: dict = {},
download_speeds: dict = {},
renice: Optional[int] = None,
):
"""Download gguf models from provided URLs."""
Expand All @@ -141,9 +148,10 @@ def download_models(
rename(temp_path, model_path)
model_size = path.getsize(model_path) / 1024**2
download_time = time() - timer_start
download_speeds[model_name] = model_size / download_time
logger.debug(
f"Downloaded model {model_name} ({model_size:.2f} MB) in "
f"{download_time:.2f} sec ({model_size / download_time:.2f} MB/s)"
f"{download_time:.2f} sec ({download_speeds[model_name]:.2f} MB/s)"
)
model_events[model_name].set()

Expand All @@ -152,21 +160,23 @@ def download_models_background(model_urls: list[str], models_dir: str):
"""Download gguf models from provided URLs in a background process.
Returns:
tuple[Process, dict[str, Event]]: The background process and a dictionary of model download completion events.
tuple[Process, dict[str, Event], dict[str, float]]: The background process, model download completion events, and download speeds.
"""
manager = Manager()
model_events = manager.dict()
download_speeds = manager.dict()

for url in model_urls:
model_name = url.split("/")[-1]
model_events[model_name] = manager.Event()

renice = 19
process = Process(
target=download_models, args=(model_urls, models_dir, model_events, renice)
target=download_models,
args=(model_urls, models_dir, model_events, download_speeds, renice),
)
process.start()
return process, model_events
return process, model_events, download_speeds


def cleanup_partially_downloaded_models(models_dir: str):
Expand Down Expand Up @@ -224,15 +234,43 @@ def max_ngl(model: str):
signal(SIGINT, signal_handler)
signal(SIGTERM, signal_handler)

models_download_process, models_downloaded = download_models_background(
model_urls=cli_args.model_urls, models_dir=cli_args.models_dir
models_download_process, models_downloaded, download_speeds = (
download_models_background(
model_urls=cli_args.model_urls, models_dir=cli_args.models_dir
)
)

for model_url in cli_args.model_urls:
model_name = model_url.split("/")[-1]
logger.info(f"Benchmarking model {model_name} ...")
# wait max 5 minutes: large models are later in the queue, so should be finished already
if not models_downloaded[model_name].wait(timeout=60 * 15):

# models are downloaded in the background, so hopefully the model is already downloaded,
# but if not, we estimate if we can finish downloading it without timing out
if len(download_speeds) and not models_downloaded[model_name].is_set():
try:
downloaded_file_size = (
path.getsize(path.join(cli_args.models_dir, model_name + ".part"))
/ 1024**2
)
except FileNotFoundError:
downloaded_file_size = 0
with urlopen(Request(model_url, method="HEAD")) as response:
target_file_size = int(response.headers.get("Content-Length"), 0) / 1024**2
avg_download_speed = sum(download_speeds.values()) / len(download_speeds)
remaining_file_size = target_file_size - downloaded_file_size
if remaining_file_size > avg_download_speed * cli_args.download_timeout:
logger.error(
f"Downloading {model_name} ({remaining_file_size:.2f} MB remaining out of {target_file_size:.2f} MB) "
f"would take too long with {avg_download_speed:.2f} MB/s, giving up."
)
models_download_process.terminate()
models_download_process.join()
sys_exit(1)
logger.info(
f"Waiting for {model_name} to be downloaded ({remaining_file_size:.2f} MB) ... ETA: {remaining_file_size / avg_download_speed:.2f} sec"
)

if not models_downloaded[model_name].wait(timeout=cli_args.download_timeout):
logger.error(f"{model_name} was not downloaded in time.")
models_download_process.terminate()
models_download_process.join()
Expand Down

0 comments on commit d242ac4

Please sign in to comment.