Skip to content

Commit

Permalink
[Frontend][Misc] Goodput metric support (vllm-project#9338)
Browse files Browse the repository at this point in the history
  • Loading branch information
Imss27 authored Oct 20, 2024
1 parent 4fa3e33 commit 855e0e6
Showing 1 changed file with 91 additions and 2 deletions.
93 changes: 91 additions & 2 deletions benchmarks/benchmark_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,16 @@
except ImportError:
from argparse import ArgumentParser as FlexibleArgumentParser

MILLISECONDS_TO_SECONDS_CONVERSION = 1000


@dataclass
class BenchmarkMetrics:
completed: int
total_input: int
total_output: int
request_throughput: float
request_goodput: float
output_throughput: float
total_token_throughput: float
mean_ttft_ms: float
Expand Down Expand Up @@ -316,12 +319,15 @@ def calculate_metrics(
tokenizer: PreTrainedTokenizerBase,
selected_percentile_metrics: List[str],
selected_percentiles: List[float],
gootput_config_dict: Dict[str, float],
) -> Tuple[BenchmarkMetrics, List[int]]:
actual_output_lens: List[int] = []
total_input = 0
completed = 0
good_completed = 0
itls: List[float] = []
tpots: List[float] = []
all_tpots: List[float] = []
ttfts: List[float] = []
e2els: List[float] = []
for i in range(len(outputs)):
Expand All @@ -335,16 +341,42 @@ def calculate_metrics(
add_special_tokens=False).input_ids)
actual_output_lens.append(output_len)
total_input += input_requests[i][1]
tpot = 0
if output_len > 1:
tpots.append(
(outputs[i].latency - outputs[i].ttft) / (output_len - 1))
tpot = (outputs[i].latency - outputs[i].ttft) / (output_len -
1)
tpots.append(tpot)
# Note: if output_len <= 1, we regard tpot as 0 for goodput
all_tpots.append(tpot)
itls += outputs[i].itl
ttfts.append(outputs[i].ttft)
e2els.append(outputs[i].latency)
completed += 1
else:
actual_output_lens.append(0)

if gootput_config_dict:
valid_metrics = []
slo_values = []

if "ttft" in gootput_config_dict:
valid_metrics.append(ttfts)
slo_values.append(gootput_config_dict["ttft"] /
MILLISECONDS_TO_SECONDS_CONVERSION)
if "tpot" in gootput_config_dict:
valid_metrics.append(all_tpots)
slo_values.append(gootput_config_dict["tpot"] /
MILLISECONDS_TO_SECONDS_CONVERSION)
if "e2el" in gootput_config_dict:
valid_metrics.append(e2els)
slo_values.append(gootput_config_dict["e2el"] /
MILLISECONDS_TO_SECONDS_CONVERSION)

for req_metric in zip(*valid_metrics):
is_good_req = all([s >= r for s, r in zip(slo_values, req_metric)])
if is_good_req:
good_completed += 1

if completed == 0:
warnings.warn(
"All requests failed. This is likely due to a misconfiguration "
Expand All @@ -355,6 +387,7 @@ def calculate_metrics(
total_input=total_input,
total_output=sum(actual_output_lens),
request_throughput=completed / dur_s,
request_goodput=good_completed / dur_s,
output_throughput=sum(actual_output_lens) / dur_s,
total_token_throughput=(total_input + sum(actual_output_lens)) / dur_s,
mean_ttft_ms=np.mean(ttfts or 0) *
Expand Down Expand Up @@ -398,6 +431,7 @@ async def benchmark(
selected_percentile_metrics: List[str],
selected_percentiles: List[str],
ignore_eos: bool,
gootput_config_dict: Dict[str, float],
max_concurrency: Optional[int],
):
if backend in ASYNC_REQUEST_FUNCS:
Expand Down Expand Up @@ -512,6 +546,7 @@ async def limited_request_func(request_func_input, pbar):
tokenizer=tokenizer,
selected_percentile_metrics=selected_percentile_metrics,
selected_percentiles=selected_percentiles,
gootput_config_dict=gootput_config_dict,
)

print("{s:{c}^{n}}".format(s=' Serving Benchmark Result ', n=50, c='='))
Expand All @@ -523,6 +558,9 @@ async def limited_request_func(request_func_input, pbar):
metrics.total_output))
print("{:<40} {:<10.2f}".format("Request throughput (req/s):",
metrics.request_throughput))
if gootput_config_dict:
print("{:<40} {:<10.2f}".format("Request goodput (req/s):",
metrics.request_goodput))
print("{:<40} {:<10.2f}".format("Output token throughput (tok/s):",
metrics.output_throughput))
print("{:<40} {:<10.2f}".format("Total Token throughput (tok/s):",
Expand All @@ -534,6 +572,8 @@ async def limited_request_func(request_func_input, pbar):
"total_input_tokens": metrics.total_input,
"total_output_tokens": metrics.total_output,
"request_throughput": metrics.request_throughput,
"request_goodput:":
metrics.request_goodput if gootput_config_dict else None,
"output_throughput": metrics.output_throughput,
"total_token_throughput": metrics.total_token_throughput,
"input_lens": [output.prompt_len for output in outputs],
Expand Down Expand Up @@ -587,6 +627,41 @@ def process_one_metric(
return result


def check_goodput_args(args):
# Check and parse goodput arguments
gootput_config_dict = {}
VALID_NAMES = ["ttft", "tpot", "e2el"]
if args.goodput:
gootput_config_dict = parse_goodput(args.goodput)
for slo_name, slo_val in gootput_config_dict.items():
if slo_name not in VALID_NAMES:
raise ValueError(
f"Invalid metric name found, {slo_name}: {slo_val}. "
"The service level objective name should be one of "
f"{str(VALID_NAMES)}. ")
if slo_val < 0:
raise ValueError(
f"Invalid value found, {slo_name}: {slo_val}. "
"The service level objective value should be "
"non-negative.")
return gootput_config_dict


def parse_goodput(slo_pairs):
gootput_config_dict = {}
try:
for slo_pair in slo_pairs:
slo_name, slo_val = slo_pair.split(":")
gootput_config_dict[slo_name] = float(slo_val)
except ValueError as err:
raise argparse.ArgumentTypeError(
"Invalid format found for service level objectives. "
"Specify service level objectives for goodput as \"KEY:VALUE\" "
"pairs, where the key is a metric name, and the value is a "
"number in milliseconds.") from err
return gootput_config_dict


def main(args: argparse.Namespace):
print(args)
random.seed(args.seed)
Expand Down Expand Up @@ -681,6 +756,8 @@ def main(args: argparse.Namespace):
else:
raise ValueError(f"Unknown dataset: {args.dataset_name}")

gootput_config_dict = check_goodput_args(args)

benchmark_result = asyncio.run(
benchmark(
backend=backend,
Expand All @@ -699,6 +776,7 @@ def main(args: argparse.Namespace):
float(p) for p in args.metric_percentiles.split(",")
],
ignore_eos=args.ignore_eos,
gootput_config_dict=gootput_config_dict,
max_concurrency=args.max_concurrency,
))

Expand Down Expand Up @@ -915,6 +993,17 @@ def main(args: argparse.Namespace):
"Default value is \"99\". "
"Use \"--percentile-metrics\" to select metrics.",
)
parser.add_argument(
"--goodput",
nargs="+",
required=False,
help="Specify service level objectives for goodput as \"KEY:VALUE\" "
"pairs, where the key is a metric name, and the value is in "
"milliseconds. Multiple \"KEY:VALUE\" pairs can be provided, "
"separated by spaces. Allowed request level metric names are "
"\"ttft\", \"tpot\", \"e2el\". For more context on the definition of "
"goodput, refer to DistServe paper: https://arxiv.org/pdf/2401.09670 "
"and the blog: https://hao-ai-lab.github.io/blogs/distserve")

# group for dataset specific arguments
sonnet_group = parser.add_argument_group("sonnet dataset options")
Expand Down

0 comments on commit 855e0e6

Please sign in to comment.