Skip to content

Commit

Permalink
[Misc]Add param max-model-len in benchmark_latency.py (vllm-project#5629
Browse files Browse the repository at this point in the history
)
  • Loading branch information
DearPlanet authored and jimpang committed Jul 24, 2024
1 parent cee8edb commit 00ad414
Showing 1 changed file with 7 additions and 0 deletions.
7 changes: 7 additions & 0 deletions benchmarks/benchmark_latency.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def main(args: argparse.Namespace):
tensor_parallel_size=args.tensor_parallel_size,
trust_remote_code=args.trust_remote_code,
dtype=args.dtype,
max_model_len=args.max_model_len,
enforce_eager=args.enforce_eager,
kv_cache_dtype=args.kv_cache_dtype,
quantization_param_path=args.quantization_param_path,
Expand Down Expand Up @@ -150,6 +151,12 @@ def run_to_completion(profile_dir: Optional[str] = None):
parser.add_argument('--trust-remote-code',
action='store_true',
help='trust remote code from huggingface')
parser.add_argument(
'--max-model-len',
type=int,
default=None,
help='Maximum length of a sequence (including prompt and output). '
'If None, will be derived from the model.')
parser.add_argument(
'--dtype',
type=str,
Expand Down

0 comments on commit 00ad414

Please sign in to comment.