From f62cc89fd3038c73fbfc44b10eacae69ef24b1a7 Mon Sep 17 00:00:00 2001 From: "clementine@huggingface.co" Date: Thu, 12 Dec 2024 14:38:21 +0000 Subject: [PATCH 1/3] init --- docs/source/package_reference/models.mdx | 2 +- src/lighteval/main_endpoint.py | 20 +++++++++++-------- .../models/endpoints/endpoint_model.py | 12 +++++++++-- src/lighteval/models/model_loader.py | 4 ++-- 4 files changed, 25 insertions(+), 13 deletions(-) diff --git a/docs/source/package_reference/models.mdx b/docs/source/package_reference/models.mdx index 096ce7be3..dcf5bc8dc 100644 --- a/docs/source/package_reference/models.mdx +++ b/docs/source/package_reference/models.mdx @@ -21,7 +21,7 @@ ## Endpoints-based Models ### InferenceEndpointModel [[autodoc]] models.endpoints.endpoint_model.InferenceEndpointModelConfig -[[autodoc]] models.endpoints.endpoint_model.InferenceModelConfig +[[autodoc]] models.endpoints.endpoint_model.ServerlessEndpointModelConfig [[autodoc]] models.endpoints.endpoint_model.InferenceEndpointModel ### TGI ModelClient diff --git a/src/lighteval/main_endpoint.py b/src/lighteval/main_endpoint.py index 952aae074..3ca75ed7a 100644 --- a/src/lighteval/main_endpoint.py +++ b/src/lighteval/main_endpoint.py @@ -146,6 +146,12 @@ def inference_endpoint( str, Argument(help="Path to model config yaml file. (examples/model_configs/endpoint_model.yaml)") ], tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")], + free_endpoint: Annotated[ + str, + Argument( + help="True if you want to use the serverless free endpoints, False (default) if you want to spin up your own inference endpoint." + ), + ] = False, # === Common parameters === use_chat_template: Annotated[ bool, Option(help="Use chat template for evaluation.", rich_help_panel=HELP_PANNEL_NAME_4) @@ -200,9 +206,7 @@ def inference_endpoint( """ from lighteval.logging.evaluation_tracker import EvaluationTracker - from lighteval.models.endpoints.endpoint_model import ( - InferenceEndpointModelConfig, - ) + from lighteval.models.endpoints.endpoint_model import InferenceEndpointModelConfig, ServerlessEndpointModelConfig from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters env_config = EnvConfig(token=TOKEN, cache_dir=cache_dir) @@ -220,10 +224,10 @@ def inference_endpoint( parallelism_manager = ParallelismManager.NONE # since we're using inference endpoints in remote # Find a way to add this back - # if config["base_params"].get("endpoint_name", None): - # return InferenceModelConfig(model=config["base_params"]["endpoint_name"]) - - model_config = InferenceEndpointModelConfig.from_path(model_config_path) + if free_endpoint: + model_config = ServerlessEndpointModelConfig.from_path(model_config_path) + else: + model_config = InferenceEndpointModelConfig.from_path(model_config_path) pipeline_params = PipelineParameters( launcher_type=parallelism_manager, @@ -317,7 +321,7 @@ def tgi( import yaml from lighteval.logging.evaluation_tracker import EvaluationTracker - from lighteval.models.model_config import TGIModelConfig + from lighteval.models.endpoints.tgi_model import TGIModelConfig from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters env_config = EnvConfig(token=TOKEN, cache_dir=cache_dir) diff --git a/src/lighteval/models/endpoints/endpoint_model.py b/src/lighteval/models/endpoints/endpoint_model.py index 0bd6cbbc3..636a5c30a 100644 --- a/src/lighteval/models/endpoints/endpoint_model.py +++ b/src/lighteval/models/endpoints/endpoint_model.py @@ -75,10 +75,18 @@ @dataclass -class InferenceModelConfig: +class ServerlessEndpointModelConfig: model: str add_special_tokens: bool = True + @classmethod + def from_path(cls, path: str) -> "InferenceEndpointModelConfig": + import yaml + + with open(path, "r") as f: + config = yaml.safe_load(f)["model"] + return cls(**config["base_params"]) + @dataclass class InferenceEndpointModelConfig: @@ -142,7 +150,7 @@ class InferenceEndpointModel(LightevalModel): """ def __init__( # noqa: C901 - self, config: Union[InferenceEndpointModelConfig, InferenceModelConfig], env_config: EnvConfig + self, config: Union[InferenceEndpointModelConfig, ServerlessEndpointModelConfig], env_config: EnvConfig ) -> None: self.reuse_existing = getattr(config, "reuse_existing", False) self._max_length = None diff --git a/src/lighteval/models/model_loader.py b/src/lighteval/models/model_loader.py index b0817be4a..66eb99886 100644 --- a/src/lighteval/models/model_loader.py +++ b/src/lighteval/models/model_loader.py @@ -27,7 +27,7 @@ from lighteval.models.endpoints.endpoint_model import ( InferenceEndpointModel, InferenceEndpointModelConfig, - InferenceModelConfig, + ServerlessEndpointModelConfig, ) from lighteval.models.endpoints.openai_model import OpenAIClient, OpenAIModelConfig from lighteval.models.endpoints.tgi_model import ModelClient, TGIModelConfig @@ -80,7 +80,7 @@ def load_model( # noqa: C901 if isinstance(config, TGIModelConfig): return load_model_with_tgi(config) - if isinstance(config, InferenceEndpointModelConfig) or isinstance(config, InferenceModelConfig): + if isinstance(config, InferenceEndpointModelConfig) or isinstance(config, ServerlessEndpointModelConfig): return load_model_with_inference_endpoints(config, env_config=env_config) if isinstance(config, BaseModelConfig): From 858d3d1edb601440416d26df18b215b0fd89155d Mon Sep 17 00:00:00 2001 From: "clementine@huggingface.co" Date: Thu, 12 Dec 2024 15:12:45 +0000 Subject: [PATCH 2/3] adding serverless endpoints back --- ..._model_lite.yaml => serverless_model.yaml} | 0 src/lighteval/main_endpoint.py | 15 ++++++++------- .../models/endpoints/endpoint_model.py | 19 ++++++++++++------- 3 files changed, 20 insertions(+), 14 deletions(-) rename examples/model_configs/{endpoint_model_lite.yaml => serverless_model.yaml} (100%) diff --git a/examples/model_configs/endpoint_model_lite.yaml b/examples/model_configs/serverless_model.yaml similarity index 100% rename from examples/model_configs/endpoint_model_lite.yaml rename to examples/model_configs/serverless_model.yaml diff --git a/src/lighteval/main_endpoint.py b/src/lighteval/main_endpoint.py index 3ca75ed7a..5ed71b7c8 100644 --- a/src/lighteval/main_endpoint.py +++ b/src/lighteval/main_endpoint.py @@ -33,10 +33,10 @@ TOKEN = os.getenv("HF_TOKEN") CACHE_DIR: str = os.getenv("HF_HOME", "/scratch") -HELP_PANNEL_NAME_1 = "Common Paramaters" +HELP_PANNEL_NAME_1 = "Common Parameters" HELP_PANNEL_NAME_2 = "Logging Parameters" -HELP_PANNEL_NAME_3 = "Debug Paramaters" -HELP_PANNEL_NAME_4 = "Modeling Paramaters" +HELP_PANNEL_NAME_3 = "Debug Parameters" +HELP_PANNEL_NAME_4 = "Modeling Parameters" @app.command(rich_help_panel="Evaluation Backends") @@ -93,7 +93,7 @@ def openai( Evaluate OPENAI models. """ from lighteval.logging.evaluation_tracker import EvaluationTracker - from lighteval.models.model_config import OpenAIModelConfig + from lighteval.models.endpoints.openai_model import OpenAIModelConfig from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters env_config = EnvConfig(token=TOKEN, cache_dir=cache_dir) @@ -147,9 +147,10 @@ def inference_endpoint( ], tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")], free_endpoint: Annotated[ - str, - Argument( - help="True if you want to use the serverless free endpoints, False (default) if you want to spin up your own inference endpoint." + bool, + Option( + help="Use serverless free endpoints instead of spinning up your own inference endpoint.", + rich_help_panel=HELP_PANNEL_NAME_4, ), ] = False, # === Common parameters === diff --git a/src/lighteval/models/endpoints/endpoint_model.py b/src/lighteval/models/endpoints/endpoint_model.py index 636a5c30a..0b30dfd07 100644 --- a/src/lighteval/models/endpoints/endpoint_model.py +++ b/src/lighteval/models/endpoints/endpoint_model.py @@ -76,11 +76,11 @@ @dataclass class ServerlessEndpointModelConfig: - model: str + model_name: str add_special_tokens: bool = True @classmethod - def from_path(cls, path: str) -> "InferenceEndpointModelConfig": + def from_path(cls, path: str) -> "ServerlessEndpointModelConfig": import yaml with open(path, "r") as f: @@ -282,10 +282,10 @@ def __init__( # noqa: C901 else: # Free inference client self.endpoint = None self.endpoint_name = None - self.name = config.model + self.name = config.model_name self.revision = "default" - self.async_client = AsyncInferenceClient(model=config.model, token=env_config.token) - self.client = InferenceClient(model=config.model, token=env_config.token) + self.async_client = AsyncInferenceClient(model=config.model_name, token=env_config.token) + self.client = InferenceClient(model=config.model_name, token=env_config.token) self.use_async = True # set to False for debug - async use is faster @@ -295,7 +295,7 @@ def __init__( # noqa: C901 self.model_info = ModelInfo( model_name=self.name, model_sha=self.revision, - model_dtype=config.model_dtype or "default", + model_dtype=getattr(config, "model_dtype", "default"), model_size=-1, ) @@ -547,7 +547,12 @@ def loglikelihood( cont_toks = torch.tensor(cur_request.tokenized_continuation) len_choice = len(cont_toks) - logits = [t.logprob for t in response.details.prefill[-len_choice:] if t.logprob is not None] + if self.endpoint: # inference endpoint + logits = [ + t.logprob for t in response.details.prefill[-len_choice:] if t.logprob is not None + ] # to check + else: # serverless endpoint + logits = [t.logprob for t in response.details.tokens[-len_choice:] if t.logprob is not None] greedy_tokens = torch.tensor(logits).argmax(dim=-1) max_equal = (greedy_tokens == cont_toks).all().squeeze(0) From b67157c705ad8a23ab578a4c0139fb83a0c39e36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine?= Date: Tue, 17 Dec 2024 13:03:45 +0100 Subject: [PATCH 3/3] updated tests --- tests/models/test_endpoint_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/test_endpoint_model.py b/tests/models/test_endpoint_model.py index 29fbb3c48..f4ba15d91 100644 --- a/tests/models/test_endpoint_model.py +++ b/tests/models/test_endpoint_model.py @@ -53,7 +53,7 @@ class TestInferenceEndpointModelConfig: }, ), ( - "examples/model_configs/endpoint_model_lite.yaml", + "examples/model_configs/serverless_model.yaml", { "model_name": "meta-llama/Llama-3.1-8B-Instruct", # Defaults: