From f62cc89fd3038c73fbfc44b10eacae69ef24b1a7 Mon Sep 17 00:00:00 2001
From: "clementine@huggingface.co" <clementine@huggingface.co>
Date: Thu, 12 Dec 2024 14:38:21 +0000
Subject: [PATCH 1/3] init

---
 docs/source/package_reference/models.mdx      |  2 +-
 src/lighteval/main_endpoint.py                | 20 +++++++++++--------
 .../models/endpoints/endpoint_model.py        | 12 +++++++++--
 src/lighteval/models/model_loader.py          |  4 ++--
 4 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/docs/source/package_reference/models.mdx b/docs/source/package_reference/models.mdx
index 096ce7be3..dcf5bc8dc 100644
--- a/docs/source/package_reference/models.mdx
+++ b/docs/source/package_reference/models.mdx
@@ -21,7 +21,7 @@
 ## Endpoints-based Models
 ### InferenceEndpointModel
 [[autodoc]] models.endpoints.endpoint_model.InferenceEndpointModelConfig
-[[autodoc]] models.endpoints.endpoint_model.InferenceModelConfig
+[[autodoc]] models.endpoints.endpoint_model.ServerlessEndpointModelConfig
 [[autodoc]] models.endpoints.endpoint_model.InferenceEndpointModel
 
 ### TGI ModelClient
diff --git a/src/lighteval/main_endpoint.py b/src/lighteval/main_endpoint.py
index 952aae074..3ca75ed7a 100644
--- a/src/lighteval/main_endpoint.py
+++ b/src/lighteval/main_endpoint.py
@@ -146,6 +146,12 @@ def inference_endpoint(
         str, Argument(help="Path to model config yaml file. (examples/model_configs/endpoint_model.yaml)")
     ],
     tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")],
+    free_endpoint: Annotated[
+        str,
+        Argument(
+            help="True if you want to use the serverless free endpoints, False (default) if you want to spin up your own inference endpoint."
+        ),
+    ] = False,
     # === Common parameters ===
     use_chat_template: Annotated[
         bool, Option(help="Use chat template for evaluation.", rich_help_panel=HELP_PANNEL_NAME_4)
@@ -200,9 +206,7 @@ def inference_endpoint(
     """
 
     from lighteval.logging.evaluation_tracker import EvaluationTracker
-    from lighteval.models.endpoints.endpoint_model import (
-        InferenceEndpointModelConfig,
-    )
+    from lighteval.models.endpoints.endpoint_model import InferenceEndpointModelConfig, ServerlessEndpointModelConfig
     from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters
 
     env_config = EnvConfig(token=TOKEN, cache_dir=cache_dir)
@@ -220,10 +224,10 @@ def inference_endpoint(
     parallelism_manager = ParallelismManager.NONE  # since we're using inference endpoints in remote
 
     # Find a way to add this back
-    # if config["base_params"].get("endpoint_name", None):
-    #    return InferenceModelConfig(model=config["base_params"]["endpoint_name"])
-
-    model_config = InferenceEndpointModelConfig.from_path(model_config_path)
+    if free_endpoint:
+        model_config = ServerlessEndpointModelConfig.from_path(model_config_path)
+    else:
+        model_config = InferenceEndpointModelConfig.from_path(model_config_path)
 
     pipeline_params = PipelineParameters(
         launcher_type=parallelism_manager,
@@ -317,7 +321,7 @@ def tgi(
     import yaml
 
     from lighteval.logging.evaluation_tracker import EvaluationTracker
-    from lighteval.models.model_config import TGIModelConfig
+    from lighteval.models.endpoints.tgi_model import TGIModelConfig
     from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters
 
     env_config = EnvConfig(token=TOKEN, cache_dir=cache_dir)
diff --git a/src/lighteval/models/endpoints/endpoint_model.py b/src/lighteval/models/endpoints/endpoint_model.py
index 0bd6cbbc3..636a5c30a 100644
--- a/src/lighteval/models/endpoints/endpoint_model.py
+++ b/src/lighteval/models/endpoints/endpoint_model.py
@@ -75,10 +75,18 @@
 
 
 @dataclass
-class InferenceModelConfig:
+class ServerlessEndpointModelConfig:
     model: str
     add_special_tokens: bool = True
 
+    @classmethod
+    def from_path(cls, path: str) -> "InferenceEndpointModelConfig":
+        import yaml
+
+        with open(path, "r") as f:
+            config = yaml.safe_load(f)["model"]
+        return cls(**config["base_params"])
+
 
 @dataclass
 class InferenceEndpointModelConfig:
@@ -142,7 +150,7 @@ class InferenceEndpointModel(LightevalModel):
     """
 
     def __init__(  # noqa: C901
-        self, config: Union[InferenceEndpointModelConfig, InferenceModelConfig], env_config: EnvConfig
+        self, config: Union[InferenceEndpointModelConfig, ServerlessEndpointModelConfig], env_config: EnvConfig
     ) -> None:
         self.reuse_existing = getattr(config, "reuse_existing", False)
         self._max_length = None
diff --git a/src/lighteval/models/model_loader.py b/src/lighteval/models/model_loader.py
index b0817be4a..66eb99886 100644
--- a/src/lighteval/models/model_loader.py
+++ b/src/lighteval/models/model_loader.py
@@ -27,7 +27,7 @@
 from lighteval.models.endpoints.endpoint_model import (
     InferenceEndpointModel,
     InferenceEndpointModelConfig,
-    InferenceModelConfig,
+    ServerlessEndpointModelConfig,
 )
 from lighteval.models.endpoints.openai_model import OpenAIClient, OpenAIModelConfig
 from lighteval.models.endpoints.tgi_model import ModelClient, TGIModelConfig
@@ -80,7 +80,7 @@ def load_model(  # noqa: C901
     if isinstance(config, TGIModelConfig):
         return load_model_with_tgi(config)
 
-    if isinstance(config, InferenceEndpointModelConfig) or isinstance(config, InferenceModelConfig):
+    if isinstance(config, InferenceEndpointModelConfig) or isinstance(config, ServerlessEndpointModelConfig):
         return load_model_with_inference_endpoints(config, env_config=env_config)
 
     if isinstance(config, BaseModelConfig):

From 858d3d1edb601440416d26df18b215b0fd89155d Mon Sep 17 00:00:00 2001
From: "clementine@huggingface.co" <clementine@huggingface.co>
Date: Thu, 12 Dec 2024 15:12:45 +0000
Subject: [PATCH 2/3] adding serverless endpoints back

---
 ..._model_lite.yaml => serverless_model.yaml} |  0
 src/lighteval/main_endpoint.py                | 15 ++++++++-------
 .../models/endpoints/endpoint_model.py        | 19 ++++++++++++-------
 3 files changed, 20 insertions(+), 14 deletions(-)
 rename examples/model_configs/{endpoint_model_lite.yaml => serverless_model.yaml} (100%)

diff --git a/examples/model_configs/endpoint_model_lite.yaml b/examples/model_configs/serverless_model.yaml
similarity index 100%
rename from examples/model_configs/endpoint_model_lite.yaml
rename to examples/model_configs/serverless_model.yaml
diff --git a/src/lighteval/main_endpoint.py b/src/lighteval/main_endpoint.py
index 3ca75ed7a..5ed71b7c8 100644
--- a/src/lighteval/main_endpoint.py
+++ b/src/lighteval/main_endpoint.py
@@ -33,10 +33,10 @@
 TOKEN = os.getenv("HF_TOKEN")
 CACHE_DIR: str = os.getenv("HF_HOME", "/scratch")
 
-HELP_PANNEL_NAME_1 = "Common Paramaters"
+HELP_PANNEL_NAME_1 = "Common Parameters"
 HELP_PANNEL_NAME_2 = "Logging Parameters"
-HELP_PANNEL_NAME_3 = "Debug Paramaters"
-HELP_PANNEL_NAME_4 = "Modeling Paramaters"
+HELP_PANNEL_NAME_3 = "Debug Parameters"
+HELP_PANNEL_NAME_4 = "Modeling Parameters"
 
 
 @app.command(rich_help_panel="Evaluation Backends")
@@ -93,7 +93,7 @@ def openai(
     Evaluate OPENAI models.
     """
     from lighteval.logging.evaluation_tracker import EvaluationTracker
-    from lighteval.models.model_config import OpenAIModelConfig
+    from lighteval.models.endpoints.openai_model import OpenAIModelConfig
     from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters
 
     env_config = EnvConfig(token=TOKEN, cache_dir=cache_dir)
@@ -147,9 +147,10 @@ def inference_endpoint(
     ],
     tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")],
     free_endpoint: Annotated[
-        str,
-        Argument(
-            help="True if you want to use the serverless free endpoints, False (default) if you want to spin up your own inference endpoint."
+        bool,
+        Option(
+            help="Use serverless free endpoints instead of spinning up your own inference endpoint.",
+            rich_help_panel=HELP_PANNEL_NAME_4,
         ),
     ] = False,
     # === Common parameters ===
diff --git a/src/lighteval/models/endpoints/endpoint_model.py b/src/lighteval/models/endpoints/endpoint_model.py
index 636a5c30a..0b30dfd07 100644
--- a/src/lighteval/models/endpoints/endpoint_model.py
+++ b/src/lighteval/models/endpoints/endpoint_model.py
@@ -76,11 +76,11 @@
 
 @dataclass
 class ServerlessEndpointModelConfig:
-    model: str
+    model_name: str
     add_special_tokens: bool = True
 
     @classmethod
-    def from_path(cls, path: str) -> "InferenceEndpointModelConfig":
+    def from_path(cls, path: str) -> "ServerlessEndpointModelConfig":
         import yaml
 
         with open(path, "r") as f:
@@ -282,10 +282,10 @@ def __init__(  # noqa: C901
         else:  # Free inference client
             self.endpoint = None
             self.endpoint_name = None
-            self.name = config.model
+            self.name = config.model_name
             self.revision = "default"
-            self.async_client = AsyncInferenceClient(model=config.model, token=env_config.token)
-            self.client = InferenceClient(model=config.model, token=env_config.token)
+            self.async_client = AsyncInferenceClient(model=config.model_name, token=env_config.token)
+            self.client = InferenceClient(model=config.model_name, token=env_config.token)
 
         self.use_async = True  # set to False for debug - async use is faster
 
@@ -295,7 +295,7 @@ def __init__(  # noqa: C901
         self.model_info = ModelInfo(
             model_name=self.name,
             model_sha=self.revision,
-            model_dtype=config.model_dtype or "default",
+            model_dtype=getattr(config, "model_dtype", "default"),
             model_size=-1,
         )
 
@@ -547,7 +547,12 @@ def loglikelihood(
                     cont_toks = torch.tensor(cur_request.tokenized_continuation)
                     len_choice = len(cont_toks)
 
-                    logits = [t.logprob for t in response.details.prefill[-len_choice:] if t.logprob is not None]
+                    if self.endpoint:  # inference endpoint
+                        logits = [
+                            t.logprob for t in response.details.prefill[-len_choice:] if t.logprob is not None
+                        ]  # to check
+                    else:  # serverless endpoint
+                        logits = [t.logprob for t in response.details.tokens[-len_choice:] if t.logprob is not None]
 
                     greedy_tokens = torch.tensor(logits).argmax(dim=-1)
                     max_equal = (greedy_tokens == cont_toks).all().squeeze(0)

From b67157c705ad8a23ab578a4c0139fb83a0c39e36 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9mentine?= <cle.fourrier@gmail.com>
Date: Tue, 17 Dec 2024 13:03:45 +0100
Subject: [PATCH 3/3] updated tests

---
 tests/models/test_endpoint_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/models/test_endpoint_model.py b/tests/models/test_endpoint_model.py
index 29fbb3c48..f4ba15d91 100644
--- a/tests/models/test_endpoint_model.py
+++ b/tests/models/test_endpoint_model.py
@@ -53,7 +53,7 @@ class TestInferenceEndpointModelConfig:
                 },
             ),
             (
-                "examples/model_configs/endpoint_model_lite.yaml",
+                "examples/model_configs/serverless_model.yaml",
                 {
                     "model_name": "meta-llama/Llama-3.1-8B-Instruct",
                     # Defaults: