diff --git a/python/kserve/kserve/protocol/rest/server.py b/python/kserve/kserve/protocol/rest/server.py index 98a19045d1f..5896ed5b083 100644 --- a/python/kserve/kserve/protocol/rest/server.py +++ b/python/kserve/kserve/protocol/rest/server.py @@ -37,7 +37,8 @@ from .v1_endpoints import V1Endpoints from .v2_datamodels import (InferenceResponse, ModelMetadataResponse, ModelReadyResponse, ServerLiveResponse, - ServerMetadataResponse, ServerReadyResponse) + ServerMetadataResponse, ServerReadyResponse, + ListModelsResponse) from .v2_endpoints import V2Endpoints @@ -100,6 +101,8 @@ def create_application(self) -> FastAPI: response_model=ServerLiveResponse, tags=["V2"]), FastAPIRoute(r"/v2/health/ready", v2_endpoints.ready, response_model=ServerReadyResponse, tags=["V2"]), + FastAPIRoute(r"/v2/models", v2_endpoints.models, + response_model=ListModelsResponse, tags=["V2"]), FastAPIRoute(r"/v2/models/{model_name}", v2_endpoints.model_metadata, response_model=ModelMetadataResponse, tags=["V2"]), FastAPIRoute(r"/v2/models/{model_name}/versions/{model_version}", diff --git a/python/kserve/kserve/protocol/rest/v2_datamodels.py b/python/kserve/kserve/protocol/rest/v2_datamodels.py index c5db165ae30..4def1c6fba6 100644 --- a/python/kserve/kserve/protocol/rest/v2_datamodels.py +++ b/python/kserve/kserve/protocol/rest/v2_datamodels.py @@ -78,6 +78,17 @@ class MetadataTensor(BaseModel): shape: List[int] +class ListModelsResponse(BaseModel): + """ListModelsResponse + + $models_list_response = + { + "models" : [ $string, ... ] + } + """ + models: List[str] + + class ModelMetadataResponse(BaseModel): """ModelMetadataResponse diff --git a/python/kserve/kserve/protocol/rest/v2_endpoints.py b/python/kserve/kserve/protocol/rest/v2_endpoints.py index 13303c496d2..c22612de863 100644 --- a/python/kserve/kserve/protocol/rest/v2_endpoints.py +++ b/python/kserve/kserve/protocol/rest/v2_endpoints.py @@ -20,7 +20,7 @@ from ..infer_type import InferInput, InferRequest from .v2_datamodels import ( InferenceRequest, ServerMetadataResponse, ServerLiveResponse, ServerReadyResponse, - ModelMetadataResponse, InferenceResponse, ModelReadyResponse + ModelMetadataResponse, InferenceResponse, ModelReadyResponse, ListModelsResponse ) from ..dataplane import DataPlane from ..model_repository_extension import ModelRepositoryExtension @@ -61,6 +61,15 @@ async def ready() -> ServerReadyResponse: """ return ServerReadyResponse(ready=True) + async def models(self) -> ListModelsResponse: + """Get a list of models in the model registry. + + Returns: + ListModelsResponse: List of models object. + """ + models = list(self.dataplane.model_registry.get_models().keys()) + return ListModelsResponse.parse_obj({"models": models}) + async def model_metadata(self, model_name: str, model_version: Optional[str] = None) -> ModelMetadataResponse: """Model metadata handler. It provides information about a model. diff --git a/python/kserve/test/test_server.py b/python/kserve/test/test_server.py index 123de0bf5c9..480ea6f9058 100644 --- a/python/kserve/test/test_server.py +++ b/python/kserve/test/test_server.py @@ -233,6 +233,11 @@ def test_list_models(self, http_server_client): assert resp.status_code == 200 assert resp.json() == {"models": ["TestModel"]} + def test_list_models_v2(self, http_server_client): + resp = http_server_client.get('/v2/models') + assert resp.status_code == 200 + assert resp.json() == {"models": ["TestModel"]} + def test_predict(self, http_server_client): resp = http_server_client.post('/v1/models/TestModel:predict', data=b'{"instances":[[1,2]]}')