From bb38d1b614aea370ca5108a29e4ced6954685727 Mon Sep 17 00:00:00 2001 From: bojiang Date: Thu, 23 Dec 2021 19:37:49 +0800 Subject: [PATCH 1/3] refactor: runner name --- bentoml/_internal/frameworks/catboost.py | 16 ++++++---- bentoml/_internal/frameworks/detectron.py | 16 ++++++---- bentoml/_internal/frameworks/easyocr.py | 14 ++++++--- bentoml/_internal/frameworks/gluon.py | 13 ++++++--- bentoml/_internal/frameworks/h2o.py | 12 ++++++-- bentoml/_internal/frameworks/keras.py | 11 +++++-- bentoml/_internal/frameworks/lightgbm.py | 12 ++++++-- bentoml/_internal/frameworks/mlflow.py | 12 ++++++-- bentoml/_internal/frameworks/onnx.py | 12 ++++++-- bentoml/_internal/frameworks/onnxmlir.py | 12 ++++++-- bentoml/_internal/frameworks/paddle.py | 12 ++++++-- bentoml/_internal/frameworks/pycaret.py | 14 ++++++--- bentoml/_internal/frameworks/pytorch.py | 12 ++++++-- .../_internal/frameworks/pytorch_lightning.py | 7 ++++- bentoml/_internal/frameworks/sklearn.py | 16 ++++++---- bentoml/_internal/frameworks/spacy.py | 14 ++++++--- bentoml/_internal/frameworks/statsmodels.py | 14 ++++++--- bentoml/_internal/frameworks/tensorflow.py | 12 ++++++-- bentoml/_internal/frameworks/transformers.py | 9 ++++-- bentoml/_internal/frameworks/xgboost.py | 12 ++++++-- bentoml/_internal/runner/runner.py | 29 +++++++------------ bentoml/_internal/service/service.py | 21 +++++++++----- bentoml/pyspark.py | 2 +- .../bento_server_general_features/service.py | 26 +++++++++++++---- .../service.py | 26 +++++++++++++---- 25 files changed, 249 insertions(+), 107 deletions(-) diff --git a/bentoml/_internal/frameworks/catboost.py b/bentoml/_internal/frameworks/catboost.py index 6b8b27aeb18..483740af6cf 100644 --- a/bentoml/_internal/frameworks/catboost.py +++ b/bentoml/_internal/frameworks/catboost.py @@ -43,7 +43,7 @@ def _get_model_info( - tag: t.Union[str, Tag], + tag: Tag, model_params: t.Optional[t.Dict[str, t.Union[str, int]]], model_store: "ModelStore", ) -> t.Tuple["Model", str, t.Dict[str, t.Any]]: @@ -235,9 +235,10 @@ class _CatBoostRunner(Runner): @inject def __init__( self, - tag: t.Union[str, Tag], + tag: Tag, predict_fn_name: str, model_params: t.Optional[t.Dict[str, t.Union[str, int]]], + name: str, resource_quota: t.Optional[t.Dict[str, t.Any]], batch_options: t.Optional[t.Dict[str, t.Any]], model_store: "ModelStore" = Provide[BentoMLContainer.model_store], @@ -245,7 +246,7 @@ def __init__( model_info, model_file, _model_params = _get_model_info( tag, model_params, model_store ) - super().__init__(model_info.tag.name, resource_quota, batch_options) + super().__init__(name, resource_quota, batch_options) self._model_info = model_info self._model_file = model_file self._predict_fn_name = predict_fn_name @@ -283,9 +284,10 @@ def load_runner( predict_fn_name: str = "predict", *, model_params: t.Union[None, t.Dict[str, t.Union[str, int]]] = None, + model_store: "ModelStore" = Provide[BentoMLContainer.model_store], + name: t.Optional[str] = None, resource_quota: t.Union[None, t.Dict[str, t.Any]] = None, batch_options: t.Union[None, t.Dict[str, t.Any]] = None, - model_store: "ModelStore" = Provide[BentoMLContainer.model_store], ) -> "_CatBoostRunner": """ Runner represents a unit of serving logic that can be scaled horizontally to @@ -320,11 +322,15 @@ def load_runner( runner = bentoml.catboost.load_runner("my_model:latest"") runner.run(cbt.Pool(input_data)) """ # noqa + tag = Tag.from_taglike(tag) + if name is None: + name = tag.name return _CatBoostRunner( tag=tag, predict_fn_name=predict_fn_name, model_params=model_params, + model_store=model_store, + name=name, resource_quota=resource_quota, batch_options=batch_options, - model_store=model_store, ) diff --git a/bentoml/_internal/frameworks/detectron.py b/bentoml/_internal/frameworks/detectron.py index 223841cb0d7..9977881a511 100644 --- a/bentoml/_internal/frameworks/detectron.py +++ b/bentoml/_internal/frameworks/detectron.py @@ -48,7 +48,7 @@ @inject def load( - tag: t.Union[str, Tag], + tag: Tag, device: str = "cpu", model_store: "ModelStore" = Provide[BentoMLContainer.model_store], ) -> "torch.nn.Module": @@ -201,19 +201,20 @@ class _DetectronRunner(Runner): # TODO add partial_kwargs @larme def __init__( self, - tag: t.Union[str, Tag], + tag: Tag, predict_fn_name: str, + name: str, resource_quota: t.Optional[t.Dict[str, t.Any]], batch_options: t.Optional[t.Dict[str, t.Any]], model_store: "ModelStore" = Provide[BentoMLContainer.model_store], ): - super().__init__(tag, resource_quota, batch_options) + super().__init__(name, resource_quota, batch_options) self._tag = tag self._predict_fn_name = predict_fn_name self._model_store = model_store @property - def required_models(self) -> t.List[str]: + def required_models(self) -> t.List[Tag]: return [self._tag] @property @@ -263,6 +264,7 @@ def load_runner( tag: t.Union[str, Tag], predict_fn_name: str = "__call__", *, + name: t.Optional[str] = None, resource_quota: t.Union[None, t.Dict[str, t.Any]] = None, batch_options: t.Union[None, t.Dict[str, t.Any]] = None, model_store: "ModelStore" = Provide[BentoMLContainer.model_store], @@ -290,10 +292,14 @@ def load_runner( Examples: TODO """ # noqa + tag = Tag.from_taglike(tag) + if name is None: + name = tag.name return _DetectronRunner( tag=tag, predict_fn_name=predict_fn_name, + model_store=model_store, + name=name, resource_quota=resource_quota, batch_options=batch_options, - model_store=model_store, ) diff --git a/bentoml/_internal/frameworks/easyocr.py b/bentoml/_internal/frameworks/easyocr.py index 6c5d4e1ba3f..b1791046fa7 100644 --- a/bentoml/_internal/frameworks/easyocr.py +++ b/bentoml/_internal/frameworks/easyocr.py @@ -182,15 +182,16 @@ class _EasyOCRRunner(Runner): @inject def __init__( self, - tag: t.Union[str, Tag], + tag: Tag, predict_fn_name: str, + name: str, predict_params: t.Optional[t.Dict[str, t.Any]], resource_quota: t.Optional[t.Dict[str, t.Any]], batch_options: t.Optional[t.Dict[str, t.Any]], model_store: "ModelStore" = Provide[BentoMLContainer.model_store], ): - super().__init__(str(tag), resource_quota, batch_options) - self._tag = Tag.from_taglike(tag) + super().__init__(name, resource_quota, batch_options) + self._tag = tag self._predict_fn_name = predict_fn_name self._predict_params = predict_params self._model_store = model_store @@ -227,6 +228,7 @@ def load_runner( tag: t.Union[str, Tag], predict_fn_name: str = "readtext_batched", *, + name: t.Optional[str] = None, predict_params: t.Union[None, t.Dict[str, t.Union[str, t.Any]]] = None, resource_quota: t.Union[None, t.Dict[str, t.Any]] = None, batch_options: t.Union[None, t.Dict[str, t.Any]] = None, @@ -263,11 +265,15 @@ def load_runner( input_data = pd.from_csv("/path/to/csv") runner = bentoml.xgboost.load_runner("my_model:20201012_DE43A2") runner.run(xgb.DMatrix(input_data)) - """ # noqa + """ + tag = Tag.from_taglike(tag) + if name is None: + name = tag.name return _EasyOCRRunner( tag=tag, predict_fn_name=predict_fn_name, predict_params=predict_params, + name=name, resource_quota=resource_quota, batch_options=batch_options, model_store=model_store, diff --git a/bentoml/_internal/frameworks/gluon.py b/bentoml/_internal/frameworks/gluon.py index e32c7976b6e..55016857d2e 100644 --- a/bentoml/_internal/frameworks/gluon.py +++ b/bentoml/_internal/frameworks/gluon.py @@ -38,7 +38,7 @@ @inject def load( - tag: t.Union[str, Tag], + tag: Tag, mxnet_ctx: t.Optional[mxnet.context.Context] = None, model_store: "ModelStore" = Provide[BentoMLContainer.model_store], ) -> gluon.Block: @@ -128,14 +128,15 @@ class _GluonRunner(Runner): @inject def __init__( self, - tag: t.Union[str, Tag], + tag: Tag, predict_fn_name: str, + name: str, resource_quota: t.Optional[t.Dict[str, t.Any]], batch_options: t.Optional[t.Dict[str, t.Any]], model_store: "ModelStore" = Provide[BentoMLContainer.model_store], ): - super().__init__(str(tag), resource_quota, batch_options) - self._tag = Tag.from_taglike(tag) + super().__init__(name, resource_quota, batch_options) + self._tag = tag self._predict_fn_name = predict_fn_name self._model_store = model_store self._ctx = None @@ -190,6 +191,7 @@ def load_runner( predict_fn_name: str = "__call__", *, resource_quota: t.Union[None, t.Dict[str, t.Any]] = None, + name: t.Optional[str] = None, batch_options: t.Union[None, t.Dict[str, t.Any]] = None, model_store: "ModelStore" = Provide[BentoMLContainer.model_store], ) -> _GluonRunner: @@ -216,6 +218,9 @@ def load_runner( Examples: TODO """ # noqa + tag = Tag.from_taglike(tag) + if name is None: + name = tag.name return _GluonRunner( tag=tag, predict_fn_name=predict_fn_name, diff --git a/bentoml/_internal/frameworks/h2o.py b/bentoml/_internal/frameworks/h2o.py index eed1338d34e..d4924b03568 100644 --- a/bentoml/_internal/frameworks/h2o.py +++ b/bentoml/_internal/frameworks/h2o.py @@ -39,7 +39,7 @@ @inject def load( - tag: t.Union[str, Tag], + tag: Tag, init_params: t.Optional[t.Dict[str, t.Any]] = None, model_store: "ModelStore" = Provide[BentoMLContainer.model_store], ) -> h2o.model.model_base.ModelBase: @@ -133,14 +133,15 @@ class _H2ORunner(Runner): @inject def __init__( self, - tag: t.Union[str, Tag], + tag: Tag, predict_fn_name: str, init_params: t.Optional[t.Dict[str, t.Union[str, t.Any]]], + name: str, resource_quota: t.Optional[t.Dict[str, t.Any]], batch_options: t.Optional[t.Dict[str, t.Any]], model_store: "ModelStore" = Provide[BentoMLContainer.model_store], ): - super().__init__(str(tag), resource_quota, batch_options) + super().__init__(name, resource_quota, batch_options) self._tag = Tag.from_taglike(tag) self._predict_fn_name = predict_fn_name @@ -189,6 +190,7 @@ def load_runner( predict_fn_name: str = "predict", *, init_params: t.Optional[t.Dict[str, t.Union[str, t.Any]]], + name: t.Optional[str] = None, resource_quota: t.Optional[t.Dict[str, t.Any]] = None, batch_options: t.Optional[t.Dict[str, t.Any]] = None, model_store: "ModelStore" = Provide[BentoMLContainer.model_store], @@ -220,10 +222,14 @@ def load_runner( TODO """ # noqa + tag = Tag.from_taglike(tag) + if name is None: + name = tag.name return _H2ORunner( tag=tag, predict_fn_name=predict_fn_name, init_params=init_params, + name=name, resource_quota=resource_quota, batch_options=batch_options, model_store=model_store, diff --git a/bentoml/_internal/frameworks/keras.py b/bentoml/_internal/frameworks/keras.py index ea87c08a006..b022bf12b32 100644 --- a/bentoml/_internal/frameworks/keras.py +++ b/bentoml/_internal/frameworks/keras.py @@ -79,7 +79,7 @@ def get_session() -> "BaseSession": @inject def load( - tag: t.Union[str, Tag], + tag: Tag, model_store: "ModelStore" = Provide[BentoMLContainer.model_store], ) -> "keras.Model": """ @@ -198,10 +198,11 @@ class _KerasRunner(_TensorflowRunner): @inject def __init__( self, - tag: t.Union[str, Tag], + tag: Tag, predict_fn_name: str, device_id: str, predict_kwargs: t.Optional[t.Dict[str, t.Any]], + name: str, resource_quota: t.Optional[t.Dict[str, t.Any]], batch_options: t.Optional[t.Dict[str, t.Any]], model_store: "ModelStore" = Provide[BentoMLContainer.model_store], @@ -210,6 +211,7 @@ def __init__( tag=tag, predict_fn_name=predict_fn_name, device_id=device_id, + name=name, partial_kwargs=predict_kwargs, resource_quota=resource_quota, batch_options=batch_options, @@ -254,6 +256,7 @@ def load_runner( predict_fn_name: str = "predict", device_id: str = "CPU:0", predict_kwargs: t.Optional[t.Dict[str, t.Any]] = None, + name: t.Optional[str] = None, resource_quota: t.Union[None, t.Dict[str, t.Any]] = None, batch_options: t.Union[None, t.Dict[str, t.Any]] = None, model_store: "ModelStore" = Provide[BentoMLContainer.model_store], @@ -284,10 +287,14 @@ def load_runner( Examples:: """ # noqa: LN001 + tag = Tag.from_taglike(tag) + if name is None: + name = tag.name return _KerasRunner( tag=tag, predict_fn_name=predict_fn_name, device_id=device_id, + name=name, predict_kwargs=predict_kwargs, resource_quota=resource_quota, batch_options=batch_options, diff --git a/bentoml/_internal/frameworks/lightgbm.py b/bentoml/_internal/frameworks/lightgbm.py index 252ec881b11..77c21f8106d 100644 --- a/bentoml/_internal/frameworks/lightgbm.py +++ b/bentoml/_internal/frameworks/lightgbm.py @@ -73,7 +73,7 @@ def _get_model_info( @inject def load( - tag: t.Union[str, Tag], + tag: Tag, booster_params: t.Optional[t.Dict[str, t.Union[str, int]]] = None, model_store: "ModelStore" = Provide[BentoMLContainer.model_store], ) -> t.Union["lgb.basic.Booster", _LightGBMModelType]: @@ -208,14 +208,15 @@ class _LightGBMRunner(Runner): @inject def __init__( self, - tag: t.Union[str, Tag], + tag: Tag, infer_api_callback: str, booster_params: t.Optional[t.Dict[str, t.Union[str, int]]], + name: str, resource_quota: t.Optional[t.Dict[str, t.Any]], batch_options: t.Optional[t.Dict[str, t.Any]], model_store: "ModelStore" = Provide[BentoMLContainer.model_store], ): - super().__init__(str(tag), resource_quota, batch_options) + super().__init__(name, resource_quota, batch_options) model_info, model_file, booster_params = _get_model_info( tag, booster_params, model_store ) @@ -269,6 +270,7 @@ def load_runner( infer_api_callback: str = "predict", *, booster_params: t.Optional[t.Dict[str, t.Union[str, int]]] = None, + name: t.Optional[str] = None, resource_quota: t.Union[None, t.Dict[str, t.Any]] = None, batch_options: t.Union[None, t.Dict[str, t.Any]] = None, model_store: "ModelStore" = Provide[BentoMLContainer.model_store], @@ -303,9 +305,13 @@ def load_runner( runner = bentoml.lightgbm.load_runner("my_lightgbm_model:latest") runner.run_batch(X_test, num_iteration=gbm.best_iteration) """ # noqa + tag = Tag.from_taglike(tag) + if name is None: + name = tag.name return _LightGBMRunner( tag=tag, infer_api_callback=infer_api_callback, + name=name, booster_params=booster_params, resource_quota=resource_quota, batch_options=batch_options, diff --git a/bentoml/_internal/frameworks/mlflow.py b/bentoml/_internal/frameworks/mlflow.py index fa1d5e2c982..307939d070c 100644 --- a/bentoml/_internal/frameworks/mlflow.py +++ b/bentoml/_internal/frameworks/mlflow.py @@ -53,7 +53,7 @@ def _validate_file_exists(fname: str, parent: str) -> t.Tuple[bool, str]: @inject def load( - tag: t.Union[str, Tag], + tag: Tag, model_store: "ModelStore" = Provide[BentoMLContainer.model_store], ) -> "PyFuncModel": """ @@ -195,12 +195,13 @@ class _PyFuncRunner(Runner): @inject def __init__( self, - tag: t.Union[str, Tag], + tag: Tag, + name: str, resource_quota: t.Optional[t.Dict[str, t.Any]], batch_options: t.Optional[t.Dict[str, t.Any]], model_store: "ModelStore" = Provide[BentoMLContainer.model_store], ): - super().__init__(str(tag), resource_quota, batch_options) + super().__init__(name, resource_quota, batch_options) self._model_store = model_store self._model_tag = tag @@ -228,6 +229,7 @@ def _run_batch(self, input_data: t.Any) -> t.Any: # type: ignore[override] @inject def load_runner( tag: t.Union[str, Tag], + name: t.Optional[str] = None, resource_quota: t.Optional[t.Dict[str, t.Any]] = None, batch_options: t.Optional[t.Dict[str, t.Any]] = None, model_store: "ModelStore" = Provide[BentoMLContainer.model_store], @@ -254,8 +256,12 @@ def load_runner( Examples:: """ # noqa + tag = Tag.from_taglike(tag) + if name is None: + name = tag.name return _PyFuncRunner( tag, + name=name, resource_quota=resource_quota, batch_options=batch_options, model_store=model_store, diff --git a/bentoml/_internal/frameworks/onnx.py b/bentoml/_internal/frameworks/onnx.py index ca7874e948e..4a0eb32f300 100644 --- a/bentoml/_internal/frameworks/onnx.py +++ b/bentoml/_internal/frameworks/onnx.py @@ -101,7 +101,7 @@ def _get_model_info( @inject def load( - tag: t.Union[str, Tag], + tag: Tag, backend: t.Optional[str] = "onnxruntime", providers: t.Optional[t.Union["_ProviderType", "_GPUProviderType"]] = None, session_options: t.Optional["ort.SessionOptions"] = None, @@ -202,12 +202,13 @@ class _ONNXRunner(Runner): @inject def __init__( self, - tag: t.Union[str, Tag], + tag: Tag, backend: str, gpu_device_id: int, disable_copy_in_default_stream: bool, providers: t.Optional["_ProviderType"], session_options: t.Optional["ort.SessionOptions"], + name: str, resource_quota: t.Optional[t.Dict[str, t.Any]], batch_options: t.Optional[t.Dict[str, t.Any]], model_store: "ModelStore", @@ -217,7 +218,7 @@ def __init__( if "gpus" not in resource_quota: resource_quota["gpus"] = gpu_device_id - super().__init__(str(tag), resource_quota, batch_options) + super().__init__(name, resource_quota, batch_options) self._model_info, self._model_file = _get_model_info(tag, model_store) self._model_store = model_store self._backend = backend @@ -371,6 +372,7 @@ def load_runner( disable_copy_in_default_stream: bool = False, providers: t.Optional["_ProviderType"] = None, session_options: t.Optional["ort.SessionOptions"] = None, + name: t.Optional[str] = None, resource_quota: t.Optional[t.Dict[str, t.Any]] = None, batch_options: t.Optional[t.Dict[str, t.Any]] = None, model_store: "ModelStore" = Provide[BentoMLContainer.model_store], @@ -408,6 +410,9 @@ def load_runner( Examples:: """ # noqa + tag = Tag.from_taglike(tag) + if name is None: + name = tag.name return _ONNXRunner( tag=tag, backend=backend, @@ -415,6 +420,7 @@ def load_runner( disable_copy_in_default_stream=disable_copy_in_default_stream, providers=providers, session_options=session_options, + name=name, resource_quota=resource_quota, batch_options=batch_options, model_store=model_store, diff --git a/bentoml/_internal/frameworks/onnxmlir.py b/bentoml/_internal/frameworks/onnxmlir.py index cf9ccbdfdac..769941710d0 100644 --- a/bentoml/_internal/frameworks/onnxmlir.py +++ b/bentoml/_internal/frameworks/onnxmlir.py @@ -38,7 +38,7 @@ @inject def load( - tag: t.Union[str, Tag], + tag: Tag, model_store: "ModelStore" = Provide[BentoMLContainer.model_store], ) -> "ExecutionSession": """ @@ -121,13 +121,14 @@ class _ONNXMLirRunner(Runner): @inject def __init__( self, - tag: t.Union[str, Tag], + tag: Tag, + name: str, resource_quota: t.Optional[t.Dict[str, t.Any]], batch_options: t.Optional[t.Dict[str, t.Any]], model_store: "ModelStore" = Provide[BentoMLContainer.model_store], ): in_store_tag = model_store.get(tag).tag - super().__init__(str(in_store_tag), resource_quota, batch_options) + super().__init__(name, resource_quota, batch_options) self._model_store = model_store self._tag = in_store_tag @@ -157,6 +158,7 @@ def _run_batch(self, input_data: np.ndarray) -> np.ndarray: # type: ignore[over def load_runner( tag: t.Union[str, Tag], *, + name: t.Optional[str] = None, resource_quota: t.Optional[t.Dict[str, t.Any]] = None, batch_options: t.Optional[t.Dict[str, t.Any]] = None, model_store: "ModelStore" = Provide[BentoMLContainer.model_store], @@ -181,8 +183,12 @@ def load_runner( Examples:: """ # noqa + tag = Tag.from_taglike(tag) + if name is None: + name = tag.name return _ONNXMLirRunner( tag=tag, + name=name, resource_quota=resource_quota, batch_options=batch_options, model_store=model_store, diff --git a/bentoml/_internal/frameworks/paddle.py b/bentoml/_internal/frameworks/paddle.py index a1bb5fa2d4d..6638e5f7492 100644 --- a/bentoml/_internal/frameworks/paddle.py +++ b/bentoml/_internal/frameworks/paddle.py @@ -116,7 +116,7 @@ def _load_paddle_bentoml_default_config(model: "Model") -> "paddle.inference.Con @inject def load( - tag: t.Union[str, Tag], + tag: Tag, config: t.Optional["paddle.inference.Config"] = None, model_store: "ModelStore" = Provide[BentoMLContainer.model_store], **kwargs: str, @@ -404,13 +404,14 @@ class _PaddlePaddleRunner(Runner): @inject def __init__( self, - tag: t.Union[str, Tag], + tag: Tag, infer_api_callback: str, *, device: str, enable_gpu: bool, gpu_mem_pool_mb: int, config: t.Optional["paddle.inference.Config"], + name: str, resource_quota: t.Optional[t.Dict[str, t.Any]], batch_options: t.Optional[t.Dict[str, t.Any]], model_store: "ModelStore" = Provide[BentoMLContainer.model_store], @@ -424,7 +425,7 @@ def __init__( ) in_store_tag = model_store.get(tag).tag - super().__init__(str(in_store_tag), resource_quota, batch_options) + super().__init__(name, resource_quota, batch_options) self._infer_api_callback = infer_api_callback self._model_store = model_store self._enable_gpu = enable_gpu @@ -545,6 +546,7 @@ def load_runner( device: str = "cpu", enable_gpu: bool = False, gpu_mem_pool_mb: int = 0, + name: t.Optional[str] = None, config: t.Optional["paddle.inference.Config"] = None, resource_quota: t.Optional[t.Dict[str, t.Any]] = None, batch_options: t.Optional[t.Dict[str, t.Any]] = None, @@ -585,6 +587,9 @@ def load_runner( Examples:: """ + tag = Tag.from_taglike(tag) + if name is None: + name = tag.name return _PaddlePaddleRunner( tag=tag, infer_api_callback=infer_api_callback, @@ -592,6 +597,7 @@ def load_runner( enable_gpu=enable_gpu, gpu_mem_pool_mb=gpu_mem_pool_mb, config=config, + name=name, resource_quota=resource_quota, batch_options=batch_options, model_store=model_store, diff --git a/bentoml/_internal/frameworks/pycaret.py b/bentoml/_internal/frameworks/pycaret.py index 73124f26799..d1fb6e3d899 100644 --- a/bentoml/_internal/frameworks/pycaret.py +++ b/bentoml/_internal/frameworks/pycaret.py @@ -51,7 +51,7 @@ def _get_model_info( - tag: t.Union[str, Tag], model_store: "ModelStore" + tag: Tag, model_store: "ModelStore" ) -> t.Tuple["Model", PathType, PathType]: model = model_store.get(tag) if model.info.module not in (MODULE_NAME, __name__): @@ -66,7 +66,7 @@ def _get_model_info( @inject def load( - tag: t.Union[str, Tag], + tag: Tag, model_store: "ModelStore" = Provide[BentoMLContainer.model_store], ) -> t.Any: """ @@ -158,12 +158,13 @@ class _PycaretRunner(Runner): @inject def __init__( self, - tag: t.Union[str, Tag], + tag: Tag, + name: str, resource_quota: t.Optional[t.Dict[str, t.Any]], batch_options: t.Optional[t.Dict[str, t.Any]], model_store: "ModelStore" = Provide[BentoMLContainer.model_store], ): - super().__init__(str(tag), resource_quota, batch_options) + super().__init__(name, resource_quota, batch_options) model_info, model_file, pycaret_config = _get_model_info(tag, model_store) self._model_info = model_info @@ -201,6 +202,7 @@ def _run_batch(self, input_data: "pd.DataFrame") -> "pd.DataFrame": # type: ign def load_runner( tag: t.Union[str, Tag], *, + name: t.Optional[str] = None, resource_quota: t.Optional[t.Dict[str, t.Any]] = None, batch_options: t.Optional[t.Dict[str, t.Any]] = None, model_store: "ModelStore" = Provide[BentoMLContainer.model_store], @@ -244,8 +246,12 @@ def load_runner( prediction = runner._run_batch(input_data=data_unseen) print(prediction) """ # noqa + tag = Tag.from_taglike(tag) + if name is None: + name = tag.name return _PycaretRunner( tag=tag, + name=name, resource_quota=resource_quota, batch_options=batch_options, model_store=model_store, diff --git a/bentoml/_internal/frameworks/pytorch.py b/bentoml/_internal/frameworks/pytorch.py index 9c7973263de..53b6dbd999c 100644 --- a/bentoml/_internal/frameworks/pytorch.py +++ b/bentoml/_internal/frameworks/pytorch.py @@ -54,7 +54,7 @@ def _is_gpu_available() -> bool: # pragma: no cover @inject def load( - tag: t.Union[str, Tag], + tag: Tag, device_id: t.Optional[str] = "cpu", model_store: "ModelStore" = Provide[BentoMLContainer.model_store], ) -> _ModelType: @@ -178,9 +178,10 @@ class _PyTorchRunner(Runner): @inject def __init__( self, - tag: t.Union[str, Tag], + tag: Tag, predict_fn_name: str, device_id: str, + name: str, partial_kwargs: t.Optional[t.Dict[str, t.Any]], resource_quota: t.Optional[t.Dict[str, t.Any]], batch_options: t.Optional[t.Dict[str, t.Any]], @@ -188,7 +189,7 @@ def __init__( ): in_store_tag = model_store.get(tag).tag - super().__init__(str(in_store_tag), resource_quota, batch_options) + super().__init__(name, resource_quota, batch_options) self._predict_fn_name = predict_fn_name self._model_store = model_store if "cuda" in device_id: @@ -288,6 +289,7 @@ def load_runner( predict_fn_name: str = "__call__", device_id: str = "cpu:0", partial_kwargs: t.Optional[t.Dict[str, t.Any]] = None, + name: t.Optional[str] = None, resource_quota: t.Optional[t.Dict[str, t.Any]] = None, batch_options: t.Optional[t.Dict[str, t.Any]] = None, model_store: "ModelStore" = Provide[BentoMLContainer.model_store], @@ -321,11 +323,15 @@ def load_runner( runner = bentoml.pytorch.load_runner("ngrams:20201012_DE43A2") runner.run(pd.DataFrame("/path/to/csv")) """ # noqa + tag = Tag.from_taglike(tag) + if name is None: + name = tag.name return _PyTorchRunner( tag=tag, predict_fn_name=predict_fn_name, device_id=device_id, partial_kwargs=partial_kwargs, + name=name, resource_quota=resource_quota, batch_options=batch_options, model_store=model_store, diff --git a/bentoml/_internal/frameworks/pytorch_lightning.py b/bentoml/_internal/frameworks/pytorch_lightning.py index 2a4445d5c8f..4a701a3e9b3 100644 --- a/bentoml/_internal/frameworks/pytorch_lightning.py +++ b/bentoml/_internal/frameworks/pytorch_lightning.py @@ -40,7 +40,7 @@ @inject def load( - tag: t.Union[str, Tag], + tag: Tag, device_id: t.Optional[str] = "cpu", model_store: "ModelStore" = Provide[BentoMLContainer.model_store], ) -> "pl.LightningModule": @@ -171,6 +171,7 @@ def load_runner( predict_fn_name: str = "__call__", device_id: str = "cpu:0", partial_kwargs: t.Optional[t.Dict[str, t.Any]] = None, + name: t.Optional[str] = None, resource_quota: t.Union[None, t.Dict[str, t.Any]] = None, batch_options: t.Union[None, t.Dict[str, t.Any]] = None, model_store: "ModelStore" = Provide[BentoMLContainer.model_store], @@ -204,9 +205,13 @@ def load_runner( runner = bentoml.pytorch_lightning.load_runner("lit_classifier:20201012_DE43A2") runner.run(pd.DataFrame("/path/to/csv")) """ # noqa + tag = Tag.from_taglike(tag) + if name is None: + name = tag.name return _PyTorchLightningRunner( tag=tag, predict_fn_name=predict_fn_name, + name=name, device_id=device_id, partial_kwargs=partial_kwargs, resource_quota=resource_quota, diff --git a/bentoml/_internal/frameworks/sklearn.py b/bentoml/_internal/frameworks/sklearn.py index 67d704c8180..9747fe21052 100644 --- a/bentoml/_internal/frameworks/sklearn.py +++ b/bentoml/_internal/frameworks/sklearn.py @@ -48,9 +48,7 @@ pd = LazyLoader("pd", globals(), "pandas") -def _get_model_info( - tag: t.Union[str, Tag], model_store: "ModelStore" -) -> t.Tuple["Model", PathType]: +def _get_model_info(tag: Tag, model_store: "ModelStore") -> t.Tuple["Model", PathType]: model = model_store.get(tag) if model.info.module not in (MODULE_NAME, __name__): raise BentoMLException( @@ -63,7 +61,7 @@ def _get_model_info( @inject def load( - tag: t.Union[str, Tag], + tag: Tag, model_store: "ModelStore" = Provide[BentoMLContainer.model_store], ) -> t.Union["BaseEstimator", "Pipeline"]: """ @@ -137,13 +135,14 @@ class _SklearnRunner(Runner): @inject def __init__( self, - tag: t.Union[str, Tag], + tag: Tag, function_name: str, + name: str, resource_quota: t.Optional[t.Dict[str, t.Any]], batch_options: t.Optional[t.Dict[str, t.Any]], model_store: "ModelStore" = Provide[BentoMLContainer.model_store], ): - super().__init__(f"{tag}-{function_name}", resource_quota, batch_options) + super().__init__(name, resource_quota, batch_options) model_info, model_file = _get_model_info(tag, model_store) self._model_store = model_store self._model_info = model_info @@ -182,6 +181,7 @@ def load_runner( tag: t.Union[str, Tag], function_name: str = "predict", *, + name: t.Optional[str] = None, resource_quota: t.Union[None, t.Dict[str, t.Any]] = None, batch_options: t.Union[None, t.Dict[str, t.Any]] = None, model_store: "ModelStore" = Provide[BentoMLContainer.model_store], @@ -218,9 +218,13 @@ def load_runner( runner = bentoml.sklearn.load_runner("my_model:20201012_DE43A2") runner.run(input_data) """ # noqa + tag = Tag.from_taglike(tag) + if name is None: + name = tag.name return _SklearnRunner( tag=tag, function_name=function_name, + name=name, resource_quota=resource_quota, batch_options=batch_options, model_store=model_store, diff --git a/bentoml/_internal/frameworks/spacy.py b/bentoml/_internal/frameworks/spacy.py index 597441af0ae..85c0235aac7 100644 --- a/bentoml/_internal/frameworks/spacy.py +++ b/bentoml/_internal/frameworks/spacy.py @@ -89,7 +89,7 @@ @inject def load_project( - tag: t.Union[str, Tag], + tag: Tag, model_store: "ModelStore" = Provide[BentoMLContainer.model_store], ) -> str: model = model_store.get(tag) @@ -112,7 +112,7 @@ def load_project( @inject def load( - tag: t.Union[str, Tag], + tag: Tag, model_store: "ModelStore" = Provide[BentoMLContainer.model_store], vocab: t.Union["Vocab", bool] = True, # type: ignore[reportUnknownParameterType] disable: t.Iterable[str] = util.SimpleFrozenList(), # noqa @@ -322,8 +322,9 @@ class _SpacyRunner(Runner): @inject def __init__( self, - tag: t.Union[str, Tag], + tag: Tag, gpu_device_id: t.Optional[int], + name: str, resource_quota: t.Optional[t.Dict[str, t.Any]], batch_options: t.Optional[t.Dict[str, t.Any]], vocab: t.Union["Vocab", bool], # type: ignore[reportUnknownParameterType] @@ -337,7 +338,7 @@ def __init__( model_store: "ModelStore" = Provide[BentoMLContainer.model_store], ): in_store_tag = model_store.get(tag).tag - super().__init__(str(in_store_tag), resource_quota, batch_options) + super().__init__(name, resource_quota, batch_options) self._tag = in_store_tag self._vocab: t.Union["Vocab", bool] = vocab @@ -466,6 +467,7 @@ def load_runner( *, gpu_device_id: t.Optional[int] = None, backend_options: t.Optional[Literal["pytorch", "tensorflow"]] = None, + name: t.Optional[str] = None, resource_quota: t.Optional[t.Dict[str, t.Any]] = None, batch_options: t.Optional[t.Dict[str, t.Any]] = None, vocab: t.Union["Vocab", bool] = True, # type: ignore[reportUnknownParameterType] @@ -477,10 +479,14 @@ def load_runner( component_cfg: t.Optional[t.Dict[str, t.Dict[str, t.Any]]] = None, model_store: "ModelStore" = Provide[BentoMLContainer.model_store], ) -> "_SpacyRunner": + tag = Tag.from_taglike(tag) + if name is None: + name = tag.name return _SpacyRunner( tag=tag, gpu_device_id=gpu_device_id, backend_options=backend_options, + name=name, resource_quota=resource_quota, batch_options=batch_options, vocab=vocab, diff --git a/bentoml/_internal/frameworks/statsmodels.py b/bentoml/_internal/frameworks/statsmodels.py index 221c0162e00..2beb6de3a5c 100644 --- a/bentoml/_internal/frameworks/statsmodels.py +++ b/bentoml/_internal/frameworks/statsmodels.py @@ -51,7 +51,7 @@ def _get_model_info( - tag: t.Union[str, Tag], + tag: Tag, model_store: "ModelStore", ) -> t.Tuple["Model", PathType]: model = model_store.get(tag) @@ -66,7 +66,7 @@ def _get_model_info( @inject def load( - tag: t.Union[str, Tag], + tag: Tag, model_store: "ModelStore" = Provide[BentoMLContainer.model_store], ) -> _MT: """ @@ -136,13 +136,14 @@ class _StatsModelsRunner(Runner): @inject def __init__( self, - tag: t.Union[str, Tag], + tag: Tag, predict_fn_name: str, + name: str, resource_quota: t.Optional[t.Dict[str, t.Any]], batch_options: t.Optional[t.Dict[str, t.Any]], model_store: "ModelStore" = Provide[BentoMLContainer.model_store], ): - super().__init__(str(tag), resource_quota, batch_options) + super().__init__(name, resource_quota, batch_options) model_info, model_file = _get_model_info(tag, model_store) self._predict_fn_name = predict_fn_name self._model_info = model_info @@ -184,6 +185,7 @@ def load_runner( tag: t.Union[str, Tag], *, predict_fn_name: str = "predict", + name: t.Optional[str] = None, resource_quota: t.Union[None, t.Dict[str, t.Any]] = None, batch_options: t.Union[None, t.Dict[str, t.Any]] = None, model_store: "ModelStore" = Provide[BentoMLContainer.model_store], @@ -210,9 +212,13 @@ def load_runner( Examples:: """ # noqa + tag = Tag.from_taglike(tag) + if name is None: + name = tag.name return _StatsModelsRunner( tag=tag, predict_fn_name=predict_fn_name, + name=name, resource_quota=resource_quota, batch_options=batch_options, model_store=model_store, diff --git a/bentoml/_internal/frameworks/tensorflow.py b/bentoml/_internal/frameworks/tensorflow.py index eadd004b3e2..f2f68c1e640 100644 --- a/bentoml/_internal/frameworks/tensorflow.py +++ b/bentoml/_internal/frameworks/tensorflow.py @@ -190,7 +190,7 @@ def _load_tf_saved_model(path: str) -> t.Union["tracking.AutoTrackable", t.Any]: @inject def load( - tag: t.Union[str, Tag], + tag: Tag, tfhub_tags: t.Optional[t.List[str]] = None, tfhub_options: t.Optional[t.Any] = None, load_as_wrapper: t.Optional[bool] = None, @@ -405,17 +405,18 @@ class _TensorflowRunner(Runner): @inject def __init__( self, - tag: t.Union[str, Tag], + tag: Tag, predict_fn_name: str, device_id: str, partial_kwargs: t.Optional[t.Dict[str, t.Any]], + name: str, resource_quota: t.Optional[t.Dict[str, t.Any]], batch_options: t.Optional[t.Dict[str, t.Any]], model_store: "ModelStore" = Provide[BentoMLContainer.model_store], ): in_store_tag = model_store.get(tag).tag self._tag = in_store_tag - super().__init__(str(in_store_tag), resource_quota, batch_options) + super().__init__(name, resource_quota, batch_options) self._device_id = device_id self._configure(device_id) @@ -517,6 +518,7 @@ def load_runner( predict_fn_name: str = "__call__", device_id: str = "CPU:0", partial_kwargs: t.Optional[t.Dict[str, t.Any]] = None, + name: t.Optional[t.String] = None, resource_quota: t.Union[None, t.Dict[str, t.Any]] = None, batch_options: t.Union[None, t.Dict[str, t.Any]] = None, model_store: "ModelStore" = Provide[BentoMLContainer.model_store], @@ -544,11 +546,15 @@ def load_runner( Runner instances for `bentoml.tensorflow` model Examples:: """ # noqa + tag = Tag.from_taglike(tag) + if name is None: + name = tag.name return _TensorflowRunner( tag=tag, predict_fn_name=predict_fn_name, device_id=device_id, partial_kwargs=partial_kwargs, + name=name, resource_quota=resource_quota, batch_options=batch_options, model_store=model_store, diff --git a/bentoml/_internal/frameworks/transformers.py b/bentoml/_internal/frameworks/transformers.py index 2430c1b0b26..ff90efbf1ff 100644 --- a/bentoml/_internal/frameworks/transformers.py +++ b/bentoml/_internal/frameworks/transformers.py @@ -198,7 +198,7 @@ def _check_flax_supported() -> None: # pragma: no cover @inject def load( - tag: t.Union[str, Tag], + tag: Tag, from_tf: bool = False, from_flax: bool = False, framework: str = "pt", @@ -722,7 +722,7 @@ class _TransformersRunner(Runner): @inject def __init__( self, - tag: t.Union[str, Tag], + tag: Tag, tasks: str, *, framework: str, @@ -819,6 +819,7 @@ def load_runner( framework: str = "pt", lm_head: str = "casual", device: int = -1, + name: t.Optional[str] = None, resource_quota: t.Optional[t.Dict[str, t.Any]] = None, batch_options: t.Optional[t.Dict[str, t.Any]] = None, model_store: "ModelStore" = Provide[BentoMLContainer.model_store], @@ -867,12 +868,16 @@ def load_runner( framework=tf) runner.run_batch(["In today news, ...", "The stocks market seems ..."]) """ + tag = Tag.from_taglike(tag) + if name is None: + name = tag.name return _TransformersRunner( tag=tag, tasks=tasks, framework=framework, lm_head=lm_head, device=device, + name=name, resource_quota=resource_quota, batch_options=batch_options, model_store=model_store, diff --git a/bentoml/_internal/frameworks/xgboost.py b/bentoml/_internal/frameworks/xgboost.py index 86bd3079d5a..3e63c712f1a 100644 --- a/bentoml/_internal/frameworks/xgboost.py +++ b/bentoml/_internal/frameworks/xgboost.py @@ -57,7 +57,7 @@ def _get_model_info( - tag: t.Union[str, Tag], + tag: Tag, booster_params: t.Optional[t.Dict[str, t.Union[str, int]]], model_store: "ModelStore", ) -> t.Tuple["Model", str, t.Dict[str, t.Any]]: @@ -181,14 +181,15 @@ class _XgBoostRunner(Runner): @inject def __init__( self, - tag: t.Union[str, Tag], + tag: Tag, predict_fn_name: str, booster_params: t.Optional[t.Dict[str, t.Union[str, int]]], + name: str, resource_quota: t.Optional[t.Dict[str, t.Any]], batch_options: t.Optional[t.Dict[str, t.Any]], model_store: "ModelStore" = Provide[BentoMLContainer.model_store], ): - super().__init__(str(tag), resource_quota, batch_options) + super().__init__(name, resource_quota, batch_options) model_info, model_file, booster_params = _get_model_info( tag, booster_params, model_store ) @@ -258,6 +259,7 @@ def load_runner( predict_fn_name: str = "predict", *, booster_params: t.Optional[t.Dict[str, t.Union[str, int]]] = None, + name: t.Optional[str] = None, resource_quota: t.Optional[t.Dict[str, t.Any]] = None, batch_options: t.Optional[t.Dict[str, t.Any]] = None, model_store: "ModelStore" = Provide[BentoMLContainer.model_store], @@ -296,10 +298,14 @@ def load_runner( runner = bentoml.xgboost.load_runner("my_model:20201012_DE43A2") runner.run(xgb.DMatrix(input_data)) """ # noqa + tag = Tag.from_taglike(tag) + if name is None: + name = tag.name return _XgBoostRunner( tag=tag, predict_fn_name=predict_fn_name, booster_params=booster_params, + name=name, resource_quota=resource_quota, batch_options=batch_options, model_store=model_store, diff --git a/bentoml/_internal/runner/runner.py b/bentoml/_internal/runner/runner.py index 725512caff3..44e6a7b59d7 100644 --- a/bentoml/_internal/runner/runner.py +++ b/bentoml/_internal/runner/runner.py @@ -87,30 +87,21 @@ class BatchOptions: VARNAME_RE = re.compile(r"\W|^(?=\d)") -class _BaseRunner: +class BaseRunner: EXIST_NAMES: t.Set[str] = set() def __init__( self, - display_name: t.Union[str, Tag], + name: t.Union[str, Tag], resource_quota: t.Optional[t.Dict[str, t.Any]] = None, batch_options: t.Optional[t.Dict[str, t.Any]] = None, ): # probe an unique name - if isinstance(display_name, Tag): - display_name = display_name.name - if not display_name.isidentifier(): - display_name = VARNAME_RE.sub("_", display_name) - i = 0 - while True: - name = display_name if i == 0 else f"{display_name}_{i}" - if name not in self.EXIST_NAMES: - self.EXIST_NAMES.add(name) - break - else: - i += 1 + if isinstance(name, Tag): + name = name.name + if not name.isidentifier(): + name = VARNAME_RE.sub("_", name) self.name = name - self.resource_quota = ResourceQuota( **(resource_quota if resource_quota else {}) ) @@ -149,7 +140,7 @@ def run_batch(self, *args: t.Any, **kwargs: t.Any) -> t.Any: return self._impl.run_batch(*args, **kwargs) -class Runner(_BaseRunner, ABC): +class Runner(BaseRunner, ABC): """ Runner represents a unit of serving logic that can be scaled horizontally to maximize throughput. This Runner class is an abstract class, used for creating @@ -189,7 +180,7 @@ def _run_batch(self, *args: t.Any, **kwargs: t.Any) -> t.Any: ... -class SimpleRunner(_BaseRunner, ABC): +class SimpleRunner(BaseRunner, ABC): """ SimpleRunner is a special type of Runner that does not support dynamic batching. Instead of `_run_batch` in Runner, a `_run` method is expected to be defined in its @@ -211,7 +202,7 @@ class RunnerState(enum.IntEnum): class RunnerImpl: - def __init__(self, runner: _BaseRunner): + def __init__(self, runner: BaseRunner): self._runner = runner self._state: RunnerState = RunnerState.INIT @@ -239,7 +230,7 @@ class RunnerImplPool: _runner_map: t.Dict[str, RunnerImpl] = {} @classmethod - def get_by_runner(cls, runner: _BaseRunner) -> RunnerImpl: + def get_by_runner(cls, runner: BaseRunner) -> RunnerImpl: if runner.name in cls._runner_map: return cls._runner_map[runner.name] diff --git a/bentoml/_internal/service/service.py b/bentoml/_internal/service/service.py index 1cedbb867eb..3ee921fde0c 100644 --- a/bentoml/_internal/service/service.py +++ b/bentoml/_internal/service/service.py @@ -4,10 +4,10 @@ from typing import TYPE_CHECKING from ..types import Tag -from ..runner import Runner from ...exceptions import BentoMLException from ..bento.bento import _get_default_bento_readme from .inference_api import InferenceAPI +from ..runner.runner import BaseRunner from ..io_descriptors import IODescriptor from ..utils.validation import validate_tag_str @@ -52,7 +52,7 @@ class Service: # For docs property _doc: t.Optional[str] = None - def __init__(self, name: str, runners: t.Optional[t.List[Runner]] = None): + def __init__(self, name: str, runners: t.Optional[t.List[BaseRunner]] = None): lower_name = name.lower() if name != lower_name: @@ -63,12 +63,19 @@ def __init__(self, name: str, runners: t.Optional[t.List[Runner]] = None): self.name = lower_name if runners is not None: - assert all( - isinstance(r, Runner) for r in runners - ), "Service runners list must only contain runner instances" - self.runners = {r.name: r for r in runners} + self.runners = {} + for r in runners: + if r.name in self.runners: + raise ValueError( + f"Found duplicate name `{r.name}` in service runners." + ) + if not isinstance(r, BaseRunner): + raise ValueError( + "Service runners list must only contain runner instances" + ) + self.runners[r.name] = r else: - self.runners: t.Dict[str, Runner] = {} + self.runners: t.Dict[str, BaseRunner] = {} self._mount_apps: t.List[t.Tuple[t.Union["ASGIApp", WSGI_APP], str, str]] = [] self._middlewares: t.List[t.Tuple[t.Type["Middleware"], t.Any]] = [] diff --git a/bentoml/pyspark.py b/bentoml/pyspark.py index 2066cc0ce31..6ce76c37b71 100644 --- a/bentoml/pyspark.py +++ b/bentoml/pyspark.py @@ -72,7 +72,7 @@ class _PySparkMLlibRunner(Runner): @inject def __init__( self, - tag: t.Union[str, Tag], + tag: Tag, resource_quota: t.Dict[str, t.Any], batch_options: t.Dict[str, t.Any], model_store: "ModelStore" = Provide[BentoMLContainer.model_store], diff --git a/tests/e2e/bento_server_general_features/service.py b/tests/e2e/bento_server_general_features/service.py index f9ce3459ce0..78286314b44 100644 --- a/tests/e2e/bento_server_general_features/service.py +++ b/tests/e2e/bento_server_general_features/service.py @@ -24,20 +24,34 @@ class _Schema(pydantic.BaseModel): endpoints: t.List[str] -json_echo_runner = bentoml.sklearn.load_runner("sk_model", function_name="echo_json") +json_echo_runner = bentoml.sklearn.load_runner( + "sk_model", + function_name="echo_json", + name="json_echo_runner", +) ndarray_pred_runner = bentoml.sklearn.load_runner( - "sk_model", function_name="predict_ndarray" + "sk_model", + function_name="predict_ndarray", + name="ndarray_pred_runner", ) dataframe_pred_runner = bentoml.sklearn.load_runner( - "sk_model", function_name="predict_dataframe" + "sk_model", + function_name="predict_dataframe", + name="dataframe_pred_runner", +) +file_pred_runner = bentoml.sklearn.load_runner( + "sk_model", function_name="predict_file", name="file_pred_runner" ) -file_pred_runner = bentoml.sklearn.load_runner("sk_model", function_name="predict_file") multi_ndarray_pred_runner = bentoml.sklearn.load_runner( - "sk_model", function_name="predict_multi_ndarray" + "sk_model", + function_name="predict_multi_ndarray", + name="multi_ndarray_pred_runner", ) echo_multi_ndarray_pred_runner = bentoml.sklearn.load_runner( - "sk_model", function_name="echo_multi_ndarray" + "sk_model", + function_name="echo_multi_ndarray", + name="echo_multi_ndarray_pred_runner", ) diff --git a/tests/e2e/bento_server_general_features_sync/service.py b/tests/e2e/bento_server_general_features_sync/service.py index 4a9ddec09c3..338c9b7a489 100644 --- a/tests/e2e/bento_server_general_features_sync/service.py +++ b/tests/e2e/bento_server_general_features_sync/service.py @@ -24,20 +24,34 @@ class _Schema(pydantic.BaseModel): endpoints: t.List[str] -json_echo_runner = bentoml.sklearn.load_runner("sk_model", function_name="echo_json") +json_echo_runner = bentoml.sklearn.load_runner( + "sk_model", + function_name="echo_json", + name="json_echo_runner", +) ndarray_pred_runner = bentoml.sklearn.load_runner( - "sk_model", function_name="predict_ndarray" + "sk_model", + function_name="predict_ndarray", + name="ndarray_pred_runner", ) dataframe_pred_runner = bentoml.sklearn.load_runner( - "sk_model", function_name="predict_dataframe" + "sk_model", + function_name="predict_dataframe", + name="dataframe_pred_runner", +) +file_pred_runner = bentoml.sklearn.load_runner( + "sk_model", function_name="predict_file", name="file_pred_runner" ) -file_pred_runner = bentoml.sklearn.load_runner("sk_model", function_name="predict_file") multi_ndarray_pred_runner = bentoml.sklearn.load_runner( - "sk_model", function_name="predict_multi_ndarray" + "sk_model", + function_name="predict_multi_ndarray", + name="multi_ndarray_pred_runner", ) echo_multi_ndarray_pred_runner = bentoml.sklearn.load_runner( - "sk_model", function_name="echo_multi_ndarray" + "sk_model", + function_name="echo_multi_ndarray", + name="echo_multi_ndarray_pred_runner", ) From c9e68f929cdf4308b571b4e6e45da52176f99776 Mon Sep 17 00:00:00 2001 From: bojiang Date: Thu, 23 Dec 2021 20:10:01 +0800 Subject: [PATCH 2/3] fix --- bentoml/_internal/frameworks/tensorflow.py | 2 +- bentoml/_internal/runner/runner.py | 30 +++++++++------------- 2 files changed, 13 insertions(+), 19 deletions(-) diff --git a/bentoml/_internal/frameworks/tensorflow.py b/bentoml/_internal/frameworks/tensorflow.py index f2f68c1e640..15f3647e11b 100644 --- a/bentoml/_internal/frameworks/tensorflow.py +++ b/bentoml/_internal/frameworks/tensorflow.py @@ -518,7 +518,7 @@ def load_runner( predict_fn_name: str = "__call__", device_id: str = "CPU:0", partial_kwargs: t.Optional[t.Dict[str, t.Any]] = None, - name: t.Optional[t.String] = None, + name: t.Optional[str] = None, resource_quota: t.Union[None, t.Dict[str, t.Any]] = None, batch_options: t.Union[None, t.Dict[str, t.Any]] = None, model_store: "ModelStore" = Provide[BentoMLContainer.model_store], diff --git a/bentoml/_internal/runner/runner.py b/bentoml/_internal/runner/runner.py index 44e6a7b59d7..eecb11ba8a0 100644 --- a/bentoml/_internal/runner/runner.py +++ b/bentoml/_internal/runner/runner.py @@ -8,6 +8,7 @@ import attr import psutil +from simple_di.providers import SingletonFactory from .utils import cpu_converter from .utils import gpu_converter @@ -106,6 +107,7 @@ def __init__( **(resource_quota if resource_quota else {}) ) self.batch_options = BatchOptions(**(batch_options if batch_options else {})) + self._impl_provider = SingletonFactory(create_runner_impl, self) @property def num_concurrency_per_replica(self) -> int: @@ -125,7 +127,7 @@ def _setup(self) -> None: @property def _impl(self) -> "RunnerImpl": - return RunnerImplPool.get_by_runner(self) + return self._impl_provider.get() async def async_run(self, *args: t.Any, **kwargs: t.Any) -> t.Any: return await self._impl.async_run(*args, **kwargs) @@ -226,23 +228,15 @@ def run_batch(self, *args: t.Any, **kwargs: t.Any) -> t.Any: ... -class RunnerImplPool: - _runner_map: t.Dict[str, RunnerImpl] = {} +def create_runner_impl(runner: BaseRunner) -> RunnerImpl: + remote_runner_mapping = BentoServerContainer.remote_runner_mapping.get() + if runner.name in remote_runner_mapping: + from .remote import RemoteRunnerClient - @classmethod - def get_by_runner(cls, runner: BaseRunner) -> RunnerImpl: - if runner.name in cls._runner_map: - return cls._runner_map[runner.name] - - remote_runner_mapping = BentoServerContainer.remote_runner_mapping.get() - if runner.name in remote_runner_mapping: - from .remote import RemoteRunnerClient - - impl = RemoteRunnerClient(runner) - else: - from .local import LocalRunner + impl = RemoteRunnerClient(runner) + else: + from .local import LocalRunner - impl = LocalRunner(runner) + impl = LocalRunner(runner) - cls._runner_map[runner.name] = impl - return impl + return impl From c68595af5fe55536185e803e3ca366322d5655d9 Mon Sep 17 00:00:00 2001 From: bojiang Date: Thu, 23 Dec 2021 20:23:07 +0800 Subject: [PATCH 3/3] fix for transformers --- bentoml/_internal/frameworks/transformers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bentoml/_internal/frameworks/transformers.py b/bentoml/_internal/frameworks/transformers.py index ff90efbf1ff..6b0f81edbfa 100644 --- a/bentoml/_internal/frameworks/transformers.py +++ b/bentoml/_internal/frameworks/transformers.py @@ -728,6 +728,7 @@ def __init__( framework: str, lm_head: str, device: int, + name: str, resource_quota: t.Optional[t.Dict[str, t.Any]], batch_options: t.Optional[t.Dict[str, t.Any]], model_store: "ModelStore" = Provide[BentoMLContainer.model_store], @@ -735,7 +736,7 @@ def __init__( ): in_store_tag = model_store.get(tag).tag self._tag = in_store_tag - super().__init__(str(in_store_tag), resource_quota, batch_options) + super().__init__(name, resource_quota, batch_options) try: transformers.pipelines.check_task(tasks)