Skip to content

Commit

Permalink
[Refactor] Merge the sparse and dense model's ci branch.
Browse files Browse the repository at this point in the history
Signed-off-by: conggguan <congguan@amazon.com>
  • Loading branch information
conggguan committed Jul 31, 2024
1 parent 6578981 commit 6546ddb
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 50 deletions.
40 changes: 12 additions & 28 deletions .ci/run-repository.sh
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ elif [[ "$TASK_TYPE" == "doc" ]]; then

docker cp opensearch-py-ml-doc-runner:/code/opensearch-py-ml/docs/build/ ./docs/
docker rm opensearch-py-ml-doc-runner
elif [[ "$TASK_TYPE" == "SentenceTransformerTrace" ]]; then
elif [[ "$TASK_TYPE" == "SentenceTransformerTrace" || "$TASK_TYPE" == "SparseTrace" ]]; then
# Set up OpenSearch cluster & Run model autotracing (Invoked by model_uploader.yml workflow)
echo -e "\033[34;1mINFO:\033[0m MODEL_ID: ${MODEL_ID}\033[0m"
echo -e "\033[34;1mINFO:\033[0m MODEL_VERSION: ${MODEL_VERSION}\033[0m"
Expand All @@ -74,30 +74,16 @@ elif [[ "$TASK_TYPE" == "SentenceTransformerTrace" ]]; then
echo -e "\033[34;1mINFO:\033[0m POOLING_MODE: ${POOLING_MODE:-N/A}\033[0m"
echo -e "\033[34;1mINFO:\033[0m MODEL_DESCRIPTION: ${MODEL_DESCRIPTION:-N/A}\033[0m"

docker run \
--network=${network_name} \
--env "STACK_VERSION=${STACK_VERSION}" \
--env "OPENSEARCH_URL=${opensearch_url}" \
--env "OPENSEARCH_VERSION=${OPENSEARCH_VERSION}" \
--env "TEST_SUITE=${TEST_SUITE}" \
--env "PYTHON_CONNECTION_CLASS=${PYTHON_CONNECTION_CLASS}" \
--env "TEST_TYPE=server" \
--name opensearch-py-ml-trace-runner \
opensearch-project/opensearch-py-ml \
nox -s "trace-${PYTHON_VERSION}" -- ${MODEL_ID} ${MODEL_VERSION} ${TRACING_FORMAT} -ed ${EMBEDDING_DIMENSION} -pm ${POOLING_MODE} -md ${MODEL_DESCRIPTION:+"$MODEL_DESCRIPTION"}

# To upload a model, we need the model artifact, description, license files into local path
# trace_output should include description and license file.
docker cp opensearch-py-ml-trace-runner:/code/opensearch-py-ml/upload/ ./upload/
docker cp opensearch-py-ml-trace-runner:/code/opensearch-py-ml/trace_output/ ./trace_output/
docker rm opensearch-py-ml-trace-runner

elif [[ "$TASK_TYPE" == "SparseTrace" ]]; then
# Set up OpenSearch cluster & Run model autotracing (Invoked by model_uploader.yml workflow)
echo -e "\033[34;1mINFO:\033[0m MODEL_ID: ${MODEL_ID}\033[0m"
echo -e "\033[34;1mINFO:\033[0m MODEL_VERSION: ${MODEL_VERSION}\033[0m"
echo -e "\033[34;1mINFO:\033[0m TRACING_FORMAT: ${TRACING_FORMAT}\033[0m"
echo -e "\033[34;1mINFO:\033[0m MODEL_DESCRIPTION: ${MODEL_DESCRIPTION:-N/A}\033[0m"
if [[ "$TASK_TYPE" == "SentenceTransformerTrace" ]]; then
NOX_TRACE_TYPE="trace"
EXTRA_ARGS="-ed ${EMBEDDING_DIMENSION} -pm ${POOLING_MODE}"
elif [[ "$TASK_TYPE" == "SparseTrace" ]]; then
NOX_TRACE_TYPE="sparsetrace"
EXTRA_ARGS=""
else
echo "Unknown TASK_TYPE: $TASK_TYPE"
exit 1
fi

docker run \
--network=${network_name} \
Expand All @@ -109,13 +95,11 @@ elif [[ "$TASK_TYPE" == "SparseTrace" ]]; then
--env "TEST_TYPE=server" \
--name opensearch-py-ml-trace-runner \
opensearch-project/opensearch-py-ml \
nox -s "sparsetrace-${PYTHON_VERSION}" -- ${MODEL_ID} ${MODEL_VERSION} ${TRACING_FORMAT} -md ${MODEL_DESCRIPTION:+"$MODEL_DESCRIPTION"}
nox -s "${NOX_TRACE_TYPE}-${PYTHON_VERSION}" -- ${MODEL_ID} ${MODEL_VERSION} ${TRACING_FORMAT} ${EXTRA_ARGS} -md ${MODEL_DESCRIPTION:+"$MODEL_DESCRIPTION"}

# To upload a model, we need the model artifact, description, license files into local path
# trace_output should include description and license file.
docker cp opensearch-py-ml-trace-runner:/code/opensearch-py-ml/upload/ ./upload/
docker cp opensearch-py-ml-trace-runner:/code/opensearch-py-ml/trace_output/ ./trace_output/

# Delete the docker image
docker rm opensearch-py-ml-trace-runner
fi
2 changes: 1 addition & 1 deletion opensearch_py_ml/ml_commons/ml_common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
MODEL_CHUNK_MAX_SIZE = 10_000_000
MODEL_MAX_SIZE = 4_000_000_000
BUF_SIZE = 65536 # lets read stuff in 64kb chunks!
TIMEOUT = 120 # timeout for synchronous method calls in seconds
TIMEOUT = 240 # timeout for synchronous method calls in seconds
META_API_ENDPOINT = "models/meta"
MODEL_NAME_FIELD = "name"
MODEL_VERSION_FIELD = "version"
Expand Down
2 changes: 1 addition & 1 deletion opensearch_py_ml/ml_commons/ml_commons_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,7 @@ def generate_sparse_encoding(self, model_id: str, sentences: List[str]) -> objec
:return: Returns a JSON object `inference_results` containing the sparse encoding results for the given sentences.
:rtype: object
"""
API_URL = f"{ML_BASE_URI}/_predict/sparse_encoding/{model_id}"
API_URL = f"{ML_BASE_URI}/models/{model_id}/_predict/"
API_BODY = {"text_docs": sentences}
return self._client.transport.perform_request(
method="POST",
Expand Down
8 changes: 6 additions & 2 deletions utils/model_uploader/autotracing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
TORCH_SCRIPT_FORMAT = "TORCH_SCRIPT"
ONNX_FORMAT = "ONNX"

DENSE_MODEL_ALGORITHM = "TEXT_EMBEDDING"
SPARSE_ALGORITHM = "SPARSE_ENCODING"
TEMP_MODEL_PATH = "temp_model_path"
TORCHSCRIPT_FOLDER_PATH = "model-torchscript/"
ONNX_FOLDER_PATH = "model-onnx/"
Expand Down Expand Up @@ -69,7 +71,9 @@ def register_and_deploy_model(
), f"Raised Exception in {model_format} model registration/deployment: {e}"


def check_model_status(ml_client: "MLCommonClient", model_id: str, model_format: str):
def check_model_status(
ml_client: "MLCommonClient", model_id: str, model_format: str, model_algorithm: str
):
"""
Check the status of the model.
Expand All @@ -87,7 +91,7 @@ def check_model_status(ml_client: "MLCommonClient", model_id: str, model_format:
print(ml_model_status)
assert ml_model_status.get("model_state") == "DEPLOYED"
assert ml_model_status.get("model_format") == model_format
assert ml_model_status.get("algorithm") == "SPARSE_ENCODING"
assert ml_model_status.get("algorithm") == model_algorithm
except Exception as e:
assert False, f"Raised Exception in getting {model_format} model info: {e}"

Expand Down
24 changes: 12 additions & 12 deletions utils/model_uploader/model_autotracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,20 @@
from numpy.typing import DTypeLike
from sentence_transformers import SentenceTransformer

# We need to append ROOT_DIR path so that we can import
# OPENSEARCH_TEST_CLIENT and opensearch_py_ml since this
# python script is not in the root directory.
THIS_DIR = os.path.dirname(__file__)
ROOT_DIR = os.path.join(THIS_DIR, "../..")
sys.path.append(ROOT_DIR)

from opensearch_py_ml.ml_commons import MLCommonClient
from opensearch_py_ml.ml_models.sentencetransformermodel import SentenceTransformerModel
from tests import OPENSEARCH_TEST_CLIENT
from utils.model_uploader.autotracing_utils import (
ATOL_TEST,
BOTH_FORMAT,
DENSE_MODEL_ALGORITHM,
ONNX_FOLDER_PATH,
ONNX_FORMAT,
RTOL_TEST,
Expand All @@ -40,17 +51,6 @@
store_license_verified_variable,
)

# We need to append ROOT_DIR path so that we can import
# OPENSEARCH_TEST_CLIENT and opensearch_py_ml since this
# python script is not in the root directory.
THIS_DIR = os.path.dirname(__file__)
ROOT_DIR = os.path.join(THIS_DIR, "../..")
sys.path.append(ROOT_DIR)

from opensearch_py_ml.ml_commons import MLCommonClient
from opensearch_py_ml.ml_models.sentencetransformermodel import SentenceTransformerModel
from tests import OPENSEARCH_TEST_CLIENT

TEST_SENTENCES = [
"First test sentence",
"This is another sentence used for testing model embedding outputs.",
Expand Down Expand Up @@ -203,7 +203,7 @@ def register_and_deploy_sentence_transformer_model(
ml_client, model_format, model_path, model_config_path
)
# 2.) Check model status
check_model_status(ml_client, model_id, model_format)
check_model_status(ml_client, model_id, model_format, DENSE_MODEL_ALGORITHM)
# 3.) Generate embeddings
try:
embedding_output = ml_client.generate_embedding(model_id, TEST_SENTENCES)
Expand Down
17 changes: 11 additions & 6 deletions utils/model_uploader/sparse_model_autotracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,27 @@
from opensearch_py_ml.ml_commons import MLCommonClient
from opensearch_py_ml.ml_models import SparseEncodingModel
from tests import OPENSEARCH_TEST_CLIENT
from utils.model_uploader import autotracing_utils
from utils.model_uploader.autotracing_utils import (
ATOL_TEST,
BOTH_FORMAT,
ONNX_FOLDER_PATH,
ONNX_FORMAT,
RTOL_TEST,
SPARSE_ALGORITHM,
TEMP_MODEL_PATH,
TORCH_SCRIPT_FORMAT,
TORCHSCRIPT_FOLDER_PATH,
ModelTraceError,
autotracing_warning_filters,
check_model_status,
delete_model,
init_sparse_model,
prepare_files_for_uploading,
preview_model_config,
register_and_deploy_model,
store_description_variable,
store_license_verified_variable,
undeploy_model,
verify_license_by_hfapi,
)

Expand Down Expand Up @@ -116,10 +121,10 @@ def register_and_deploy_sparse_encoding_model(
texts: list[str],
) -> list:
encoding_datas = None
model_id = autotracing_utils.register_and_deploy_model(
model_id = register_and_deploy_model(
ml_client, model_format, model_path, model_config_path
)
autotracing_utils.check_model_status(ml_client, model_id, model_format)
check_model_status(ml_client, model_id, model_format, SPARSE_ALGORITHM)
try:
encoding_output = ml_client.generate_sparse_encoding(model_id, texts)
encoding_datas = [
Expand All @@ -132,8 +137,8 @@ def register_and_deploy_sparse_encoding_model(
assert (
False
), f"Raised Exception in generating sparse encoding with {model_format} model: {e}"
autotracing_utils.undeploy_model(ml_client, model_id, model_format)
autotracing_utils.delete_model(ml_client, model_id, model_format)
undeploy_model(ml_client, model_id, model_format)
delete_model(ml_client, model_id, model_format)
return encoding_datas


Expand Down Expand Up @@ -303,7 +308,7 @@ def main(


if __name__ == "__main__":
autotracing_utils.autotracing_warning_filters()
autotracing_warning_filters()

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
Expand Down

0 comments on commit 6546ddb

Please sign in to comment.