From 55ef7beeaa7e1a142746824fcc71b2af8718d29f Mon Sep 17 00:00:00 2001 From: "opensearch-trigger-bot[bot]" <98922864+opensearch-trigger-bot[bot]@users.noreply.github.com> Date: Wed, 31 Jan 2024 13:28:52 +0800 Subject: [PATCH] Fix the flaky test due to m_l_limit_exceeded_exception (#150) (#164) * increase the CB threshold, delete model after test * add log * add wait time * enhancement: wait model undeploy before delete; refactor the wait response logic * modify ci yml --------- (cherry picked from commit 0791c34f12f3192d104046842ce70914e7acf7c7) Signed-off-by: zhichao-aws Signed-off-by: github-actions[bot] Co-authored-by: github-actions[bot] --- .github/workflows/ci.yml | 12 ++--- .../integTest/BaseAgentToolsIT.java | 52 +++++++++++++++---- .../integTest/NeuralSparseSearchToolIT.java | 1 + .../integTest/ToolIntegrationTest.java | 5 ++ 4 files changed, 51 insertions(+), 19 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 87f9899c..aa65ae37 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,10 +35,7 @@ jobs: needs: Get-CI-Image-Tag strategy: matrix: - java: - - 11 - - 17 - - 21.0.1 + java: [11, 17, 21] name: Build and Test skills plugin on Linux runs-on: ubuntu-latest container: @@ -71,7 +68,7 @@ jobs: build-MacOS: strategy: matrix: - java: [ 11, 17 ] + java: [11, 17, 21] name: Build and Test skills Plugin on MacOS needs: Get-CI-Image-Tag @@ -95,10 +92,7 @@ jobs: build-windows: strategy: matrix: - java: - - 11 - - 17 - - 21.0.1 + java: [11, 17, 21] name: Build and Test skills plugin on Windows needs: Get-CI-Image-Tag runs-on: windows-latest diff --git a/src/test/java/org/opensearch/integTest/BaseAgentToolsIT.java b/src/test/java/org/opensearch/integTest/BaseAgentToolsIT.java index 21b2a8d4..38d43377 100644 --- a/src/test/java/org/opensearch/integTest/BaseAgentToolsIT.java +++ b/src/test/java/org/opensearch/integTest/BaseAgentToolsIT.java @@ -10,6 +10,7 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.function.Predicate; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; @@ -35,6 +36,7 @@ import org.opensearch.ml.common.MLTask; import org.opensearch.ml.common.MLTaskState; import org.opensearch.ml.common.input.execute.agent.AgentMLInput; +import org.opensearch.ml.common.model.MLModelState; import org.opensearch.ml.common.output.model.ModelTensor; import org.opensearch.ml.common.output.model.ModelTensorOutput; import org.opensearch.ml.common.output.model.ModelTensors; @@ -57,6 +59,7 @@ public void updateClusterSettings() { updateClusterSettings("plugins.ml_commons.only_run_on_ml_node", false); // default threshold for native circuit breaker is 90, it may be not enough on test runner machine updateClusterSettings("plugins.ml_commons.native_memory_threshold", 100); + updateClusterSettings("plugins.ml_commons.jvm_heap_memory_threshold", 100); updateClusterSettings("plugins.ml_commons.allow_registering_model_via_url", true); } @@ -123,26 +126,35 @@ protected String indexMonitor(String monitorAsJsonString) { } @SneakyThrows - protected Map waitTaskComplete(String taskId) { + protected Map waitResponseMeetingCondition( + String method, + String endpoint, + String jsonEntity, + Predicate> condition + ) { for (int i = 0; i < MAX_TASK_RESULT_QUERY_TIME_IN_SECOND; i++) { - Response response = makeRequest(client(), "GET", "/_plugins/_ml/tasks/" + taskId, null, (String) null, null); + Response response = makeRequest(client(), method, endpoint, null, jsonEntity, null); assertEquals(RestStatus.OK, RestStatus.fromCode(response.getStatusLine().getStatusCode())); Map responseInMap = parseResponseToMap(response); - String state = responseInMap.get(MLTask.STATE_FIELD).toString(); - if (state.equals(MLTaskState.COMPLETED.toString())) { + if (condition.test(responseInMap)) { return responseInMap; } - if (state.equals(MLTaskState.FAILED.toString()) - || state.equals(MLTaskState.CANCELLED.toString()) - || state.equals(MLTaskState.COMPLETED_WITH_ERROR.toString())) { - fail("The task failed with state " + state); - } + logger.info("The " + i + "-th response: " + responseInMap.toString()); Thread.sleep(DEFAULT_TASK_RESULT_QUERY_INTERVAL_IN_MILLISECOND); } - fail("The task failed to complete after " + MAX_TASK_RESULT_QUERY_TIME_IN_SECOND + " seconds."); + fail("The response failed to meet condition after " + MAX_TASK_RESULT_QUERY_TIME_IN_SECOND + " seconds."); return null; } + @SneakyThrows + protected Map waitTaskComplete(String taskId) { + Predicate> condition = responseInMap -> { + String state = responseInMap.get(MLTask.STATE_FIELD).toString(); + return state.equals(MLTaskState.COMPLETED.toString()); + }; + return waitResponseMeetingCondition("GET", "/_plugins/_ml/tasks/" + taskId, (String) null, condition); + } + // Register the model then deploy it. Returns the model_id until the model is deployed protected String registerModelThenDeploy(String requestBody) { String registerModelTaskId = registerModel(requestBody); @@ -153,6 +165,26 @@ protected String registerModelThenDeploy(String requestBody) { return modelId; } + @SneakyThrows + private void waitModelUndeployed(String modelId) { + Predicate> condition = responseInMap -> { + String state = responseInMap.get(MLModel.MODEL_STATE_FIELD).toString(); + return !state.equals(MLModelState.DEPLOYED.toString()) + && !state.equals(MLModelState.DEPLOYING.toString()) + && !state.equals(MLModelState.PARTIALLY_DEPLOYED.toString()); + }; + waitResponseMeetingCondition("GET", "/_plugins/_ml/models/" + modelId, (String) null, condition); + return; + } + + @SneakyThrows + protected void deleteModel(String modelId) { + // need to undeploy first as model can be in use + makeRequest(client(), "POST", "/_plugins/_ml/models/" + modelId + "/_undeploy", null, (String) null, null); + waitModelUndeployed(modelId); + makeRequest(client(), "DELETE", "/_plugins/_ml/models/" + modelId, null, (String) null, null); + } + protected void createIndexWithConfiguration(String indexName, String indexConfiguration) throws Exception { Response response = makeRequest(client(), "PUT", indexName, null, indexConfiguration, null); Map responseInMap = parseResponseToMap(response); diff --git a/src/test/java/org/opensearch/integTest/NeuralSparseSearchToolIT.java b/src/test/java/org/opensearch/integTest/NeuralSparseSearchToolIT.java index 2dda2095..58431a0a 100644 --- a/src/test/java/org/opensearch/integTest/NeuralSparseSearchToolIT.java +++ b/src/test/java/org/opensearch/integTest/NeuralSparseSearchToolIT.java @@ -88,6 +88,7 @@ public void setUp() { public void tearDown() { super.tearDown(); deleteExternalIndices(); + deleteModel(modelId); } public void testNeuralSparseSearchToolInFlowAgent() { diff --git a/src/test/java/org/opensearch/integTest/ToolIntegrationTest.java b/src/test/java/org/opensearch/integTest/ToolIntegrationTest.java index aba39573..74c3d5cf 100644 --- a/src/test/java/org/opensearch/integTest/ToolIntegrationTest.java +++ b/src/test/java/org/opensearch/integTest/ToolIntegrationTest.java @@ -68,6 +68,11 @@ public void stopMockLLM() { server.stop(1); } + @After + public void deleteModel() { + deleteModel(modelId); + } + private String setUpConnector() { String url = String.format(Locale.ROOT, "http://127.0.0.1:%d/invoke", server.getAddress().getPort()); return createConnector(