From 20d56d0c43b31f91191b9d7d33526ceffaebfd5b Mon Sep 17 00:00:00 2001 From: Max Hniebergall <137079448+maxhniebergall@users.noreply.github.com> Date: Tue, 24 Sep 2024 09:54:22 -0400 Subject: [PATCH] [ML][backport] Warn for model load failures if they have a status code <500 (#113410) * [ML][backport] Warn for model load failures if they have a status code <500 * On model load failure, warn if the failure status code was less tahn 500 * Update docs/changelog/113280.yaml # Conflicts: # x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentNodeService.java * Update docs/changelog/113410.yaml * fix merge --- docs/changelog/113410.yaml | 5 +++++ .../assignment/TrainedModelAssignmentNodeService.java | 7 ++++++- 2 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 docs/changelog/113410.yaml diff --git a/docs/changelog/113410.yaml b/docs/changelog/113410.yaml new file mode 100644 index 0000000000000..03dec6624b771 --- /dev/null +++ b/docs/changelog/113410.yaml @@ -0,0 +1,5 @@ +pr: 113410 +summary: "[ML][backport] Warn for model load failures if they have a status code <500" +area: Machine Learning +type: bug +issues: [] diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentNodeService.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentNodeService.java index 7052e6f147b36..f1c9842e2d5c4 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentNodeService.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentNodeService.java @@ -9,6 +9,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ResourceNotFoundException; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.search.SearchPhaseExecutionException; @@ -753,7 +754,11 @@ private void updateStoredState(String deploymentId, RoutingInfoUpdate update, Ac } private void handleLoadFailure(TrainedModelDeploymentTask task, Exception ex) { - logger.error(() -> "[" + task.getDeploymentId() + "] model [" + task.getParams().getModelId() + "] failed to load", ex); + if (ex instanceof ElasticsearchException esEx && esEx.status().getStatus() < 500) { + logger.warn(() -> "[" + task.getDeploymentId() + "] model [" + task.getParams().getModelId() + "] failed to load", ex); + } else { + logger.error(() -> "[" + task.getDeploymentId() + "] model [" + task.getParams().getModelId() + "] failed to load", ex); + } if (task.isStopped()) { logger.debug( () -> format(