diff --git a/.gitignore b/.gitignore index 9ab7de894636a..7514d55cc3c9a 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,9 @@ out/ benchmarks/src/main/generated/* benchmarks/bin/* benchmarks/build-eclipse-default/* +server/bin/* +server/build-eclipse-default/* +test/framework/build-eclipse-default/* # eclipse files .project @@ -61,4 +64,4 @@ testfixtures_shared/ .ci/jobs/ # build files generated -doc-tools/missing-doclet/bin/ +doc-tools/missing-doclet/bin/ \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index fc367be22c1fd..0de0d823aa764 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -101,6 +101,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Adds mock implementation for TelemetryPlugin ([#7545](https://github.com/opensearch-project/OpenSearch/issues/7545)) - Support transport action names when registering NamedRoutes ([#7957](https://github.com/opensearch-project/OpenSearch/pull/7957)) - Create concept of persistent ThreadContext headers that are unstashable ([#8291]()https://github.com/opensearch-project/OpenSearch/pull/8291) +- [Search pipelines] Add Global Ignore_failure options for Processors ([#8373](https://github.com/opensearch-project/OpenSearch/pull/8373)) - Enable Partial Flat Object ([#7997](https://github.com/opensearch-project/OpenSearch/pull/7997)) - Add jdk.incubator.vector module support for JDK 20+ ([#8601](https://github.com/opensearch-project/OpenSearch/pull/8601)) @@ -143,6 +144,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `com.google.jimfs:jimfs` from 1.2 to 1.3.0 (#8577, #8571) - Bump `com.networknt:json-schema-validator` from 1.0.85 to 1.0.86 ([#8573](https://github.com/opensearch-project/OpenSearch/pull/8573)) - Bump `com.google.cloud:google-cloud-core-http` from 2.17.0 to 2.21.0 ([#8586](https://github.com/opensearch-project/OpenSearch/pull/8586)) +- Bump `org.apache.logging.log4j:log4j-core` from 2.17.1 to 2.20.0 ([#8307](https://github.com/opensearch-project/OpenSearch/pull/8307)) ### Changed - Replace jboss-annotations-api_1.2_spec with jakarta.annotation-api ([#7836](https://github.com/opensearch-project/OpenSearch/pull/7836)) diff --git a/buildSrc/src/testKit/thirdPartyAudit/sample_jars/build.gradle b/buildSrc/src/testKit/thirdPartyAudit/sample_jars/build.gradle index b31e3b168c587..cb8050d1718c4 100644 --- a/buildSrc/src/testKit/thirdPartyAudit/sample_jars/build.gradle +++ b/buildSrc/src/testKit/thirdPartyAudit/sample_jars/build.gradle @@ -16,7 +16,7 @@ repositories { mavenCentral() } dependencies { - implementation 'org.apache.logging.log4j:log4j-core:2.20.0' + implementation "org.apache.logging.log4j:log4j-core:2.20.0" } ["0.0.1", "0.0.2"].forEach { v -> diff --git a/buildSrc/version.properties b/buildSrc/version.properties index 7a2ddc24aabcf..246f6e3444224 100644 --- a/buildSrc/version.properties +++ b/buildSrc/version.properties @@ -14,8 +14,7 @@ jackson_databind = 2.15.2 snakeyaml = 2.0 icu4j = 70.1 supercsv = 2.4.0 -# Update to 2.17.2+ is breaking OpenSearchJsonLayout (see https://issues.apache.org/jira/browse/LOG4J2-3562) -log4j = 2.17.1 +log4j = 2.20.0 slf4j = 1.7.36 asm = 9.5 jettison = 1.5.4 diff --git a/server/src/main/java/org/opensearch/common/io/InputStreamContainer.java b/libs/common/src/main/java/org/opensearch/common/io/InputStreamContainer.java similarity index 85% rename from server/src/main/java/org/opensearch/common/io/InputStreamContainer.java rename to libs/common/src/main/java/org/opensearch/common/io/InputStreamContainer.java index ce5dcff9f5349..eb8a4e1382497 100644 --- a/server/src/main/java/org/opensearch/common/io/InputStreamContainer.java +++ b/libs/common/src/main/java/org/opensearch/common/io/InputStreamContainer.java @@ -19,6 +19,7 @@ public class InputStreamContainer { private final InputStream inputStream; private final long contentLength; + private final long offset; /** * Construct a new stream object @@ -26,9 +27,10 @@ public class InputStreamContainer { * @param inputStream The input stream that is to be encapsulated * @param contentLength The total content length that is to be read from the stream */ - public InputStreamContainer(InputStream inputStream, long contentLength) { + public InputStreamContainer(InputStream inputStream, long contentLength, long offset) { this.inputStream = inputStream; this.contentLength = contentLength; + this.offset = offset; } /** @@ -44,4 +46,11 @@ public InputStream getInputStream() { public long getContentLength() { return contentLength; } + + /** + * @return offset of the source content. + */ + public long getOffset() { + return offset; + } } diff --git a/libs/core/licenses/log4j-api-2.17.1.jar.sha1 b/libs/core/licenses/log4j-api-2.17.1.jar.sha1 deleted file mode 100644 index 9d0e5dc631ed5..0000000000000 --- a/libs/core/licenses/log4j-api-2.17.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -d771af8e336e372fb5399c99edabe0919aeaf5b2 \ No newline at end of file diff --git a/libs/core/licenses/log4j-api-2.20.0.jar.sha1 b/libs/core/licenses/log4j-api-2.20.0.jar.sha1 new file mode 100644 index 0000000000000..37154d9861ac0 --- /dev/null +++ b/libs/core/licenses/log4j-api-2.20.0.jar.sha1 @@ -0,0 +1 @@ +1fe6082e660daf07c689a89c94dc0f49c26b44bb \ No newline at end of file diff --git a/modules/search-pipeline-common/src/main/java/org/opensearch/search/pipeline/common/FilterQueryRequestProcessor.java b/modules/search-pipeline-common/src/main/java/org/opensearch/search/pipeline/common/FilterQueryRequestProcessor.java index d8862aa59cede..c776181a64b26 100644 --- a/modules/search-pipeline-common/src/main/java/org/opensearch/search/pipeline/common/FilterQueryRequestProcessor.java +++ b/modules/search-pipeline-common/src/main/java/org/opensearch/search/pipeline/common/FilterQueryRequestProcessor.java @@ -21,6 +21,7 @@ import org.opensearch.ingest.ConfigurationUtils; import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.search.pipeline.Processor; +import org.opensearch.search.pipeline.AbstractProcessor; import org.opensearch.search.pipeline.SearchRequestProcessor; import java.io.InputStream; @@ -53,12 +54,13 @@ public String getType() { /** * Constructor that takes a filter query. * - * @param tag processor tag - * @param description processor description + * @param tag processor tag + * @param description processor description + * @param ignoreFailure option to ignore failure * @param filterQuery the query that will be added as a filter to incoming queries */ - public FilterQueryRequestProcessor(String tag, String description, QueryBuilder filterQuery) { - super(tag, description); + FilterQueryRequestProcessor(String tag, String description, boolean ignoreFailure, QueryBuilder filterQuery) { + super(tag, description, ignoreFailure); this.filterQuery = filterQuery; } @@ -101,6 +103,7 @@ public FilterQueryRequestProcessor create( Map> processorFactories, String tag, String description, + boolean ignoreFailure, Map config, PipelineContext pipelineContext ) throws Exception { @@ -114,7 +117,7 @@ public FilterQueryRequestProcessor create( XContentParser parser = XContentType.JSON.xContent() .createParser(namedXContentRegistry, LoggingDeprecationHandler.INSTANCE, stream) ) { - return new FilterQueryRequestProcessor(tag, description, parseInnerQueryBuilder(parser)); + return new FilterQueryRequestProcessor(tag, description, ignoreFailure, parseInnerQueryBuilder(parser)); } } } diff --git a/modules/search-pipeline-common/src/main/java/org/opensearch/search/pipeline/common/RenameFieldResponseProcessor.java b/modules/search-pipeline-common/src/main/java/org/opensearch/search/pipeline/common/RenameFieldResponseProcessor.java index c8b3c06a71562..c410895304f76 100644 --- a/modules/search-pipeline-common/src/main/java/org/opensearch/search/pipeline/common/RenameFieldResponseProcessor.java +++ b/modules/search-pipeline-common/src/main/java/org/opensearch/search/pipeline/common/RenameFieldResponseProcessor.java @@ -19,6 +19,7 @@ import org.opensearch.ingest.ConfigurationUtils; import org.opensearch.search.SearchHit; import org.opensearch.search.pipeline.Processor; +import org.opensearch.search.pipeline.AbstractProcessor; import org.opensearch.search.pipeline.SearchRequestProcessor; import org.opensearch.search.pipeline.SearchResponseProcessor; @@ -41,14 +42,22 @@ public class RenameFieldResponseProcessor extends AbstractProcessor implements S /** * Constructor that takes a target field to rename and the new name * - * @param tag processor tag - * @param description processor description - * @param oldField name of field to be renamed - * @param newField name of field that will replace the old field + * @param tag processor tag + * @param description processor description + * @param ignoreFailure option to ignore failure + * @param oldField name of field to be renamed + * @param newField name of field that will replace the old field * @param ignoreMissing if true, do not throw error if oldField does not exist within search response */ - public RenameFieldResponseProcessor(String tag, String description, String oldField, String newField, boolean ignoreMissing) { - super(tag, description); + public RenameFieldResponseProcessor( + String tag, + String description, + boolean ignoreFailure, + String oldField, + String newField, + boolean ignoreMissing + ) { + super(tag, description, ignoreFailure); this.oldField = oldField; this.newField = newField; this.ignoreMissing = ignoreMissing; @@ -140,13 +149,14 @@ public RenameFieldResponseProcessor create( Map> processorFactories, String tag, String description, + boolean ignoreFailure, Map config, PipelineContext pipelineContext ) throws Exception { String oldField = ConfigurationUtils.readStringProperty(TYPE, tag, config, "field"); String newField = ConfigurationUtils.readStringProperty(TYPE, tag, config, "target_field"); boolean ignoreMissing = ConfigurationUtils.readBooleanProperty(TYPE, tag, config, "ignore_missing", false); - return new RenameFieldResponseProcessor(tag, description, oldField, newField, ignoreMissing); + return new RenameFieldResponseProcessor(tag, description, ignoreFailure, oldField, newField, ignoreMissing); } } } diff --git a/modules/search-pipeline-common/src/main/java/org/opensearch/search/pipeline/common/ScriptRequestProcessor.java b/modules/search-pipeline-common/src/main/java/org/opensearch/search/pipeline/common/ScriptRequestProcessor.java index 43ab3d4622d6b..e100fe0e3edce 100644 --- a/modules/search-pipeline-common/src/main/java/org/opensearch/search/pipeline/common/ScriptRequestProcessor.java +++ b/modules/search-pipeline-common/src/main/java/org/opensearch/search/pipeline/common/ScriptRequestProcessor.java @@ -25,6 +25,7 @@ import org.opensearch.script.ScriptType; import org.opensearch.script.SearchScript; import org.opensearch.search.pipeline.Processor; +import org.opensearch.search.pipeline.AbstractProcessor; import org.opensearch.search.pipeline.SearchRequestProcessor; import org.opensearch.search.pipeline.common.helpers.SearchRequestMap; @@ -54,6 +55,7 @@ public final class ScriptRequestProcessor extends AbstractProcessor implements S * * @param tag The processor's tag. * @param description The processor's description. + * @param ignoreFailure The option to ignore failure * @param script The {@link Script} to execute. * @param precompiledSearchScript The {@link Script} precompiled * @param scriptService The {@link ScriptService} used to execute the script. @@ -61,11 +63,12 @@ public final class ScriptRequestProcessor extends AbstractProcessor implements S ScriptRequestProcessor( String tag, String description, + boolean ignoreFailure, Script script, @Nullable SearchScript precompiledSearchScript, ScriptService scriptService ) { - super(tag, description); + super(tag, description, ignoreFailure); this.script = script; this.precompiledSearchScript = precompiledSearchScript; this.scriptService = scriptService; @@ -146,6 +149,7 @@ public ScriptRequestProcessor create( Map> registry, String processorTag, String description, + boolean ignoreFailure, Map config, PipelineContext pipelineContext ) throws Exception { @@ -174,7 +178,7 @@ public ScriptRequestProcessor create( } catch (ScriptException e) { throw newConfigurationException(TYPE, processorTag, null, e); } - return new ScriptRequestProcessor(processorTag, description, script, searchScript, scriptService); + return new ScriptRequestProcessor(processorTag, description, ignoreFailure, script, searchScript, scriptService); } } } diff --git a/modules/search-pipeline-common/src/test/java/org/opensearch/search/pipeline/common/FilterQueryRequestProcessorTests.java b/modules/search-pipeline-common/src/test/java/org/opensearch/search/pipeline/common/FilterQueryRequestProcessorTests.java index ecf746af556a2..576660d05b96d 100644 --- a/modules/search-pipeline-common/src/test/java/org/opensearch/search/pipeline/common/FilterQueryRequestProcessorTests.java +++ b/modules/search-pipeline-common/src/test/java/org/opensearch/search/pipeline/common/FilterQueryRequestProcessorTests.java @@ -23,7 +23,7 @@ public class FilterQueryRequestProcessorTests extends AbstractBuilderTestCase { public void testFilterQuery() throws Exception { QueryBuilder filterQuery = new TermQueryBuilder("field", "value"); - FilterQueryRequestProcessor filterQueryRequestProcessor = new FilterQueryRequestProcessor(null, null, filterQuery); + FilterQueryRequestProcessor filterQueryRequestProcessor = new FilterQueryRequestProcessor(null, null, false, filterQuery); QueryBuilder incomingQuery = new TermQueryBuilder("text", "foo"); SearchSourceBuilder source = new SearchSourceBuilder().query(incomingQuery); SearchRequest request = new SearchRequest().source(source); @@ -39,13 +39,13 @@ public void testFilterQuery() throws Exception { public void testFactory() throws Exception { FilterQueryRequestProcessor.Factory factory = new FilterQueryRequestProcessor.Factory(this.xContentRegistry()); Map configMap = new HashMap<>(Map.of("query", Map.of("term", Map.of("field", "value")))); - FilterQueryRequestProcessor processor = factory.create(Collections.emptyMap(), null, null, configMap, null); + FilterQueryRequestProcessor processor = factory.create(Collections.emptyMap(), null, null, false, configMap, null); assertEquals(new TermQueryBuilder("field", "value"), processor.filterQuery); // Missing "query" parameter: expectThrows( IllegalArgumentException.class, - () -> factory.create(Collections.emptyMap(), null, null, Collections.emptyMap(), null) + () -> factory.create(Collections.emptyMap(), null, null, false, Collections.emptyMap(), null) ); } } diff --git a/modules/search-pipeline-common/src/test/java/org/opensearch/search/pipeline/common/RenameFieldResponseProcessorTests.java b/modules/search-pipeline-common/src/test/java/org/opensearch/search/pipeline/common/RenameFieldResponseProcessorTests.java index 7f3a2acfbdc08..b7d1510d065ae 100644 --- a/modules/search-pipeline-common/src/test/java/org/opensearch/search/pipeline/common/RenameFieldResponseProcessorTests.java +++ b/modules/search-pipeline-common/src/test/java/org/opensearch/search/pipeline/common/RenameFieldResponseProcessorTests.java @@ -58,6 +58,7 @@ public void testRenameResponse() throws Exception { RenameFieldResponseProcessor renameFieldResponseProcessor = new RenameFieldResponseProcessor( null, null, + false, "field 0", "new field", false @@ -74,6 +75,7 @@ public void testRenameResponseWithMapping() throws Exception { RenameFieldResponseProcessor renameFieldResponseProcessor = new RenameFieldResponseProcessor( null, null, + false, "field 0", "new field", true @@ -97,6 +99,7 @@ public void testMissingField() throws Exception { RenameFieldResponseProcessor renameFieldResponseProcessor = new RenameFieldResponseProcessor( null, null, + false, "field", "new field", false @@ -115,7 +118,7 @@ public void testFactory() throws Exception { config.put("target_field", newField); RenameFieldResponseProcessor.Factory factory = new RenameFieldResponseProcessor.Factory(); - RenameFieldResponseProcessor processor = factory.create(Collections.emptyMap(), null, null, config, null); + RenameFieldResponseProcessor processor = factory.create(Collections.emptyMap(), null, null, false, config, null); assertEquals(processor.getType(), "rename_field"); assertEquals(processor.getOldField(), oldField); assertEquals(processor.getNewField(), newField); @@ -123,7 +126,7 @@ public void testFactory() throws Exception { expectThrows( OpenSearchParseException.class, - () -> factory.create(Collections.emptyMap(), null, null, Collections.emptyMap(), null) + () -> factory.create(Collections.emptyMap(), null, null, false, Collections.emptyMap(), null) ); } } diff --git a/modules/search-pipeline-common/src/test/java/org/opensearch/search/pipeline/common/ScriptRequestProcessorTests.java b/modules/search-pipeline-common/src/test/java/org/opensearch/search/pipeline/common/ScriptRequestProcessorTests.java index 2fb3b2345e7e2..df383e778c7ba 100644 --- a/modules/search-pipeline-common/src/test/java/org/opensearch/search/pipeline/common/ScriptRequestProcessorTests.java +++ b/modules/search-pipeline-common/src/test/java/org/opensearch/search/pipeline/common/ScriptRequestProcessorTests.java @@ -82,7 +82,7 @@ public void setupScripting() { } public void testScriptingWithoutPrecompiledScriptFactory() throws Exception { - ScriptRequestProcessor processor = new ScriptRequestProcessor(randomAlphaOfLength(10), null, script, null, scriptService); + ScriptRequestProcessor processor = new ScriptRequestProcessor(randomAlphaOfLength(10), null, false, script, null, scriptService); SearchRequest searchRequest = new SearchRequest(); searchRequest.source(createSearchSourceBuilder()); @@ -92,7 +92,14 @@ public void testScriptingWithoutPrecompiledScriptFactory() throws Exception { } public void testScriptingWithPrecompiledIngestScript() throws Exception { - ScriptRequestProcessor processor = new ScriptRequestProcessor(randomAlphaOfLength(10), null, script, searchScript, scriptService); + ScriptRequestProcessor processor = new ScriptRequestProcessor( + randomAlphaOfLength(10), + null, + false, + script, + searchScript, + scriptService + ); SearchRequest searchRequest = new SearchRequest(); searchRequest.source(createSearchSourceBuilder()); diff --git a/modules/search-pipeline-common/src/yamlRestTest/resources/rest-api-spec/test/search_pipeline/40_rename_response.yml b/modules/search-pipeline-common/src/yamlRestTest/resources/rest-api-spec/test/search_pipeline/40_rename_response.yml index 3b705f9bd5356..0528440f7584d 100644 --- a/modules/search-pipeline-common/src/yamlRestTest/resources/rest-api-spec/test/search_pipeline/40_rename_response.yml +++ b/modules/search-pipeline-common/src/yamlRestTest/resources/rest-api-spec/test/search_pipeline/40_rename_response.yml @@ -63,6 +63,26 @@ teardown: } - match: { acknowledged: true } + - do: + search_pipeline.put: + id: "my_pipeline_4" + body: > + { + "description": "test pipeline with ignore missing false and ignore failure true", + "response_processors": [ + { + "rename_field": + { + "field": "aa", + "target_field": "b", + "ignore_missing": false, + "ignore_failure": true + } + } + ] + } + - match: { acknowledged: true } + - do: indices.create: index: test @@ -119,8 +139,8 @@ teardown: - match: { hits.total.value: 1 } - match: {hits.hits.0._source: { "a": "foo" } } - # Pipeline with ignore_missing set to true - # Should still pass even though index does not contain field + # Pipeline with ignore_missing set to false + # Should throw illegal_argument_exception - do: catch: bad_request search: @@ -128,6 +148,15 @@ teardown: search_pipeline: "my_pipeline_3" - match: { error.type: "illegal_argument_exception" } + # Pipeline with ignore_missing set to false and ignore_failure set to true + # Should return while catching error + - do: + search: + index: test + search_pipeline: "my_pipeline_4" + - match: { hits.total.value: 1 } + - match: {hits.hits.0._source: { "a": "foo" } } + # No source, using stored_fields - do: search: diff --git a/plugins/discovery-azure-classic/licenses/log4j-1.2-api-2.17.1.jar.sha1 b/plugins/discovery-azure-classic/licenses/log4j-1.2-api-2.17.1.jar.sha1 deleted file mode 100644 index 23aa5c60bd596..0000000000000 --- a/plugins/discovery-azure-classic/licenses/log4j-1.2-api-2.17.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -db3a7e7f07e878b92ac4a8f1100bee8325d5713a \ No newline at end of file diff --git a/plugins/discovery-azure-classic/licenses/log4j-1.2-api-2.20.0.jar.sha1 b/plugins/discovery-azure-classic/licenses/log4j-1.2-api-2.20.0.jar.sha1 new file mode 100644 index 0000000000000..9829576d38ce0 --- /dev/null +++ b/plugins/discovery-azure-classic/licenses/log4j-1.2-api-2.20.0.jar.sha1 @@ -0,0 +1 @@ +689151374756cb809cb029f2501015bdc7733179 \ No newline at end of file diff --git a/plugins/discovery-ec2/licenses/log4j-1.2-api-2.17.1.jar.sha1 b/plugins/discovery-ec2/licenses/log4j-1.2-api-2.17.1.jar.sha1 deleted file mode 100644 index 23aa5c60bd596..0000000000000 --- a/plugins/discovery-ec2/licenses/log4j-1.2-api-2.17.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -db3a7e7f07e878b92ac4a8f1100bee8325d5713a \ No newline at end of file diff --git a/plugins/discovery-ec2/licenses/log4j-1.2-api-2.20.0.jar.sha1 b/plugins/discovery-ec2/licenses/log4j-1.2-api-2.20.0.jar.sha1 new file mode 100644 index 0000000000000..9829576d38ce0 --- /dev/null +++ b/plugins/discovery-ec2/licenses/log4j-1.2-api-2.20.0.jar.sha1 @@ -0,0 +1 @@ +689151374756cb809cb029f2501015bdc7733179 \ No newline at end of file diff --git a/plugins/discovery-gce/licenses/log4j-1.2-api-2.17.1.jar.sha1 b/plugins/discovery-gce/licenses/log4j-1.2-api-2.17.1.jar.sha1 deleted file mode 100644 index 23aa5c60bd596..0000000000000 --- a/plugins/discovery-gce/licenses/log4j-1.2-api-2.17.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -db3a7e7f07e878b92ac4a8f1100bee8325d5713a \ No newline at end of file diff --git a/plugins/discovery-gce/licenses/log4j-1.2-api-2.20.0.jar.sha1 b/plugins/discovery-gce/licenses/log4j-1.2-api-2.20.0.jar.sha1 new file mode 100644 index 0000000000000..9829576d38ce0 --- /dev/null +++ b/plugins/discovery-gce/licenses/log4j-1.2-api-2.20.0.jar.sha1 @@ -0,0 +1 @@ +689151374756cb809cb029f2501015bdc7733179 \ No newline at end of file diff --git a/plugins/repository-gcs/licenses/log4j-1.2-api-2.17.1.jar.sha1 b/plugins/repository-gcs/licenses/log4j-1.2-api-2.17.1.jar.sha1 deleted file mode 100644 index 23aa5c60bd596..0000000000000 --- a/plugins/repository-gcs/licenses/log4j-1.2-api-2.17.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -db3a7e7f07e878b92ac4a8f1100bee8325d5713a \ No newline at end of file diff --git a/plugins/repository-gcs/licenses/log4j-1.2-api-2.20.0.jar.sha1 b/plugins/repository-gcs/licenses/log4j-1.2-api-2.20.0.jar.sha1 new file mode 100644 index 0000000000000..9829576d38ce0 --- /dev/null +++ b/plugins/repository-gcs/licenses/log4j-1.2-api-2.20.0.jar.sha1 @@ -0,0 +1 @@ +689151374756cb809cb029f2501015bdc7733179 \ No newline at end of file diff --git a/plugins/repository-hdfs/licenses/log4j-slf4j-impl-2.17.1.jar.sha1 b/plugins/repository-hdfs/licenses/log4j-slf4j-impl-2.17.1.jar.sha1 deleted file mode 100644 index 894ed8d886c3f..0000000000000 --- a/plugins/repository-hdfs/licenses/log4j-slf4j-impl-2.17.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -84692d456bcce689355d33d68167875e486954dd \ No newline at end of file diff --git a/plugins/repository-hdfs/licenses/log4j-slf4j-impl-2.20.0.jar.sha1 b/plugins/repository-hdfs/licenses/log4j-slf4j-impl-2.20.0.jar.sha1 new file mode 100644 index 0000000000000..800a4aa87ba0e --- /dev/null +++ b/plugins/repository-hdfs/licenses/log4j-slf4j-impl-2.20.0.jar.sha1 @@ -0,0 +1 @@ +7ab4f082fd162f60afcaf2b8744a3d959feab3e8 \ No newline at end of file diff --git a/plugins/repository-s3/licenses/log4j-1.2-api-2.17.1.jar.sha1 b/plugins/repository-s3/licenses/log4j-1.2-api-2.17.1.jar.sha1 deleted file mode 100644 index 23aa5c60bd596..0000000000000 --- a/plugins/repository-s3/licenses/log4j-1.2-api-2.17.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -db3a7e7f07e878b92ac4a8f1100bee8325d5713a \ No newline at end of file diff --git a/plugins/repository-s3/licenses/log4j-1.2-api-2.20.0.jar.sha1 b/plugins/repository-s3/licenses/log4j-1.2-api-2.20.0.jar.sha1 new file mode 100644 index 0000000000000..9829576d38ce0 --- /dev/null +++ b/plugins/repository-s3/licenses/log4j-1.2-api-2.20.0.jar.sha1 @@ -0,0 +1 @@ +689151374756cb809cb029f2501015bdc7733179 \ No newline at end of file diff --git a/qa/rolling-upgrade/src/test/java/org/opensearch/upgrades/IndexingIT.java b/qa/rolling-upgrade/src/test/java/org/opensearch/upgrades/IndexingIT.java index 93c0bc96a5183..b60ee09d39048 100644 --- a/qa/rolling-upgrade/src/test/java/org/opensearch/upgrades/IndexingIT.java +++ b/qa/rolling-upgrade/src/test/java/org/opensearch/upgrades/IndexingIT.java @@ -247,6 +247,7 @@ public void testIndexing() throws IOException, ParseException { * * @throws Exception */ + @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/8322") public void testIndexingWithSegRep() throws Exception { if (UPGRADE_FROM_VERSION.before(Version.V_2_4_0)) { logger.info("--> Skip test for version {} where segment replication feature is not available", UPGRADE_FROM_VERSION); diff --git a/qa/wildfly/build.gradle b/qa/wildfly/build.gradle index 391d2c78b489b..5d37be47e782e 100644 --- a/qa/wildfly/build.gradle +++ b/qa/wildfly/build.gradle @@ -58,11 +58,10 @@ dependencies { api "com.fasterxml.jackson.jakarta.rs:jackson-jakarta-rs-base:${versions.jackson}" api "com.fasterxml.jackson.jakarta.rs:jackson-jakarta-rs-json-provider:${versions.jackson}" api "com.github.fge:json-patch:1.9" - api "org.apache.logging.log4j:log4j-api:${versions.log4j}" - api "org.apache.logging.log4j:log4j-core:${versions.log4j}" api(project(path: ':client:rest-high-level')) { exclude module: 'jakarta.annotation-api' } + testImplementation "org.apache.logging.log4j:log4j-slf4j-impl:${versions.log4j}" testImplementation(project(':test:framework')) { exclude module: 'jakarta.annotation-api' } diff --git a/qa/wildfly/src/main/resources/log4j2.properties b/qa/wildfly/src/main/resources/log4j2.properties deleted file mode 100644 index 4fd5d69daf65b..0000000000000 --- a/qa/wildfly/src/main/resources/log4j2.properties +++ /dev/null @@ -1,20 +0,0 @@ -# -# SPDX-License-Identifier: Apache-2.0 -# -# The OpenSearch Contributors require contributions made to -# this file be licensed under the Apache-2.0 license or a -# compatible open source license. -# -# Modifications Copyright OpenSearch Contributors. See -# GitHub history for details. -# - -status = error - -appender.console.type = Console -appender.console.name = console -appender.console.layout.type = PatternLayout -appender.console.layout.pattern = [%d{ISO8601}][%-5p][%-25c{1.}] %marker%m%n - -rootLogger.level = info -rootLogger.appenderRef.console.ref = console diff --git a/server/build.gradle b/server/build.gradle index 38bbf020d860b..e80aa0f77a164 100644 --- a/server/build.gradle +++ b/server/build.gradle @@ -146,6 +146,7 @@ dependencies { api "org.apache.logging.log4j:log4j-api:${versions.log4j}" api "org.apache.logging.log4j:log4j-jul:${versions.log4j}" api "org.apache.logging.log4j:log4j-core:${versions.log4j}", optional + annotationProcessor "org.apache.logging.log4j:log4j-core:${versions.log4j}" // jna api "net.java.dev.jna:jna:${versions.jna}" @@ -175,6 +176,10 @@ tasks.withType(JavaCompile).configureEach { options.compilerArgs -= '-Xlint:unchecked' } +compileJava { + options.compilerArgs += ['-processor', 'org.apache.logging.log4j.core.config.plugins.processor.PluginProcessor'] +} + tasks.named("internalClusterTest").configure { // TODO: these run faster with C2 only because they run for so, so long jvmArgs -= '-XX:TieredStopAtLevel=1' diff --git a/server/licenses/log4j-api-2.17.1.jar.sha1 b/server/licenses/log4j-api-2.17.1.jar.sha1 deleted file mode 100644 index 9d0e5dc631ed5..0000000000000 --- a/server/licenses/log4j-api-2.17.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -d771af8e336e372fb5399c99edabe0919aeaf5b2 \ No newline at end of file diff --git a/server/licenses/log4j-api-2.20.0.jar.sha1 b/server/licenses/log4j-api-2.20.0.jar.sha1 new file mode 100644 index 0000000000000..37154d9861ac0 --- /dev/null +++ b/server/licenses/log4j-api-2.20.0.jar.sha1 @@ -0,0 +1 @@ +1fe6082e660daf07c689a89c94dc0f49c26b44bb \ No newline at end of file diff --git a/server/licenses/log4j-core-2.17.1.jar.sha1 b/server/licenses/log4j-core-2.17.1.jar.sha1 deleted file mode 100644 index 7d4634f3d4e18..0000000000000 --- a/server/licenses/log4j-core-2.17.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -779f60f3844dadc3ef597976fcb1e5127b1f343d \ No newline at end of file diff --git a/server/licenses/log4j-core-2.20.0.jar.sha1 b/server/licenses/log4j-core-2.20.0.jar.sha1 new file mode 100644 index 0000000000000..49c972626563b --- /dev/null +++ b/server/licenses/log4j-core-2.20.0.jar.sha1 @@ -0,0 +1 @@ +eb2a9a47b1396e00b5eee1264296729a70565cc0 \ No newline at end of file diff --git a/server/licenses/log4j-jul-2.17.1.jar.sha1 b/server/licenses/log4j-jul-2.17.1.jar.sha1 deleted file mode 100644 index 4afb381a696e9..0000000000000 --- a/server/licenses/log4j-jul-2.17.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -881333b463d47828eda7443b19811763367b1916 \ No newline at end of file diff --git a/server/licenses/log4j-jul-2.20.0.jar.sha1 b/server/licenses/log4j-jul-2.20.0.jar.sha1 new file mode 100644 index 0000000000000..a456651e4569e --- /dev/null +++ b/server/licenses/log4j-jul-2.20.0.jar.sha1 @@ -0,0 +1 @@ +8170e6118eac1ab332046c179718a0f107f688e1 \ No newline at end of file diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationBaseIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationBaseIT.java index 52fe85b51cebd..4692210ccc577 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationBaseIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationBaseIT.java @@ -19,12 +19,15 @@ import org.opensearch.common.Nullable; import org.opensearch.common.settings.Settings; import org.opensearch.common.lease.Releasable; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.index.Index; import org.opensearch.index.IndexModule; import org.opensearch.index.IndexService; import org.opensearch.index.SegmentReplicationPerGroupStats; import org.opensearch.index.SegmentReplicationShardStats; +import org.opensearch.index.engine.Engine; import org.opensearch.index.shard.IndexShard; +import org.opensearch.index.shard.ShardId; import org.opensearch.index.store.Store; import org.opensearch.index.store.StoreFileMetadata; import org.opensearch.indices.IndicesService; @@ -158,6 +161,7 @@ protected void verifyStoreContent() throws Exception { final String indexName = primaryRouting.getIndexName(); final List replicaRouting = shardRoutingTable.replicaShards(); final IndexShard primaryShard = getIndexShard(clusterState, primaryRouting, indexName); + final int primaryDocCount = getDocCountFromShard(primaryShard); final Map primarySegmentMetadata = primaryShard.getSegmentMetadataMap(); for (ShardRouting replica : replicaRouting) { IndexShard replicaShard = getIndexShard(clusterState, replica, indexName); @@ -165,6 +169,8 @@ protected void verifyStoreContent() throws Exception { primarySegmentMetadata, replicaShard.getSegmentMetadataMap() ); + final int replicaDocCount = getDocCountFromShard(replicaShard); + assertEquals("Doc counts should match", primaryDocCount, replicaDocCount); if (recoveryDiff.missing.isEmpty() == false || recoveryDiff.different.isEmpty() == false) { fail( "Expected no missing or different segments between primary and replica but diff was missing: " @@ -185,10 +191,30 @@ protected void verifyStoreContent() throws Exception { }, 1, TimeUnit.MINUTES); } + private int getDocCountFromShard(IndexShard shard) { + try (final Engine.Searcher searcher = shard.acquireSearcher("test")) { + return searcher.getDirectoryReader().numDocs(); + } + } + private IndexShard getIndexShard(ClusterState state, ShardRouting routing, String indexName) { - return getIndexShard(state.nodes().get(routing.currentNodeId()).getName(), indexName); + return getIndexShard(state.nodes().get(routing.currentNodeId()).getName(), routing.shardId(), indexName); } + /** + * Fetch IndexShard by shardId, multiple shards per node allowed. + */ + protected IndexShard getIndexShard(String node, ShardId shardId, String indexName) { + final Index index = resolveIndex(indexName); + IndicesService indicesService = internalCluster().getInstance(IndicesService.class, node); + IndexService indexService = indicesService.indexServiceSafe(index); + final Optional id = indexService.shardIds().stream().filter(sid -> sid == shardId.id()).findFirst(); + return indexService.getShard(id.get()); + } + + /** + * Fetch IndexShard, assumes only a single shard per node. + */ protected IndexShard getIndexShard(String node, String indexName) { final Index index = resolveIndex(indexName); IndicesService indicesService = internalCluster().getInstance(IndicesService.class, node); @@ -197,6 +223,11 @@ protected IndexShard getIndexShard(String node, String indexName) { return indexService.getShard(shardId.get()); } + protected boolean segmentReplicationWithRemoteEnabled() { + return IndexMetadata.INDEX_REMOTE_STORE_ENABLED_SETTING.get(indexSettings()).booleanValue() + && "true".equalsIgnoreCase(featureFlagSettings().get(FeatureFlags.SEGMENT_REPLICATION_EXPERIMENTAL)); + } + protected Releasable blockReplication(List nodes, CountDownLatch latch) { CountDownLatch pauseReplicationLatch = new CountDownLatch(nodes.size()); for (String node : nodes) { @@ -206,7 +237,11 @@ protected Releasable blockReplication(List nodes, CountDownLatch latch) node )); mockTargetTransportService.addSendBehavior((connection, requestId, action, request, options) -> { - if (action.equals(SegmentReplicationSourceService.Actions.UPDATE_VISIBLE_CHECKPOINT)) { + String actionToWaitFor = SegmentReplicationSourceService.Actions.GET_SEGMENT_FILES; + if (segmentReplicationWithRemoteEnabled()) { + actionToWaitFor = SegmentReplicationSourceService.Actions.UPDATE_VISIBLE_CHECKPOINT; + } + if (action.equals(actionToWaitFor)) { try { latch.countDown(); pauseReplicationLatch.await(); diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationIT.java index 873c05843fb56..099c15267c2f7 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationIT.java @@ -49,7 +49,6 @@ import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.common.lease.Releasable; -import org.opensearch.common.util.FeatureFlags; import org.opensearch.index.IndexModule; import org.opensearch.index.SegmentReplicationPerGroupStats; import org.opensearch.index.SegmentReplicationPressureService; @@ -1324,9 +1323,4 @@ public void testPrimaryReceivesDocsDuringReplicaRecovery() throws Exception { ensureGreen(INDEX_NAME); waitForSearchableDocs(2, nodes); } - - private boolean segmentReplicationWithRemoteEnabled() { - return IndexMetadata.INDEX_REMOTE_STORE_ENABLED_SETTING.get(indexSettings()).booleanValue() - && "true".equalsIgnoreCase(featureFlagSettings().get(FeatureFlags.SEGMENT_REPLICATION_EXPERIMENTAL)); - } } diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationSuiteIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationSuiteIT.java new file mode 100644 index 0000000000000..9025c1cc79884 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationSuiteIT.java @@ -0,0 +1,88 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.indices.replication; + +import org.junit.Before; +import org.opensearch.action.admin.indices.delete.DeleteIndexRequest; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.settings.Settings; +import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.test.OpenSearchIntegTestCase; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.SUITE, minNumDataNodes = 2) +public class SegmentReplicationSuiteIT extends SegmentReplicationBaseIT { + + @Before + public void setup() { + internalCluster().startClusterManagerOnlyNode(); + createIndex(INDEX_NAME); + } + + @Override + public Settings indexSettings() { + final Settings.Builder builder = Settings.builder() + .put(super.indexSettings()) + // reset shard & replica count to random values set by OpenSearchIntegTestCase. + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numberOfShards()) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, numberOfReplicas()) + .put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT); + + // TODO: Randomly enable remote store on these tests. + return builder.build(); + } + + public void testBasicReplication() throws Exception { + final int docCount = scaledRandomIntBetween(10, 200); + for (int i = 0; i < docCount; i++) { + client().prepareIndex(INDEX_NAME).setId(Integer.toString(i)).setSource("field", "value" + i).execute().get(); + } + refresh(); + ensureGreen(INDEX_NAME); + verifyStoreContent(); + } + + public void testDropRandomNodeDuringReplication() throws Exception { + internalCluster().ensureAtLeastNumDataNodes(2); + internalCluster().startClusterManagerOnlyNodes(1); + + final int docCount = scaledRandomIntBetween(10, 200); + for (int i = 0; i < docCount; i++) { + client().prepareIndex(INDEX_NAME).setId(Integer.toString(i)).setSource("field", "value" + i).execute().get(); + } + refresh(); + + internalCluster().restartRandomDataNode(); + + ensureYellow(INDEX_NAME); + client().prepareIndex(INDEX_NAME).setId(Integer.toString(docCount)).setSource("field", "value" + docCount).execute().get(); + internalCluster().startDataOnlyNode(); + client().admin().indices().delete(new DeleteIndexRequest(INDEX_NAME)).actionGet(); + } + + public void testDeleteIndexWhileReplicating() throws Exception { + internalCluster().startClusterManagerOnlyNode(); + final int docCount = scaledRandomIntBetween(10, 200); + for (int i = 0; i < docCount; i++) { + client().prepareIndex(INDEX_NAME).setId(Integer.toString(i)).setSource("field", "value" + i).execute().get(); + } + refresh(INDEX_NAME); + client().admin().indices().delete(new DeleteIndexRequest(INDEX_NAME)).actionGet(); + } + + public void testFullRestartDuringReplication() throws Exception { + internalCluster().startNode(); + final int docCount = scaledRandomIntBetween(10, 200); + for (int i = 0; i < docCount; i++) { + client().prepareIndex(INDEX_NAME).setId(Integer.toString(i)).setSource("field", "value" + i).execute().get(); + } + refresh(INDEX_NAME); + internalCluster().fullRestart(); + ensureGreen(INDEX_NAME); + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java index 2b3fcadfc645e..10f01749ab4c5 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java @@ -102,9 +102,7 @@ protected void putRepository(Path path) { protected void setupRepo() { internalCluster().startClusterManagerOnlyNode(); absolutePath = randomRepoPath().toAbsolutePath(); - assertAcked( - clusterAdmin().preparePutRepository(REPOSITORY_NAME).setType("fs").setSettings(Settings.builder().put("location", absolutePath)) - ); + putRepository(absolutePath); } @After diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/RemoteStoreMultipartFileCorruptionIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/RemoteStoreMultipartFileCorruptionIT.java new file mode 100644 index 0000000000000..8f375ca6e2b01 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/RemoteStoreMultipartFileCorruptionIT.java @@ -0,0 +1,111 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.remotestore.multipart; + +import org.junit.After; +import org.junit.Before; +import org.opensearch.action.index.IndexResponse; +import org.opensearch.action.support.IndicesOptions; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.UUIDs; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; +import org.opensearch.index.IndexModule; +import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.plugins.Plugin; +import org.opensearch.remotestore.multipart.mocks.MockFsRepository; +import org.opensearch.remotestore.multipart.mocks.MockFsRepositoryPlugin; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.nio.file.Path; +import java.util.Collection; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; + +public class RemoteStoreMultipartFileCorruptionIT extends OpenSearchIntegTestCase { + + protected static final String REPOSITORY_NAME = "test-remore-store-repo"; + private static final String INDEX_NAME = "remote-store-test-idx-1"; + + @Override + protected Collection> nodePlugins() { + return Stream.concat(super.nodePlugins().stream(), Stream.of(MockFsRepositoryPlugin.class)).collect(Collectors.toList()); + } + + @Override + protected Settings featureFlagSettings() { + return Settings.builder().put(super.featureFlagSettings()).put(FeatureFlags.REMOTE_STORE, "true").build(); + } + + @Before + public void setup() { + internalCluster().startClusterManagerOnlyNode(); + Path absolutePath = randomRepoPath().toAbsolutePath(); + putRepository(absolutePath); + } + + protected void putRepository(Path path) { + assertAcked( + clusterAdmin().preparePutRepository(REPOSITORY_NAME) + .setType(MockFsRepositoryPlugin.TYPE) + .setSettings( + Settings.builder() + .put("location", path) + // custom setting for MockFsRepositoryPlugin + .put(MockFsRepository.TRIGGER_DATA_INTEGRITY_FAILURE.getKey(), true) + ) + ); + } + + @After + public void teardown() { + assertAcked(clusterAdmin().prepareDeleteRepository(REPOSITORY_NAME)); + } + + protected Settings remoteStoreIndexSettings() { + return Settings.builder() + .put(super.indexSettings()) + .put("index.refresh_interval", "300s") + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put(IndexModule.INDEX_QUERY_CACHE_ENABLED_SETTING.getKey(), false) + .put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) + .put(IndexMetadata.SETTING_REMOTE_STORE_ENABLED, true) + .put(IndexMetadata.SETTING_REMOTE_STORE_REPOSITORY, REPOSITORY_NAME) + .build(); + } + + private IndexResponse indexSingleDoc() { + return client().prepareIndex(INDEX_NAME) + .setId(UUIDs.randomBase64UUID()) + .setSource(randomAlphaOfLength(5), randomAlphaOfLength(5)) + .get(); + } + + public void testLocalFileCorruptionDuringUpload() { + internalCluster().startDataOnlyNodes(1); + createIndex(INDEX_NAME, remoteStoreIndexSettings()); + ensureYellowAndNoInitializingShards(INDEX_NAME); + ensureGreen(INDEX_NAME); + + indexSingleDoc(); + + client().admin() + .indices() + .prepareRefresh(INDEX_NAME) + .setIndicesOptions(IndicesOptions.STRICT_EXPAND_OPEN_HIDDEN_FORBID_CLOSED) + .execute() + .actionGet(); + + // ensuring red cluster meaning shard has failed and is unassigned + ensureRed(INDEX_NAME); + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/RemoteStoreMultipartIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/RemoteStoreMultipartIT.java new file mode 100644 index 0000000000000..a523d5c0f5470 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/RemoteStoreMultipartIT.java @@ -0,0 +1,38 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.remotestore.multipart; + +import org.opensearch.common.settings.Settings; +import org.opensearch.plugins.Plugin; +import org.opensearch.remotestore.RemoteStoreIT; +import org.opensearch.remotestore.multipart.mocks.MockFsRepositoryPlugin; + +import java.nio.file.Path; +import java.util.Collection; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; + +public class RemoteStoreMultipartIT extends RemoteStoreIT { + + @Override + protected Collection> nodePlugins() { + return Stream.concat(super.nodePlugins().stream(), Stream.of(MockFsRepositoryPlugin.class)).collect(Collectors.toList()); + } + + @Override + protected void putRepository(Path path) { + assertAcked( + clusterAdmin().preparePutRepository(REPOSITORY_NAME) + .setType(MockFsRepositoryPlugin.TYPE) + .setSettings(Settings.builder().put("location", path)) + ); + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/mocks/MockFsBlobStore.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/mocks/MockFsBlobStore.java new file mode 100644 index 0000000000000..f1d9fbba84528 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/mocks/MockFsBlobStore.java @@ -0,0 +1,36 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.remotestore.multipart.mocks; + +import org.opensearch.OpenSearchException; +import org.opensearch.common.blobstore.BlobContainer; +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.blobstore.fs.FsBlobStore; + +import java.io.IOException; +import java.nio.file.Path; + +public class MockFsBlobStore extends FsBlobStore { + + private final boolean triggerDataIntegrityFailure; + + public MockFsBlobStore(int bufferSizeInBytes, Path path, boolean readonly, boolean triggerDataIntegrityFailure) throws IOException { + super(bufferSizeInBytes, path, readonly); + this.triggerDataIntegrityFailure = triggerDataIntegrityFailure; + } + + @Override + public BlobContainer blobContainer(BlobPath path) { + try { + return new MockFsVerifyingBlobContainer(this, path, buildAndCreate(path), triggerDataIntegrityFailure); + } catch (IOException ex) { + throw new OpenSearchException("failed to create blob container", ex); + } + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/mocks/MockFsRepository.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/mocks/MockFsRepository.java new file mode 100644 index 0000000000000..15a9853477081 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/mocks/MockFsRepository.java @@ -0,0 +1,46 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.remotestore.multipart.mocks; + +import org.opensearch.cluster.metadata.RepositoryMetadata; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.blobstore.BlobStore; +import org.opensearch.common.blobstore.fs.FsBlobStore; +import org.opensearch.common.settings.Setting; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.env.Environment; +import org.opensearch.indices.recovery.RecoverySettings; +import org.opensearch.repositories.fs.FsRepository; + +public class MockFsRepository extends FsRepository { + + public static Setting TRIGGER_DATA_INTEGRITY_FAILURE = Setting.boolSetting( + "mock_fs_repository.trigger_data_integrity_failure", + false + ); + + private final boolean triggerDataIntegrityFailure; + + public MockFsRepository( + RepositoryMetadata metadata, + Environment environment, + NamedXContentRegistry namedXContentRegistry, + ClusterService clusterService, + RecoverySettings recoverySettings + ) { + super(metadata, environment, namedXContentRegistry, clusterService, recoverySettings); + triggerDataIntegrityFailure = TRIGGER_DATA_INTEGRITY_FAILURE.get(metadata.settings()); + } + + @Override + protected BlobStore createBlobStore() throws Exception { + FsBlobStore fsBlobStore = (FsBlobStore) super.createBlobStore(); + return new MockFsBlobStore(fsBlobStore.bufferSizeInBytes(), fsBlobStore.path(), isReadOnly(), triggerDataIntegrityFailure); + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/mocks/MockFsRepositoryPlugin.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/mocks/MockFsRepositoryPlugin.java new file mode 100644 index 0000000000000..ffd53adf4e29e --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/mocks/MockFsRepositoryPlugin.java @@ -0,0 +1,38 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.remotestore.multipart.mocks; + +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.env.Environment; +import org.opensearch.indices.recovery.RecoverySettings; +import org.opensearch.plugins.Plugin; +import org.opensearch.plugins.RepositoryPlugin; +import org.opensearch.repositories.Repository; + +import java.util.Collections; +import java.util.Map; + +public class MockFsRepositoryPlugin extends Plugin implements RepositoryPlugin { + + public static final String TYPE = "fs_multipart_repository"; + + @Override + public Map getRepositories( + Environment env, + NamedXContentRegistry namedXContentRegistry, + ClusterService clusterService, + RecoverySettings recoverySettings + ) { + return Collections.singletonMap( + "fs_multipart_repository", + metadata -> new MockFsRepository(metadata, env, namedXContentRegistry, clusterService, recoverySettings) + ); + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/mocks/MockFsVerifyingBlobContainer.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/mocks/MockFsVerifyingBlobContainer.java new file mode 100644 index 0000000000000..8f2814eb7c4c4 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/mocks/MockFsVerifyingBlobContainer.java @@ -0,0 +1,120 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.remotestore.multipart.mocks; + +import org.apache.lucene.index.CorruptIndexException; +import org.opensearch.action.ActionListener; +import org.opensearch.common.blobstore.VerifyingMultiStreamBlobContainer; +import org.opensearch.common.io.InputStreamContainer; +import org.opensearch.common.StreamContext; +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.blobstore.fs.FsBlobContainer; +import org.opensearch.common.blobstore.fs.FsBlobStore; +import org.opensearch.common.blobstore.stream.write.WriteContext; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; + +public class MockFsVerifyingBlobContainer extends FsBlobContainer implements VerifyingMultiStreamBlobContainer { + + private static final int TRANSFER_TIMEOUT_MILLIS = 30000; + + private final boolean triggerDataIntegrityFailure; + + public MockFsVerifyingBlobContainer(FsBlobStore blobStore, BlobPath blobPath, Path path, boolean triggerDataIntegrityFailure) { + super(blobStore, blobPath, path); + this.triggerDataIntegrityFailure = triggerDataIntegrityFailure; + } + + @Override + public void asyncBlobUpload(WriteContext writeContext, ActionListener completionListener) throws IOException { + + int nParts = 10; + long partSize = writeContext.getFileSize() / nParts; + StreamContext streamContext = writeContext.getStreamProvider(partSize); + final Path file = path.resolve(writeContext.getFileName()); + byte[] buffer = new byte[(int) writeContext.getFileSize()]; + AtomicLong totalContentRead = new AtomicLong(); + CountDownLatch latch = new CountDownLatch(streamContext.getNumberOfParts()); + for (int partIdx = 0; partIdx < streamContext.getNumberOfParts(); partIdx++) { + int finalPartIdx = partIdx; + Thread thread = new Thread(() -> { + try { + InputStreamContainer inputStreamContainer = streamContext.provideStream(finalPartIdx); + InputStream inputStream = inputStreamContainer.getInputStream(); + long remainingContentLength = inputStreamContainer.getContentLength(); + long offset = partSize * finalPartIdx; + while (remainingContentLength > 0) { + int readContentLength = inputStream.read(buffer, (int) offset, (int) remainingContentLength); + totalContentRead.addAndGet(readContentLength); + remainingContentLength -= readContentLength; + offset += readContentLength; + } + inputStream.close(); + } catch (IOException e) { + completionListener.onFailure(e); + } finally { + latch.countDown(); + } + }); + thread.start(); + } + try { + if (!latch.await(TRANSFER_TIMEOUT_MILLIS, TimeUnit.MILLISECONDS)) { + throw new IOException("Timed out waiting for file transfer to complete for " + writeContext.getFileName()); + } + } catch (InterruptedException e) { + throw new IOException("Await interrupted on CountDownLatch, transfer failed for " + writeContext.getFileName()); + } + try (OutputStream outputStream = Files.newOutputStream(file, StandardOpenOption.CREATE_NEW)) { + outputStream.write(buffer); + } + if (writeContext.getFileSize() != totalContentRead.get()) { + throw new IOException( + "Incorrect content length read for file " + + writeContext.getFileName() + + ", actual file size: " + + writeContext.getFileSize() + + ", bytes read: " + + totalContentRead.get() + ); + } + + try { + // bulks need to succeed for segment files to be generated + if (isSegmentFile(writeContext.getFileName()) && triggerDataIntegrityFailure) { + completionListener.onFailure( + new RuntimeException( + new CorruptIndexException( + "Data integrity check failure for file: " + writeContext.getFileName(), + writeContext.getFileName() + ) + ) + ); + } else { + writeContext.getUploadFinalizer().accept(true); + completionListener.onResponse(null); + } + } catch (Exception e) { + completionListener.onFailure(e); + } + + } + + private boolean isSegmentFile(String filename) { + return !filename.endsWith(".tlog") && !filename.endsWith(".ckp"); + } +} diff --git a/server/src/main/java/org/opensearch/common/blobstore/VerifyingMultiStreamBlobContainer.java b/server/src/main/java/org/opensearch/common/blobstore/VerifyingMultiStreamBlobContainer.java new file mode 100644 index 0000000000000..0dfcc5c50e4b1 --- /dev/null +++ b/server/src/main/java/org/opensearch/common/blobstore/VerifyingMultiStreamBlobContainer.java @@ -0,0 +1,34 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.common.blobstore; + +import org.opensearch.action.ActionListener; +import org.opensearch.common.blobstore.stream.write.WriteContext; + +import java.io.IOException; + +/** + * An extension of {@link BlobContainer} that adds {@link VerifyingMultiStreamBlobContainer#asyncBlobUpload} to allow + * multipart uploads and performs integrity checks on transferred files + * + * @opensearch.internal + */ +public interface VerifyingMultiStreamBlobContainer extends BlobContainer { + + /** + * Reads blob content from multiple streams, each from a specific part of the file, which is provided by the + * StreamContextSupplier in the WriteContext passed to this method. An {@link IOException} is thrown if reading + * any of the input streams fails, or writing to the target blob fails + * + * @param writeContext A WriteContext object encapsulating all information needed to perform the upload + * @param completionListener Listener on which upload events should be published. + * @throws IOException if any of the input streams could not be read, or the target blob could not be written to + */ + void asyncBlobUpload(WriteContext writeContext, ActionListener completionListener) throws IOException; +} diff --git a/server/src/main/java/org/opensearch/common/blobstore/transfer/RemoteTransferContainer.java b/server/src/main/java/org/opensearch/common/blobstore/transfer/RemoteTransferContainer.java index 7864c3ab5c794..ca744efae902d 100644 --- a/server/src/main/java/org/opensearch/common/blobstore/transfer/RemoteTransferContainer.java +++ b/server/src/main/java/org/opensearch/common/blobstore/transfer/RemoteTransferContainer.java @@ -160,7 +160,7 @@ private LocalStreamSupplier getMultipartStreamSupplier( : offsetRangeInputStream; Objects.requireNonNull(inputStreams.get())[streamIdx] = inputStream; - return new InputStreamContainer(inputStream, size); + return new InputStreamContainer(inputStream, size, position); } catch (IOException e) { log.error("Failed to create input stream", e); throw e; diff --git a/server/src/main/java/org/opensearch/common/io/VersionedCodecStreamWrapper.java b/server/src/main/java/org/opensearch/common/io/VersionedCodecStreamWrapper.java index ff0af3954a3a3..9907e2225c64e 100644 --- a/server/src/main/java/org/opensearch/common/io/VersionedCodecStreamWrapper.java +++ b/server/src/main/java/org/opensearch/common/io/VersionedCodecStreamWrapper.java @@ -10,7 +10,11 @@ import java.io.IOException; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.message.ParameterizedMessage; import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; @@ -22,6 +26,8 @@ * @opensearch.internal */ public class VersionedCodecStreamWrapper { + private static final Logger logger = LogManager.getLogger(VersionedCodecStreamWrapper.class); + // TODO This can be updated to hold a streamReadWriteHandlerFactory and get relevant handler based on the stream versions private final IndexIOStreamHandler indexIOStreamHandler; private final int currentVersion; @@ -46,10 +52,21 @@ public VersionedCodecStreamWrapper(IndexIOStreamHandler indexIOStreamHandler, * @return stream content parsed into {@link T} */ public T readStream(IndexInput indexInput) throws IOException { - CodecUtil.checksumEntireFile(indexInput); - int readStreamVersion = checkHeader(indexInput); - T content = getHandlerForVersion(readStreamVersion).readContent(indexInput); - return content; + logger.debug("Reading input stream [{}] of length - [{}]", indexInput.toString(), indexInput.length()); + try { + CodecUtil.checksumEntireFile(indexInput); + int readStreamVersion = checkHeader(indexInput); + return getHandlerForVersion(readStreamVersion).readContent(indexInput); + } catch (CorruptIndexException cie) { + logger.error( + () -> new ParameterizedMessage( + "Error while validating header/footer for [{}]. Total data length [{}]", + indexInput.toString(), + indexInput.length() + ) + ); + throw cie; + } } /** diff --git a/server/src/main/java/org/opensearch/common/logging/LogConfigurator.java b/server/src/main/java/org/opensearch/common/logging/LogConfigurator.java index 4438bf53fd62c..b1a338e65746f 100644 --- a/server/src/main/java/org/opensearch/common/logging/LogConfigurator.java +++ b/server/src/main/java/org/opensearch/common/logging/LogConfigurator.java @@ -41,7 +41,6 @@ import org.apache.logging.log4j.core.config.builder.api.ConfigurationBuilderFactory; import org.apache.logging.log4j.core.config.builder.impl.BuiltConfiguration; import org.apache.logging.log4j.core.config.composite.CompositeConfiguration; -import org.apache.logging.log4j.core.config.plugins.util.PluginManager; import org.apache.logging.log4j.core.config.properties.PropertiesConfiguration; import org.apache.logging.log4j.core.config.properties.PropertiesConfigurationFactory; import org.apache.logging.log4j.status.StatusConsoleListener; @@ -140,13 +139,6 @@ public static void configure(final Environment environment) throws IOException, configure(environment.settings(), environment.configDir(), environment.logsDir()); } - /** - * Load logging plugins so we can have {@code node_name} in the pattern. - */ - public static void loadLog4jPlugins() { - PluginManager.addPackage(LogConfigurator.class.getPackage().getName()); - } - /** * Sets the node name. This is called before logging is configured if the * node name is set in opensearch.yml. Otherwise it is called as soon @@ -172,8 +164,6 @@ private static void configure(final Settings settings, final Path configsPath, f Objects.requireNonNull(configsPath); Objects.requireNonNull(logsPath); - loadLog4jPlugins(); - setLogConfigurationSystemProperty(logsPath, settings); // we initialize the status logger immediately otherwise Log4j will complain when we try to get the context configureStatusLogger(); diff --git a/server/src/main/java/org/opensearch/common/logging/OpenSearchJsonLayout.java b/server/src/main/java/org/opensearch/common/logging/OpenSearchJsonLayout.java index 5896aec0ce71c..60df94036cc6b 100644 --- a/server/src/main/java/org/opensearch/common/logging/OpenSearchJsonLayout.java +++ b/server/src/main/java/org/opensearch/common/logging/OpenSearchJsonLayout.java @@ -34,10 +34,12 @@ import org.apache.logging.log4j.core.Layout; import org.apache.logging.log4j.core.LogEvent; +import org.apache.logging.log4j.core.config.Configuration; import org.apache.logging.log4j.core.config.Node; import org.apache.logging.log4j.core.config.plugins.Plugin; import org.apache.logging.log4j.core.config.plugins.PluginAttribute; import org.apache.logging.log4j.core.config.plugins.PluginBuilderFactory; +import org.apache.logging.log4j.core.config.plugins.PluginConfiguration; import org.apache.logging.log4j.core.config.plugins.PluginFactory; import org.apache.logging.log4j.core.layout.AbstractStringLayout; import org.apache.logging.log4j.core.layout.ByteBufferDestination; @@ -94,11 +96,18 @@ public class OpenSearchJsonLayout extends AbstractStringLayout { private final PatternLayout patternLayout; - protected OpenSearchJsonLayout(String typeName, Charset charset, String[] opensearchMessageFields, int maxMessageLength) { + protected OpenSearchJsonLayout( + String typeName, + Charset charset, + String[] opensearchMessageFields, + int maxMessageLength, + Configuration configuration + ) { super(charset); this.patternLayout = PatternLayout.newBuilder() .withPattern(pattern(typeName, opensearchMessageFields, maxMessageLength)) .withAlwaysWriteExceptions(false) + .withConfiguration(configuration) .build(); } @@ -173,8 +182,14 @@ private String inQuotes(String s) { } @PluginFactory - public static OpenSearchJsonLayout createLayout(String type, Charset charset, String[] opensearchmessagefields, int maxMessageLength) { - return new OpenSearchJsonLayout(type, charset, opensearchmessagefields, maxMessageLength); + public static OpenSearchJsonLayout createLayout( + String type, + Charset charset, + String[] opensearchmessagefields, + int maxMessageLength, + Configuration configuration + ) { + return new OpenSearchJsonLayout(type, charset, opensearchmessagefields, maxMessageLength, configuration); } PatternLayout getPatternLayout() { @@ -202,6 +217,9 @@ public static class Builder> extends A @PluginAttribute(value = "maxmessagelength", defaultInt = 10000) private int maxMessageLength; + @PluginConfiguration + private Configuration configuration; + public Builder() { setCharset(StandardCharsets.UTF_8); setMaxMessageLength(10000); @@ -210,7 +228,7 @@ public Builder() { @Override public OpenSearchJsonLayout build() { String[] split = Strings.isNullOrEmpty(opensearchMessageFields) ? new String[] {} : opensearchMessageFields.split(","); - return OpenSearchJsonLayout.createLayout(type, charset, split, maxMessageLength); + return OpenSearchJsonLayout.createLayout(type, charset, split, maxMessageLength, configuration); } public Charset getCharset() { @@ -248,6 +266,15 @@ public B setMaxMessageLength(final int maxMessageLength) { this.maxMessageLength = maxMessageLength; return asBuilder(); } + + public Configuration getConfiguration() { + return configuration; + } + + public B setConfiguration(final Configuration configuration) { + this.configuration = configuration; + return asBuilder(); + } } @PluginBuilderFactory diff --git a/server/src/main/java/org/opensearch/common/util/ByteUtils.java b/server/src/main/java/org/opensearch/common/util/ByteUtils.java index 36ae3b1f5bcaa..8c7665d991751 100644 --- a/server/src/main/java/org/opensearch/common/util/ByteUtils.java +++ b/server/src/main/java/org/opensearch/common/util/ByteUtils.java @@ -61,6 +61,16 @@ public static void writeLongLE(long l, byte[] arr, int offset) { assert l == 0; } + /** Convert long to a byte array in big-endian format */ + public static byte[] toByteArrayBE(long l) { + byte[] result = new byte[8]; + for (int i = 7; i >= 0; i--) { + result[i] = (byte) (l & 0xffL); + l >>= 8; + } + return result; + } + /** Write a long in little-endian format. */ public static long readLongLE(byte[] arr, int offset) { long l = arr[offset++] & 0xFFL; diff --git a/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java b/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java index 46d52bc8ca5df..e087bbb265727 100644 --- a/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java +++ b/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java @@ -11,6 +11,7 @@ import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfos; import org.apache.lucene.search.ReferenceManager; @@ -18,11 +19,14 @@ import org.apache.lucene.store.FilterDirectory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; +import org.opensearch.action.ActionListener; +import org.opensearch.action.LatchedActionListener; import org.opensearch.action.bulk.BackoffPolicy; -import org.opensearch.common.CheckedFunction; +import org.opensearch.action.support.GroupedActionListener; import org.opensearch.common.concurrent.GatedCloseable; import org.opensearch.common.logging.Loggers; import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.UploadListener; import org.opensearch.common.util.concurrent.ConcurrentCollections; import org.opensearch.index.engine.EngineException; import org.opensearch.index.engine.InternalEngine; @@ -46,6 +50,7 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; @@ -110,7 +115,7 @@ public final class RemoteStoreRefreshListener implements ReferenceManager.Refres private final SegmentReplicationCheckpointPublisher checkpointPublisher; - private final FileUploader fileUploader; + private final UploadListener statsListener; public RemoteStoreRefreshListener( IndexShard indexShard, @@ -137,7 +142,7 @@ public RemoteStoreRefreshListener( this.segmentTracker = segmentTracker; resetBackOffDelayIterator(); this.checkpointPublisher = checkpointPublisher; - this.fileUploader = new FileUploader(new UploadTracker() { + this.statsListener = new UploadListener() { @Override public void beforeUpload(String file) { // Start tracking the upload bytes started @@ -156,7 +161,7 @@ public void onFailure(String file) { // Track upload failure segmentTracker.addUploadBytesFailed(latestFileNameSizeOnLocalMap.get(file)); } - }, remoteDirectory, storeDirectory, this::getChecksumOfLocalFile, logger); + }; } @Override @@ -190,7 +195,7 @@ private synchronized void syncSegments(boolean isRetry) { long refreshTimeMs = segmentTracker.getLocalRefreshTimeMs(), refreshClockTimeMs = segmentTracker.getLocalRefreshClockTimeMs(); long refreshSeqNo = segmentTracker.getLocalRefreshSeqNo(); long bytesBeforeUpload = segmentTracker.getUploadBytesSucceeded(), startTimeInNS = System.nanoTime(); - boolean shouldRetry = true; + try { if (this.primaryTerm != indexShard.getOperationPrimaryTerm()) { @@ -242,18 +247,51 @@ private synchronized void syncSegments(boolean isRetry) { // Create a map of file name to size and update the refresh segment tracker updateLocalSizeMapAndTracker(localSegmentsPostRefresh); + CountDownLatch latch = new CountDownLatch(1); + ActionListener segmentUploadsCompletedListener = new LatchedActionListener<>(new ActionListener<>() { + @Override + public void onResponse(Void unused) { + boolean shouldRetry = true; + try { + // Start metadata file upload + uploadMetadata(localSegmentsPostRefresh, segmentInfos); + clearStaleFilesFromLocalSegmentChecksumMap(localSegmentsPostRefresh); + onSuccessfulSegmentsSync( + refreshTimeMs, + refreshClockTimeMs, + refreshSeqNo, + lastRefreshedCheckpoint, + checkpoint + ); + // At this point since we have uploaded new segments, segment infos and segment metadata file, + // along with marking minSeqNoToKeep, upload has succeeded completely. + shouldRetry = false; + } catch (Exception e) { + // We don't want to fail refresh if upload of new segments fails. The missed segments will be re-tried + // in the next refresh. This should not affect durability of the indexed data after remote trans-log + // integration. + logger.warn("Exception in post new segment upload actions", e); + } finally { + doComplete(shouldRetry); + } + } + + @Override + public void onFailure(Exception e) { + logger.warn("Exception while uploading new segments to the remote segment store", e); + doComplete(true); + } + + private void doComplete(boolean shouldRetry) { + // Update the segment tracker with the final upload status as seen at the end + updateFinalUploadStatusInSegmentTracker(shouldRetry == false, bytesBeforeUpload, startTimeInNS); + afterSegmentsSync(isRetry, shouldRetry); + } + }, latch); // Start the segments files upload - boolean newSegmentsUploadStatus = uploadNewSegments(localSegmentsPostRefresh); - if (newSegmentsUploadStatus) { - // Start metadata file upload - uploadMetadata(localSegmentsPostRefresh, segmentInfos); - clearStaleFilesFromLocalSegmentChecksumMap(localSegmentsPostRefresh); - onSuccessfulSegmentsSync(refreshTimeMs, refreshClockTimeMs, refreshSeqNo, lastRefreshedCheckpoint, checkpoint); - // At this point since we have uploaded new segments, segment infos and segment metadata file, - // along with marking minSeqNoToKeep, upload has succeeded completely. - shouldRetry = false; - } + uploadNewSegments(localSegmentsPostRefresh, segmentUploadsCompletedListener); + latch.await(); } } catch (EngineException e) { logger.warn("Exception while reading SegmentInfosSnapshot", e); @@ -265,11 +303,7 @@ private synchronized void syncSegments(boolean isRetry) { } } catch (Throwable t) { logger.error("Exception in RemoteStoreRefreshListener.afterRefresh()", t); - } finally { - // Update the segment tracker with the final upload status as seen at the end - updateFinalUploadStatusInSegmentTracker(shouldRetry == false, bytesBeforeUpload, startTimeInNS); } - afterSegmentsSync(isRetry, shouldRetry); } /** @@ -378,17 +412,50 @@ void uploadMetadata(Collection localSegmentsPostRefresh, SegmentInfos se } } - private boolean uploadNewSegments(Collection localSegmentsPostRefresh) throws IOException { - AtomicBoolean uploadSuccess = new AtomicBoolean(true); - localSegmentsPostRefresh.forEach(file -> { - try { - fileUploader.uploadFile(file); - } catch (IOException e) { - uploadSuccess.set(false); - logger.warn(() -> new ParameterizedMessage("Exception while uploading file {} to the remote segment store", file), e); - } - }); - return uploadSuccess.get(); + private void uploadNewSegments(Collection localSegmentsPostRefresh, ActionListener listener) { + Collection filteredFiles = localSegmentsPostRefresh.stream().filter(file -> !skipUpload(file)).collect(Collectors.toList()); + if (filteredFiles.size() == 0) { + listener.onResponse(null); + return; + } + + ActionListener> mappedListener = ActionListener.map(listener, resp -> null); + GroupedActionListener batchUploadListener = new GroupedActionListener<>(mappedListener, filteredFiles.size()); + + for (String src : filteredFiles) { + ActionListener aggregatedListener = ActionListener.wrap(resp -> { + statsListener.onSuccess(src); + batchUploadListener.onResponse(resp); + }, ex -> { + logger.warn(() -> new ParameterizedMessage("Exception: [{}] while uploading segment files", ex), ex); + if (ex instanceof CorruptIndexException) { + indexShard.failShard(ex.getMessage(), ex); + } + statsListener.onFailure(src); + batchUploadListener.onFailure(ex); + }); + statsListener.beforeUpload(src); + remoteDirectory.copyFrom(storeDirectory, src, IOContext.DEFAULT, aggregatedListener); + } + } + + /** + * Whether to upload a file or not depending on whether file is in excluded list or has been already uploaded. + * + * @param file that needs to be uploaded. + * @return true if the upload has to be skipped for the file. + */ + private boolean skipUpload(String file) { + try { + // Exclude files that are already uploaded and the exclude files to come up with the list of files to be uploaded. + return EXCLUDE_FILES.contains(file) || remoteDirectory.containsFile(file, getChecksumOfLocalFile(file)); + } catch (IOException e) { + logger.error( + "Exception while reading checksum of local segment file: {}, ignoring the exception and re-uploading the file", + file + ); + } + return false; } private String getChecksumOfLocalFile(String file) throws IOException { @@ -462,104 +529,4 @@ private void updateFinalUploadStatusInSegmentTracker(boolean uploadStatus, long segmentTracker.incrementTotalUploadsFailed(); } } - - /** - * This class is a wrapper over the copying of file from local to remote store allowing to decorate the actual copy - * method along with adding hooks of code that can be run before, on success and on failure. - * - * @opensearch.internal - */ - private static class FileUploader { - - private final Logger logger; - - private final UploadTracker uploadTracker; - - private final RemoteSegmentStoreDirectory remoteDirectory; - - private final Directory storeDirectory; - - private final CheckedFunction checksumProvider; - - public FileUploader( - UploadTracker uploadTracker, - RemoteSegmentStoreDirectory remoteDirectory, - Directory storeDirectory, - CheckedFunction checksumProvider, - Logger logger - ) { - this.uploadTracker = uploadTracker; - this.remoteDirectory = remoteDirectory; - this.storeDirectory = storeDirectory; - this.checksumProvider = checksumProvider; - this.logger = logger; - } - - /** - * Calling this method will lead to before getting executed and then the actual upload. Based on the upload status, - * the onSuccess or onFailure method gets invoked. - * - * @param file the file which is to be uploaded. - * @throws IOException is thrown if the upload fails. - */ - private void uploadFile(String file) throws IOException { - if (skipUpload(file)) { - return; - } - uploadTracker.beforeUpload(file); - boolean success = false; - try { - performUpload(file); - uploadTracker.onSuccess(file); - success = true; - } finally { - if (!success) { - uploadTracker.onFailure(file); - } - } - } - - /** - * Whether to upload a file or not depending on whether file is in excluded list or has been already uploaded. - * - * @param file that needs to be uploaded. - * @return true if the upload has to be skipped for the file. - */ - private boolean skipUpload(String file) { - try { - // Exclude files that are already uploaded and the exclude files to come up with the list of files to be uploaded. - return EXCLUDE_FILES.contains(file) || remoteDirectory.containsFile(file, checksumProvider.apply(file)); - } catch (IOException e) { - logger.error( - "Exception while reading checksum of local segment file: {}, ignoring the exception and re-uploading the file", - file - ); - } - return false; - } - - /** - * This method does the actual upload. - * - * @param file that needs to be uploaded. - * @throws IOException is thrown if the upload fails. - */ - private void performUpload(String file) throws IOException { - remoteDirectory.copyFrom(storeDirectory, file, file, IOContext.DEFAULT); - } - } - - /** - * A tracker class that is fed to FileUploader. - * - * @opensearch.internal - */ - interface UploadTracker { - - void beforeUpload(String file); - - void onSuccess(String file); - - void onFailure(String file); - } } diff --git a/server/src/main/java/org/opensearch/index/store/RemoteDirectory.java b/server/src/main/java/org/opensearch/index/store/RemoteDirectory.java index 8782808c070ab..f7fe7ca62e6ba 100644 --- a/server/src/main/java/org/opensearch/index/store/RemoteDirectory.java +++ b/server/src/main/java/org/opensearch/index/store/RemoteDirectory.java @@ -45,6 +45,10 @@ public class RemoteDirectory extends Directory { protected final BlobContainer blobContainer; + public BlobContainer getBlobContainer() { + return blobContainer; + } + public RemoteDirectory(BlobContainer blobContainer) { this.blobContainer = blobContainer; } diff --git a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java index e7602203440d2..395ecba442e86 100644 --- a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java +++ b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java @@ -8,9 +8,12 @@ package org.opensearch.index.store; +import com.jcraft.jzlib.JZlib; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.message.ParameterizedMessage; import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.SegmentInfos; import org.apache.lucene.store.ByteBuffersDataOutput; import org.apache.lucene.store.ByteBuffersIndexOutput; @@ -19,10 +22,20 @@ import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; +import org.opensearch.ExceptionsHelper; +import org.opensearch.action.ActionListener; import org.opensearch.common.UUIDs; +import org.opensearch.common.blobstore.VerifyingMultiStreamBlobContainer; +import org.opensearch.common.blobstore.exception.CorruptFileException; +import org.opensearch.common.blobstore.stream.write.WriteContext; +import org.opensearch.common.blobstore.stream.write.WritePriority; +import org.opensearch.common.blobstore.transfer.RemoteTransferContainer; +import org.opensearch.common.blobstore.transfer.stream.OffsetRangeIndexInputStream; import org.opensearch.common.io.VersionedCodecStreamWrapper; import org.opensearch.common.lucene.store.ByteArrayIndexInput; +import org.opensearch.common.util.ByteUtils; import org.opensearch.index.remote.RemoteStoreUtils; +import org.opensearch.index.store.exception.ChecksumCombinationException; import org.opensearch.index.store.lockmanager.FileLockInfo; import org.opensearch.index.store.lockmanager.RemoteStoreCommitLevelLockManager; import org.opensearch.index.store.lockmanager.RemoteStoreLockManager; @@ -44,6 +57,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; +import java.util.zip.CRC32; /** * A RemoteDirectory extension for remote segment store. We need to make sure we don't overwrite a segment file once uploaded. @@ -62,6 +76,11 @@ public final class RemoteSegmentStoreDirectory extends FilterDirectory implement */ public static final String SEGMENT_NAME_UUID_SEPARATOR = "__"; + /** + * Number of bytes in the segment file to store checksum + */ + private static final int SEGMENT_CHECKSUM_BYTES = 8; + /** * remoteDataDirectory is used to store segment files at path: cluster_UUID/index_UUID/shardId/segments/data */ @@ -349,6 +368,89 @@ public IndexInput openInput(String name, IOContext context) throws IOException { } } + /** + * Copies a file from the source directory to a remote based on multi-stream upload support. + * If vendor plugin supports uploading multiple parts in parallel, BlobContainer#writeBlobByStreams + * will be used, else, the legacy {@link RemoteSegmentStoreDirectory#copyFrom(Directory, String, String, IOContext)} + * will be called. + * + * @param from The directory for the file to be uploaded + * @param src File to be uploaded + * @param context IOContext to be used to open IndexInput of file during remote upload + * @param listener Listener to handle upload callback events + */ + public void copyFrom(Directory from, String src, IOContext context, ActionListener listener) { + if (remoteDataDirectory.getBlobContainer() instanceof VerifyingMultiStreamBlobContainer) { + try { + String remoteFilename = getNewRemoteSegmentFilename(src); + uploadBlob(from, src, remoteFilename, context, listener); + } catch (Exception e) { + listener.onFailure(e); + } + } else { + try { + copyFrom(from, src, src, context); + listener.onResponse(null); + } catch (Exception e) { + logger.warn(() -> new ParameterizedMessage("Exception while uploading file {} to the remote segment store", src), e); + listener.onFailure(e); + } + } + } + + private void uploadBlob(Directory from, String src, String remoteFileName, IOContext ioContext, ActionListener listener) + throws Exception { + long expectedChecksum = calculateChecksumOfChecksum(from, src); + long contentLength; + try (IndexInput indexInput = from.openInput(src, ioContext)) { + contentLength = indexInput.length(); + } + RemoteTransferContainer remoteTransferContainer = new RemoteTransferContainer( + src, + remoteFileName, + contentLength, + true, + WritePriority.NORMAL, + (size, position) -> new OffsetRangeIndexInputStream(from.openInput(src, ioContext), size, position), + expectedChecksum, + remoteDataDirectory.getBlobContainer() instanceof VerifyingMultiStreamBlobContainer + ); + ActionListener completionListener = ActionListener.wrap(resp -> { + try { + postUpload(from, src, remoteFileName, getChecksumOfLocalFile(from, src)); + listener.onResponse(null); + } catch (Exception e) { + logger.error(() -> new ParameterizedMessage("Exception in segment postUpload for file [{}]", src), e); + listener.onFailure(e); + } + }, ex -> { + logger.error(() -> new ParameterizedMessage("Failed to upload blob {}", src), ex); + IOException corruptIndexException = ExceptionsHelper.unwrapCorruption(ex); + if (corruptIndexException != null) { + listener.onFailure(corruptIndexException); + return; + } + Throwable throwable = ExceptionsHelper.unwrap(ex, CorruptFileException.class); + if (throwable != null) { + CorruptFileException corruptFileException = (CorruptFileException) throwable; + listener.onFailure(new CorruptIndexException(corruptFileException.getMessage(), corruptFileException.getFileName())); + return; + } + listener.onFailure(ex); + }); + + completionListener = ActionListener.runBefore(completionListener, () -> { + try { + remoteTransferContainer.close(); + } catch (Exception e) { + logger.warn("Error occurred while closing streams", e); + } + }); + + WriteContext writeContext = remoteTransferContainer.createWriteContext(); + ((VerifyingMultiStreamBlobContainer) remoteDataDirectory.getBlobContainer()).asyncBlobUpload(writeContext, completionListener); + } + /** * This acquires a lock on a given commit by creating a lock file in lock directory using {@code FileLockInfo} * @param primaryTerm Primary Term of index at the time of commit. @@ -425,6 +527,10 @@ public void copyFrom(Directory from, String src, String dest, IOContext context, String remoteFilename; remoteFilename = getNewRemoteSegmentFilename(dest); remoteDataDirectory.copyFrom(from, src, remoteFilename, context); + postUpload(from, src, remoteFilename, checksum); + } + + private void postUpload(Directory from, String src, String remoteFilename, String checksum) throws IOException { UploadedSegmentMetadata segmentMetadata = new UploadedSegmentMetadata(src, remoteFilename, checksum, from.fileLength(src)); segmentsUploadedToRemoteStore.put(src, segmentMetadata); } @@ -528,6 +634,27 @@ private String getChecksumOfLocalFile(Directory directory, String file) throws I } } + private long calculateChecksumOfChecksum(Directory directory, String file) throws IOException { + try (IndexInput indexInput = directory.openInput(file, IOContext.DEFAULT)) { + long storedChecksum = CodecUtil.retrieveChecksum(indexInput); + CRC32 checksumOfChecksum = new CRC32(); + checksumOfChecksum.update(ByteUtils.toByteArrayBE(storedChecksum)); + try { + return JZlib.crc32_combine(storedChecksum, checksumOfChecksum.getValue(), SEGMENT_CHECKSUM_BYTES); + } catch (Exception e) { + throw new ChecksumCombinationException( + "Potentially corrupted file: Checksum combination failed while combining stored checksum " + + "and calculated checksum of stored checksum in segment file: " + + file + + ", directory: " + + directory, + file, + e + ); + } + } + } + private String getExistingRemoteFilename(String localFilename) { if (segmentsUploadedToRemoteStore.containsKey(localFilename)) { return segmentsUploadedToRemoteStore.get(localFilename).uploadedFilename; diff --git a/server/src/main/java/org/opensearch/index/store/exception/ChecksumCombinationException.java b/server/src/main/java/org/opensearch/index/store/exception/ChecksumCombinationException.java index a355473aa2afd..d8e1739fbaa9d 100644 --- a/server/src/main/java/org/opensearch/index/store/exception/ChecksumCombinationException.java +++ b/server/src/main/java/org/opensearch/index/store/exception/ChecksumCombinationException.java @@ -11,7 +11,7 @@ import org.apache.lucene.index.CorruptIndexException; /** - * Exception is raised when combination to two crc checksums fail. + * Exception is raised when combination of two CRC checksums fail. * * @opensearch.internal */ diff --git a/server/src/main/java/org/opensearch/index/translog/Checkpoint.java b/server/src/main/java/org/opensearch/index/translog/Checkpoint.java index 56de7e5daf55f..a9f905f52bc3a 100644 --- a/server/src/main/java/org/opensearch/index/translog/Checkpoint.java +++ b/server/src/main/java/org/opensearch/index/translog/Checkpoint.java @@ -233,7 +233,7 @@ public static void write(FileChannel fileChannel, Path checkpointFile, Checkpoin } } - private static byte[] createCheckpointBytes(Path checkpointFile, Checkpoint checkpoint) throws IOException { + public static byte[] createCheckpointBytes(Path checkpointFile, Checkpoint checkpoint) throws IOException { final ByteArrayOutputStream byteOutputStream = new ByteArrayOutputStream(V4_FILE_SIZE) { @Override public synchronized byte[] toByteArray() { diff --git a/server/src/main/java/org/opensearch/index/translog/checked/TranslogCheckedContainer.java b/server/src/main/java/org/opensearch/index/translog/TranslogCheckedContainer.java similarity index 74% rename from server/src/main/java/org/opensearch/index/translog/checked/TranslogCheckedContainer.java rename to server/src/main/java/org/opensearch/index/translog/TranslogCheckedContainer.java index b90794e29d2b1..7e2a38559166f 100644 --- a/server/src/main/java/org/opensearch/index/translog/checked/TranslogCheckedContainer.java +++ b/server/src/main/java/org/opensearch/index/translog/TranslogCheckedContainer.java @@ -6,13 +6,10 @@ * compatible open source license. */ -package org.opensearch.index.translog.checked; +package org.opensearch.index.translog; -import org.opensearch.common.io.Channels; import org.opensearch.common.util.concurrent.ReleasableLock; -import java.io.IOException; -import java.nio.channels.FileChannel; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.ReentrantLock; import java.util.zip.CRC32; @@ -28,21 +25,15 @@ public class TranslogCheckedContainer { private final Checksum checksum; private final AtomicLong contentLength; private final ReleasableLock updateLock = new ReleasableLock(new ReentrantLock()); - private final String file; /** - * Creates TranslogCheckedContainer from provided channel. + * Create TranslogCheckedContainer from provided bytes * - * @param channel {@link FileChannel} to read from - * @param offset offset of channel from which bytes are to be read. - * @param len Length of bytes to be read. + * @param bytes The byte array to read from */ - public TranslogCheckedContainer(FileChannel channel, int offset, int len, String file) throws IOException { + public TranslogCheckedContainer(byte[] bytes) { this.checksum = new CRC32(); this.contentLength = new AtomicLong(); - this.file = file; - - byte[] bytes = Channels.readFromFileChannel(channel, offset, len); updateFromBytes(bytes, 0, bytes.length); } diff --git a/server/src/main/java/org/opensearch/index/translog/TranslogHeader.java b/server/src/main/java/org/opensearch/index/translog/TranslogHeader.java index af6ebcf7b7c66..8067cccb772a2 100644 --- a/server/src/main/java/org/opensearch/index/translog/TranslogHeader.java +++ b/server/src/main/java/org/opensearch/index/translog/TranslogHeader.java @@ -46,6 +46,7 @@ import java.io.EOFException; import java.io.IOException; +import java.io.OutputStream; import java.nio.channels.FileChannel; import java.nio.file.Path; @@ -213,12 +214,10 @@ private static void tryReportOldVersionError(final Path path, final FileChannel /** * Writes this header with the latest format into the file channel */ - void write(final FileChannel channel, boolean fsync) throws IOException { + void write(final OutputStream outputStream) throws IOException { // This output is intentionally not closed because closing it will close the FileChannel. @SuppressWarnings({ "IOResourceOpenedButNotSafelyClosed", "resource" }) - final BufferedChecksumStreamOutput out = new BufferedChecksumStreamOutput( - new OutputStreamStreamOutput(java.nio.channels.Channels.newOutputStream(channel)) - ); + final BufferedChecksumStreamOutput out = new BufferedChecksumStreamOutput(new OutputStreamStreamOutput(outputStream)); CodecUtil.writeHeader(new OutputStreamDataOutput(out), TRANSLOG_CODEC, CURRENT_VERSION); // Write uuid final BytesRef uuid = new BytesRef(translogUUID); @@ -229,6 +228,11 @@ void write(final FileChannel channel, boolean fsync) throws IOException { // Checksum header out.writeInt((int) out.getChecksum()); out.flush(); + } + + void write(final FileChannel channel, boolean fsync) throws IOException { + OutputStream outputStream = java.nio.channels.Channels.newOutputStream(channel); + write(outputStream); if (fsync == true) { channel.force(true); } diff --git a/server/src/main/java/org/opensearch/index/translog/TranslogReader.java b/server/src/main/java/org/opensearch/index/translog/TranslogReader.java index c4a4fb7a460a0..9ea3328587645 100644 --- a/server/src/main/java/org/opensearch/index/translog/TranslogReader.java +++ b/server/src/main/java/org/opensearch/index/translog/TranslogReader.java @@ -33,6 +33,7 @@ package org.opensearch.index.translog; import org.apache.lucene.store.AlreadyClosedException; +import org.opensearch.common.Nullable; import org.opensearch.common.io.Channels; import org.opensearch.common.util.io.IOUtils; import org.opensearch.index.seqno.SequenceNumbers; @@ -59,6 +60,11 @@ public class TranslogReader extends BaseTranslogReader implements Closeable { private final Checkpoint checkpoint; protected final AtomicBoolean closed = new AtomicBoolean(false); + @Nullable + private final Long translogChecksum; + @Nullable + private final Long checkpointChecksum; + /** * Create a translog writer against the specified translog file channel. * @@ -67,11 +73,34 @@ public class TranslogReader extends BaseTranslogReader implements Closeable { * @param path the path to the translog * @param header the header of the translog file */ - TranslogReader(final Checkpoint checkpoint, final FileChannel channel, final Path path, final TranslogHeader header) { + TranslogReader( + final Checkpoint checkpoint, + final FileChannel channel, + final Path path, + final TranslogHeader header, + final Long translogChecksum + ) throws IOException { super(checkpoint.generation, channel, path, header); this.length = checkpoint.offset; this.totalOperations = checkpoint.numOps; this.checkpoint = checkpoint; + this.translogChecksum = translogChecksum; + this.checkpointChecksum = (translogChecksum != null) ? calculateCheckpointChecksum(checkpoint, path) : null; + } + + private static Long calculateCheckpointChecksum(Checkpoint checkpoint, Path path) throws IOException { + TranslogCheckedContainer checkpointCheckedContainer = new TranslogCheckedContainer( + Checkpoint.createCheckpointBytes(path.getParent().resolve(Translog.CHECKPOINT_FILE_NAME), checkpoint) + ); + return checkpointCheckedContainer.getChecksum(); + } + + public Long getTranslogChecksum() { + return translogChecksum; + } + + public Long getCheckpointChecksum() { + return checkpointChecksum; } /** @@ -87,7 +116,7 @@ public class TranslogReader extends BaseTranslogReader implements Closeable { public static TranslogReader open(final FileChannel channel, final Path path, final Checkpoint checkpoint, final String translogUUID) throws IOException { final TranslogHeader header = TranslogHeader.read(translogUUID, path, channel); - return new TranslogReader(checkpoint, channel, path, header); + return new TranslogReader(checkpoint, channel, path, header, null); } /** @@ -115,9 +144,9 @@ TranslogReader closeIntoTrimmedReader(long aboveSeqNo, ChannelFactory channelFac IOUtils.fsync(checkpointFile.getParent(), true); - newReader = new TranslogReader(newCheckpoint, channel, path, header); + newReader = new TranslogReader(newCheckpoint, channel, path, header, translogChecksum); } else { - newReader = new TranslogReader(checkpoint, channel, path, header); + newReader = new TranslogReader(checkpoint, channel, path, header, translogChecksum); } toCloseOnFailure = null; return newReader; diff --git a/server/src/main/java/org/opensearch/index/translog/TranslogWriter.java b/server/src/main/java/org/opensearch/index/translog/TranslogWriter.java index e19aece60adc0..e7b08b1dda3d2 100644 --- a/server/src/main/java/org/opensearch/index/translog/TranslogWriter.java +++ b/server/src/main/java/org/opensearch/index/translog/TranslogWriter.java @@ -37,6 +37,7 @@ import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefIterator; +import org.opensearch.common.Nullable; import org.opensearch.common.SuppressForbidden; import org.opensearch.common.bytes.BytesArray; import org.opensearch.common.bytes.BytesReference; @@ -54,6 +55,7 @@ import org.opensearch.index.seqno.SequenceNumbers; import org.opensearch.index.shard.ShardId; +import java.io.ByteArrayOutputStream; import java.io.Closeable; import java.io.IOException; import java.nio.ByteBuffer; @@ -110,6 +112,9 @@ public class TranslogWriter extends BaseTranslogReader implements Closeable { private final Map> seenSequenceNumbers; + @Nullable + private final TranslogCheckedContainer translogCheckedContainer; + private final Boolean remoteTranslogEnabled; private TranslogWriter( @@ -126,6 +131,7 @@ private TranslogWriter( final TragicExceptionHolder tragedy, final LongConsumer persistedSequenceNumberConsumer, final BigArrays bigArrays, + TranslogCheckedContainer translogCheckedContainer, Boolean remoteTranslogEnabled ) throws IOException { super(initialCheckpoint.generation, channel, path, header); @@ -151,6 +157,7 @@ private TranslogWriter( this.bigArrays = bigArrays; this.seenSequenceNumbers = Assertions.ENABLED ? new HashMap<>() : null; this.tragedy = tragedy; + this.translogCheckedContainer = translogCheckedContainer; this.remoteTranslogEnabled = remoteTranslogEnabled; } @@ -179,6 +186,12 @@ public static TranslogWriter create( checkpointChannel = channelFactory.open(checkpointFile, StandardOpenOption.WRITE); final TranslogHeader header = new TranslogHeader(translogUUID, primaryTerm); header.write(channel, !Boolean.TRUE.equals(remoteTranslogEnabled)); + TranslogCheckedContainer translogCheckedContainer = null; + if (Boolean.TRUE.equals(remoteTranslogEnabled)) { + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + header.write(byteArrayOutputStream); + translogCheckedContainer = new TranslogCheckedContainer(byteArrayOutputStream.toByteArray()); + } final Checkpoint checkpoint = Checkpoint.emptyTranslogCheckpoint( header.sizeInBytes(), fileGeneration, @@ -214,6 +227,7 @@ public static TranslogWriter create( tragedy, persistedSequenceNumberConsumer, bigArrays, + translogCheckedContainer, remoteTranslogEnabled ); } catch (Exception exception) { @@ -438,7 +452,13 @@ public TranslogReader closeIntoReader() throws IOException { closeWithTragicEvent(ex); throw ex; } - return new TranslogReader(getLastSyncedCheckpoint(), channel, path, header); + return new TranslogReader( + getLastSyncedCheckpoint(), + channel, + path, + header, + (translogCheckedContainer != null) ? translogCheckedContainer.getChecksum() : null + ); } else { throw new AlreadyClosedException( "translog [" + getGeneration() + "] is already closed (path [" + path + "]", @@ -571,6 +591,9 @@ private void writeAndReleaseOps(ReleasableBytesReference toWrite) throws IOExcep while (currentBytesConsumed != current.length) { int nBytesToWrite = Math.min(current.length - currentBytesConsumed, ioBuffer.remaining()); ioBuffer.put(current.bytes, current.offset + currentBytesConsumed, nBytesToWrite); + if (translogCheckedContainer != null) { + translogCheckedContainer.updateFromBytes(current.bytes, current.offset + currentBytesConsumed, nBytesToWrite); + } currentBytesConsumed += nBytesToWrite; if (ioBuffer.hasRemaining() == false) { ioBuffer.flip(); diff --git a/server/src/main/java/org/opensearch/index/translog/checked/package-info.java b/server/src/main/java/org/opensearch/index/translog/checked/package-info.java deleted file mode 100644 index ddb235fdbedce..0000000000000 --- a/server/src/main/java/org/opensearch/index/translog/checked/package-info.java +++ /dev/null @@ -1,10 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -/** Contains checksum related utilities for translog files */ -package org.opensearch.index.translog.checked; diff --git a/server/src/main/java/org/opensearch/index/translog/transfer/BlobStoreTransferService.java b/server/src/main/java/org/opensearch/index/translog/transfer/BlobStoreTransferService.java index d9feb1a832681..974e8af42b939 100644 --- a/server/src/main/java/org/opensearch/index/translog/transfer/BlobStoreTransferService.java +++ b/server/src/main/java/org/opensearch/index/translog/transfer/BlobStoreTransferService.java @@ -16,12 +16,22 @@ import org.opensearch.common.blobstore.BlobMetadata; import org.opensearch.common.blobstore.BlobPath; import org.opensearch.common.blobstore.BlobStore; +import org.opensearch.common.blobstore.VerifyingMultiStreamBlobContainer; +import org.opensearch.common.blobstore.stream.write.WriteContext; +import org.opensearch.common.blobstore.stream.write.WritePriority; +import org.opensearch.common.blobstore.transfer.RemoteTransferContainer; +import org.opensearch.common.blobstore.transfer.stream.OffsetRangeFileInputStream; +import org.opensearch.index.translog.ChannelFactory; import org.opensearch.index.translog.transfer.FileSnapshot.TransferFileSnapshot; import org.opensearch.threadpool.ThreadPool; import java.io.IOException; import java.io.InputStream; +import java.nio.channels.FileChannel; +import java.nio.file.StandardOpenOption; import java.util.List; +import java.util.Map; +import java.util.Objects; import java.util.Set; import static org.opensearch.common.blobstore.BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC; @@ -44,18 +54,18 @@ public BlobStoreTransferService(BlobStore blobStore, ThreadPool threadPool) { } @Override - public void uploadBlobAsync( - String threadpoolName, + public void uploadBlob( + String threadPoolName, final TransferFileSnapshot fileSnapshot, Iterable remoteTransferPath, - ActionListener listener + ActionListener listener, + WritePriority writePriority ) { assert remoteTransferPath instanceof BlobPath; BlobPath blobPath = (BlobPath) remoteTransferPath; - threadPool.executor(threadpoolName).execute(ActionRunnable.wrap(listener, l -> { - try (InputStream inputStream = fileSnapshot.inputStream()) { - blobStore.blobContainer(blobPath) - .writeBlobAtomic(fileSnapshot.getName(), inputStream, fileSnapshot.getContentLength(), true); + threadPool.executor(threadPoolName).execute(ActionRunnable.wrap(listener, l -> { + try { + uploadBlob(fileSnapshot, blobPath, writePriority); l.onResponse(fileSnapshot); } catch (Exception e) { logger.error(() -> new ParameterizedMessage("Failed to upload blob {}", fileSnapshot.getName()), e); @@ -65,14 +75,84 @@ public void uploadBlobAsync( } @Override - public void uploadBlob(final TransferFileSnapshot fileSnapshot, Iterable remoteTransferPath) throws IOException { - assert remoteTransferPath instanceof BlobPath; + public void uploadBlob(final TransferFileSnapshot fileSnapshot, Iterable remoteTransferPath, WritePriority writePriority) + throws IOException { BlobPath blobPath = (BlobPath) remoteTransferPath; try (InputStream inputStream = fileSnapshot.inputStream()) { blobStore.blobContainer(blobPath).writeBlobAtomic(fileSnapshot.getName(), inputStream, fileSnapshot.getContentLength(), true); } } + @Override + public void uploadBlobs( + Set fileSnapshots, + final Map blobPaths, + ActionListener listener, + WritePriority writePriority + ) { + fileSnapshots.forEach(fileSnapshot -> { + BlobPath blobPath = blobPaths.get(fileSnapshot.getPrimaryTerm()); + if (!(blobStore.blobContainer(blobPath) instanceof VerifyingMultiStreamBlobContainer)) { + uploadBlob(ThreadPool.Names.TRANSLOG_TRANSFER, fileSnapshot, blobPath, listener, writePriority); + } else { + uploadBlob(fileSnapshot, listener, blobPath, writePriority); + } + }); + + } + + private void uploadBlob( + TransferFileSnapshot fileSnapshot, + ActionListener listener, + BlobPath blobPath, + WritePriority writePriority + ) { + + try { + ChannelFactory channelFactory = FileChannel::open; + long contentLength; + try (FileChannel channel = channelFactory.open(fileSnapshot.getPath(), StandardOpenOption.READ)) { + contentLength = channel.size(); + } + RemoteTransferContainer remoteTransferContainer = new RemoteTransferContainer( + fileSnapshot.getName(), + fileSnapshot.getName(), + contentLength, + true, + writePriority, + (size, position) -> new OffsetRangeFileInputStream(fileSnapshot.getPath(), size, position), + Objects.requireNonNull(fileSnapshot.getChecksum()), + blobStore.blobContainer(blobPath) instanceof VerifyingMultiStreamBlobContainer + ); + ActionListener completionListener = ActionListener.wrap(resp -> listener.onResponse(fileSnapshot), ex -> { + logger.error(() -> new ParameterizedMessage("Failed to upload blob {}", fileSnapshot.getName()), ex); + listener.onFailure(new FileTransferException(fileSnapshot, ex)); + }); + + completionListener = ActionListener.runBefore(completionListener, () -> { + try { + remoteTransferContainer.close(); + } catch (Exception e) { + logger.warn("Error occurred while closing streams", e); + } + }); + + WriteContext writeContext = remoteTransferContainer.createWriteContext(); + ((VerifyingMultiStreamBlobContainer) blobStore.blobContainer(blobPath)).asyncBlobUpload(writeContext, completionListener); + + } catch (Exception e) { + logger.error(() -> new ParameterizedMessage("Failed to upload blob {}", fileSnapshot.getName()), e); + listener.onFailure(new FileTransferException(fileSnapshot, e)); + } finally { + try { + fileSnapshot.close(); + } catch (IOException e) { + logger.warn("Error while closing TransferFileSnapshot", e); + } + } + + } + @Override public InputStream downloadBlob(Iterable path, String fileName) throws IOException { return blobStore.blobContainer((BlobPath) path).readBlob(fileName); diff --git a/server/src/main/java/org/opensearch/index/translog/transfer/FileSnapshot.java b/server/src/main/java/org/opensearch/index/translog/transfer/FileSnapshot.java index 239ef7c3c9300..dcec94edd694f 100644 --- a/server/src/main/java/org/opensearch/index/translog/transfer/FileSnapshot.java +++ b/server/src/main/java/org/opensearch/index/translog/transfer/FileSnapshot.java @@ -107,10 +107,12 @@ public void close() throws IOException { public static class TransferFileSnapshot extends FileSnapshot { private final long primaryTerm; + private Long checksum; - public TransferFileSnapshot(Path path, long primaryTerm) throws IOException { + public TransferFileSnapshot(Path path, long primaryTerm, Long checksum) throws IOException { super(path); this.primaryTerm = primaryTerm; + this.checksum = checksum; } public TransferFileSnapshot(String name, byte[] content, long primaryTerm) throws IOException { @@ -118,6 +120,10 @@ public TransferFileSnapshot(String name, byte[] content, long primaryTerm) throw this.primaryTerm = primaryTerm; } + public Long getChecksum() { + return checksum; + } + public long getPrimaryTerm() { return primaryTerm; } @@ -148,8 +154,8 @@ public static final class TranslogFileSnapshot extends TransferFileSnapshot { private final long generation; - public TranslogFileSnapshot(long primaryTerm, long generation, Path path) throws IOException { - super(path, primaryTerm); + public TranslogFileSnapshot(long primaryTerm, long generation, Path path, Long checksum) throws IOException { + super(path, primaryTerm, checksum); this.generation = generation; } @@ -185,8 +191,9 @@ public static final class CheckpointFileSnapshot extends TransferFileSnapshot { private final long minTranslogGeneration; - public CheckpointFileSnapshot(long primaryTerm, long generation, long minTranslogGeneration, Path path) throws IOException { - super(path, primaryTerm); + public CheckpointFileSnapshot(long primaryTerm, long generation, long minTranslogGeneration, Path path, Long checksum) + throws IOException { + super(path, primaryTerm, checksum); this.minTranslogGeneration = minTranslogGeneration; this.generation = generation; } diff --git a/server/src/main/java/org/opensearch/index/translog/transfer/TransferService.java b/server/src/main/java/org/opensearch/index/translog/transfer/TransferService.java index 0e6496042e3d8..a240fd38cda11 100644 --- a/server/src/main/java/org/opensearch/index/translog/transfer/TransferService.java +++ b/server/src/main/java/org/opensearch/index/translog/transfer/TransferService.java @@ -10,11 +10,14 @@ import org.opensearch.action.ActionListener; import org.opensearch.common.blobstore.BlobMetadata; +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.blobstore.stream.write.WritePriority; import org.opensearch.index.translog.transfer.FileSnapshot.TransferFileSnapshot; import java.io.IOException; import java.io.InputStream; import java.util.List; +import java.util.Map; import java.util.Set; /** @@ -26,25 +29,40 @@ public interface TransferService { /** * Uploads the {@link TransferFileSnapshot} async, once the upload is complete the callback is invoked - * @param threadpoolName threadpool type which will be used to upload blobs asynchronously + * @param threadPoolName threadpool type which will be used to upload blobs asynchronously * @param fileSnapshot the file snapshot to upload * @param remotePath the remote path where upload should be made * @param listener the callback to be invoked once upload completes successfully/fails */ - void uploadBlobAsync( - String threadpoolName, + void uploadBlob( + String threadPoolName, final TransferFileSnapshot fileSnapshot, Iterable remotePath, - ActionListener listener + ActionListener listener, + WritePriority writePriority ); + /** + * Uploads multiple {@link TransferFileSnapshot}, once the upload is complete the callback is invoked + * @param fileSnapshots the file snapshots to upload + * @param blobPaths Primary term to {@link BlobPath} map + * @param listener the callback to be invoked once uploads complete successfully/fail + */ + void uploadBlobs( + Set fileSnapshots, + final Map blobPaths, + ActionListener listener, + WritePriority writePriority + ) throws Exception; + /** * Uploads the {@link TransferFileSnapshot} blob * @param fileSnapshot the file snapshot to upload * @param remotePath the remote path where upload should be made + * @param writePriority Priority by which content needs to be written. * @throws IOException the exception while transferring the data */ - void uploadBlob(final TransferFileSnapshot fileSnapshot, Iterable remotePath) throws IOException; + void uploadBlob(final TransferFileSnapshot fileSnapshot, Iterable remotePath, WritePriority writePriority) throws IOException; void deleteBlobs(Iterable path, List fileNames) throws IOException; diff --git a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogCheckpointTransferSnapshot.java b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogCheckpointTransferSnapshot.java index b34c2282e874f..10dec13c81e1a 100644 --- a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogCheckpointTransferSnapshot.java +++ b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogCheckpointTransferSnapshot.java @@ -145,8 +145,14 @@ public TranslogCheckpointTransferSnapshot build() throws IOException { Path checkpointPath = location.resolve(checkpointGenFileNameMapper.apply(readerGeneration)); generations.add(readerGeneration); translogTransferSnapshot.add( - new TranslogFileSnapshot(readerPrimaryTerm, readerGeneration, translogPath), - new CheckpointFileSnapshot(readerPrimaryTerm, checkpointGeneration, minTranslogGeneration, checkpointPath) + new TranslogFileSnapshot(readerPrimaryTerm, readerGeneration, translogPath, reader.getTranslogChecksum()), + new CheckpointFileSnapshot( + readerPrimaryTerm, + checkpointGeneration, + minTranslogGeneration, + checkpointPath, + reader.getCheckpointChecksum() + ) ); if (readerGeneration > highestGeneration) { highestGeneration = readerGeneration; diff --git a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java index 54140226e3744..0c63a7ffe4cce 100644 --- a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java +++ b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java @@ -17,6 +17,7 @@ import org.opensearch.common.SetOnce; import org.opensearch.common.blobstore.BlobMetadata; import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.blobstore.stream.write.WritePriority; import org.opensearch.common.bytes.BytesReference; import org.opensearch.common.io.VersionedCodecStreamWrapper; import org.opensearch.common.io.stream.BytesStreamOutput; @@ -119,14 +120,16 @@ public boolean transferSnapshot(TransferSnapshot transferSnapshot, TranslogTrans }), latch ); + Map blobPathMap = new HashMap<>(); toUpload.forEach( - fileSnapshot -> transferService.uploadBlobAsync( - ThreadPool.Names.TRANSLOG_TRANSFER, - fileSnapshot, - remoteDataTransferPath.add(String.valueOf(fileSnapshot.getPrimaryTerm())), - latchedActionListener + fileSnapshot -> blobPathMap.put( + fileSnapshot.getPrimaryTerm(), + remoteDataTransferPath.add(String.valueOf(fileSnapshot.getPrimaryTerm())) ) ); + + transferService.uploadBlobs(toUpload, blobPathMap, latchedActionListener, WritePriority.HIGH); + try { if (latch.await(TRANSFER_TIMEOUT_IN_MILLIS, TimeUnit.MILLISECONDS) == false) { Exception ex = new TimeoutException("Timed out waiting for transfer of snapshot " + transferSnapshot + " to complete"); @@ -139,7 +142,7 @@ public boolean transferSnapshot(TransferSnapshot transferSnapshot, TranslogTrans throw ex; } if (exceptionList.isEmpty()) { - transferService.uploadBlob(prepareMetadata(transferSnapshot), remoteMetadataTransferPath); + transferService.uploadBlob(prepareMetadata(transferSnapshot), remoteMetadataTransferPath, WritePriority.HIGH); translogTransferListener.onUploadComplete(transferSnapshot); return true; } else { diff --git a/server/src/main/java/org/opensearch/indices/recovery/RecoverySourceHandler.java b/server/src/main/java/org/opensearch/indices/recovery/RecoverySourceHandler.java index 5e278f06cfb8f..0b343fb0b0871 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RecoverySourceHandler.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RecoverySourceHandler.java @@ -835,7 +835,7 @@ void finalizeRecovery(long targetLocalCheckpoint, long trimAboveSeqNo, ActionLis } else { // Force round of segment replication to update its checkpoint to primary's if (shard.indexSettings().isSegRepEnabled()) { - recoveryTarget.forceSegmentFileSync(); + cancellableThreads.execute(recoveryTarget::forceSegmentFileSync); } } stopWatch.stop(); diff --git a/server/src/main/java/org/opensearch/indices/replication/OngoingSegmentReplications.java b/server/src/main/java/org/opensearch/indices/replication/OngoingSegmentReplications.java index 050a66bedcf5d..f32887175d4f3 100644 --- a/server/src/main/java/org/opensearch/indices/replication/OngoingSegmentReplications.java +++ b/server/src/main/java/org/opensearch/indices/replication/OngoingSegmentReplications.java @@ -120,11 +120,7 @@ void startSegmentCopy(GetSegmentFilesRequest request, ActionListener listener) { + // Short circuit when no files to transfer + if (request.getFilesToFetch().isEmpty()) { + // before completion, alert the primary of the replica's state. + shard.updateVisibleCheckpointForShard(request.getTargetAllocationId(), copyState.getCheckpoint()); + listener.onResponse(new GetSegmentFilesResponse(Collections.emptyList())); + return; + } + final ReplicationTimer timer = new ReplicationTimer(); if (isReplicating.compareAndSet(false, true) == false) { throw new OpenSearchException("Replication to {} is already running.", shard.shardId()); @@ -159,10 +168,11 @@ public synchronized void sendFiles(GetSegmentFilesRequest request, ActionListene sendFileStep.whenComplete(r -> { try { + shard.updateVisibleCheckpointForShard(allocationId, copyState.getCheckpoint()); future.onResponse(new GetSegmentFilesResponse(List.of(storeFileMetadata))); + timer.stop(); } finally { IOUtils.close(resources); - timer.stop(); logger.trace( "[replication id {}] Source node completed sending files to target node [{}], timing: {}", request.getReplicationId(), diff --git a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationState.java b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationState.java index 7a996ec7aedaa..226ccbaf01afa 100644 --- a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationState.java +++ b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationState.java @@ -45,8 +45,7 @@ public enum Stage { GET_CHECKPOINT_INFO((byte) 3), FILE_DIFF((byte) 4), GET_FILES((byte) 5), - FINALIZE_REPLICATION((byte) 6), - CANCELLED((byte) 7); + FINALIZE_REPLICATION((byte) 6); private static final Stage[] STAGES = new Stage[Stage.values().length]; @@ -245,14 +244,6 @@ public void setStage(Stage stage) { overallTimer.stop(); timingData.put("OVERALL", overallTimer.time()); break; - case CANCELLED: - if (this.stage == Stage.DONE) { - throw new IllegalStateException("can't move replication to Cancelled state from Done."); - } - this.stage = Stage.CANCELLED; - overallTimer.stop(); - timingData.put("OVERALL", overallTimer.time()); - break; default: throw new IllegalArgumentException("unknown SegmentReplicationState.Stage [" + stage + "]"); } diff --git a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTarget.java b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTarget.java index 9d724d6cc9dcf..f59a7c2368689 100644 --- a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTarget.java +++ b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTarget.java @@ -12,16 +12,15 @@ import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexFormatTooNewException; import org.apache.lucene.index.IndexFormatTooOldException; -import org.apache.lucene.index.SegmentInfos; +import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.BufferedChecksumIndexInput; import org.apache.lucene.store.ByteBuffersDataInput; import org.apache.lucene.store.ByteBuffersIndexInput; import org.apache.lucene.store.ChecksumIndexInput; -import org.opensearch.ExceptionsHelper; +import org.opensearch.OpenSearchCorruptionException; import org.opensearch.OpenSearchException; import org.opensearch.action.ActionListener; import org.opensearch.action.StepListener; -import org.opensearch.common.CheckedConsumer; import org.opensearch.common.UUIDs; import org.opensearch.common.bytes.BytesReference; import org.opensearch.common.lucene.Lucene; @@ -39,6 +38,8 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.util.Arrays; +import java.util.List; +import java.util.Locale; /** * Represents the target of a replication event. @@ -102,17 +103,11 @@ public SegmentReplicationTarget retryCopy() { @Override public String description() { - return "Segment replication from " + source.toString(); + return String.format(Locale.ROOT, "Id:[%d] Shard:[%s] Source:[%s]", getId(), shardId(), source.getDescription()); } @Override public void notifyListener(ReplicationFailedException e, boolean sendShardFailure) { - // Cancellations still are passed to our SegmentReplicationListener as failures, if we have failed because of cancellation - // update the stage. - final Throwable cancelledException = ExceptionsHelper.unwrap(e, CancellableThreads.ExecutionCancelledException.class); - if (cancelledException != null) { - state.setStage(SegmentReplicationState.Stage.CANCELLED); - } listener.onFailure(state(), e, sendShardFailure); } @@ -141,141 +136,115 @@ public void writeFileChunk( /** * Start the Replication event. + * * @param listener {@link ActionListener} listener. */ public void startReplication(ActionListener listener) { cancellableThreads.setOnCancel((reason, beforeCancelEx) -> { - // This method only executes when cancellation is triggered by this node and caught by a call to checkForCancel, - // SegmentReplicationSource does not share CancellableThreads. - final CancellableThreads.ExecutionCancelledException executionCancelledException = - new CancellableThreads.ExecutionCancelledException("replication was canceled reason [" + reason + "]"); - notifyListener(new ReplicationFailedException("Segment replication failed", executionCancelledException), false); - throw executionCancelledException; + throw new CancellableThreads.ExecutionCancelledException("replication was canceled reason [" + reason + "]"); }); + // TODO: Remove this useless state. state.setStage(SegmentReplicationState.Stage.REPLICATING); final StepListener checkpointInfoListener = new StepListener<>(); final StepListener getFilesListener = new StepListener<>(); - final StepListener finalizeListener = new StepListener<>(); - cancellableThreads.checkForCancel(); - logger.trace("[shardId {}] Replica starting replication [id {}]", shardId().getId(), getId()); + logger.trace(new ParameterizedMessage("Starting Replication Target: {}", description())); // Get list of files to copy from this checkpoint. state.setStage(SegmentReplicationState.Stage.GET_CHECKPOINT_INFO); + cancellableThreads.checkForCancel(); source.getCheckpointMetadata(getId(), checkpoint, checkpointInfoListener); - checkpointInfoListener.whenComplete(checkpointInfo -> getFiles(checkpointInfo, getFilesListener), listener::onFailure); - getFilesListener.whenComplete( - response -> finalizeReplication(checkpointInfoListener.result(), finalizeListener), - listener::onFailure - ); - finalizeListener.whenComplete(r -> listener.onResponse(null), listener::onFailure); + checkpointInfoListener.whenComplete(checkpointInfo -> { + final List filesToFetch = getFiles(checkpointInfo); + state.setStage(SegmentReplicationState.Stage.GET_FILES); + cancellableThreads.checkForCancel(); + source.getSegmentFiles(getId(), checkpointInfo.getCheckpoint(), filesToFetch, indexShard, getFilesListener); + }, listener::onFailure); + + getFilesListener.whenComplete(response -> { + finalizeReplication(checkpointInfoListener.result()); + listener.onResponse(null); + }, listener::onFailure); } - private void getFiles(CheckpointInfoResponse checkpointInfo, StepListener getFilesListener) - throws IOException { + private List getFiles(CheckpointInfoResponse checkpointInfo) throws IOException { cancellableThreads.checkForCancel(); state.setStage(SegmentReplicationState.Stage.FILE_DIFF); final Store.RecoveryDiff diff = Store.segmentReplicationDiff(checkpointInfo.getMetadataMap(), indexShard.getSegmentMetadataMap()); - logger.trace("Replication diff for checkpoint {} {}", checkpointInfo.getCheckpoint(), diff); + logger.trace(() -> new ParameterizedMessage("Replication diff for checkpoint {} {}", checkpointInfo.getCheckpoint(), diff)); /* * Segments are immutable. So if the replica has any segments with the same name that differ from the one in the incoming * snapshot from source that means the local copy of the segment has been corrupted/changed in some way and we throw an * IllegalStateException to fail the shard */ if (diff.different.isEmpty() == false) { - IllegalStateException illegalStateException = new IllegalStateException( + throw new OpenSearchCorruptionException( new ParameterizedMessage( "Shard {} has local copies of segments that differ from the primary {}", indexShard.shardId(), diff.different ).getFormattedMessage() ); - ReplicationFailedException rfe = new ReplicationFailedException( - indexShard.shardId(), - "different segment files", - illegalStateException - ); - fail(rfe, true); - throw rfe; } for (StoreFileMetadata file : diff.missing) { state.getIndex().addFileDetail(file.name(), file.length(), false); } - // always send a req even if not fetching files so the primary can clear the copyState for this shard. - state.setStage(SegmentReplicationState.Stage.GET_FILES); - cancellableThreads.checkForCancel(); - source.getSegmentFiles(getId(), checkpointInfo.getCheckpoint(), diff.missing, indexShard, getFilesListener); + return diff.missing; } - private void finalizeReplication(CheckpointInfoResponse checkpointInfoResponse, ActionListener listener) { + private void finalizeReplication(CheckpointInfoResponse checkpointInfoResponse) throws OpenSearchCorruptionException { // TODO: Refactor the logic so that finalize doesn't have to be invoked for remote store as source if (source instanceof RemoteStoreReplicationSource) { - ActionListener.completeWith(listener, () -> { - state.setStage(SegmentReplicationState.Stage.FINALIZE_REPLICATION); - return null; - }); + state.setStage(SegmentReplicationState.Stage.FINALIZE_REPLICATION); return; } - ActionListener.completeWith(listener, () -> { - cancellableThreads.checkForCancel(); - state.setStage(SegmentReplicationState.Stage.FINALIZE_REPLICATION); - Store store = null; + cancellableThreads.checkForCancel(); + state.setStage(SegmentReplicationState.Stage.FINALIZE_REPLICATION); + Store store = null; + try { + store = store(); + store.incRef(); + store.buildInfosFromBytes( + multiFileWriter.getTempFileNames(), + checkpointInfoResponse.getInfosBytes(), + checkpointInfoResponse.getCheckpoint().getSegmentsGen(), + indexShard::finalizeReplication + ); + } catch (CorruptIndexException | IndexFormatTooNewException | IndexFormatTooOldException ex) { + // this is a fatal exception at this stage. + // this means we transferred files from the remote that have not be checksummed and they are + // broken. We have to clean up this shard entirely, remove all files and bubble it up to the + // source shard since this index might be broken there as well? The Source can handle this and checks + // its content on disk if possible. try { - store = store(); - store.incRef(); - CheckedConsumer finalizeReplication = indexShard::finalizeReplication; - store.buildInfosFromBytes( - multiFileWriter.getTempFileNames(), - checkpointInfoResponse.getInfosBytes(), - checkpointInfoResponse.getCheckpoint().getSegmentsGen(), - finalizeReplication - ); - } catch (CorruptIndexException | IndexFormatTooNewException | IndexFormatTooOldException ex) { - // this is a fatal exception at this stage. - // this means we transferred files from the remote that have not be checksummed and they are - // broken. We have to clean up this shard entirely, remove all files and bubble it up to the - // source shard since this index might be broken there as well? The Source can handle this and checks - // its content on disk if possible. try { - try { - store.removeCorruptionMarker(); - } finally { - Lucene.cleanLuceneIndex(store.directory()); // clean up and delete all files - } - } catch (Exception e) { - logger.debug("Failed to clean lucene index", e); - ex.addSuppressed(e); - } - ReplicationFailedException rfe = new ReplicationFailedException( - indexShard.shardId(), - "failed to clean after replication", - ex - ); - fail(rfe, true); - throw rfe; - } catch (OpenSearchException ex) { - /* - Ignore closed replication target as it can happen due to index shard closed event in a separate thread. - In such scenario, ignore the exception - */ - assert cancellableThreads.isCancelled() : "Replication target closed but segment replication not cancelled"; - logger.info("Replication target closed", ex); - } catch (Exception ex) { - ReplicationFailedException rfe = new ReplicationFailedException( - indexShard.shardId(), - "failed to clean after replication", - ex - ); - fail(rfe, true); - throw rfe; - } finally { - if (store != null) { - store.decRef(); + store.removeCorruptionMarker(); + } finally { + Lucene.cleanLuceneIndex(store.directory()); // clean up and delete all files } + } catch (Exception e) { + logger.debug("Failed to clean lucene index", e); + ex.addSuppressed(e); } - return null; - }); + throw new OpenSearchCorruptionException(ex); + } catch (AlreadyClosedException ex) { + // In this case the shard is closed at some point while updating the reader. + // This can happen when the engine is closed in a separate thread. + logger.warn("Shard is already closed, closing replication"); + } catch (OpenSearchException ex) { + /* + Ignore closed replication target as it can happen due to index shard closed event in a separate thread. + In such scenario, ignore the exception + */ + assert cancellableThreads.isCancelled() : "Replication target closed but segment replication not cancelled"; + } catch (Exception ex) { + throw new OpenSearchCorruptionException(ex); + } finally { + if (store != null) { + store.decRef(); + } + } } /** @@ -288,10 +257,15 @@ private ChecksumIndexInput toIndexInput(byte[] input) { ); } + /** + * Trigger a cancellation, this method will not close the target a subsequent call to #fail is required from target service. + */ @Override - protected void onCancel(String reason) { - cancellableThreads.cancel(reason); - source.cancel(); - multiFileWriter.close(); + public void cancel(String reason) { + if (finished.get() == false) { + logger.trace(new ParameterizedMessage("Cancelling replication for target {}", description())); + cancellableThreads.cancel(reason); + source.cancel(); + } } } diff --git a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTargetService.java b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTargetService.java index a7e0c0ec887ab..467f499056345 100644 --- a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTargetService.java +++ b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTargetService.java @@ -11,14 +11,15 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; -import org.opensearch.ExceptionsHelper; +import org.opensearch.OpenSearchCorruptionException; import org.opensearch.action.ActionListener; +import org.opensearch.action.support.ChannelActionListener; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.Nullable; import org.opensearch.common.settings.Settings; -import org.opensearch.common.util.CancellableThreads; +import org.opensearch.common.util.concurrent.AbstractRunnable; import org.opensearch.common.util.concurrent.ConcurrentCollections; import org.opensearch.index.shard.IndexEventListener; import org.opensearch.index.shard.IndexShard; @@ -43,10 +44,8 @@ import org.opensearch.transport.TransportResponse; import org.opensearch.transport.TransportService; -import java.io.IOException; import java.util.Map; import java.util.Optional; -import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicLong; import static org.opensearch.indices.replication.SegmentReplicationSourceService.Actions.UPDATE_VISIBLE_CHECKPOINT; @@ -145,7 +144,7 @@ public SegmentReplicationTargetService( @Override public void beforeIndexShardClosed(ShardId shardId, @Nullable IndexShard indexShard, Settings indexSettings) { if (indexShard != null && indexShard.indexSettings().isSegRepEnabled()) { - onGoingReplications.cancelForShard(shardId, "shard closed"); + onGoingReplications.requestCancel(indexShard.shardId(), "Shard closing"); latestReceivedCheckpoint.remove(shardId); } } @@ -167,7 +166,7 @@ public void afterIndexShardStarted(IndexShard indexShard) { @Override public void shardRoutingChanged(IndexShard indexShard, @Nullable ShardRouting oldRouting, ShardRouting newRouting) { if (oldRouting != null && indexShard.indexSettings().isSegRepEnabled() && oldRouting.primary() == false && newRouting.primary()) { - onGoingReplications.cancelForShard(indexShard.shardId(), "shard has been promoted to primary"); + onGoingReplications.requestCancel(indexShard.shardId(), "Shard has been promoted to primary"); latestReceivedCheckpoint.remove(indexShard.shardId()); } } @@ -224,11 +223,13 @@ public synchronized void onNewCheckpoint(final ReplicationCheckpoint receivedChe if (ongoingReplicationTarget != null) { if (ongoingReplicationTarget.getCheckpoint().getPrimaryTerm() < receivedCheckpoint.getPrimaryTerm()) { logger.trace( - "Cancelling ongoing replication from old primary with primary term {}", - ongoingReplicationTarget.getCheckpoint().getPrimaryTerm() + () -> new ParameterizedMessage( + "Cancelling ongoing replication {} from old primary with primary term {}", + ongoingReplicationTarget.description(), + ongoingReplicationTarget.getCheckpoint().getPrimaryTerm() + ) ); - onGoingReplications.cancel(ongoingReplicationTarget.getId(), "Cancelling stuck target after new primary"); - completedReplications.put(replicaShard.shardId(), ongoingReplicationTarget); + ongoingReplicationTarget.cancel("Cancelling stuck target after new primary"); } else { logger.trace( () -> new ParameterizedMessage( @@ -268,21 +269,20 @@ public void onReplicationFailure( ReplicationFailedException e, boolean sendShardFailure ) { - logger.trace( + logger.error( () -> new ParameterizedMessage( "[shardId {}] [replication id {}] Replication failed, timing data: {}", replicaShard.shardId().getId(), state.getReplicationId(), state.getTimingData() - ) + ), + e ); if (sendShardFailure == true) { - logger.error("replication failure", e); - replicaShard.failShard("replication failure", e); + failShard(e, replicaShard); } } }); - } } else { logger.trace( @@ -292,6 +292,11 @@ public void onReplicationFailure( } protected void updateVisibleCheckpoint(long replicationId, IndexShard replicaShard) { + // Update replication checkpoint on source via transport call only supported for remote store integration. For node- + // node communication, checkpoint update is piggy-backed to GET_SEGMENT_FILES transport call + if (replicaShard.indexSettings().isRemoteStoreEnabled() == false) { + return; + } ShardRouting primaryShard = clusterService.state().routingTable().shardRoutingTable(replicaShard.shardId()).primaryShard(); final UpdateVisibleCheckpointRequest request = new UpdateVisibleCheckpointRequest( @@ -305,7 +310,14 @@ protected void updateVisibleCheckpoint(long replicationId, IndexShard replicaSha final TransportRequestOptions options = TransportRequestOptions.builder() .withTimeout(recoverySettings.internalActionTimeout()) .build(); - logger.debug("Updating replication checkpoint to {}", request.getCheckpoint()); + logger.trace( + () -> new ParameterizedMessage( + "Updating Primary shard that replica {}-{} is synced to checkpoint {}", + replicaShard.shardId(), + replicaShard.routingEntry().allocationId(), + request.getCheckpoint() + ) + ); RetryableTransportClient transportClient = new RetryableTransportClient( transportService, getPrimaryNode(primaryShard), @@ -315,19 +327,23 @@ protected void updateVisibleCheckpoint(long replicationId, IndexShard replicaSha final ActionListener listener = new ActionListener<>() { @Override public void onResponse(Void unused) { - logger.debug( - "Successfully updated replication checkpoint {} for replica {}", - replicaShard.shardId(), - request.getCheckpoint() + logger.trace( + () -> new ParameterizedMessage( + "Successfully updated replication checkpoint {} for replica {}", + replicaShard.shardId(), + request.getCheckpoint() + ) ); } @Override public void onFailure(Exception e) { logger.error( - "Failed to update visible checkpoint for replica {}, {}: {}", - replicaShard.shardId(), - request.getCheckpoint(), + () -> new ParameterizedMessage( + "Failed to update visible checkpoint for replica {}, {}:", + replicaShard.shardId(), + request.getCheckpoint() + ), e ); } @@ -350,6 +366,13 @@ private DiscoveryNode getPrimaryNode(ShardRouting primaryShard) { protected boolean processLatestReceivedCheckpoint(IndexShard replicaShard, Thread thread) { final ReplicationCheckpoint latestPublishedCheckpoint = latestReceivedCheckpoint.get(replicaShard.shardId()); if (latestPublishedCheckpoint != null && latestPublishedCheckpoint.isAheadOf(replicaShard.getLatestReplicationCheckpoint())) { + logger.trace( + () -> new ParameterizedMessage( + "Processing latest received checkpoint for shard {} {}", + replicaShard.shardId(), + latestPublishedCheckpoint + ) + ); Runnable runnable = () -> onNewCheckpoint(latestReceivedCheckpoint.get(replicaShard.shardId()), replicaShard); // Checks if we are using same thread and forks if necessary. if (thread == Thread.currentThread()) { @@ -381,7 +404,15 @@ public SegmentReplicationTarget startReplication(final IndexShard indexShard, fi // pkg-private for integration tests void startReplication(final SegmentReplicationTarget target) { - final long replicationId = onGoingReplications.start(target, recoverySettings.activityTimeout()); + final long replicationId; + try { + replicationId = onGoingReplications.startSafe(target, recoverySettings.activityTimeout()); + } catch (ReplicationFailedException e) { + // replication already running for shard. + target.fail(e, false); + return; + } + logger.trace(() -> new ParameterizedMessage("Added new replication to collection {}", target.description())); threadPool.generic().execute(new ReplicationRunner(replicationId)); } @@ -410,7 +441,7 @@ default void onFailure(ReplicationState state, ReplicationFailedException e, boo /** * Runnable implementation to trigger a replication event. */ - private class ReplicationRunner implements Runnable { + private class ReplicationRunner extends AbstractRunnable { final long replicationId; @@ -419,47 +450,49 @@ public ReplicationRunner(long replicationId) { } @Override - public void run() { + public void onFailure(Exception e) { + try (final ReplicationRef ref = onGoingReplications.get(replicationId)) { + logger.error(() -> new ParameterizedMessage("Error during segment replication, {}", ref.get().description()), e); + } + onGoingReplications.fail(replicationId, new ReplicationFailedException("Unexpected Error during replication", e), false); + } + + @Override + public void doRun() { start(replicationId); } } private void start(final long replicationId) { + final SegmentReplicationTarget target; try (ReplicationRef replicationRef = onGoingReplications.get(replicationId)) { // This check is for handling edge cases where the reference is removed before the ReplicationRunner is started by the // threadpool. if (replicationRef == null) { return; } - SegmentReplicationTarget target = onGoingReplications.getTarget(replicationId); - replicationRef.get().startReplication(new ActionListener<>() { - @Override - public void onResponse(Void o) { - onGoingReplications.markAsDone(replicationId); - if (target.state().getIndex().recoveredFileCount() != 0 && target.state().getIndex().recoveredBytes() != 0) { - completedReplications.put(target.shardId(), target); - } - + target = replicationRef.get(); + } + target.startReplication(new ActionListener<>() { + @Override + public void onResponse(Void o) { + logger.trace(() -> new ParameterizedMessage("Finished replicating {} marking as done.", target.description())); + onGoingReplications.markAsDone(replicationId); + if (target.state().getIndex().recoveredFileCount() != 0 && target.state().getIndex().recoveredBytes() != 0) { + completedReplications.put(target.shardId(), target); } + } - @Override - public void onFailure(Exception e) { - Throwable cause = ExceptionsHelper.unwrapCause(e); - if (cause instanceof CancellableThreads.ExecutionCancelledException) { - if (onGoingReplications.getTarget(replicationId) != null) { - IndexShard indexShard = onGoingReplications.getTarget(replicationId).indexShard(); - // if the target still exists in our collection, the primary initiated the cancellation, fail the replication - // but do not fail the shard. Cancellations initiated by this node from Index events will be removed with - // onGoingReplications.cancel and not appear in the collection when this listener resolves. - onGoingReplications.fail(replicationId, new ReplicationFailedException(indexShard, cause), false); - completedReplications.put(target.shardId(), target); - } - } else { - onGoingReplications.fail(replicationId, new ReplicationFailedException("Segment Replication failed", e), false); - } + @Override + public void onFailure(Exception e) { + logger.error(() -> new ParameterizedMessage("Exception replicating {} marking as failed.", target.description()), e); + if (e instanceof OpenSearchCorruptionException) { + onGoingReplications.fail(replicationId, new ReplicationFailedException("Store corruption during replication", e), true); + return; } - }); - } + onGoingReplications.fail(replicationId, new ReplicationFailedException("Segment Replication failed", e), false); + } + }); } private class FileChunkTransportRequestHandler implements TransportRequestHandler { @@ -484,27 +517,31 @@ public void messageReceived(final FileChunkRequest request, TransportChannel cha private class ForceSyncTransportRequestHandler implements TransportRequestHandler { @Override public void messageReceived(final ForceSyncRequest request, TransportChannel channel, Task task) throws Exception { - assert indicesService != null; - final IndexShard indexShard = indicesService.getShardOrNull(request.getShardId()); - // Proceed with round of segment replication only when it is allowed - if (indexShard == null || indexShard.getReplicationEngine().isEmpty()) { - logger.info("Ignore force segment replication sync as it is not allowed"); - channel.sendResponse(TransportResponse.Empty.INSTANCE); - return; - } + forceReplication(request, new ChannelActionListener<>(channel, Actions.FORCE_SYNC, request)); + } + } + + private void forceReplication(ForceSyncRequest request, ActionListener listener) { + final ShardId shardId = request.getShardId(); + assert indicesService != null; + final IndexShard indexShard = indicesService.getShardOrNull(shardId); + // Proceed with round of segment replication only when it is allowed + if (indexShard == null || indexShard.getReplicationEngine().isEmpty()) { + listener.onResponse(TransportResponse.Empty.INSTANCE); + } else { startReplication(indexShard, new SegmentReplicationTargetService.SegmentReplicationListener() { @Override public void onReplicationDone(SegmentReplicationState state) { - logger.trace( - () -> new ParameterizedMessage( - "[shardId {}] [replication id {}] Replication complete to {}, timing data: {}", - indexShard.shardId().getId(), - state.getReplicationId(), - indexShard.getLatestReplicationCheckpoint(), - state.getTimingData() - ) - ); try { + logger.trace( + () -> new ParameterizedMessage( + "[shardId {}] [replication id {}] Force replication Sync complete to {}, timing data: {}", + shardId, + state.getReplicationId(), + indexShard.getLatestReplicationCheckpoint(), + state.getTimingData() + ) + ); // Promote engine type for primary target if (indexShard.recoveryState().getPrimary() == true) { indexShard.resetToWriteableEngine(); @@ -512,33 +549,40 @@ public void onReplicationDone(SegmentReplicationState state) { // Update the replica's checkpoint on primary's replication tracker. updateVisibleCheckpoint(state.getReplicationId(), indexShard); } - channel.sendResponse(TransportResponse.Empty.INSTANCE); - } catch (InterruptedException | TimeoutException | IOException e) { - throw new RuntimeException(e); + listener.onResponse(TransportResponse.Empty.INSTANCE); + } catch (Exception e) { + logger.error("Error while marking replication completed", e); + listener.onFailure(e); } } @Override public void onReplicationFailure(SegmentReplicationState state, ReplicationFailedException e, boolean sendShardFailure) { - logger.trace( + logger.error( () -> new ParameterizedMessage( "[shardId {}] [replication id {}] Replication failed, timing data: {}", indexShard.shardId().getId(), state.getReplicationId(), state.getTimingData() - ) + ), + e ); - if (sendShardFailure == true) { - indexShard.failShard("replication failure", e); - } - try { - channel.sendResponse(e); - } catch (IOException ex) { - throw new RuntimeException(ex); + if (sendShardFailure) { + failShard(e, indexShard); } + listener.onFailure(e); } }); } } + private void failShard(ReplicationFailedException e, IndexShard indexShard) { + try { + indexShard.failShard("unrecoverable replication failure", e); + } catch (Exception inner) { + logger.error("Error attempting to fail shard", inner); + e.addSuppressed(inner); + } + } + } diff --git a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationCollection.java b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationCollection.java index e918ac0a79691..c65ef27969154 100644 --- a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationCollection.java +++ b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationCollection.java @@ -70,6 +70,26 @@ public ReplicationCollection(Logger logger, ThreadPool threadPool) { this.threadPool = threadPool; } + /** + * Starts a new target event for a given shard, fails the given target if this shard is already replicating. + * @param target ReplicationTarget to start + * @param activityTimeout timeout for entire replication event + * @return The replication id + */ + public long startSafe(T target, TimeValue activityTimeout) { + synchronized (onGoingTargetEvents) { + final boolean isPresent = onGoingTargetEvents.values() + .stream() + .map(ReplicationTarget::shardId) + .anyMatch(t -> t.equals(target.shardId())); + if (isPresent) { + throw new ReplicationFailedException("Shard " + target.shardId() + " is already replicating"); + } else { + return start(target, activityTimeout); + } + } + } + /** * Starts a new target event for the given shard, source node and state * @@ -234,6 +254,22 @@ public boolean cancelForShard(ShardId shardId, String reason) { return cancelled; } + /** + * Trigger cancel on the target but do not remove it from the collection. + * This is intended to be called to ensure replication events are removed from the collection + * only when the target has closed. + * + * @param shardId {@link ShardId} shard events to cancel + * @param reason {@link String} reason for cancellation + */ + public void requestCancel(ShardId shardId, String reason) { + for (T value : onGoingTargetEvents.values()) { + if (value.shardId().equals(shardId)) { + value.cancel(reason); + } + } + } + /** * Get target for shard * diff --git a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java index 4d75ff4896706..344a4040be119 100644 --- a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java +++ b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java @@ -173,6 +173,7 @@ public void cancel(String reason) { public void fail(ReplicationFailedException e, boolean sendShardFailure) { if (finished.compareAndSet(false, true)) { try { + logger.debug("marking target " + description() + " as failed", e); notifyListener(e, sendShardFailure); } finally { try { diff --git a/server/src/main/java/org/opensearch/ingest/ConfigurationUtils.java b/server/src/main/java/org/opensearch/ingest/ConfigurationUtils.java index dc41d1985fe42..b212e67a59434 100644 --- a/server/src/main/java/org/opensearch/ingest/ConfigurationUtils.java +++ b/server/src/main/java/org/opensearch/ingest/ConfigurationUtils.java @@ -67,6 +67,7 @@ public final class ConfigurationUtils { public static final String TAG_KEY = "tag"; public static final String DESCRIPTION_KEY = "description"; + public static final String IGNORE_FAILURE_KEY = "ignore_failure"; private ConfigurationUtils() {} @@ -194,7 +195,7 @@ public static String readOptionalStringOrIntProperty( return readStringOrInt(processorType, processorTag, propertyName, value); } - public static Boolean readBooleanProperty( + public static boolean readBooleanProperty( String processorType, String processorTag, Map configuration, @@ -214,7 +215,7 @@ private static Boolean readBoolean(String processorType, String processorTag, St return null; } if (value instanceof Boolean) { - return (Boolean) value; + return (boolean) value; } throw newConfigurationException( processorType, @@ -530,10 +531,11 @@ public static Processor readProcessor( ) throws Exception { String tag = ConfigurationUtils.readOptionalStringProperty(null, null, config, TAG_KEY); String description = ConfigurationUtils.readOptionalStringProperty(null, tag, config, DESCRIPTION_KEY); + boolean ignoreFailure = ConfigurationUtils.readBooleanProperty(null, null, config, IGNORE_FAILURE_KEY, false); Script conditionalScript = extractConditional(config); Processor.Factory factory = processorFactories.get(type); + if (factory != null) { - boolean ignoreFailure = ConfigurationUtils.readBooleanProperty(null, null, config, "ignore_failure", false); List> onFailureProcessorConfigs = ConfigurationUtils.readOptionalList( null, null, diff --git a/modules/search-pipeline-common/src/main/java/org/opensearch/search/pipeline/common/AbstractProcessor.java b/server/src/main/java/org/opensearch/search/pipeline/AbstractProcessor.java similarity index 60% rename from modules/search-pipeline-common/src/main/java/org/opensearch/search/pipeline/common/AbstractProcessor.java rename to server/src/main/java/org/opensearch/search/pipeline/AbstractProcessor.java index e62497cb54db5..d197051c5952b 100644 --- a/modules/search-pipeline-common/src/main/java/org/opensearch/search/pipeline/common/AbstractProcessor.java +++ b/server/src/main/java/org/opensearch/search/pipeline/AbstractProcessor.java @@ -6,20 +6,20 @@ * compatible open source license. */ -package org.opensearch.search.pipeline.common; - -import org.opensearch.search.pipeline.Processor; +package org.opensearch.search.pipeline; /** * Base class for common processor behavior. */ -abstract class AbstractProcessor implements Processor { +public abstract class AbstractProcessor implements Processor { private final String tag; private final String description; + private final boolean ignoreFailure; - protected AbstractProcessor(String tag, String description) { + protected AbstractProcessor(String tag, String description, boolean ignoreFailure) { this.tag = tag; this.description = description; + this.ignoreFailure = ignoreFailure; } @Override @@ -31,4 +31,9 @@ public String getTag() { public String getDescription() { return description; } + + @Override + public boolean isIgnoreFailure() { + return ignoreFailure; + } } diff --git a/server/src/main/java/org/opensearch/search/pipeline/Pipeline.java b/server/src/main/java/org/opensearch/search/pipeline/Pipeline.java index 92826eee5a4f4..5ada11b59556b 100644 --- a/server/src/main/java/org/opensearch/search/pipeline/Pipeline.java +++ b/server/src/main/java/org/opensearch/search/pipeline/Pipeline.java @@ -8,6 +8,8 @@ package org.opensearch.search.pipeline; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.opensearch.action.search.SearchPhaseContext; import org.opensearch.action.search.SearchPhaseResults; import org.opensearch.action.search.SearchRequest; @@ -32,6 +34,9 @@ class Pipeline { public static final String REQUEST_PROCESSORS_KEY = "request_processors"; public static final String RESPONSE_PROCESSORS_KEY = "response_processors"; public static final String PHASE_PROCESSORS_KEY = "phase_results_processors"; + + private static final Logger logger = LogManager.getLogger(Pipeline.class); + private final String id; private final String description; private final Integer version; @@ -132,7 +137,18 @@ SearchRequest transformRequest(SearchRequest request) throws SearchPipelineProce request = processor.processRequest(request); } catch (Exception e) { onRequestProcessorFailed(processor); - throw e; + if (processor.isIgnoreFailure()) { + logger.warn( + "The exception from request processor [" + + processor.getType() + + "] in the search pipeline [" + + id + + "] was ignored", + e + ); + } else { + throw e; + } } finally { long took = TimeUnit.NANOSECONDS.toMillis(relativeTimeSupplier.getAsLong() - start); afterRequestProcessor(processor, took); @@ -161,7 +177,18 @@ SearchResponse transformResponse(SearchRequest request, SearchResponse response) response = processor.processResponse(request, response); } catch (Exception e) { onResponseProcessorFailed(processor); - throw e; + if (processor.isIgnoreFailure()) { + logger.warn( + "The exception from response processor [" + + processor.getType() + + "] in the search pipeline [" + + id + + "] was ignored", + e + ); + } else { + throw e; + } } finally { long took = TimeUnit.NANOSECONDS.toMillis(relativeTimeSupplier.getAsLong() - start); afterResponseProcessor(processor, took); @@ -184,12 +211,27 @@ void runSearchPhaseResultsTransformer( String currentPhase, String nextPhase ) throws SearchPipelineProcessingException { - try { for (SearchPhaseResultsProcessor searchPhaseResultsProcessor : searchPhaseResultsProcessors) { if (currentPhase.equals(searchPhaseResultsProcessor.getBeforePhase().getName()) && nextPhase.equals(searchPhaseResultsProcessor.getAfterPhase().getName())) { - searchPhaseResultsProcessor.process(searchPhaseResult, context); + try { + searchPhaseResultsProcessor.process(searchPhaseResult, context); + } catch (Exception e) { + if (searchPhaseResultsProcessor.isIgnoreFailure()) { + logger.warn( + "The exception from search phase results processor [" + + searchPhaseResultsProcessor.getType() + + "] in the search pipeline [" + + id + + "] was ignored", + e + ); + } else { + throw e; + } + } + } } } catch (RuntimeException e) { diff --git a/server/src/main/java/org/opensearch/search/pipeline/PipelineWithMetrics.java b/server/src/main/java/org/opensearch/search/pipeline/PipelineWithMetrics.java index 060894a37e5ed..3e473f070d9c6 100644 --- a/server/src/main/java/org/opensearch/search/pipeline/PipelineWithMetrics.java +++ b/server/src/main/java/org/opensearch/search/pipeline/PipelineWithMetrics.java @@ -22,6 +22,7 @@ import java.util.function.LongSupplier; import static org.opensearch.ingest.ConfigurationUtils.TAG_KEY; +import static org.opensearch.ingest.ConfigurationUtils.IGNORE_FAILURE_KEY; import static org.opensearch.ingest.Pipeline.DESCRIPTION_KEY; import static org.opensearch.ingest.Pipeline.VERSION_KEY; @@ -150,8 +151,11 @@ private static List readProcessors( } Map config = (Map) entry.getValue(); String tag = ConfigurationUtils.readOptionalStringProperty(null, null, config, TAG_KEY); + boolean ignoreFailure = ConfigurationUtils.readBooleanProperty(null, null, config, IGNORE_FAILURE_KEY, false); String description = ConfigurationUtils.readOptionalStringProperty(null, tag, config, DESCRIPTION_KEY); - processors.add(processorFactories.get(type).create(processorFactories, tag, description, config, pipelineContext)); + processors.add( + processorFactories.get(type).create(processorFactories, tag, description, ignoreFailure, config, pipelineContext) + ); if (config.isEmpty() == false) { String processorName = type; if (tag != null) { diff --git a/server/src/main/java/org/opensearch/search/pipeline/Processor.java b/server/src/main/java/org/opensearch/search/pipeline/Processor.java index cc96132479c74..fb33f46acada4 100644 --- a/server/src/main/java/org/opensearch/search/pipeline/Processor.java +++ b/server/src/main/java/org/opensearch/search/pipeline/Processor.java @@ -43,6 +43,11 @@ public interface Processor { */ String getDescription(); + /** + * Gets the setting of ignoreFailure of a processor. + */ + boolean isIgnoreFailure(); + /** * A factory that knows how to construct a processor based on a map of maps. */ @@ -63,6 +68,7 @@ T create( Map> processorFactories, String tag, String description, + boolean ignoreFailure, Map config, PipelineContext pipelineContext ) throws Exception; diff --git a/server/src/main/java/org/opensearch/threadpool/ThreadPool.java b/server/src/main/java/org/opensearch/threadpool/ThreadPool.java index ebc68c288e25a..d9f73a9b41658 100644 --- a/server/src/main/java/org/opensearch/threadpool/ThreadPool.java +++ b/server/src/main/java/org/opensearch/threadpool/ThreadPool.java @@ -46,10 +46,10 @@ import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.util.concurrent.OpenSearchExecutors; -import org.opensearch.core.concurrency.OpenSearchRejectedExecutionException; import org.opensearch.common.util.concurrent.OpenSearchThreadPoolExecutor; import org.opensearch.common.util.concurrent.ThreadContext; import org.opensearch.common.util.concurrent.XRejectedExecutionHandler; +import org.opensearch.core.concurrency.OpenSearchRejectedExecutionException; import org.opensearch.core.xcontent.ToXContentFragment; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.node.Node; diff --git a/server/src/test/java/org/opensearch/common/blobstore/transfer/RemoteTransferContainerTests.java b/server/src/test/java/org/opensearch/common/blobstore/transfer/RemoteTransferContainerTests.java index 1ebec46042247..48940a0d401fd 100644 --- a/server/src/test/java/org/opensearch/common/blobstore/transfer/RemoteTransferContainerTests.java +++ b/server/src/test/java/org/opensearch/common/blobstore/transfer/RemoteTransferContainerTests.java @@ -9,6 +9,7 @@ package org.opensearch.common.blobstore.transfer; import org.junit.Before; +import org.opensearch.common.blobstore.stream.write.WriteContext; import org.opensearch.common.io.InputStreamContainer; import org.opensearch.common.StreamContext; import org.opensearch.common.blobstore.stream.write.WritePriority; @@ -21,6 +22,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardOpenOption; +import java.util.UUID; public class RemoteTransferContainerTests extends OpenSearchTestCase { @@ -140,6 +142,45 @@ public void testTypeOfProvidedStreamsAllCases() throws IOException { testTypeOfProvidedStreams(false); } + public void testCreateWriteContextAllCases() throws IOException { + testCreateWriteContext(true); + testCreateWriteContext(false); + } + + private void testCreateWriteContext(boolean doRemoteDataIntegrityCheck) throws IOException { + String remoteFileName = testFile.getFileName().toString() + UUID.randomUUID(); + Long expectedChecksum = randomLong(); + try ( + RemoteTransferContainer remoteTransferContainer = new RemoteTransferContainer( + testFile.getFileName().toString(), + remoteFileName, + TEST_FILE_SIZE_BYTES, + true, + WritePriority.HIGH, + new RemoteTransferContainer.OffsetRangeInputStreamSupplier() { + @Override + public OffsetRangeInputStream get(long size, long position) throws IOException { + return new OffsetRangeFileInputStream(testFile, size, position); + } + }, + expectedChecksum, + doRemoteDataIntegrityCheck + ) + ) { + WriteContext writeContext = remoteTransferContainer.createWriteContext(); + assertEquals(remoteFileName, writeContext.getFileName()); + assertTrue(writeContext.isFailIfAlreadyExists()); + assertEquals(TEST_FILE_SIZE_BYTES, writeContext.getFileSize()); + assertEquals(WritePriority.HIGH, writeContext.getWritePriority()); + assertEquals(doRemoteDataIntegrityCheck, writeContext.doRemoteDataIntegrityCheck()); + if (doRemoteDataIntegrityCheck) { + assertEquals(expectedChecksum, writeContext.getExpectedChecksum()); + } else { + assertNull(writeContext.getExpectedChecksum()); + } + } + } + private void testTypeOfProvidedStreams(boolean isRemoteDataIntegritySupported) throws IOException { try ( RemoteTransferContainer remoteTransferContainer = new RemoteTransferContainer( diff --git a/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java b/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java index 21a9393408529..ed1438cbc3b08 100644 --- a/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java +++ b/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java @@ -13,6 +13,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.FilterDirectory; import org.apache.lucene.tests.store.BaseDirectoryWrapper; +import org.apache.lucene.tests.util.LuceneTestCase; import org.junit.After; import org.opensearch.action.ActionListener; import org.opensearch.cluster.metadata.IndexMetadata; @@ -47,6 +48,7 @@ import static org.mockito.Mockito.when; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REPLICATION_TYPE; +@LuceneTestCase.AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/8549") public class RemoteStoreRefreshListenerTests extends IndexShardTestCase { private IndexShard indexShard; private ClusterService clusterService; diff --git a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java index b3876a8ea8fd0..1f5980ba9bfe0 100644 --- a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java @@ -10,6 +10,7 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.index.SegmentInfos; +import org.apache.lucene.store.AlreadyClosedException; import org.junit.Assert; import org.opensearch.ExceptionsHelper; import org.opensearch.action.ActionListener; @@ -86,11 +87,12 @@ import static org.hamcrest.Matchers.instanceOf; import static org.mockito.Mockito.any; import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; -import static org.mockito.Mockito.spy; public class SegmentReplicationIndexShardTests extends OpenSearchIndexLevelReplicationTestCase { @@ -1156,6 +1158,125 @@ public void getSegmentFiles( } } + public void testCloseShardDuringFinalize() throws Exception { + try (ReplicationGroup shards = createGroup(1, settings, new NRTReplicationEngineFactory())) { + shards.startAll(); + IndexShard primary = shards.getPrimary(); + final IndexShard replica = shards.getReplicas().get(0); + final IndexShard replicaSpy = spy(replica); + + primary.refresh("Test"); + + doThrow(AlreadyClosedException.class).when(replicaSpy).finalizeReplication(any()); + + replicateSegments(primary, List.of(replicaSpy)); + } + } + + public void testCloseShardWhileGettingCheckpoint() throws Exception { + try (ReplicationGroup shards = createGroup(1, settings, new NRTReplicationEngineFactory())) { + shards.startAll(); + IndexShard primary = shards.getPrimary(); + final IndexShard replica = shards.getReplicas().get(0); + + primary.refresh("Test"); + + final SegmentReplicationSourceFactory sourceFactory = mock(SegmentReplicationSourceFactory.class); + final SegmentReplicationTargetService targetService = newTargetService(sourceFactory); + SegmentReplicationSource source = new TestReplicationSource() { + + ActionListener listener; + + @Override + public void getCheckpointMetadata( + long replicationId, + ReplicationCheckpoint checkpoint, + ActionListener listener + ) { + // set the listener, we will only fail it once we cancel the source. + this.listener = listener; + // shard is closing while we are copying files. + targetService.beforeIndexShardClosed(replica.shardId, replica, Settings.EMPTY); + } + + @Override + public void getSegmentFiles( + long replicationId, + ReplicationCheckpoint checkpoint, + List filesToFetch, + IndexShard indexShard, + ActionListener listener + ) { + Assert.fail("Unreachable"); + } + + @Override + public void cancel() { + // simulate listener resolving, but only after we have issued a cancel from beforeIndexShardClosed . + final RuntimeException exception = new CancellableThreads.ExecutionCancelledException("retryable action was cancelled"); + listener.onFailure(exception); + } + }; + when(sourceFactory.get(any())).thenReturn(source); + startReplicationAndAssertCancellation(replica, targetService); + + shards.removeReplica(replica); + closeShards(replica); + } + } + + public void testBeforeIndexShardClosedWhileCopyingFiles() throws Exception { + try (ReplicationGroup shards = createGroup(1, settings, new NRTReplicationEngineFactory())) { + shards.startAll(); + IndexShard primary = shards.getPrimary(); + final IndexShard replica = shards.getReplicas().get(0); + + primary.refresh("Test"); + + final SegmentReplicationSourceFactory sourceFactory = mock(SegmentReplicationSourceFactory.class); + final SegmentReplicationTargetService targetService = newTargetService(sourceFactory); + SegmentReplicationSource source = new TestReplicationSource() { + + ActionListener listener; + + @Override + public void getCheckpointMetadata( + long replicationId, + ReplicationCheckpoint checkpoint, + ActionListener listener + ) { + resolveCheckpointInfoResponseListener(listener, primary); + } + + @Override + public void getSegmentFiles( + long replicationId, + ReplicationCheckpoint checkpoint, + List filesToFetch, + IndexShard indexShard, + ActionListener listener + ) { + // set the listener, we will only fail it once we cancel the source. + this.listener = listener; + // shard is closing while we are copying files. + targetService.beforeIndexShardClosed(replica.shardId, replica, Settings.EMPTY); + } + + @Override + public void cancel() { + // simulate listener resolving, but only after we have issued a cancel from beforeIndexShardClosed . + final RuntimeException exception = new CancellableThreads.ExecutionCancelledException("retryable action was cancelled"); + listener.onFailure(exception); + } + }; + when(sourceFactory.get(any())).thenReturn(source); + startReplicationAndAssertCancellation(replica, targetService); + + shards.removeReplica(replica); + closeShards(replica); + } + } + public void testPrimaryCancelsExecution() throws Exception { try (ReplicationGroup shards = createGroup(1, settings, new NRTReplicationEngineFactory())) { shards.startAll(); @@ -1245,7 +1366,6 @@ public void onReplicationDone(SegmentReplicationState state) { @Override public void onReplicationFailure(SegmentReplicationState state, ReplicationFailedException e, boolean sendShardFailure) { assertFalse(sendShardFailure); - assertEquals(SegmentReplicationState.Stage.CANCELLED, state.getStage()); latch.countDown(); } } diff --git a/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java b/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java index c37893877253e..ea092fffa3a9a 100644 --- a/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java +++ b/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java @@ -23,8 +23,12 @@ import org.apache.lucene.tests.util.LuceneTestCase; import org.junit.After; import org.junit.Before; +import org.mockito.Mockito; +import org.opensearch.action.ActionListener; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.UUIDs; +import org.opensearch.common.blobstore.VerifyingMultiStreamBlobContainer; +import org.opensearch.common.blobstore.stream.write.WriteContext; import org.opensearch.common.bytes.BytesReference; import org.opensearch.common.io.VersionedCodecStreamWrapper; import org.opensearch.common.io.stream.BytesStreamOutput; @@ -48,6 +52,10 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.HashMap; +import java.util.Collection; import java.util.concurrent.ExecutorService; import static org.mockito.Mockito.mock; @@ -68,6 +76,7 @@ public class RemoteSegmentStoreDirectoryTests extends IndexShardTestCase { private RemoteStoreMetadataLockManager mdLockManager; private RemoteSegmentStoreDirectory remoteSegmentStoreDirectory; + private TestUploadListener testUploadTracker; private IndexShard indexShard; private SegmentInfos segmentInfos; private ThreadPool threadPool; @@ -89,6 +98,7 @@ public void setup() throws IOException { mdLockManager, threadPool ); + testUploadTracker = new TestUploadListener(); Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, org.opensearch.Version.CURRENT).build(); ExecutorService executorService = OpenSearchExecutors.newDirectExecutorService(); @@ -503,6 +513,82 @@ public void testCopyFrom() throws IOException { storeDirectory.close(); } + public void testCopyFilesFromMultipart() throws Exception { + String filename = "_100.si"; + populateMetadata(); + remoteSegmentStoreDirectory.init(); + + Directory storeDirectory = LuceneTestCase.newDirectory(); + IndexOutput indexOutput = storeDirectory.createOutput(filename, IOContext.DEFAULT); + indexOutput.writeString("Hello World!"); + CodecUtil.writeFooter(indexOutput); + indexOutput.close(); + storeDirectory.sync(List.of(filename)); + + assertFalse(remoteSegmentStoreDirectory.getSegmentsUploadedToRemoteStore().containsKey(filename)); + + VerifyingMultiStreamBlobContainer blobContainer = mock(VerifyingMultiStreamBlobContainer.class); + when(remoteDataDirectory.getBlobContainer()).thenReturn(blobContainer); + Mockito.doAnswer(invocation -> { + ActionListener completionListener = invocation.getArgument(1); + completionListener.onResponse(null); + return null; + }).when(blobContainer).asyncBlobUpload(any(WriteContext.class), any()); + + CountDownLatch latch = new CountDownLatch(1); + ActionListener completionListener = new ActionListener() { + @Override + public void onResponse(Void unused) { + latch.countDown(); + } + + @Override + public void onFailure(Exception e) {} + }; + remoteSegmentStoreDirectory.copyFrom(storeDirectory, filename, IOContext.DEFAULT, completionListener); + assertTrue(latch.await(5000, TimeUnit.SECONDS)); + assertTrue(remoteSegmentStoreDirectory.getSegmentsUploadedToRemoteStore().containsKey(filename)); + storeDirectory.close(); + } + + public void testCopyFilesFromMultipartIOException() throws Exception { + String filename = "_100.si"; + populateMetadata(); + remoteSegmentStoreDirectory.init(); + + Directory storeDirectory = LuceneTestCase.newDirectory(); + IndexOutput indexOutput = storeDirectory.createOutput(filename, IOContext.DEFAULT); + indexOutput.writeString("Hello World!"); + CodecUtil.writeFooter(indexOutput); + indexOutput.close(); + storeDirectory.sync(List.of(filename)); + + assertFalse(remoteSegmentStoreDirectory.getSegmentsUploadedToRemoteStore().containsKey(filename)); + + VerifyingMultiStreamBlobContainer blobContainer = mock(VerifyingMultiStreamBlobContainer.class); + when(remoteDataDirectory.getBlobContainer()).thenReturn(blobContainer); + Mockito.doAnswer(invocation -> { + ActionListener completionListener = invocation.getArgument(1); + completionListener.onFailure(new Exception("Test exception")); + return null; + }).when(blobContainer).asyncBlobUpload(any(WriteContext.class), any()); + CountDownLatch latch = new CountDownLatch(1); + ActionListener completionListener = new ActionListener<>() { + @Override + public void onResponse(Void unused) {} + + @Override + public void onFailure(Exception e) { + latch.countDown(); + } + }; + remoteSegmentStoreDirectory.copyFrom(storeDirectory, filename, IOContext.DEFAULT, completionListener); + assertTrue(latch.await(5000, TimeUnit.SECONDS)); + assertFalse(remoteSegmentStoreDirectory.getSegmentsUploadedToRemoteStore().containsKey(filename)); + + storeDirectory.close(); + } + public void testCopyFromException() throws IOException { String filename = "_100.si"; Directory storeDirectory = LuceneTestCase.newDirectory(); diff --git a/server/src/test/java/org/opensearch/index/store/TestUploadListener.java b/server/src/test/java/org/opensearch/index/store/TestUploadListener.java new file mode 100644 index 0000000000000..a2a61a93371e8 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/store/TestUploadListener.java @@ -0,0 +1,43 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.store; + +import org.opensearch.common.util.UploadListener; + +import java.util.concurrent.ConcurrentHashMap; + +public class TestUploadListener implements UploadListener { + + private final ConcurrentHashMap uploadStatusMap = new ConcurrentHashMap<>(); + + enum UploadStatus { + BEFORE_UPLOAD, + UPLOAD_SUCCESS, + UPLOAD_FAILURE + } + + @Override + public void beforeUpload(String file) { + uploadStatusMap.put(file, UploadStatus.BEFORE_UPLOAD); + } + + @Override + public void onSuccess(String file) { + uploadStatusMap.put(file, UploadStatus.UPLOAD_SUCCESS); + } + + @Override + public void onFailure(String file) { + uploadStatusMap.put(file, UploadStatus.UPLOAD_FAILURE); + } + + public UploadStatus getUploadStatus(String file) { + return uploadStatusMap.get(file); + } +} diff --git a/server/src/test/java/org/opensearch/index/translog/transfer/BlobStoreTransferServiceMockRepositoryTests.java b/server/src/test/java/org/opensearch/index/translog/transfer/BlobStoreTransferServiceMockRepositoryTests.java new file mode 100644 index 0000000000000..1175716679d0f --- /dev/null +++ b/server/src/test/java/org/opensearch/index/translog/transfer/BlobStoreTransferServiceMockRepositoryTests.java @@ -0,0 +1,189 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.translog.transfer; + +import org.mockito.Mockito; +import org.opensearch.action.ActionListener; +import org.opensearch.action.LatchedActionListener; +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.blobstore.BlobStore; +import org.opensearch.common.blobstore.VerifyingMultiStreamBlobContainer; +import org.opensearch.common.blobstore.stream.write.WriteContext; +import org.opensearch.common.blobstore.stream.write.WritePriority; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.Collections; +import java.util.HashMap; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +public class BlobStoreTransferServiceMockRepositoryTests extends OpenSearchTestCase { + + private ThreadPool threadPool; + + private BlobStore blobStore; + + @Override + public void setUp() throws Exception { + super.setUp(); + blobStore = mock(BlobStore.class); + threadPool = new TestThreadPool(getClass().getName()); + } + + public void testUploadBlobs() throws Exception { + Path testFile = createTempFile(); + Files.write(testFile, randomByteArrayOfLength(128), StandardOpenOption.APPEND); + FileSnapshot.TransferFileSnapshot transferFileSnapshot = new FileSnapshot.TransferFileSnapshot( + testFile, + randomNonNegativeLong(), + 0L + ); + + VerifyingMultiStreamBlobContainer blobContainer = mock(VerifyingMultiStreamBlobContainer.class); + Mockito.doAnswer(invocation -> { + ActionListener completionListener = invocation.getArgument(1); + completionListener.onResponse(null); + return null; + }).when(blobContainer).asyncBlobUpload(any(WriteContext.class), any()); + when(blobStore.blobContainer(any(BlobPath.class))).thenReturn(blobContainer); + + TransferService transferService = new BlobStoreTransferService(blobStore, threadPool); + CountDownLatch latch = new CountDownLatch(1); + AtomicBoolean onResponseCalled = new AtomicBoolean(false); + AtomicReference exceptionRef = new AtomicReference<>(); + AtomicReference fileSnapshotRef = new AtomicReference<>(); + transferService.uploadBlobs(Collections.singleton(transferFileSnapshot), new HashMap<>() { + { + put(transferFileSnapshot.getPrimaryTerm(), new BlobPath().add("sample_path")); + } + }, new LatchedActionListener<>(new ActionListener<>() { + @Override + public void onResponse(FileSnapshot.TransferFileSnapshot fileSnapshot) { + onResponseCalled.set(true); + fileSnapshotRef.set(fileSnapshot); + } + + @Override + public void onFailure(Exception e) { + exceptionRef.set(e); + } + }, latch), WritePriority.HIGH); + + assertTrue(latch.await(1000, TimeUnit.MILLISECONDS)); + verify(blobContainer).asyncBlobUpload(any(WriteContext.class), any()); + assertTrue(onResponseCalled.get()); + assertEquals(transferFileSnapshot.getPrimaryTerm(), fileSnapshotRef.get().getPrimaryTerm()); + assertEquals(transferFileSnapshot.getName(), fileSnapshotRef.get().getName()); + assertNull(exceptionRef.get()); + } + + public void testUploadBlobsIOException() throws Exception { + Path testFile = createTempFile(); + Files.write(testFile, randomByteArrayOfLength(128), StandardOpenOption.APPEND); + FileSnapshot.TransferFileSnapshot transferFileSnapshot = new FileSnapshot.TransferFileSnapshot( + testFile, + randomNonNegativeLong(), + 0L + ); + + VerifyingMultiStreamBlobContainer blobContainer = mock(VerifyingMultiStreamBlobContainer.class); + doThrow(new IOException()).when(blobContainer).asyncBlobUpload(any(WriteContext.class), any()); + when(blobStore.blobContainer(any(BlobPath.class))).thenReturn(blobContainer); + + TransferService transferService = new BlobStoreTransferService(blobStore, threadPool); + CountDownLatch latch = new CountDownLatch(1); + AtomicBoolean onResponseCalled = new AtomicBoolean(false); + AtomicReference exceptionRef = new AtomicReference<>(); + transferService.uploadBlobs(Collections.singleton(transferFileSnapshot), new HashMap<>() { + { + put(transferFileSnapshot.getPrimaryTerm(), new BlobPath().add("sample_path")); + } + }, new LatchedActionListener<>(new ActionListener<>() { + @Override + public void onResponse(FileSnapshot.TransferFileSnapshot fileSnapshot) { + onResponseCalled.set(true); + } + + @Override + public void onFailure(Exception e) { + exceptionRef.set(e); + } + }, latch), WritePriority.HIGH); + + assertTrue(latch.await(1000, TimeUnit.MILLISECONDS)); + verify(blobContainer).asyncBlobUpload(any(WriteContext.class), any()); + assertFalse(onResponseCalled.get()); + assertTrue(exceptionRef.get() instanceof FileTransferException); + } + + public void testUploadBlobsUploadFutureCompletedExceptionally() throws Exception { + Path testFile = createTempFile(); + Files.write(testFile, randomByteArrayOfLength(128), StandardOpenOption.APPEND); + FileSnapshot.TransferFileSnapshot transferFileSnapshot = new FileSnapshot.TransferFileSnapshot( + testFile, + randomNonNegativeLong(), + 0L + ); + + VerifyingMultiStreamBlobContainer blobContainer = mock(VerifyingMultiStreamBlobContainer.class); + Mockito.doAnswer(invocation -> { + ActionListener completionListener = invocation.getArgument(1); + completionListener.onFailure(new Exception("Test exception")); + return null; + }).when(blobContainer).asyncBlobUpload(any(WriteContext.class), any()); + + when(blobStore.blobContainer(any(BlobPath.class))).thenReturn(blobContainer); + + TransferService transferService = new BlobStoreTransferService(blobStore, threadPool); + CountDownLatch latch = new CountDownLatch(1); + AtomicBoolean onResponseCalled = new AtomicBoolean(false); + AtomicReference exceptionRef = new AtomicReference<>(); + LatchedActionListener listener = new LatchedActionListener<>(new ActionListener<>() { + @Override + public void onResponse(FileSnapshot.TransferFileSnapshot fileSnapshot) { + onResponseCalled.set(true); + } + + @Override + public void onFailure(Exception e) { + exceptionRef.set(e); + } + }, latch); + transferService.uploadBlobs(Collections.singleton(transferFileSnapshot), new HashMap<>() { + { + put(transferFileSnapshot.getPrimaryTerm(), new BlobPath().add("sample_path")); + } + }, listener, WritePriority.HIGH); + + assertTrue(latch.await(1000, TimeUnit.MILLISECONDS)); + verify(blobContainer).asyncBlobUpload(any(WriteContext.class), any()); + assertFalse(onResponseCalled.get()); + assertTrue(exceptionRef.get() instanceof FileTransferException); + } + + @Override + public void tearDown() throws Exception { + super.tearDown(); + ThreadPool.terminate(threadPool, 10, TimeUnit.SECONDS); + } +} diff --git a/server/src/test/java/org/opensearch/index/translog/transfer/BlobStoreTransferServiceTests.java b/server/src/test/java/org/opensearch/index/translog/transfer/BlobStoreTransferServiceTests.java index 196fbd58c2c20..5502dc3089c62 100644 --- a/server/src/test/java/org/opensearch/index/translog/transfer/BlobStoreTransferServiceTests.java +++ b/server/src/test/java/org/opensearch/index/translog/transfer/BlobStoreTransferServiceTests.java @@ -12,6 +12,7 @@ import org.opensearch.action.LatchedActionListener; import org.opensearch.cluster.metadata.RepositoryMetadata; import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.blobstore.stream.write.WritePriority; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.env.Environment; @@ -49,9 +50,13 @@ public void setUp() throws Exception { public void testUploadBlob() throws IOException { Path testFile = createTempFile(); Files.write(testFile, randomByteArrayOfLength(128), StandardOpenOption.APPEND); - FileSnapshot.TransferFileSnapshot transferFileSnapshot = new FileSnapshot.TransferFileSnapshot(testFile, randomNonNegativeLong()); + FileSnapshot.TransferFileSnapshot transferFileSnapshot = new FileSnapshot.TransferFileSnapshot( + testFile, + randomNonNegativeLong(), + null + ); TransferService transferService = new BlobStoreTransferService(repository.blobStore(), threadPool); - transferService.uploadBlob(transferFileSnapshot, repository.basePath()); + transferService.uploadBlob(transferFileSnapshot, repository.basePath(), WritePriority.HIGH); } public void testUploadBlobFromByteArray() throws IOException { @@ -61,17 +66,21 @@ public void testUploadBlobFromByteArray() throws IOException { 1 ); TransferService transferService = new BlobStoreTransferService(repository.blobStore(), threadPool); - transferService.uploadBlob(transferFileSnapshot, repository.basePath()); + transferService.uploadBlob(transferFileSnapshot, repository.basePath(), WritePriority.NORMAL); } public void testUploadBlobAsync() throws IOException, InterruptedException { Path testFile = createTempFile(); Files.write(testFile, randomByteArrayOfLength(128), StandardOpenOption.APPEND); AtomicBoolean succeeded = new AtomicBoolean(false); - FileSnapshot.TransferFileSnapshot transferFileSnapshot = new FileSnapshot.TransferFileSnapshot(testFile, randomNonNegativeLong()); + FileSnapshot.TransferFileSnapshot transferFileSnapshot = new FileSnapshot.TransferFileSnapshot( + testFile, + randomNonNegativeLong(), + null + ); CountDownLatch latch = new CountDownLatch(1); TransferService transferService = new BlobStoreTransferService(repository.blobStore(), threadPool); - transferService.uploadBlobAsync( + transferService.uploadBlob( ThreadPool.Names.TRANSLOG_TRANSFER, transferFileSnapshot, repository.basePath(), @@ -87,7 +96,8 @@ public void onResponse(FileSnapshot.TransferFileSnapshot fileSnapshot) { public void onFailure(Exception e) { throw new AssertionError("Failed to perform uploadBlobAsync", e); } - }, latch) + }, latch), + WritePriority.HIGH ); assertTrue(latch.await(1000, TimeUnit.MILLISECONDS)); assertTrue(succeeded.get()); diff --git a/server/src/test/java/org/opensearch/index/translog/transfer/FileSnapshotTests.java b/server/src/test/java/org/opensearch/index/translog/transfer/FileSnapshotTests.java index 6d2fb3794b107..8d07af5927135 100644 --- a/server/src/test/java/org/opensearch/index/translog/transfer/FileSnapshotTests.java +++ b/server/src/test/java/org/opensearch/index/translog/transfer/FileSnapshotTests.java @@ -28,15 +28,15 @@ public void tearDown() throws Exception { public void testFileSnapshotPath() throws IOException { Path file = createTempFile(); Files.writeString(file, "hello"); - fileSnapshot = new FileSnapshot.TransferFileSnapshot(file, 12); + fileSnapshot = new FileSnapshot.TransferFileSnapshot(file, 12, null); assertFileSnapshotProperties(file); - try (FileSnapshot sameFileSnapshot = new FileSnapshot.TransferFileSnapshot(file, 12)) { + try (FileSnapshot sameFileSnapshot = new FileSnapshot.TransferFileSnapshot(file, 12, null)) { assertEquals(sameFileSnapshot, fileSnapshot); } - try (FileSnapshot sameFileDiffPTSnapshot = new FileSnapshot.TransferFileSnapshot(file, 34)) { + try (FileSnapshot sameFileDiffPTSnapshot = new FileSnapshot.TransferFileSnapshot(file, 34, null)) { assertNotEquals(sameFileDiffPTSnapshot, fileSnapshot); } } diff --git a/server/src/test/java/org/opensearch/index/translog/transfer/FileTransferTrackerTests.java b/server/src/test/java/org/opensearch/index/translog/transfer/FileTransferTrackerTests.java index be14e4a7bd380..fd0d44564ef6b 100644 --- a/server/src/test/java/org/opensearch/index/translog/transfer/FileTransferTrackerTests.java +++ b/server/src/test/java/org/opensearch/index/translog/transfer/FileTransferTrackerTests.java @@ -34,7 +34,8 @@ public void testOnSuccess() throws IOException { try ( FileSnapshot.TransferFileSnapshot transferFileSnapshot = new FileSnapshot.TransferFileSnapshot( testFile, - randomNonNegativeLong() + randomNonNegativeLong(), + null ) ) { fileTransferTracker.onSuccess(transferFileSnapshot); @@ -58,11 +59,13 @@ public void testOnFailure() throws IOException { try ( FileSnapshot.TransferFileSnapshot transferFileSnapshot = new FileSnapshot.TransferFileSnapshot( testFile, - randomNonNegativeLong() + randomNonNegativeLong(), + null ); FileSnapshot.TransferFileSnapshot transferFileSnapshot2 = new FileSnapshot.TransferFileSnapshot( testFile2, - randomNonNegativeLong() + randomNonNegativeLong(), + null ) ) { @@ -82,7 +85,8 @@ public void testUploaded() throws IOException { try ( FileSnapshot.TransferFileSnapshot transferFileSnapshot = new FileSnapshot.TransferFileSnapshot( testFile, - randomNonNegativeLong() + randomNonNegativeLong(), + null ); ) { diff --git a/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java b/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java index 5f8aa64457896..66cd257299e25 100644 --- a/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java +++ b/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java @@ -17,6 +17,7 @@ import org.opensearch.common.blobstore.BlobPath; import org.opensearch.common.blobstore.BlobStore; import org.opensearch.common.blobstore.support.PlainBlobMetadata; +import org.opensearch.common.blobstore.stream.write.WritePriority; import org.opensearch.index.Index; import org.opensearch.index.shard.ShardId; import org.opensearch.index.translog.Translog; @@ -41,6 +42,8 @@ import java.util.concurrent.atomic.AtomicInteger; import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.anyMap; +import static org.mockito.ArgumentMatchers.anySet; import static org.mockito.Mockito.any; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doNothing; @@ -80,20 +83,24 @@ public void tearDown() throws Exception { } @SuppressWarnings("unchecked") - public void testTransferSnapshot() throws IOException { + public void testTransferSnapshot() throws Exception { AtomicInteger fileTransferSucceeded = new AtomicInteger(); AtomicInteger fileTransferFailed = new AtomicInteger(); AtomicInteger translogTransferSucceeded = new AtomicInteger(); AtomicInteger translogTransferFailed = new AtomicInteger(); doNothing().when(transferService) - .uploadBlob(any(TransferFileSnapshot.class), Mockito.eq(remoteBaseTransferPath.add(String.valueOf(primaryTerm)))); + .uploadBlob( + any(TransferFileSnapshot.class), + Mockito.eq(remoteBaseTransferPath.add(String.valueOf(primaryTerm))), + any(WritePriority.class) + ); doAnswer(invocationOnMock -> { - ActionListener listener = (ActionListener) invocationOnMock.getArguments()[3]; - listener.onResponse((TransferFileSnapshot) invocationOnMock.getArguments()[1]); + ActionListener listener = (ActionListener) invocationOnMock.getArguments()[2]; + Set transferFileSnapshots = (Set) invocationOnMock.getArguments()[0]; + transferFileSnapshots.forEach(listener::onResponse); return null; - }).when(transferService) - .uploadBlobAsync(any(String.class), any(TransferFileSnapshot.class), any(BlobPath.class), any(ActionListener.class)); + }).when(transferService).uploadBlobs(anySet(), anyMap(), any(ActionListener.class), any(WritePriority.class)); FileTransferTracker fileTransferTracker = new FileTransferTracker(new ShardId("index", "indexUUid", 0)) { @Override @@ -145,13 +152,15 @@ public Set getCheckpointFileSnapshots() { primaryTerm, generation, minTranslogGeneration, - createTempFile(Translog.TRANSLOG_FILE_PREFIX + generation, Translog.CHECKPOINT_SUFFIX) + createTempFile(Translog.TRANSLOG_FILE_PREFIX + generation, Translog.CHECKPOINT_SUFFIX), + null ), new CheckpointFileSnapshot( primaryTerm, generation, minTranslogGeneration, - createTempFile(Translog.TRANSLOG_FILE_PREFIX + (generation - 1), Translog.CHECKPOINT_SUFFIX) + createTempFile(Translog.TRANSLOG_FILE_PREFIX + (generation - 1), Translog.CHECKPOINT_SUFFIX), + null ) ); } catch (IOException e) { @@ -166,12 +175,14 @@ public Set getTranslogFileSnapshots() { new TranslogFileSnapshot( primaryTerm, generation, - createTempFile(Translog.TRANSLOG_FILE_PREFIX + generation, Translog.TRANSLOG_FILE_SUFFIX) + createTempFile(Translog.TRANSLOG_FILE_PREFIX + generation, Translog.TRANSLOG_FILE_SUFFIX), + null ), new TranslogFileSnapshot( primaryTerm, generation - 1, - createTempFile(Translog.TRANSLOG_FILE_PREFIX + (generation - 1), Translog.TRANSLOG_FILE_SUFFIX) + createTempFile(Translog.TRANSLOG_FILE_PREFIX + (generation - 1), Translog.TRANSLOG_FILE_SUFFIX), + null ) ); } catch (IOException e) { diff --git a/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationSourceHandlerTests.java b/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationSourceHandlerTests.java index 607f9dd91e35e..b4e9166f377ec 100644 --- a/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationSourceHandlerTests.java +++ b/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationSourceHandlerTests.java @@ -196,11 +196,13 @@ public void testReplicationAlreadyRunning() throws IOException { 1 ); + final List expectedFiles = List.of(new StoreFileMetadata("_0.si", 20, "test", Version.CURRENT.luceneVersion)); + final GetSegmentFilesRequest getSegmentFilesRequest = new GetSegmentFilesRequest( 1L, replica.routingEntry().allocationId().getId(), replicaDiscoveryNode, - Collections.emptyList(), + expectedFiles, latestReplicationCheckpoint ); @@ -224,11 +226,12 @@ public void testCancelReplication() throws IOException, InterruptedException { 1 ); + final List expectedFiles = List.of(new StoreFileMetadata("_0.si", 20, "test", Version.CURRENT.luceneVersion)); final GetSegmentFilesRequest getSegmentFilesRequest = new GetSegmentFilesRequest( 1L, replica.routingEntry().allocationId().getId(), replicaDiscoveryNode, - Collections.emptyList(), + expectedFiles, latestReplicationCheckpoint ); diff --git a/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetServiceTests.java b/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetServiceTests.java index c632f2843cba2..32203c28c8ed8 100644 --- a/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetServiceTests.java +++ b/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetServiceTests.java @@ -8,8 +8,8 @@ package org.opensearch.indices.replication; +import org.apache.lucene.store.AlreadyClosedException; import org.junit.Assert; -import org.mockito.Mockito; import org.opensearch.OpenSearchException; import org.opensearch.Version; import org.opensearch.action.ActionListener; @@ -26,6 +26,7 @@ import org.opensearch.index.replication.TestReplicationSource; import org.opensearch.index.shard.IndexShard; import org.opensearch.index.shard.IndexShardTestCase; +import org.opensearch.index.shard.ShardId; import org.opensearch.index.store.StoreFileMetadata; import org.opensearch.indices.IndicesService; import org.opensearch.indices.recovery.ForceSyncRequest; @@ -49,6 +50,7 @@ import java.util.concurrent.TimeUnit; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; import static org.mockito.Mockito.atLeastOnce; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doNothing; @@ -62,7 +64,6 @@ import static org.mockito.Mockito.verify; import static org.mockito.Mockito.verifyNoInteractions; import static org.mockito.Mockito.when; -import static org.opensearch.indices.replication.SegmentReplicationState.Stage.CANCELLED; public class SegmentReplicationTargetServiceTests extends IndexShardTestCase { @@ -240,71 +241,81 @@ public void testAlreadyOnNewCheckpoint() { verify(spy, times(0)).startReplication(any(), any()); } - public void testShardAlreadyReplicating() throws InterruptedException { - // Create a spy of Target Service so that we can verify invocation of startReplication call with specific checkpoint on it. - SegmentReplicationTargetService serviceSpy = spy(sut); - final SegmentReplicationTarget target = new SegmentReplicationTarget( - replicaShard, - replicationSource, - mock(SegmentReplicationTargetService.SegmentReplicationListener.class) - ); - // Create a Mockito spy of target to stub response of few method calls. - final SegmentReplicationTarget targetSpy = Mockito.spy(target); - CountDownLatch latch = new CountDownLatch(1); - // Mocking response when startReplication is called on targetSpy we send a new checkpoint to serviceSpy and later reduce countdown - // of latch. - doAnswer(invocation -> { - final ActionListener listener = invocation.getArgument(0); - // a new checkpoint arrives before we've completed. - serviceSpy.onNewCheckpoint(aheadCheckpoint, replicaShard); - listener.onResponse(null); - latch.countDown(); - return null; - }).when(targetSpy).startReplication(any()); - doNothing().when(targetSpy).onDone(); - - // start replication of this shard the first time. - serviceSpy.startReplication(targetSpy); + public void testShardAlreadyReplicating() { + sut.startReplication(replicaShard, mock(SegmentReplicationTargetService.SegmentReplicationListener.class)); + sut.startReplication(replicaShard, new SegmentReplicationTargetService.SegmentReplicationListener() { + @Override + public void onReplicationDone(SegmentReplicationState state) { + Assert.fail("Should not succeed"); + } - // wait for the new checkpoint to arrive, before the listener completes. - latch.await(30, TimeUnit.SECONDS); - verify(targetSpy, times(0)).cancel(any()); - verify(serviceSpy, times(0)).startReplication(eq(replicaShard), any()); + @Override + public void onReplicationFailure(SegmentReplicationState state, ReplicationFailedException e, boolean sendShardFailure) { + assertEquals("Shard " + replicaShard.shardId() + " is already replicating", e.getMessage()); + assertFalse(sendShardFailure); + } + }); } - public void testOnNewCheckpointFromNewPrimaryCancelOngoingReplication() throws IOException, InterruptedException { + public void testOnNewCheckpointFromNewPrimaryCancelOngoingReplication() throws InterruptedException { // Create a spy of Target Service so that we can verify invocation of startReplication call with specific checkpoint on it. SegmentReplicationTargetService serviceSpy = spy(sut); + doNothing().when(serviceSpy).updateVisibleCheckpoint(anyLong(), any()); + // skip post replication actions so we can assert execution counts. This will continue to process bc replica's pterm is not advanced + // post replication. + doReturn(true).when(serviceSpy).processLatestReceivedCheckpoint(any(), any()); // Create a Mockito spy of target to stub response of few method calls. - final SegmentReplicationTarget targetSpy = spy( - new SegmentReplicationTarget( - replicaShard, - replicationSource, - mock(SegmentReplicationTargetService.SegmentReplicationListener.class) - ) - ); CountDownLatch latch = new CountDownLatch(1); - // Mocking response when startReplication is called on targetSpy we send a new checkpoint to serviceSpy and later reduce countdown - // of latch. - doAnswer(invocation -> { - // short circuit loop on new checkpoint request - doReturn(null).when(serviceSpy).startReplication(eq(replicaShard), any()); - // a new checkpoint arrives before we've completed. - serviceSpy.onNewCheckpoint(newPrimaryCheckpoint, replicaShard); - try { - invocation.callRealMethod(); - } catch (CancellableThreads.ExecutionCancelledException e) { + SegmentReplicationSource source = new TestReplicationSource() { + + ActionListener listener; + + @Override + public void getCheckpointMetadata( + long replicationId, + ReplicationCheckpoint checkpoint, + ActionListener listener + ) { + // set the listener, we will only fail it once we cancel the source. + this.listener = listener; latch.countDown(); + // do not resolve this listener yet, wait for cancel to hit. } - return null; - }).when(targetSpy).startReplication(any()); + + @Override + public void getSegmentFiles( + long replicationId, + ReplicationCheckpoint checkpoint, + List filesToFetch, + IndexShard indexShard, + ActionListener listener + ) { + Assert.fail("Unreachable"); + } + + @Override + public void cancel() { + // simulate listener resolving, but only after we have issued a cancel from beforeIndexShardClosed . + final RuntimeException exception = new CancellableThreads.ExecutionCancelledException("retryable action was cancelled"); + listener.onFailure(exception); + } + }; + + final SegmentReplicationTarget targetSpy = spy( + new SegmentReplicationTarget(replicaShard, source, mock(SegmentReplicationTargetService.SegmentReplicationListener.class)) + ); // start replication. This adds the target to on-ongoing replication collection serviceSpy.startReplication(targetSpy); + + // wait until we get to getCheckpoint step. latch.await(); - // wait for the new checkpoint to arrive, before the listener completes. - assertEquals(CANCELLED, targetSpy.state().getStage()); + + // new checkpoint arrives with higher pterm. + serviceSpy.onNewCheckpoint(newPrimaryCheckpoint, replicaShard); + + // ensure the old target is cancelled. and new iteration kicks off. verify(targetSpy, times(1)).cancel("Cancelling stuck target after new primary"); verify(serviceSpy, times(1)).startReplication(eq(replicaShard), any()); } @@ -467,6 +478,7 @@ public void testForceSegmentSyncHandler() throws Exception { } public void testForceSegmentSyncHandlerWithFailure() throws Exception { + allowShardFailures(); IndexShard spyReplicaShard = spy(replicaShard); ForceSyncRequest forceSyncRequest = new ForceSyncRequest(1L, 1L, replicaShard.shardId()); when(indicesService.getShardOrNull(forceSyncRequest.getShardId())).thenReturn(spyReplicaShard); @@ -488,4 +500,57 @@ public void testForceSegmentSyncHandlerWithFailure() throws Exception { assertTrue(nestedException instanceof IOException); assertTrue(nestedException.getMessage().contains("dummy failure")); } + + public void testForceSync_ShardDoesNotExist() { + ForceSyncRequest forceSyncRequest = new ForceSyncRequest(1L, 1L, new ShardId("no", "", 0)); + when(indicesService.getShardOrNull(forceSyncRequest.getShardId())).thenReturn(null); + transportService.submitRequest( + localNode, + SegmentReplicationTargetService.Actions.FORCE_SYNC, + forceSyncRequest, + TransportRequestOptions.builder().withTimeout(TRANSPORT_TIMEOUT).build(), + EmptyTransportResponseHandler.INSTANCE_SAME + ).txGet(); + } + + public void testForceSegmentSyncHandlerWithFailure_AlreadyClosedException_swallowed() throws Exception { + IndexShard spyReplicaShard = spy(replicaShard); + ForceSyncRequest forceSyncRequest = new ForceSyncRequest(1L, 1L, replicaShard.shardId()); + when(indicesService.getShardOrNull(forceSyncRequest.getShardId())).thenReturn(spyReplicaShard); + + AlreadyClosedException exception = new AlreadyClosedException("shard closed"); + doThrow(exception).when(spyReplicaShard).finalizeReplication(any()); + + // prevent shard failure to avoid test setup assertion + doNothing().when(spyReplicaShard).failShard(eq("replication failure"), any()); + transportService.submitRequest( + localNode, + SegmentReplicationTargetService.Actions.FORCE_SYNC, + forceSyncRequest, + TransportRequestOptions.builder().withTimeout(TRANSPORT_TIMEOUT).build(), + EmptyTransportResponseHandler.INSTANCE_SAME + ).txGet(); + } + + public void testTargetCancelledBeforeStartInvoked() { + final SegmentReplicationTarget target = new SegmentReplicationTarget( + replicaShard, + mock(SegmentReplicationSource.class), + new SegmentReplicationTargetService.SegmentReplicationListener() { + @Override + public void onReplicationDone(SegmentReplicationState state) { + Assert.fail(); + } + + @Override + public void onReplicationFailure(SegmentReplicationState state, ReplicationFailedException e, boolean sendShardFailure) { + // failures leave state object in last entered stage. + assertEquals(SegmentReplicationState.Stage.GET_CHECKPOINT_INFO, state.getStage()); + assertTrue(e.getCause() instanceof CancellableThreads.ExecutionCancelledException); + } + } + ); + target.cancel("test"); + sut.startReplication(target); + } } diff --git a/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetTests.java b/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetTests.java index ac8904527f7fb..4fb1edb4e496e 100644 --- a/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetTests.java +++ b/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetTests.java @@ -27,6 +27,7 @@ import org.junit.Assert; import org.mockito.Mockito; import org.opensearch.ExceptionsHelper; +import org.opensearch.OpenSearchCorruptionException; import org.opensearch.action.ActionListener; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.settings.Settings; @@ -373,8 +374,9 @@ public void onResponse(Void replicationResponse) { @Override public void onFailure(Exception e) { - assert (e instanceof ReplicationFailedException); - assert (e.getMessage().contains("different segment files")); + assertTrue(e instanceof OpenSearchCorruptionException); + assertTrue(e.getMessage().contains("has local copies of segments that differ from the primary")); + segrepTarget.fail(new ReplicationFailedException(e), false); } }); } diff --git a/server/src/test/java/org/opensearch/recovery/ReplicationCollectionTests.java b/server/src/test/java/org/opensearch/recovery/ReplicationCollectionTests.java index 75ac1075e8ee0..0bb1bc210d946 100644 --- a/server/src/test/java/org/opensearch/recovery/ReplicationCollectionTests.java +++ b/server/src/test/java/org/opensearch/recovery/ReplicationCollectionTests.java @@ -38,12 +38,15 @@ import org.opensearch.index.shard.IndexShard; import org.opensearch.index.shard.ShardId; import org.opensearch.index.store.Store; +import org.opensearch.indices.replication.SegmentReplicationSource; +import org.opensearch.indices.replication.SegmentReplicationTarget; import org.opensearch.indices.replication.common.ReplicationCollection; import org.opensearch.indices.replication.common.ReplicationFailedException; import org.opensearch.indices.replication.common.ReplicationListener; import org.opensearch.indices.replication.common.ReplicationState; import org.opensearch.indices.recovery.RecoveryState; import org.opensearch.indices.recovery.RecoveryTarget; +import org.opensearch.indices.replication.common.ReplicationTarget; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; @@ -51,6 +54,7 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.lessThan; +import static org.mockito.Mockito.mock; public class ReplicationCollectionTests extends OpenSearchIndexLevelReplicationTestCase { static final ReplicationListener listener = new ReplicationListener() { @@ -108,7 +112,30 @@ public void onFailure(ReplicationState state, ReplicationFailedException e, bool } } - public void testMultiReplicationsForSingleShard() throws Exception { + public void testStartMultipleReplicationsForSingleShard() throws Exception { + try (ReplicationGroup shards = createGroup(0)) { + shards.startAll(); + final ReplicationCollection collection = new ReplicationCollection<>(logger, threadPool); + final IndexShard shard = shards.addReplica(); + shards.recoverReplica(shard); + final SegmentReplicationTarget target1 = new SegmentReplicationTarget( + shard, + mock(SegmentReplicationSource.class), + mock(ReplicationListener.class) + ); + final SegmentReplicationTarget target2 = new SegmentReplicationTarget( + shard, + mock(SegmentReplicationSource.class), + mock(ReplicationListener.class) + ); + collection.startSafe(target1, TimeValue.timeValueMinutes(30)); + assertThrows(ReplicationFailedException.class, () -> collection.startSafe(target2, TimeValue.timeValueMinutes(30))); + target1.decRef(); + target2.decRef(); + } + } + + public void testGetReplicationTargetMultiReplicationsForSingleShard() throws Exception { try (ReplicationGroup shards = createGroup(0)) { final ReplicationCollection collection = new ReplicationCollection<>(logger, threadPool); final IndexShard shard1 = shards.addReplica(); diff --git a/server/src/test/java/org/opensearch/search/pipeline/SearchPipelineServiceTests.java b/server/src/test/java/org/opensearch/search/pipeline/SearchPipelineServiceTests.java index 84f39e4bdab42..0103d4c677b00 100644 --- a/server/src/test/java/org/opensearch/search/pipeline/SearchPipelineServiceTests.java +++ b/server/src/test/java/org/opensearch/search/pipeline/SearchPipelineServiceTests.java @@ -78,16 +78,16 @@ public class SearchPipelineServiceTests extends OpenSearchTestCase { private static final SearchPipelinePlugin DUMMY_PLUGIN = new SearchPipelinePlugin() { @Override public Map> getRequestProcessors(Parameters parameters) { - return Map.of("foo", (factories, tag, description, config, ctx) -> null); + return Map.of("foo", (factories, tag, description, ignoreFailure, config, ctx) -> null); } public Map> getResponseProcessors(Parameters parameters) { - return Map.of("bar", (factories, tag, description, config, ctx) -> null); + return Map.of("bar", (factories, tag, description, ignoreFailure, config, ctx) -> null); } @Override public Map> getSearchPhaseResultsProcessors(Parameters parameters) { - return Map.of("zoe", (factories, tag, description, config, ctx) -> null); + return Map.of("zoe", (factories, tag, description, ignoreFailure, config, ctx) -> null); } }; @@ -208,39 +208,15 @@ public void testResolveIndexDefaultPipeline() throws Exception { assertEquals(5, pipelinedRequest.source().size()); } - private static abstract class FakeProcessor implements Processor { - private final String type; - private final String tag; - private final String description; - - protected FakeProcessor(String type, String tag, String description) { - this.type = type; - this.tag = tag; - this.description = description; - } - - @Override - public String getType() { - return type; - } - - @Override - public String getTag() { - return tag; - } - - @Override - public String getDescription() { - return description; - } - } - - private static class FakeRequestProcessor extends FakeProcessor implements SearchRequestProcessor { + private static class FakeRequestProcessor extends AbstractProcessor implements SearchRequestProcessor { private final Consumer executor; - public FakeRequestProcessor(String type, String tag, String description, Consumer executor) { - super(type, tag, description); + private final String type; + + public FakeRequestProcessor(String type, String tag, String description, boolean ignoreFailure, Consumer executor) { + super(tag, description, ignoreFailure); this.executor = executor; + this.type = type; } @Override @@ -248,14 +224,30 @@ public SearchRequest processRequest(SearchRequest request) throws Exception { executor.accept(request); return request; } + + /* + * Gets the type of processor + */ + @Override + public String getType() { + return this.type; + } } - private static class FakeResponseProcessor extends FakeProcessor implements SearchResponseProcessor { + private static class FakeResponseProcessor extends AbstractProcessor implements SearchResponseProcessor { private final Consumer executor; + private String type; - public FakeResponseProcessor(String type, String tag, String description, Consumer executor) { - super(type, tag, description); + public FakeResponseProcessor( + String type, + String tag, + String description, + boolean ignoreFailure, + Consumer executor + ) { + super(tag, description, ignoreFailure); this.executor = executor; + this.type = type; } @Override @@ -263,19 +255,31 @@ public SearchResponse processResponse(SearchRequest request, SearchResponse resp executor.accept(response); return response; } + + /** + * Gets the type of processor + */ + @Override + public String getType() { + return this.type; + } } - private static class FakeSearchPhaseResultsProcessor extends FakeProcessor implements SearchPhaseResultsProcessor { + private static class FakeSearchPhaseResultsProcessor extends AbstractProcessor implements SearchPhaseResultsProcessor { private Consumer querySearchResultConsumer; + private String type; + public FakeSearchPhaseResultsProcessor( String type, String tag, String description, + boolean ignoreFailure, Consumer querySearchResultConsumer ) { - super(type, tag, description); + super(tag, description, ignoreFailure); this.querySearchResultConsumer = querySearchResultConsumer; + this.type = type; } @Override @@ -297,30 +301,45 @@ public SearchPhaseName getBeforePhase() { public SearchPhaseName getAfterPhase() { return SearchPhaseName.FETCH; } + + /** + * Gets the type of processor + */ + @Override + public String getType() { + return this.type; + } } private SearchPipelineService createWithProcessors() { Map> requestProcessors = new HashMap<>(); - requestProcessors.put("scale_request_size", (processorFactories, tag, description, config, ctx) -> { + requestProcessors.put("scale_request_size", (processorFactories, tag, description, ignoreFailure, config, ctx) -> { float scale = ((Number) config.remove("scale")).floatValue(); return new FakeRequestProcessor( "scale_request_size", tag, description, + ignoreFailure, req -> req.source().size((int) (req.source().size() * scale)) ); }); Map> responseProcessors = new HashMap<>(); - responseProcessors.put("fixed_score", (processorFactories, tag, description, config, ctx) -> { + responseProcessors.put("fixed_score", (processorFactories, tag, description, ignoreFailure, config, ctx) -> { float score = ((Number) config.remove("score")).floatValue(); - return new FakeResponseProcessor("fixed_score", tag, description, rsp -> rsp.getHits().forEach(h -> h.score(score))); + return new FakeResponseProcessor( + "fixed_score", + tag, + description, + ignoreFailure, + rsp -> rsp.getHits().forEach(h -> h.score(score)) + ); }); Map> searchPhaseProcessors = new HashMap<>(); - searchPhaseProcessors.put("max_score", (processorFactories, tag, description, config, context) -> { + searchPhaseProcessors.put("max_score", (processorFactories, tag, description, ignoreFailure, config, context) -> { final float finalScore = config.containsKey("score") ? ((Number) config.remove("score")).floatValue() : 100f; final Consumer querySearchResultConsumer = (result) -> result.queryResult().topDocs().maxScore = finalScore; - return new FakeSearchPhaseResultsProcessor("max_score", tag, description, querySearchResultConsumer); + return new FakeSearchPhaseResultsProcessor("max_score", tag, description, ignoreFailure, querySearchResultConsumer); }); return createWithProcessors(requestProcessors, responseProcessors, searchPhaseProcessors); @@ -893,7 +912,7 @@ public void testInfo() { } public void testExceptionOnPipelineCreation() { - Map> badFactory = Map.of("bad_factory", (pf, t, f, c, ctx) -> { + Map> badFactory = Map.of("bad_factory", (pf, t, i, f, c, ctx) -> { throw new RuntimeException(); }); SearchPipelineService searchPipelineService = createWithProcessors(badFactory, Collections.emptyMap(), Collections.emptyMap()); @@ -910,12 +929,12 @@ public void testExceptionOnPipelineCreation() { } public void testExceptionOnRequestProcessing() { - SearchRequestProcessor throwingRequestProcessor = new FakeRequestProcessor("throwing_request", null, null, r -> { + SearchRequestProcessor throwingRequestProcessor = new FakeRequestProcessor("throwing_request", null, null, false, r -> { throw new RuntimeException(); }); Map> throwingRequestProcessorFactory = Map.of( "throwing_request", - (pf, t, f, c, ctx) -> throwingRequestProcessor + (pf, t, i, f, c, ctx) -> throwingRequestProcessor ); SearchPipelineService searchPipelineService = createWithProcessors( @@ -935,12 +954,12 @@ public void testExceptionOnRequestProcessing() { } public void testExceptionOnResponseProcessing() throws Exception { - SearchResponseProcessor throwingResponseProcessor = new FakeResponseProcessor("throwing_response", null, null, r -> { + SearchResponseProcessor throwingResponseProcessor = new FakeResponseProcessor("throwing_response", null, null, false, r -> { throw new RuntimeException(); }); Map> throwingResponseProcessorFactory = Map.of( "throwing_response", - (pf, t, f, c, ctx) -> throwingResponseProcessor + (pf, t, i, f, c, ctx) -> throwingResponseProcessor ); SearchPipelineService searchPipelineService = createWithProcessors( @@ -962,61 +981,103 @@ public void testExceptionOnResponseProcessing() throws Exception { expectThrows(SearchPipelineProcessingException.class, () -> pipelinedRequest.transformResponse(response)); } + public void testCatchExceptionOnRequestProcessing() throws IllegalAccessException { + SearchRequestProcessor throwingRequestProcessor = new FakeRequestProcessor("throwing_request", null, null, true, r -> { + throw new RuntimeException(); + }); + Map> throwingRequestProcessorFactory = Map.of( + "throwing_request", + (pf, t, i, f, c, ctx) -> throwingRequestProcessor + ); + + SearchPipelineService searchPipelineService = createWithProcessors( + throwingRequestProcessorFactory, + Collections.emptyMap(), + Collections.emptyMap() + ); + + Map pipelineSourceMap = new HashMap<>(); + pipelineSourceMap.put(Pipeline.REQUEST_PROCESSORS_KEY, List.of(Map.of("throwing_request", Collections.emptyMap()))); + + SearchSourceBuilder sourceBuilder = SearchSourceBuilder.searchSource().searchPipelineSource(pipelineSourceMap); + SearchRequest searchRequest = new SearchRequest().source(sourceBuilder); + + // Caught Exception thrown when processing the request and produced warn level logging message + try (MockLogAppender mockAppender = MockLogAppender.createForLoggers(LogManager.getLogger(Pipeline.class))) { + mockAppender.addExpectation( + new MockLogAppender.SeenEventExpectation( + "test1", + Pipeline.class.getCanonicalName(), + Level.WARN, + "The exception from request processor [throwing_request] in the search pipeline [_ad_hoc_pipeline] was ignored" + ) + ); + PipelinedRequest pipelinedRequest = searchPipelineService.resolvePipeline(searchRequest); + mockAppender.assertAllExpectationsMatched(); + } + } + + public void testCatchExceptionOnResponseProcessing() throws Exception { + SearchResponseProcessor throwingResponseProcessor = new FakeResponseProcessor("throwing_response", null, null, true, r -> { + throw new RuntimeException(); + }); + Map> throwingResponseProcessorFactory = Map.of( + "throwing_response", + (pf, t, i, f, c, ctx) -> throwingResponseProcessor + ); + + SearchPipelineService searchPipelineService = createWithProcessors( + Collections.emptyMap(), + throwingResponseProcessorFactory, + Collections.emptyMap() + ); + + Map pipelineSourceMap = new HashMap<>(); + pipelineSourceMap.put(Pipeline.RESPONSE_PROCESSORS_KEY, List.of(Map.of("throwing_response", Collections.emptyMap()))); + + SearchSourceBuilder sourceBuilder = SearchSourceBuilder.searchSource().size(100).searchPipelineSource(pipelineSourceMap); + SearchRequest searchRequest = new SearchRequest().source(sourceBuilder); + + PipelinedRequest pipelinedRequest = searchPipelineService.resolvePipeline(searchRequest); + + SearchResponse response = new SearchResponse(null, null, 0, 0, 0, 0, null, null); + + // Caught Exception thrown when processing response and produced warn level logging message + try (MockLogAppender mockAppender = MockLogAppender.createForLoggers(LogManager.getLogger(Pipeline.class))) { + mockAppender.addExpectation( + new MockLogAppender.SeenEventExpectation( + "test1", + Pipeline.class.getCanonicalName(), + Level.WARN, + "The exception from response processor [throwing_response] in the search pipeline [_ad_hoc_pipeline] was ignored" + ) + ); + pipelinedRequest.transformResponse(response); + mockAppender.assertAllExpectationsMatched(); + } + } + public void testStats() throws Exception { - SearchRequestProcessor throwingRequestProcessor = new FakeRequestProcessor("throwing_request", "1", null, r -> { + SearchRequestProcessor throwingRequestProcessor = new FakeRequestProcessor("throwing_request", "1", null, false, r -> { throw new RuntimeException(); }); Map> requestProcessors = Map.of( "successful_request", - (pf, t, f, c, ctx) -> new FakeRequestProcessor("successful_request", "2", null, r -> {}), + (pf, t, i, f, c, ctx) -> new FakeRequestProcessor("successful_request", "2", null, false, r -> {}), "throwing_request", - (pf, t, f, c, ctx) -> throwingRequestProcessor + (pf, t, i, f, c, ctx) -> throwingRequestProcessor ); - SearchResponseProcessor throwingResponseProcessor = new FakeResponseProcessor("throwing_response", "3", null, r -> { + SearchResponseProcessor throwingResponseProcessor = new FakeResponseProcessor("throwing_response", "3", null, false, r -> { throw new RuntimeException(); }); Map> responseProcessors = Map.of( "successful_response", - (pf, t, f, c, ctx) -> new FakeResponseProcessor("successful_response", "4", null, r -> {}), + (pf, t, i, f, c, ctx) -> new FakeResponseProcessor("successful_response", "4", null, false, r -> {}), "throwing_response", - (pf, t, f, c, ctx) -> throwingResponseProcessor + (pf, t, i, f, c, ctx) -> throwingResponseProcessor ); - SearchPipelineService searchPipelineService = createWithProcessors(requestProcessors, responseProcessors, Collections.emptyMap()); - SearchPipelineMetadata metadata = new SearchPipelineMetadata( - Map.of( - "good_response_pipeline", - new PipelineConfiguration( - "good_response_pipeline", - new BytesArray("{\"response_processors\" : [ { \"successful_response\": {} } ] }"), - XContentType.JSON - ), - "bad_response_pipeline", - new PipelineConfiguration( - "bad_response_pipeline", - new BytesArray("{\"response_processors\" : [ { \"throwing_response\": {} } ] }"), - XContentType.JSON - ), - "good_request_pipeline", - new PipelineConfiguration( - "good_request_pipeline", - new BytesArray("{\"request_processors\" : [ { \"successful_request\": {} } ] }"), - XContentType.JSON - ), - "bad_request_pipeline", - new PipelineConfiguration( - "bad_request_pipeline", - new BytesArray("{\"request_processors\" : [ { \"throwing_request\": {} } ] }"), - XContentType.JSON - ) - ) - ); - ClusterState clusterState = ClusterState.builder(new ClusterName("_name")).build(); - ClusterState previousState = clusterState; - clusterState = ClusterState.builder(clusterState) - .metadata(Metadata.builder().putCustom(SearchPipelineMetadata.TYPE, metadata)) - .build(); - searchPipelineService.applyClusterState(new ClusterChangedEvent("", clusterState, previousState)); + SearchPipelineService searchPipelineService = getSearchPipelineService(requestProcessors, responseProcessors); SearchRequest request = new SearchRequest(); SearchResponse response = new SearchResponse(null, null, 0, 0, 0, 0, null, null); @@ -1079,6 +1140,142 @@ public void testStats() throws Exception { } } + public void testStatsEnabledIgnoreFailure() throws Exception { + SearchRequestProcessor throwingRequestProcessor = new FakeRequestProcessor("throwing_request", "1", null, true, r -> { + throw new RuntimeException(); + }); + Map> requestProcessorsEnableIgnoreFailure = Map.of( + "successful_request", + (pf, t, i, f, c, ctx) -> new FakeRequestProcessor("successful_request", "2", null, true, r -> {}), + "throwing_request", + (pf, t, i, f, c, ctx) -> throwingRequestProcessor + ); + SearchResponseProcessor throwingResponseProcessor = new FakeResponseProcessor("throwing_response", "3", null, true, r -> { + throw new RuntimeException(); + }); + Map> responseProcessorsEnableIgnoreFailure = Map.of( + "successful_response", + (pf, t, i, f, c, ctx) -> new FakeResponseProcessor("successful_response", "4", null, true, r -> {}), + "throwing_response", + (pf, t, i, f, c, ctx) -> throwingResponseProcessor + ); + + SearchPipelineService searchPipelineService = getSearchPipelineService( + requestProcessorsEnableIgnoreFailure, + responseProcessorsEnableIgnoreFailure + ); + + SearchRequest request = new SearchRequest(); + SearchResponse response = new SearchResponse(null, null, 0, 0, 0, 0, null, null); + + searchPipelineService.resolvePipeline(request.pipeline("good_request_pipeline")).transformResponse(response); + // Caught Exception here + searchPipelineService.resolvePipeline(request.pipeline("bad_request_pipeline")).transformResponse(response); + searchPipelineService.resolvePipeline(request.pipeline("good_response_pipeline")).transformResponse(response); + // Caught Exception here + searchPipelineService.resolvePipeline(request.pipeline("bad_response_pipeline")).transformResponse(response); + + // when ignoreFailure enabled, the search pipelines will all succeed. + SearchPipelineStats stats = searchPipelineService.stats(); + assertPipelineStats(stats.getTotalRequestStats(), 2, 0); + assertPipelineStats(stats.getTotalResponseStats(), 2, 0); + + for (SearchPipelineStats.PerPipelineStats perPipelineStats : stats.getPipelineStats()) { + SearchPipelineStats.PipelineDetailStats detailStats = stats.getPerPipelineProcessorStats() + .get(perPipelineStats.getPipelineId()); + switch (perPipelineStats.getPipelineId()) { + case "good_request_pipeline": + assertPipelineStats(perPipelineStats.getRequestStats(), 1, 0); + assertPipelineStats(perPipelineStats.getResponseStats(), 0, 0); + assertEquals(1, detailStats.requestProcessorStats().size()); + assertEquals(0, detailStats.responseProcessorStats().size()); + assertEquals("successful_request:2", detailStats.requestProcessorStats().get(0).getProcessorName()); + assertEquals("successful_request", detailStats.requestProcessorStats().get(0).getProcessorType()); + assertPipelineStats(detailStats.requestProcessorStats().get(0).getStats(), 1, 0); + break; + case "bad_request_pipeline": + // pipeline succeed when ignore failure is true + assertPipelineStats(perPipelineStats.getRequestStats(), 1, 0); + assertPipelineStats(perPipelineStats.getResponseStats(), 0, 0); + assertEquals(1, detailStats.requestProcessorStats().size()); + assertEquals(0, detailStats.responseProcessorStats().size()); + assertEquals("throwing_request:1", detailStats.requestProcessorStats().get(0).getProcessorName()); + assertEquals("throwing_request", detailStats.requestProcessorStats().get(0).getProcessorType()); + // processor stats got 1 count and 1 failed + assertPipelineStats(detailStats.requestProcessorStats().get(0).getStats(), 1, 1); + break; + case "good_response_pipeline": + assertPipelineStats(perPipelineStats.getRequestStats(), 0, 0); + assertPipelineStats(perPipelineStats.getResponseStats(), 1, 0); + assertEquals(0, detailStats.requestProcessorStats().size()); + assertEquals(1, detailStats.responseProcessorStats().size()); + assertEquals("successful_response:4", detailStats.responseProcessorStats().get(0).getProcessorName()); + assertEquals("successful_response", detailStats.responseProcessorStats().get(0).getProcessorType()); + assertPipelineStats(detailStats.responseProcessorStats().get(0).getStats(), 1, 0); + break; + case "bad_response_pipeline": + // pipeline succeed when ignore failure is true + assertPipelineStats(perPipelineStats.getRequestStats(), 0, 0); + assertPipelineStats(perPipelineStats.getResponseStats(), 1, 0); + assertEquals(0, detailStats.requestProcessorStats().size()); + assertEquals(1, detailStats.responseProcessorStats().size()); + assertEquals("throwing_response:3", detailStats.responseProcessorStats().get(0).getProcessorName()); + assertEquals("throwing_response", detailStats.responseProcessorStats().get(0).getProcessorType()); + // processor stats got 1 count and 1 failed + assertPipelineStats(detailStats.responseProcessorStats().get(0).getStats(), 1, 1); + break; + } + } + + } + + private SearchPipelineService getSearchPipelineService( + Map> requestProcessorsEnableIgnoreFailure, + Map> responseProcessorsEnableIgnoreFailure + ) { + SearchPipelineService searchPipelineService = createWithProcessors( + requestProcessorsEnableIgnoreFailure, + responseProcessorsEnableIgnoreFailure, + Collections.emptyMap() + ); + + SearchPipelineMetadata metadata = new SearchPipelineMetadata( + Map.of( + "good_response_pipeline", + new PipelineConfiguration( + "good_response_pipeline", + new BytesArray("{\"response_processors\" : [ { \"successful_response\": {} } ] }"), + XContentType.JSON + ), + "bad_response_pipeline", + new PipelineConfiguration( + "bad_response_pipeline", + new BytesArray("{\"response_processors\" : [ { \"throwing_response\": {} } ] }"), + XContentType.JSON + ), + "good_request_pipeline", + new PipelineConfiguration( + "good_request_pipeline", + new BytesArray("{\"request_processors\" : [ { \"successful_request\": {} } ] }"), + XContentType.JSON + ), + "bad_request_pipeline", + new PipelineConfiguration( + "bad_request_pipeline", + new BytesArray("{\"request_processors\" : [ { \"throwing_request\": {} } ] }"), + XContentType.JSON + ) + ) + ); + ClusterState clusterState = ClusterState.builder(new ClusterName("_name")).build(); + ClusterState previousState = clusterState; + clusterState = ClusterState.builder(clusterState) + .metadata(Metadata.builder().putCustom(SearchPipelineMetadata.TYPE, metadata)) + .build(); + searchPipelineService.applyClusterState(new ClusterChangedEvent("", clusterState, previousState)); + return searchPipelineService; + } + private static void assertPipelineStats(OperationStats stats, long count, long failed) { assertEquals(stats.getCount(), count); assertEquals(stats.getFailedCount(), failed); @@ -1086,11 +1283,11 @@ private static void assertPipelineStats(OperationStats stats, long count, long f public void testAdHocRejectingProcessor() { String processorType = "ad_hoc_rejecting"; - Map> requestProcessorFactories = Map.of(processorType, (pf, t, d, c, ctx) -> { + Map> requestProcessorFactories = Map.of(processorType, (pf, t, d, i, c, ctx) -> { if (ctx.getPipelineSource() == Processor.PipelineSource.SEARCH_REQUEST) { throw new IllegalArgumentException(processorType + " cannot be created as part of a pipeline defined in a search request"); } - return new FakeRequestProcessor(processorType, t, d, r -> {}); + return new FakeRequestProcessor(processorType, t, d, i, r -> {}); }); SearchPipelineService searchPipelineService = createWithProcessors( diff --git a/test/framework/build.gradle b/test/framework/build.gradle index 2532fdf1938fd..c65bf51c6af36 100644 --- a/test/framework/build.gradle +++ b/test/framework/build.gradle @@ -49,11 +49,14 @@ dependencies { api "org.mockito:mockito-core:${versions.mockito}" api "net.bytebuddy:byte-buddy:${versions.bytebuddy}" api "org.objenesis:objenesis:${versions.objenesis}" + + annotationProcessor "org.apache.logging.log4j:log4j-core:${versions.log4j}" } compileJava.options.compilerArgs -= '-Xlint:cast' compileJava.options.compilerArgs -= '-Xlint:rawtypes' compileJava.options.compilerArgs -= '-Xlint:unchecked' +compileJava.options.compilerArgs += ['-processor', 'org.apache.logging.log4j.core.config.plugins.processor.PluginProcessor'] compileTestJava.options.compilerArgs -= '-Xlint:rawtypes' // the main files are actually test files, so use the appropriate forbidden api sigs diff --git a/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java b/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java index ec397a2baa640..1bea22852df4c 100644 --- a/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java +++ b/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java @@ -83,7 +83,6 @@ import org.opensearch.common.logging.DeprecatedMessage; import org.opensearch.common.logging.HeaderWarning; import org.opensearch.common.logging.HeaderWarningAppender; -import org.opensearch.common.logging.LogConfigurator; import org.opensearch.common.logging.Loggers; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; @@ -239,7 +238,6 @@ public void tearDown() throws Exception { static { TEST_WORKER_VM_ID = System.getProperty(TEST_WORKER_SYS_PROPERTY, DEFAULT_TEST_WORKER_ID); setTestSysProps(); - LogConfigurator.loadLog4jPlugins(); String leakLoggerName = "io.netty.util.ResourceLeakDetector"; Logger leakLogger = LogManager.getLogger(leakLoggerName);