diff --git a/.ci/bwcVersions b/.ci/bwcVersions index 980e08c696d54..0f59fec3d824a 100644 --- a/.ci/bwcVersions +++ b/.ci/bwcVersions @@ -38,3 +38,4 @@ BWC_VERSION: - "2.16.0" - "2.16.1" - "2.17.0" + - "2.18.0" diff --git a/.github/benchmark-configs.json b/.github/benchmark-configs.json index 8f4bad040fe44..4ada715d21495 100644 --- a/.github/benchmark-configs.json +++ b/.github/benchmark-configs.json @@ -167,5 +167,59 @@ "data_instance_config": "4vCPU, 32G Mem, 16G Heap" }, "baseline_cluster_config": "x64-r5.xlarge-single-node-1-shard-0-replica-baseline" + }, + "id_11": { + "description": "Search only test-procedure for big5 with concurrent segment search setting enabled", + "supported_major_versions": ["3"], + "cluster-benchmark-configs": { + "SINGLE_NODE_CLUSTER": "true", + "MIN_DISTRIBUTION": "true", + "TEST_WORKLOAD": "big5", + "ADDITIONAL_CONFIG": "search.concurrent_segment_search.enabled:true", + "WORKLOAD_PARAMS": "{\"snapshot_repo_name\":\"benchmark-workloads-repo-300\",\"snapshot_bucket_name\":\"benchmark-workload-snapshots\",\"snapshot_region\":\"us-east-1\",\"snapshot_base_path\":\"workload-snapshots-300\",\"snapshot_name\":\"big5_1_shard\"}", + "CAPTURE_NODE_STAT": "true", + "TEST_PROCEDURE": "restore-from-snapshot" + }, + "cluster_configuration": { + "size": "Single-Node", + "data_instance_config": "4vCPU, 32G Mem, 16G Heap" + }, + "baseline_cluster_config": "x64-r5.xlarge-1-shard-0-replica-snapshot-baseline" + }, + "id_12": { + "description": "Search only test-procedure for big5 with concurrent segment search mode as all", + "supported_major_versions": ["3"], + "cluster-benchmark-configs": { + "SINGLE_NODE_CLUSTER": "true", + "MIN_DISTRIBUTION": "true", + "TEST_WORKLOAD": "big5", + "ADDITIONAL_CONFIG": "search.concurrent_segment_search.mode:all", + "WORKLOAD_PARAMS": "{\"snapshot_repo_name\":\"benchmark-workloads-repo-300\",\"snapshot_bucket_name\":\"benchmark-workload-snapshots\",\"snapshot_region\":\"us-east-1\",\"snapshot_base_path\":\"workload-snapshots-300\",\"snapshot_name\":\"big5_1_shard\"}", + "CAPTURE_NODE_STAT": "true", + "TEST_PROCEDURE": "restore-from-snapshot" + }, + "cluster_configuration": { + "size": "Single-Node", + "data_instance_config": "4vCPU, 32G Mem, 16G Heap" + }, + "baseline_cluster_config": "x64-r5.xlarge-1-shard-0-replica-snapshot-baseline" + }, + "id_13": { + "description": "Search only test-procedure for big5 with concurrent segment search mode as auto", + "supported_major_versions": ["3"], + "cluster-benchmark-configs": { + "SINGLE_NODE_CLUSTER": "true", + "MIN_DISTRIBUTION": "true", + "TEST_WORKLOAD": "big5", + "ADDITIONAL_CONFIG": "search.concurrent_segment_search.mode:auto", + "WORKLOAD_PARAMS": "{\"snapshot_repo_name\":\"benchmark-workloads-repo-300\",\"snapshot_bucket_name\":\"benchmark-workload-snapshots\",\"snapshot_region\":\"us-east-1\",\"snapshot_base_path\":\"workload-snapshots-300\",\"snapshot_name\":\"big5_1_shard\"}", + "CAPTURE_NODE_STAT": "true", + "TEST_PROCEDURE": "restore-from-snapshot" + }, + "cluster_configuration": { + "size": "Single-Node", + "data_instance_config": "4vCPU, 32G Mem, 16G Heap" + }, + "baseline_cluster_config": "x64-r5.xlarge-1-shard-0-replica-snapshot-baseline" } } diff --git a/.github/workflows/assemble.yml b/.github/workflows/assemble.yml index 200d9cf2d788b..d90b05c323cf1 100644 --- a/.github/workflows/assemble.yml +++ b/.github/workflows/assemble.yml @@ -30,11 +30,14 @@ jobs: - name: Setup docker (missing on MacOS) id: setup_docker if: runner.os == 'macos' - uses: douglascamata/setup-docker-macos-action@main continue-on-error: true - with: - upgrade-qemu: true - colima: v0.6.8 + run: | + # Force QEMU 9.0.2 usage + curl https://raw.githubusercontent.com/Homebrew/homebrew-core/f1a9cf104a9a51779c7a532b658c490f69974839/Formula/q/qemu.rb > qemu.rb + brew install qemu.rb + HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1 HOMEBREW_NO_AUTO_UPDATE=1 brew install docker colima coreutils + gtimeout 15m colima start + shell: bash - name: Run Gradle (assemble) if: runner.os == 'macos' && steps.setup_docker.outcome != 'success' run: | diff --git a/.github/workflows/version.yml b/.github/workflows/version.yml index 2de54716256ff..959c5c394be7d 100644 --- a/.github/workflows/version.yml +++ b/.github/workflows/version.yml @@ -62,7 +62,7 @@ jobs: - name: Create PR for BASE id: base_pr - uses: peter-evans/create-pull-request@v6 + uses: peter-evans/create-pull-request@v7 with: base: ${{ env.BASE }} branch: 'create-pull-request/patch-${{ env.BASE }}' @@ -88,7 +88,7 @@ jobs: - name: Create PR for BASE_X id: base_x_pr - uses: peter-evans/create-pull-request@v6 + uses: peter-evans/create-pull-request@v7 with: base: ${{ env.BASE_X }} branch: 'create-pull-request/patch-${{ env.BASE_X }}' @@ -114,7 +114,7 @@ jobs: - name: Create PR for main id: main_pr - uses: peter-evans/create-pull-request@v6 + uses: peter-evans/create-pull-request@v7 with: base: main branch: 'create-pull-request/patch-main' diff --git a/CHANGELOG.md b/CHANGELOG.md index 402e8372871bf..57a269af712b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,87 +5,29 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [Unreleased 2.x] ### Added -- [Offline Nodes] Adds offline-tasks library containing various interfaces to be used for Offline Background Tasks. ([#13574](https://github.com/opensearch-project/OpenSearch/pull/13574)) -- Fix for hasInitiatedFetching to fix allocation explain and manual reroute APIs (([#14972](https://github.com/opensearch-project/OpenSearch/pull/14972)) -- [Workload Management] Add queryGroupId to Task ([14708](https://github.com/opensearch-project/OpenSearch/pull/14708)) -- Add setting to ignore throttling nodes for allocation of unassigned primaries in remote restore ([#14991](https://github.com/opensearch-project/OpenSearch/pull/14991)) -- [Workload Management] Add Delete QueryGroup API Logic ([#14735](https://github.com/opensearch-project/OpenSearch/pull/14735)) -- [Streaming Indexing] Enhance RestClient with a new streaming API support ([#14437](https://github.com/opensearch-project/OpenSearch/pull/14437)) -- Add basic aggregation support for derived fields ([#14618](https://github.com/opensearch-project/OpenSearch/pull/14618)) -- [Workload Management] Add Create QueryGroup API Logic ([#14680](https://github.com/opensearch-project/OpenSearch/pull/14680))- [Workload Management] Add Create QueryGroup API Logic ([#14680](https://github.com/opensearch-project/OpenSearch/pull/14680)) -- Add ThreadContextPermission for markAsSystemContext and allow core to perform the method ([#15016](https://github.com/opensearch-project/OpenSearch/pull/15016)) -- Add ThreadContextPermission for stashAndMergeHeaders and stashWithOrigin ([#15039](https://github.com/opensearch-project/OpenSearch/pull/15039)) -- [Concurrent Segment Search] Support composite aggregations with scripting ([#15072](https://github.com/opensearch-project/OpenSearch/pull/15072)) -- Add `rangeQuery` and `regexpQuery` for `constant_keyword` field type ([#14711](https://github.com/opensearch-project/OpenSearch/pull/14711)) -- Add took time to request nodes stats ([#15054](https://github.com/opensearch-project/OpenSearch/pull/15054)) -- [Workload Management] Add Get QueryGroup API Logic ([14709](https://github.com/opensearch-project/OpenSearch/pull/14709)) -- [Workload Management] Add Settings for Workload Management feature ([#15028](https://github.com/opensearch-project/OpenSearch/pull/15028)) -- [Workload Management] QueryGroup resource tracking framework changes ([#13897](https://github.com/opensearch-project/OpenSearch/pull/13897)) -- Support filtering on a large list encoded by bitmap ([#14774](https://github.com/opensearch-project/OpenSearch/pull/14774)) -- Add slice execution listeners to SearchOperationListener interface ([#15153](https://github.com/opensearch-project/OpenSearch/pull/15153)) -- Make balanced shards allocator timebound ([#15239](https://github.com/opensearch-project/OpenSearch/pull/15239)) -- Add allowlist setting for ingest-geoip and ingest-useragent ([#15325](https://github.com/opensearch-project/OpenSearch/pull/15325)) -- Adding access to noSubMatches and noOverlappingMatches in Hyphenation ([#13895](https://github.com/opensearch-project/OpenSearch/pull/13895)) -- Add support for index level max slice count setting for concurrent segment search ([#15336](https://github.com/opensearch-project/OpenSearch/pull/15336)) -- [Streaming Indexing] Introduce bulk HTTP API streaming flavor ([#15381](https://github.com/opensearch-project/OpenSearch/pull/15381)) -- Add support for centralize snapshot creation with pinned timestamp ([#15124](https://github.com/opensearch-project/OpenSearch/pull/15124)) -- Add concurrent search support for Derived Fields ([#15326](https://github.com/opensearch-project/OpenSearch/pull/15326)) -- [Workload Management] Add query group stats constructs ([#15343](https://github.com/opensearch-project/OpenSearch/pull/15343))) -- Add runAs to Subject interface and introduce IdentityAwarePlugin extension point ([#14630](https://github.com/opensearch-project/OpenSearch/pull/14630)) -- Optimize NodeIndicesStats output behind flag ([#14454](https://github.com/opensearch-project/OpenSearch/pull/14454)) -- [Workload Management] Add rejection logic for co-ordinator and shard level requests ([#15428](https://github.com/opensearch-project/OpenSearch/pull/15428))) +- MultiTermQueries in keyword fields now default to `indexed` approach and gated behind cluster setting ([#15637](https://github.com/opensearch-project/OpenSearch/pull/15637)) +- [Workload Management] QueryGroup resource cancellation framework changes ([#15651](https://github.com/opensearch-project/OpenSearch/pull/15651)) +- Fallback to Remote cluster-state on Term-Version check mismatch - ([#15424](https://github.com/opensearch-project/OpenSearch/pull/15424)) ### Dependencies -- Bump `netty` from 4.1.111.Final to 4.1.112.Final ([#15081](https://github.com/opensearch-project/OpenSearch/pull/15081)) -- Bump `org.apache.commons:commons-lang3` from 3.14.0 to 3.16.0 ([#14861](https://github.com/opensearch-project/OpenSearch/pull/14861), [#15205](https://github.com/opensearch-project/OpenSearch/pull/15205)) -- OpenJDK Update (July 2024 Patch releases) ([#14998](https://github.com/opensearch-project/OpenSearch/pull/14998)) -- Bump `com.microsoft.azure:msal4j` from 1.16.1 to 1.17.0 ([#14995](https://github.com/opensearch-project/OpenSearch/pull/14995), [#15420](https://github.com/opensearch-project/OpenSearch/pull/15420)) -- Bump `actions/github-script` from 6 to 7 ([#14997](https://github.com/opensearch-project/OpenSearch/pull/14997)) -- Bump `org.tukaani:xz` from 1.9 to 1.10 ([#15110](https://github.com/opensearch-project/OpenSearch/pull/15110)) -- Bump `actions/setup-java` from 1 to 4 ([#15104](https://github.com/opensearch-project/OpenSearch/pull/15104)) -- Bump `org.apache.avro:avro` from 1.11.3 to 1.12.0 in /plugins/repository-hdfs ([#15119](https://github.com/opensearch-project/OpenSearch/pull/15119)) -- Bump `org.bouncycastle:bcpg-fips` from 1.0.7.1 to 2.0.9 ([#15103](https://github.com/opensearch-project/OpenSearch/pull/15103), [#15299](https://github.com/opensearch-project/OpenSearch/pull/15299)) -- Bump `com.azure:azure-core` from 1.49.1 to 1.51.0 ([#15111](https://github.com/opensearch-project/OpenSearch/pull/15111)) -- Bump `org.xerial.snappy:snappy-java` from 1.1.10.5 to 1.1.10.6 ([#15207](https://github.com/opensearch-project/OpenSearch/pull/15207)) -- Bump `com.azure:azure-xml` from 1.0.0 to 1.1.0 ([#15206](https://github.com/opensearch-project/OpenSearch/pull/15206)) -- Bump `reactor` from 3.5.19 to 3.5.20 ([#15262](https://github.com/opensearch-project/OpenSearch/pull/15262)) -- Bump `reactor-netty` from 1.1.21 to 1.1.22 ([#15262](https://github.com/opensearch-project/OpenSearch/pull/15262)) -- Bump `org.apache.kerby:kerb-admin` from 2.0.3 to 2.1.0 ([#15301](https://github.com/opensearch-project/OpenSearch/pull/15301)) -- Bump `com.azure:azure-core-http-netty` from 1.15.1 to 1.15.3 ([#15300](https://github.com/opensearch-project/OpenSearch/pull/15300)) -- Bump `com.gradle.develocity` from 3.17.6 to 3.18 ([#15297](https://github.com/opensearch-project/OpenSearch/pull/15297)) -- Bump `commons-cli:commons-cli` from 1.8.0 to 1.9.0 ([#15298](https://github.com/opensearch-project/OpenSearch/pull/15298)) -- Bump `opentelemetry` from 1.40.0 to 1.41.0 ([#15361](https://github.com/opensearch-project/OpenSearch/pull/15361)) -- Bump `opentelemetry-semconv` from 1.26.0-alpha to 1.27.0-alpha ([#15361](https://github.com/opensearch-project/OpenSearch/pull/15361)) -- Bump `tj-actions/changed-files` from 44 to 45 ([#15422](https://github.com/opensearch-project/OpenSearch/pull/15422)) -- Bump `dnsjava:dnsjava` from 3.6.0 to 3.6.1 ([#15418](https://github.com/opensearch-project/OpenSearch/pull/15418)) -- Bump `com.netflix.nebula.ospackage-base` from 11.9.1 to 11.10.0 ([#15419](https://github.com/opensearch-project/OpenSearch/pull/15419)) -- Bump `org.roaringbitmap:RoaringBitmap` from 1.1.0 to 1.2.1 ([#15423](https://github.com/opensearch-project/OpenSearch/pull/15423)) -- Bump `icu4j` from 70.1 to 75.1 ([#15469](https://github.com/opensearch-project/OpenSearch/pull/15469)) +- Bump `com.azure:azure-identity` from 1.13.0 to 1.13.2 ([#15578](https://github.com/opensearch-project/OpenSearch/pull/15578)) +- Bump `protobuf` from 3.22.3 to 3.25.4 ([#15684](https://github.com/opensearch-project/OpenSearch/pull/15684)) +- Bump `org.apache.logging.log4j:log4j-core` from 2.23.1 to 2.24.0 ([#15858](https://github.com/opensearch-project/OpenSearch/pull/15858)) +- Bump `peter-evans/create-pull-request` from 6 to 7 ([#15863](https://github.com/opensearch-project/OpenSearch/pull/15863)) +- Bump `com.nimbusds:oauth2-oidc-sdk` from 11.9.1 to 11.19.1 ([#15862](https://github.com/opensearch-project/OpenSearch/pull/15862)) ### Changed -- Add lower limit for primary and replica batch allocators timeout ([#14979](https://github.com/opensearch-project/OpenSearch/pull/14979)) -- Optimize regexp-based include/exclude on aggregations when pattern matches prefixes ([#14371](https://github.com/opensearch-project/OpenSearch/pull/14371)) -- Replace and block usages of org.apache.logging.log4j.util.Strings ([#15238](https://github.com/opensearch-project/OpenSearch/pull/15238)) + ### Deprecated ### Removed ### Fixed -- Fix constraint bug which allows more primary shards than average primary shards per index ([#14908](https://github.com/opensearch-project/OpenSearch/pull/14908)) -- Fix NPE when bulk ingest with empty pipeline ([#15033](https://github.com/opensearch-project/OpenSearch/pull/15033)) -- Fix missing value of FieldSort for unsigned_long ([#14963](https://github.com/opensearch-project/OpenSearch/pull/14963)) -- Fix delete index template failed when the index template matches a data stream but is unused ([#15080](https://github.com/opensearch-project/OpenSearch/pull/15080)) -- Fix array_index_out_of_bounds_exception when indexing documents with field name containing only dot ([#15126](https://github.com/opensearch-project/OpenSearch/pull/15126)) -- Fixed array field name omission in flat_object function for nested JSON ([#13620](https://github.com/opensearch-project/OpenSearch/pull/13620)) -- Fix range aggregation optimization ignoring top level queries ([#15194](https://github.com/opensearch-project/OpenSearch/pull/15194)) -- Fix incorrect parameter names in MinHash token filter configuration handling ([#15233](https://github.com/opensearch-project/OpenSearch/pull/15233)) -- Fix indexing error when flat_object field is explicitly null ([#15375](https://github.com/opensearch-project/OpenSearch/pull/15375)) -- Fix split response processor not included in allowlist ([#15393](https://github.com/opensearch-project/OpenSearch/pull/15393)) -- Fix unchecked cast in dynamic action map getter ([#15394](https://github.com/opensearch-project/OpenSearch/pull/15394)) +- Fix wildcard query containing escaped character ([#15737](https://github.com/opensearch-project/OpenSearch/pull/15737)) +- Fix case-insensitive query on wildcard field ([#15882](https://github.com/opensearch-project/OpenSearch/pull/15882)) - Add validation for the search backpressure cancellation settings ([#15501](https://github.com/opensearch-project/OpenSearch/pull/15501)) -- Fix null values indexed as "null" strings in flat_object field ([#14069](https://github.com/opensearch-project/OpenSearch/pull/14069)) ### Security -[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.15...2.x +[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.17...2.x diff --git a/TESTING.md b/TESTING.md index de7ab3eefe2f8..9127f005ced75 100644 --- a/TESTING.md +++ b/TESTING.md @@ -39,7 +39,7 @@ OpenSearch uses [jUnit](https://junit.org/junit5/) for testing, it also uses ran - [Expect a specific segment topology](#expect-a-specific-segment-topology) - [Leave environment in an unstable state after test](#leave-environment-in-an-unstable-state-after-test) - [Test coverage analysis](#test-coverage-analysis) -- [Building with extra plugins](#building-with-extra-plugins) +- [Testing with plugins](#testing-with-plugins) - [Environment misc](#environment-misc) # Requirements @@ -552,11 +552,17 @@ Apart from using Gradle, it is also possible to gain insight in code coverage us Please read your IDE documentation for how to attach a debugger to a JVM process. -# Building with extra plugins +# Testing with plugins -Additional plugins may be built alongside OpenSearch, where their dependency on OpenSearch will be substituted with the local OpenSearch build. To add your plugin, create a directory called `opensearch-extra` as a sibling of OpenSearch. Checkout your plugin underneath `opensearch-extra` and the build will automatically pick it up. You can verify the plugin is included as part of the build by checking the projects of the build. +To test a plugin with a custom build of OpenSearch, build OpenSearch and use the `customDistributionUrl` setting supported by each plugin to override the OpenSearch distribution. - ./gradlew projects +For example, in your OpenSearch repository assemble a custom distribution. + + ./gradlew :distribution:archives:linux-tar:assemble + +Then in your plugin repository, substitute in your OpenSearch build + + ./gradlew run -PcustomDistributionUrl="/distribution/archives/linux-tar/build/distributions/opensearch-min-3.0.0-SNAPSHOT-linux-x64.tar.gz" # Environment misc diff --git a/buildSrc/src/main/java/org/opensearch/gradle/testclusters/OpenSearchNode.java b/buildSrc/src/main/java/org/opensearch/gradle/testclusters/OpenSearchNode.java index 268de50340cbf..cd22560af9a96 100644 --- a/buildSrc/src/main/java/org/opensearch/gradle/testclusters/OpenSearchNode.java +++ b/buildSrc/src/main/java/org/opensearch/gradle/testclusters/OpenSearchNode.java @@ -116,7 +116,12 @@ public class OpenSearchNode implements TestClusterConfiguration { private static final TimeUnit NODE_UP_TIMEOUT_UNIT = TimeUnit.MINUTES; private static final int ADDITIONAL_CONFIG_TIMEOUT = 15; private static final TimeUnit ADDITIONAL_CONFIG_TIMEOUT_UNIT = TimeUnit.SECONDS; - private static final List OVERRIDABLE_SETTINGS = Arrays.asList("path.repo", "discovery.seed_providers", "discovery.seed_hosts"); + private static final List OVERRIDABLE_SETTINGS = Arrays.asList( + "path.repo", + "discovery.seed_providers", + "discovery.seed_hosts", + "indices.breaker.total.use_real_memory" + ); private static final int TAIL_LOG_MESSAGES_COUNT = 40; private static final List MESSAGES_WE_DONT_CARE_ABOUT = Arrays.asList( diff --git a/buildSrc/src/testKit/thirdPartyAudit/sample_jars/build.gradle b/buildSrc/src/testKit/thirdPartyAudit/sample_jars/build.gradle index 48dfb206375ca..59063b3789c70 100644 --- a/buildSrc/src/testKit/thirdPartyAudit/sample_jars/build.gradle +++ b/buildSrc/src/testKit/thirdPartyAudit/sample_jars/build.gradle @@ -17,7 +17,7 @@ repositories { } dependencies { - implementation "org.apache.logging.log4j:log4j-core:2.23.1" + implementation "org.apache.logging.log4j:log4j-core:2.24.0" } ["0.0.1", "0.0.2"].forEach { v -> diff --git a/buildSrc/version.properties b/buildSrc/version.properties index 0f066de481ea2..0c6d0536dd1d5 100644 --- a/buildSrc/version.properties +++ b/buildSrc/version.properties @@ -20,7 +20,7 @@ woodstox = 6.4.0 kotlin = 1.7.10 antlr4 = 4.13.1 guava = 32.1.1-jre -protobuf = 3.22.3 +protobuf = 3.25.4 jakarta_annotation = 1.3.5 google_http_client = 1.44.1 tdigest = 3.3 diff --git a/client/rest-high-level/src/main/java/org/opensearch/client/indices/GetIndexResponse.java b/client/rest-high-level/src/main/java/org/opensearch/client/indices/GetIndexResponse.java index 6ec1c312c9ba9..1ceaeab6c0064 100644 --- a/client/rest-high-level/src/main/java/org/opensearch/client/indices/GetIndexResponse.java +++ b/client/rest-high-level/src/main/java/org/opensearch/client/indices/GetIndexResponse.java @@ -34,6 +34,7 @@ import org.apache.lucene.util.CollectionUtil; import org.opensearch.cluster.metadata.AliasMetadata; +import org.opensearch.cluster.metadata.Context; import org.opensearch.cluster.metadata.MappingMetadata; import org.opensearch.common.settings.Settings; import org.opensearch.core.xcontent.XContentParser; @@ -61,6 +62,7 @@ public class GetIndexResponse { private Map settings; private Map defaultSettings; private Map dataStreams; + private Map contexts; private String[] indices; GetIndexResponse( @@ -69,7 +71,8 @@ public class GetIndexResponse { Map> aliases, Map settings, Map defaultSettings, - Map dataStreams + Map dataStreams, + Map contexts ) { this.indices = indices; // to have deterministic order @@ -89,6 +92,9 @@ public class GetIndexResponse { if (dataStreams != null) { this.dataStreams = dataStreams; } + if (contexts != null) { + this.contexts = contexts; + } } public String[] getIndices() { @@ -123,6 +129,10 @@ public Map getDataStreams() { return dataStreams; } + public Map contexts() { + return contexts; + } + /** * Returns the string value for the specified index and setting. If the includeDefaults flag was not set or set to * false on the {@link GetIndexRequest}, this method will only return a value where the setting was explicitly set @@ -167,6 +177,7 @@ private static IndexEntry parseIndexEntry(XContentParser parser) throws IOExcept Settings indexSettings = null; Settings indexDefaultSettings = null; String dataStream = null; + Context context = null; // We start at START_OBJECT since fromXContent ensures that while (parser.nextToken() != Token.END_OBJECT) { ensureExpectedToken(Token.FIELD_NAME, parser.currentToken(), parser); @@ -185,6 +196,9 @@ private static IndexEntry parseIndexEntry(XContentParser parser) throws IOExcept case "defaults": indexDefaultSettings = Settings.fromXContent(parser); break; + case "context": + context = Context.fromXContent(parser); + break; default: parser.skipChildren(); } @@ -197,7 +211,7 @@ private static IndexEntry parseIndexEntry(XContentParser parser) throws IOExcept parser.skipChildren(); } } - return new IndexEntry(indexAliases, indexMappings, indexSettings, indexDefaultSettings, dataStream); + return new IndexEntry(indexAliases, indexMappings, indexSettings, indexDefaultSettings, dataStream, context); } // This is just an internal container to make stuff easier for returning @@ -207,19 +221,22 @@ private static class IndexEntry { Settings indexSettings = Settings.EMPTY; Settings indexDefaultSettings = Settings.EMPTY; String dataStream; + Context context; IndexEntry( List indexAliases, MappingMetadata indexMappings, Settings indexSettings, Settings indexDefaultSettings, - String dataStream + String dataStream, + Context context ) { if (indexAliases != null) this.indexAliases = indexAliases; if (indexMappings != null) this.indexMappings = indexMappings; if (indexSettings != null) this.indexSettings = indexSettings; if (indexDefaultSettings != null) this.indexDefaultSettings = indexDefaultSettings; if (dataStream != null) this.dataStream = dataStream; + if (context != null) this.context = context; } } @@ -229,6 +246,7 @@ public static GetIndexResponse fromXContent(XContentParser parser) throws IOExce Map settings = new HashMap<>(); Map defaultSettings = new HashMap<>(); Map dataStreams = new HashMap<>(); + Map contexts = new HashMap<>(); List indices = new ArrayList<>(); if (parser.currentToken() == null) { @@ -254,12 +272,15 @@ public static GetIndexResponse fromXContent(XContentParser parser) throws IOExce if (indexEntry.dataStream != null) { dataStreams.put(indexName, indexEntry.dataStream); } + if (indexEntry.context != null) { + contexts.put(indexName, indexEntry.context); + } } else if (parser.currentToken() == Token.START_ARRAY) { parser.skipChildren(); } else { parser.nextToken(); } } - return new GetIndexResponse(indices.toArray(new String[0]), mappings, aliases, settings, defaultSettings, dataStreams); + return new GetIndexResponse(indices.toArray(new String[0]), mappings, aliases, settings, defaultSettings, dataStreams, contexts); } } diff --git a/client/rest-high-level/src/test/java/org/opensearch/client/SnapshotRequestConvertersTests.java b/client/rest-high-level/src/test/java/org/opensearch/client/SnapshotRequestConvertersTests.java index af178ad2a5d47..8b6910ffebe4a 100644 --- a/client/rest-high-level/src/test/java/org/opensearch/client/SnapshotRequestConvertersTests.java +++ b/client/rest-high-level/src/test/java/org/opensearch/client/SnapshotRequestConvertersTests.java @@ -230,6 +230,7 @@ public void testSnapshotsStatus() { Map expectedParams = new HashMap<>(); String repository = RequestConvertersTests.randomIndicesNames(1, 1)[0]; String[] snapshots = RequestConvertersTests.randomIndicesNames(1, 5); + String[] indices = RequestConvertersTests.randomIndicesNames(1, 5); StringBuilder snapshotNames = new StringBuilder(snapshots[0]); for (int idx = 1; idx < snapshots.length; idx++) { snapshotNames.append(",").append(snapshots[idx]); @@ -237,7 +238,7 @@ public void testSnapshotsStatus() { boolean ignoreUnavailable = randomBoolean(); String endpoint = "/_snapshot/" + repository + "/" + snapshotNames.toString() + "/_status"; - SnapshotsStatusRequest snapshotsStatusRequest = new SnapshotsStatusRequest(repository, snapshots); + SnapshotsStatusRequest snapshotsStatusRequest = (new SnapshotsStatusRequest(repository, snapshots)).indices(indices); RequestConvertersTests.setRandomClusterManagerTimeout(snapshotsStatusRequest, expectedParams); snapshotsStatusRequest.ignoreUnavailable(ignoreUnavailable); expectedParams.put("ignore_unavailable", Boolean.toString(ignoreUnavailable)); diff --git a/client/rest-high-level/src/test/java/org/opensearch/client/indices/GetIndexResponseTests.java b/client/rest-high-level/src/test/java/org/opensearch/client/indices/GetIndexResponseTests.java index a00f0487116dc..fa313e68f8a35 100644 --- a/client/rest-high-level/src/test/java/org/opensearch/client/indices/GetIndexResponseTests.java +++ b/client/rest-high-level/src/test/java/org/opensearch/client/indices/GetIndexResponseTests.java @@ -36,6 +36,7 @@ import org.opensearch.client.AbstractResponseTestCase; import org.opensearch.client.GetAliasesResponseTests; import org.opensearch.cluster.metadata.AliasMetadata; +import org.opensearch.cluster.metadata.Context; import org.opensearch.cluster.metadata.MappingMetadata; import org.opensearch.common.settings.IndexScopedSettings; import org.opensearch.common.settings.Settings; @@ -66,6 +67,7 @@ protected org.opensearch.action.admin.indices.get.GetIndexResponse createServerT final Map settings = new HashMap<>(); final Map defaultSettings = new HashMap<>(); final Map dataStreams = new HashMap<>(); + final Map contexts = new HashMap<>(); IndexScopedSettings indexScopedSettings = IndexScopedSettings.DEFAULT_SCOPED_SETTINGS; boolean includeDefaults = randomBoolean(); for (String index : indices) { @@ -90,6 +92,10 @@ protected org.opensearch.action.admin.indices.get.GetIndexResponse createServerT if (randomBoolean()) { dataStreams.put(index, randomAlphaOfLength(5).toLowerCase(Locale.ROOT)); } + + if (randomBoolean()) { + contexts.put(index, new Context(randomAlphaOfLength(5).toLowerCase(Locale.ROOT))); + } } return new org.opensearch.action.admin.indices.get.GetIndexResponse( indices, @@ -97,7 +103,8 @@ protected org.opensearch.action.admin.indices.get.GetIndexResponse createServerT aliases, settings, defaultSettings, - dataStreams + dataStreams, + null ); } @@ -116,6 +123,7 @@ protected void assertInstances( assertEquals(serverTestInstance.getSettings(), clientInstance.getSettings()); assertEquals(serverTestInstance.defaultSettings(), clientInstance.getDefaultSettings()); assertEquals(serverTestInstance.getAliases(), clientInstance.getAliases()); + assertEquals(serverTestInstance.contexts(), clientInstance.contexts()); } private static MappingMetadata createMappingsForIndex() { diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index 39a291b258efb..5c6205ebf24d4 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -11,7 +11,7 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-8.10-all.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.1-all.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists -distributionSha256Sum=682b4df7fe5accdca84a4d1ef6a3a6ab096b3efd5edf7de2bd8c758d95a93703 +distributionSha256Sum=fdfca5dbc2834f0ece5020465737538e5ba679deeff5ab6c09621d67f8bb1a15 diff --git a/libs/core/src/main/java/org/opensearch/Version.java b/libs/core/src/main/java/org/opensearch/Version.java index 28cb989185ff7..ca5dd306cf907 100644 --- a/libs/core/src/main/java/org/opensearch/Version.java +++ b/libs/core/src/main/java/org/opensearch/Version.java @@ -109,6 +109,7 @@ public class Version implements Comparable, ToXContentFragment { public static final Version V_2_16_0 = new Version(2160099, org.apache.lucene.util.Version.LUCENE_9_11_1); public static final Version V_2_16_1 = new Version(2160199, org.apache.lucene.util.Version.LUCENE_9_11_1); public static final Version V_2_17_0 = new Version(2170099, org.apache.lucene.util.Version.LUCENE_9_11_1); + public static final Version V_2_18_0 = new Version(2180099, org.apache.lucene.util.Version.LUCENE_9_11_1); public static final Version V_3_0_0 = new Version(3000099, org.apache.lucene.util.Version.LUCENE_9_12_0); public static final Version CURRENT = V_3_0_0; diff --git a/libs/core/src/main/java/org/opensearch/core/common/io/stream/BufferedChecksumStreamOutput.java b/libs/core/src/main/java/org/opensearch/core/common/io/stream/BufferedChecksumStreamOutput.java index 422f956c0cd47..18bd53dc5d77c 100644 --- a/libs/core/src/main/java/org/opensearch/core/common/io/stream/BufferedChecksumStreamOutput.java +++ b/libs/core/src/main/java/org/opensearch/core/common/io/stream/BufferedChecksumStreamOutput.java @@ -33,9 +33,18 @@ package org.opensearch.core.common.io.stream; import org.apache.lucene.store.BufferedChecksum; +import org.opensearch.common.Nullable; import org.opensearch.common.annotation.PublicApi; import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; +import java.util.stream.Collectors; import java.util.zip.CRC32; import java.util.zip.Checksum; @@ -90,4 +99,75 @@ public void reset() throws IOException { public void resetDigest() { digest.reset(); } + + @Override + public void writeMap(@Nullable Map map) throws IOException { + Map newMap = new TreeMap<>(map); + writeGenericValue(newMap); + } + + @Override + public void writeMap(Map map, final Writeable.Writer keyWriter, final Writeable.Writer valueWriter) + throws IOException { + writeVInt(map.size()); + map.keySet().stream().sorted().forEachOrdered(key -> { + try { + keyWriter.write(this, key); + valueWriter.write(this, map.get(key)); + } catch (IOException e) { + throw new RuntimeException("Failed to write map values.", e); + } + }); + } + + public void writeMapValues(Map map, final Writeable.Writer valueWriter) throws IOException { + writeVInt(map.size()); + map.keySet().stream().sorted().forEachOrdered(key -> { + try { + valueWriter.write(this, map.get(key)); + } catch (IOException e) { + throw new RuntimeException("Failed to write map values.", e); + } + }); + } + + @Override + public void writeStringArray(String[] array) throws IOException { + String[] copyArray = Arrays.copyOf(array, array.length); + Arrays.sort(copyArray); + super.writeStringArray(copyArray); + } + + @Override + public void writeVLongArray(long[] values) throws IOException { + long[] copyValues = Arrays.copyOf(values, values.length); + Arrays.sort(copyValues); + super.writeVLongArray(copyValues); + } + + @Override + public void writeCollection(final Collection collection) throws IOException { + List sortedList = collection.stream().sorted().collect(Collectors.toList()); + super.writeCollection(sortedList, (o, v) -> v.writeTo(o)); + } + + @Override + public void writeStringCollection(final Collection collection) throws IOException { + List listCollection = new ArrayList<>(collection); + Collections.sort(listCollection); + writeCollection(listCollection, StreamOutput::writeString); + } + + @Override + public void writeOptionalStringCollection(final Collection collection) throws IOException { + if (collection != null) { + List listCollection = new ArrayList<>(collection); + Collections.sort(listCollection); + writeBoolean(true); + writeCollection(listCollection, StreamOutput::writeString); + } else { + writeBoolean(false); + } + } + } diff --git a/libs/core/src/main/java/org/opensearch/core/common/io/stream/StreamOutput.java b/libs/core/src/main/java/org/opensearch/core/common/io/stream/StreamOutput.java index b7599265aece3..cac8ddc8f94e3 100644 --- a/libs/core/src/main/java/org/opensearch/core/common/io/stream/StreamOutput.java +++ b/libs/core/src/main/java/org/opensearch/core/common/io/stream/StreamOutput.java @@ -633,7 +633,7 @@ public final void writeMapOfLists(final Map> map, final Writer * @param keyWriter The key writer * @param valueWriter The value writer */ - public final void writeMap(final Map map, final Writer keyWriter, final Writer valueWriter) throws IOException { + public void writeMap(final Map map, final Writer keyWriter, final Writer valueWriter) throws IOException { writeVInt(map.size()); for (final Map.Entry entry : map.entrySet()) { keyWriter.write(this, entry.getKey()); @@ -969,9 +969,13 @@ public void writeOptionalArray(@Nullable T[] array) throws } public void writeOptionalWriteable(@Nullable Writeable writeable) throws IOException { + writeOptionalWriteable((out, writable) -> writable.writeTo(out), writeable); + } + + public void writeOptionalWriteable(final Writer writer, @Nullable T writeable) throws IOException { if (writeable != null) { writeBoolean(true); - writeable.writeTo(this); + writer.write(this, writeable); } else { writeBoolean(false); } diff --git a/libs/core/src/main/java/org/opensearch/core/common/io/stream/VerifiableWriteable.java b/libs/core/src/main/java/org/opensearch/core/common/io/stream/VerifiableWriteable.java new file mode 100644 index 0000000000000..6683606e27195 --- /dev/null +++ b/libs/core/src/main/java/org/opensearch/core/common/io/stream/VerifiableWriteable.java @@ -0,0 +1,20 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.core.common.io.stream; + +import java.io.IOException; + +/** + * Provides a method for serialization which will give ordered stream, creating same byte array on every invocation. + * This should be invoked with a stream that provides ordered serialization. + */ +public interface VerifiableWriteable extends Writeable { + + void writeVerifiableTo(BufferedChecksumStreamOutput out) throws IOException; +} diff --git a/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java b/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java index 3115dce6c10a5..b46b58f415cfd 100644 --- a/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java @@ -49,6 +49,7 @@ import org.opensearch.common.xcontent.support.XContentMapValues; import org.opensearch.core.xcontent.XContentParser; import org.opensearch.core.xcontent.XContentParser.Token; +import org.opensearch.index.compositeindex.datacube.DimensionType; import org.opensearch.index.fielddata.FieldData; import org.opensearch.index.fielddata.IndexFieldData; import org.opensearch.index.fielddata.IndexNumericFieldData; @@ -71,10 +72,12 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.function.Supplier; /** A {@link FieldMapper} for scaled floats. Values are internally multiplied - * by a scaling factor and rounded to the closest long. */ + * by a scaling factor and rounded to the closest long. + */ public class ScaledFloatFieldMapper extends ParametrizedFieldMapper { public static final String CONTENT_TYPE = "scaled_float"; @@ -162,11 +165,21 @@ public ScaledFloatFieldMapper build(BuilderContext context) { ); return new ScaledFloatFieldMapper(name, type, multiFieldsBuilder.build(this, context), copyTo.build(), this); } + + @Override + public Optional getSupportedDataCubeDimensionType() { + return Optional.of(DimensionType.NUMERIC); + } + + @Override + public boolean isDataCubeMetricSupported() { + return true; + } } public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.getSettings())); - public static final class ScaledFloatFieldType extends SimpleMappedFieldType implements NumericPointEncoder { + public static final class ScaledFloatFieldType extends SimpleMappedFieldType implements NumericPointEncoder, FieldValueConverter { private final double scalingFactor; private final Double nullValue; @@ -340,6 +353,12 @@ public DocValueFormat docValueFormat(String format, ZoneId timeZone) { private double scale(Object input) { return new BigDecimal(Double.toString(parse(input))).multiply(BigDecimal.valueOf(scalingFactor)).doubleValue(); } + + @Override + public double toDoubleValue(long value) { + double inverseScalingFactor = 1d / scalingFactor; + return value * inverseScalingFactor; + } } private final Explicit ignoreMalformed; diff --git a/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/ScaledFloatFieldMapperTests.java b/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/ScaledFloatFieldMapperTests.java index 45507867f6691..c3d62b088ced7 100644 --- a/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/ScaledFloatFieldMapperTests.java +++ b/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/ScaledFloatFieldMapperTests.java @@ -34,11 +34,16 @@ import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexableField; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.core.common.bytes.BytesReference; import org.opensearch.core.xcontent.MediaTypeRegistry; import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeIndexSettings; import org.opensearch.plugins.Plugin; +import org.junit.AfterClass; +import org.junit.BeforeClass; import java.io.IOException; import java.util.Arrays; @@ -46,6 +51,7 @@ import java.util.List; import static java.util.Collections.singletonList; +import static org.opensearch.common.util.FeatureFlags.STAR_TREE_INDEX; import static org.hamcrest.Matchers.containsString; public class ScaledFloatFieldMapperTests extends MapperTestCase { @@ -91,24 +97,112 @@ public void testExistsQueryDocValuesDisabled() throws IOException { assertParseMinimalWarnings(); } - public void testDefaults() throws Exception { - XContentBuilder mapping = fieldMapping(b -> b.field("type", "scaled_float").field("scaling_factor", 10.0)); + @BeforeClass + public static void createMapper() { + FeatureFlags.initializeFeatureFlags(Settings.builder().put(STAR_TREE_INDEX, "true").build()); + } + + @AfterClass + public static void clearMapper() { + FeatureFlags.initializeFeatureFlags(Settings.EMPTY); + } + + public void testScaledFloatWithStarTree() throws Exception { + + double scalingFactorField1 = randomDouble() * 100; + double scalingFactorField2 = randomDouble() * 100; + double scalingFactorField3 = randomDouble() * 100; + + XContentBuilder mapping = getStarTreeMappingWithScaledFloat(scalingFactorField1, scalingFactorField2, scalingFactorField3); DocumentMapper mapper = createDocumentMapper(mapping); - assertEquals(mapping.toString(), mapper.mappingSource().toString()); + assertTrue(mapping.toString().contains("startree")); - ParsedDocument doc = mapper.parse(source(b -> b.field("field", 123))); - IndexableField[] fields = doc.rootDoc().getFields("field"); + long randomLongField1 = randomLong(); + long randomLongField2 = randomLong(); + long randomLongField3 = randomLong(); + ParsedDocument doc = mapper.parse( + source(b -> b.field("field1", randomLongField1).field("field2", randomLongField2).field("field3", randomLongField3)) + ); + validateScaledFloatFields(doc, "field1", randomLongField1, scalingFactorField1); + validateScaledFloatFields(doc, "field2", randomLongField2, scalingFactorField2); + validateScaledFloatFields(doc, "field3", randomLongField3, scalingFactorField3); + } + + @Override + protected Settings getIndexSettings() { + return Settings.builder() + .put(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey(), true) + .put(super.getIndexSettings()) + .build(); + } + + private static void validateScaledFloatFields(ParsedDocument doc, String field, long value, double scalingFactor) { + IndexableField[] fields = doc.rootDoc().getFields(field); assertEquals(2, fields.length); IndexableField pointField = fields[0]; assertEquals(1, pointField.fieldType().pointDimensionCount()); assertFalse(pointField.fieldType().stored()); - assertEquals(1230, pointField.numericValue().longValue()); + assertEquals((long) (value * scalingFactor), pointField.numericValue().longValue()); IndexableField dvField = fields[1]; assertEquals(DocValuesType.SORTED_NUMERIC, dvField.fieldType().docValuesType()); - assertEquals(1230, dvField.numericValue().longValue()); + assertEquals((long) (value * scalingFactor), dvField.numericValue().longValue()); assertFalse(dvField.fieldType().stored()); } + private XContentBuilder getStarTreeMappingWithScaledFloat( + double scalingFactorField1, + double scalingFactorField2, + double scalingFactorField3 + ) throws IOException { + return topMapping(b -> { + b.startObject("composite"); + b.startObject("startree"); + b.field("type", "star_tree"); + b.startObject("config"); + b.field("max_leaf_docs", 100); + b.startArray("ordered_dimensions"); + b.startObject(); + b.field("name", "field1"); + b.endObject(); + b.startObject(); + b.field("name", "field2"); + b.endObject(); + b.endArray(); + b.startArray("metrics"); + b.startObject(); + b.field("name", "field3"); + b.startArray("stats"); + b.value("sum"); + b.value("value_count"); + b.endArray(); + b.endObject(); + b.endArray(); + b.endObject(); + b.endObject(); + b.endObject(); + b.startObject("properties"); + b.startObject("field1"); + b.field("type", "scaled_float").field("scaling_factor", scalingFactorField1); + b.endObject(); + b.startObject("field2"); + b.field("type", "scaled_float").field("scaling_factor", scalingFactorField2); + b.endObject(); + b.startObject("field3"); + b.field("type", "scaled_float").field("scaling_factor", scalingFactorField3); + b.endObject(); + b.endObject(); + }); + } + + public void testDefaults() throws Exception { + XContentBuilder mapping = fieldMapping(b -> b.field("type", "scaled_float").field("scaling_factor", 10.0)); + DocumentMapper mapper = createDocumentMapper(mapping); + assertEquals(mapping.toString(), mapper.mappingSource().toString()); + + ParsedDocument doc = mapper.parse(source(b -> b.field("field", 123))); + validateScaledFloatFields(doc, "field", 123, 10.0); + } + public void testMissingScalingFactor() { Exception e = expectThrows( MapperParsingException.class, diff --git a/modules/repository-url/src/internalClusterTest/java/org/opensearch/repositories/url/URLSnapshotRestoreIT.java b/modules/repository-url/src/internalClusterTest/java/org/opensearch/repositories/url/URLSnapshotRestoreIT.java index 9c61bca316a56..fd0c6b309c4fd 100644 --- a/modules/repository-url/src/internalClusterTest/java/org/opensearch/repositories/url/URLSnapshotRestoreIT.java +++ b/modules/repository-url/src/internalClusterTest/java/org/opensearch/repositories/url/URLSnapshotRestoreIT.java @@ -67,19 +67,11 @@ public void testUrlRepository() throws Exception { logger.info("--> creating repository"); Path repositoryLocation = randomRepoPath(); - assertAcked( - client.admin() - .cluster() - .preparePutRepository("test-repo") - .setType(FsRepository.TYPE) - .setSettings( - Settings.builder() - .put(FsRepository.LOCATION_SETTING.getKey(), repositoryLocation) - .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) - .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) - ) - ); - + Settings.Builder settings = Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), repositoryLocation) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES); + createRepository("test-repo", FsRepository.TYPE, settings); createIndex("test-idx"); ensureGreen(); @@ -115,17 +107,10 @@ public void testUrlRepository() throws Exception { cluster().wipeIndices("test-idx"); logger.info("--> create read-only URL repository"); - assertAcked( - client.admin() - .cluster() - .preparePutRepository("url-repo") - .setType(URLRepository.TYPE) - .setSettings( - Settings.builder() - .put(URLRepository.URL_SETTING.getKey(), repositoryLocation.toUri().toURL().toString()) - .put("list_directories", randomBoolean()) - ) - ); + Settings.Builder settingsBuilder = Settings.builder() + .put(URLRepository.URL_SETTING.getKey(), repositoryLocation.toUri().toURL().toString()) + .put("list_directories", randomBoolean()); + createRepository("url-repo", URLRepository.TYPE, settingsBuilder); logger.info("--> restore index after deletion"); RestoreSnapshotResponse restoreSnapshotResponse = client.admin() .cluster() diff --git a/plugins/events-correlation-engine/src/main/java/org/opensearch/plugin/correlation/core/index/query/CorrelationQueryBuilder.java b/plugins/events-correlation-engine/src/main/java/org/opensearch/plugin/correlation/core/index/query/CorrelationQueryBuilder.java index 806ac0389b5f3..e95b68e855cca 100644 --- a/plugins/events-correlation-engine/src/main/java/org/opensearch/plugin/correlation/core/index/query/CorrelationQueryBuilder.java +++ b/plugins/events-correlation-engine/src/main/java/org/opensearch/plugin/correlation/core/index/query/CorrelationQueryBuilder.java @@ -23,6 +23,7 @@ import org.opensearch.index.query.AbstractQueryBuilder; import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.query.QueryShardContext; +import org.opensearch.index.query.WithFieldName; import org.opensearch.plugin.correlation.core.index.mapper.VectorFieldMapper; import java.io.IOException; @@ -36,7 +37,7 @@ * * @opensearch.internal */ -public class CorrelationQueryBuilder extends AbstractQueryBuilder { +public class CorrelationQueryBuilder extends AbstractQueryBuilder implements WithFieldName { private static final Logger log = LogManager.getLogger(CorrelationQueryBuilder.class); protected static final ParseField VECTOR_FIELD = new ParseField("vector"); @@ -205,6 +206,7 @@ public void setFieldName(String fieldName) { * get field name * @return field name */ + @Override public String fieldName() { return fieldName; } diff --git a/plugins/repository-azure/build.gradle b/plugins/repository-azure/build.gradle index e76556f24cc23..5669c5e559ac8 100644 --- a/plugins/repository-azure/build.gradle +++ b/plugins/repository-azure/build.gradle @@ -57,12 +57,12 @@ dependencies { api "io.netty:netty-transport-native-unix-common:${versions.netty}" implementation project(':modules:transport-netty4') api 'com.azure:azure-storage-blob:12.23.0' - api 'com.azure:azure-identity:1.13.0' + api 'com.azure:azure-identity:1.13.2' // Start of transitive dependencies for azure-identity api 'com.microsoft.azure:msal4j-persistence-extension:1.3.0' api "net.java.dev.jna:jna-platform:${versions.jna}" api 'com.microsoft.azure:msal4j:1.17.0' - api 'com.nimbusds:oauth2-oidc-sdk:11.9.1' + api 'com.nimbusds:oauth2-oidc-sdk:11.19.1' api 'com.nimbusds:nimbus-jose-jwt:9.40' api 'com.nimbusds:content-type:2.3' api 'com.nimbusds:lang-tag:1.7' diff --git a/plugins/repository-azure/licenses/azure-identity-1.13.0.jar.sha1 b/plugins/repository-azure/licenses/azure-identity-1.13.0.jar.sha1 deleted file mode 100644 index b59c2a3be5c92..0000000000000 --- a/plugins/repository-azure/licenses/azure-identity-1.13.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -54b44a74636322d06e9dc42d611a9f12a0966790 \ No newline at end of file diff --git a/plugins/repository-azure/licenses/azure-identity-1.13.2.jar.sha1 b/plugins/repository-azure/licenses/azure-identity-1.13.2.jar.sha1 new file mode 100644 index 0000000000000..7c98a9ccba592 --- /dev/null +++ b/plugins/repository-azure/licenses/azure-identity-1.13.2.jar.sha1 @@ -0,0 +1 @@ +50a1daef3eb5c6ab2e1351a3e3f5a7649a8fe464 \ No newline at end of file diff --git a/plugins/repository-azure/licenses/oauth2-oidc-sdk-11.19.1.jar.sha1 b/plugins/repository-azure/licenses/oauth2-oidc-sdk-11.19.1.jar.sha1 new file mode 100644 index 0000000000000..7d83b0e8ca639 --- /dev/null +++ b/plugins/repository-azure/licenses/oauth2-oidc-sdk-11.19.1.jar.sha1 @@ -0,0 +1 @@ +58db85a807a56ae76baffa519772271ad5808195 \ No newline at end of file diff --git a/plugins/repository-azure/licenses/oauth2-oidc-sdk-11.9.1.jar.sha1 b/plugins/repository-azure/licenses/oauth2-oidc-sdk-11.9.1.jar.sha1 deleted file mode 100644 index 96d9a196a172a..0000000000000 --- a/plugins/repository-azure/licenses/oauth2-oidc-sdk-11.9.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -fa9a2e447e2cef4dfda40a854dd7ec35624a7799 \ No newline at end of file diff --git a/plugins/repository-azure/src/internalClusterTest/java/org/opensearch/repositories/azure/AzureStorageCleanupThirdPartyTests.java b/plugins/repository-azure/src/internalClusterTest/java/org/opensearch/repositories/azure/AzureStorageCleanupThirdPartyTests.java index 176e60a667aef..7f32f09602164 100644 --- a/plugins/repository-azure/src/internalClusterTest/java/org/opensearch/repositories/azure/AzureStorageCleanupThirdPartyTests.java +++ b/plugins/repository-azure/src/internalClusterTest/java/org/opensearch/repositories/azure/AzureStorageCleanupThirdPartyTests.java @@ -38,7 +38,6 @@ import com.azure.storage.blob.models.BlobStorageException; import org.opensearch.action.ActionRunnable; import org.opensearch.action.support.PlainActionFuture; -import org.opensearch.action.support.master.AcknowledgedResponse; import org.opensearch.common.collect.Tuple; import org.opensearch.common.settings.MockSecureSettings; import org.opensearch.common.settings.SecureSettings; @@ -47,6 +46,7 @@ import org.opensearch.plugins.Plugin; import org.opensearch.repositories.AbstractThirdPartyRepositoryTestCase; import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.test.OpenSearchIntegTestCase; import org.junit.AfterClass; import java.net.HttpURLConnection; @@ -56,7 +56,6 @@ import reactor.core.scheduler.Schedulers; import static org.hamcrest.Matchers.blankOrNullString; -import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.not; public class AzureStorageCleanupThirdPartyTests extends AbstractThirdPartyRepositoryTestCase { @@ -103,17 +102,11 @@ protected SecureSettings credentials() { @Override protected void createRepository(String repoName) { - AcknowledgedResponse putRepositoryResponse = client().admin() - .cluster() - .preparePutRepository(repoName) - .setType("azure") - .setSettings( - Settings.builder() - .put("container", System.getProperty("test.azure.container")) - .put("base_path", System.getProperty("test.azure.base")) - ) - .get(); - assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true)); + Settings.Builder settings = Settings.builder() + .put("container", System.getProperty("test.azure.container")) + .put("base_path", System.getProperty("test.azure.base")); + + OpenSearchIntegTestCase.putRepository(client().admin().cluster(), repoName, "azure", settings); if (Strings.hasText(System.getProperty("test.azure.sas_token"))) { ensureSasTokenPermissions(); } diff --git a/plugins/repository-gcs/build.gradle b/plugins/repository-gcs/build.gradle index 110df89f25de8..94d25af94e67f 100644 --- a/plugins/repository-gcs/build.gradle +++ b/plugins/repository-gcs/build.gradle @@ -149,9 +149,6 @@ thirdPartyAudit { 'com.google.appengine.api.urlfetch.URLFetchService', 'com.google.appengine.api.urlfetch.URLFetchServiceFactory', 'com.google.auth.oauth2.GdchCredentials', - 'com.google.protobuf.MapFieldBuilder', - 'com.google.protobuf.MapFieldBuilder$Converter', - 'com.google.protobuf.MapFieldReflectionAccessor', 'com.google.protobuf.util.JsonFormat', 'com.google.protobuf.util.JsonFormat$Parser', 'com.google.protobuf.util.JsonFormat$Printer', diff --git a/plugins/repository-gcs/src/internalClusterTest/java/org/opensearch/repositories/gcs/GoogleCloudStorageThirdPartyTests.java b/plugins/repository-gcs/src/internalClusterTest/java/org/opensearch/repositories/gcs/GoogleCloudStorageThirdPartyTests.java index 1e11b1d111d8f..860b30fdef9ca 100644 --- a/plugins/repository-gcs/src/internalClusterTest/java/org/opensearch/repositories/gcs/GoogleCloudStorageThirdPartyTests.java +++ b/plugins/repository-gcs/src/internalClusterTest/java/org/opensearch/repositories/gcs/GoogleCloudStorageThirdPartyTests.java @@ -32,19 +32,18 @@ package org.opensearch.repositories.gcs; -import org.opensearch.action.support.master.AcknowledgedResponse; import org.opensearch.common.settings.MockSecureSettings; import org.opensearch.common.settings.SecureSettings; import org.opensearch.common.settings.Settings; import org.opensearch.core.common.Strings; import org.opensearch.plugins.Plugin; import org.opensearch.repositories.AbstractThirdPartyRepositoryTestCase; +import org.opensearch.test.OpenSearchIntegTestCase; import java.util.Base64; import java.util.Collection; import static org.hamcrest.Matchers.blankOrNullString; -import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.not; public class GoogleCloudStorageThirdPartyTests extends AbstractThirdPartyRepositoryTestCase { @@ -84,16 +83,9 @@ protected SecureSettings credentials() { @Override protected void createRepository(final String repoName) { - AcknowledgedResponse putRepositoryResponse = client().admin() - .cluster() - .preparePutRepository("test-repo") - .setType("gcs") - .setSettings( - Settings.builder() - .put("bucket", System.getProperty("test.google.bucket")) - .put("base_path", System.getProperty("test.google.base", "/")) - ) - .get(); - assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true)); + Settings.Builder settings = Settings.builder() + .put("bucket", System.getProperty("test.google.bucket")) + .put("base_path", System.getProperty("test.google.base", "/")); + OpenSearchIntegTestCase.putRepository(client().admin().cluster(), "test-repo", "gcs", settings); } } diff --git a/plugins/repository-hdfs/build.gradle b/plugins/repository-hdfs/build.gradle index eeb5e2cd88317..15c4a1647fd38 100644 --- a/plugins/repository-hdfs/build.gradle +++ b/plugins/repository-hdfs/build.gradle @@ -76,7 +76,7 @@ dependencies { api "org.apache.commons:commons-compress:${versions.commonscompress}" api 'org.apache.commons:commons-configuration2:2.11.0' api "commons-io:commons-io:${versions.commonsio}" - api 'org.apache.commons:commons-lang3:3.16.0' + api 'org.apache.commons:commons-lang3:3.17.0' implementation 'com.google.re2j:re2j:1.7' api 'javax.servlet:servlet-api:2.5' api "org.slf4j:slf4j-api:${versions.slf4j}" diff --git a/plugins/repository-hdfs/licenses/commons-lang3-3.16.0.jar.sha1 b/plugins/repository-hdfs/licenses/commons-lang3-3.16.0.jar.sha1 deleted file mode 100644 index ef4f1c1fc2002..0000000000000 --- a/plugins/repository-hdfs/licenses/commons-lang3-3.16.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -3eb54effe40946dfb06dc5cd6c7ce4116cd51ea4 \ No newline at end of file diff --git a/plugins/repository-hdfs/licenses/commons-lang3-3.17.0.jar.sha1 b/plugins/repository-hdfs/licenses/commons-lang3-3.17.0.jar.sha1 new file mode 100644 index 0000000000000..073922fda1dbe --- /dev/null +++ b/plugins/repository-hdfs/licenses/commons-lang3-3.17.0.jar.sha1 @@ -0,0 +1 @@ +b17d2136f0460dcc0d2016ceefca8723bdf4ee70 \ No newline at end of file diff --git a/plugins/repository-hdfs/src/test/java/org/opensearch/repositories/hdfs/HdfsRepositoryTests.java b/plugins/repository-hdfs/src/test/java/org/opensearch/repositories/hdfs/HdfsRepositoryTests.java index ab10691240649..60fdbea011a44 100644 --- a/plugins/repository-hdfs/src/test/java/org/opensearch/repositories/hdfs/HdfsRepositoryTests.java +++ b/plugins/repository-hdfs/src/test/java/org/opensearch/repositories/hdfs/HdfsRepositoryTests.java @@ -34,12 +34,12 @@ import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; import org.opensearch.action.admin.cluster.repositories.cleanup.CleanupRepositoryResponse; -import org.opensearch.action.support.master.AcknowledgedResponse; import org.opensearch.common.settings.MockSecureSettings; import org.opensearch.common.settings.SecureSettings; import org.opensearch.common.settings.Settings; import org.opensearch.plugins.Plugin; import org.opensearch.repositories.AbstractThirdPartyRepositoryTestCase; +import org.opensearch.test.OpenSearchIntegTestCase; import java.util.Collection; @@ -61,20 +61,13 @@ protected SecureSettings credentials() { @Override protected void createRepository(String repoName) { - AcknowledgedResponse putRepositoryResponse = client().admin() - .cluster() - .preparePutRepository(repoName) - .setType("hdfs") - .setSettings( - Settings.builder() - .put("uri", "hdfs:///") - .put("conf.fs.AbstractFileSystem.hdfs.impl", TestingFs.class.getName()) - .put("path", "foo") - .put("chunk_size", randomIntBetween(100, 1000) + "k") - .put("compress", randomBoolean()) - ) - .get(); - assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true)); + Settings.Builder settings = Settings.builder() + .put("uri", "hdfs:///") + .put("conf.fs.AbstractFileSystem.hdfs.impl", TestingFs.class.getName()) + .put("path", "foo") + .put("chunk_size", randomIntBetween(100, 1000) + "k") + .put("compress", randomBoolean()); + OpenSearchIntegTestCase.putRepository(client().admin().cluster(), repoName, "hdfs", settings); } // HDFS repository doesn't have precise cleanup stats so we only check whether or not any blobs were removed diff --git a/plugins/repository-hdfs/src/test/java/org/opensearch/repositories/hdfs/HdfsTests.java b/plugins/repository-hdfs/src/test/java/org/opensearch/repositories/hdfs/HdfsTests.java index ce456f26af3a4..130bbbf1d2198 100644 --- a/plugins/repository-hdfs/src/test/java/org/opensearch/repositories/hdfs/HdfsTests.java +++ b/plugins/repository-hdfs/src/test/java/org/opensearch/repositories/hdfs/HdfsTests.java @@ -35,7 +35,6 @@ import org.opensearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse; import org.opensearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse; -import org.opensearch.action.support.master.AcknowledgedResponse; import org.opensearch.client.Client; import org.opensearch.cluster.ClusterState; import org.opensearch.common.settings.Settings; @@ -45,6 +44,7 @@ import org.opensearch.repositories.blobstore.BlobStoreRepository; import org.opensearch.repositories.blobstore.BlobStoreTestUtil; import org.opensearch.snapshots.SnapshotState; +import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.test.OpenSearchSingleNodeTestCase; import org.opensearch.threadpool.ThreadPool; @@ -63,21 +63,13 @@ protected Collection> getPlugins() { public void testSimpleWorkflow() { Client client = client(); - - AcknowledgedResponse putRepositoryResponse = client.admin() - .cluster() - .preparePutRepository("test-repo") - .setType("hdfs") - .setSettings( - Settings.builder() - .put("uri", "hdfs:///") - .put("conf.fs.AbstractFileSystem.hdfs.impl", TestingFs.class.getName()) - .put("path", "foo") - .put("chunk_size", randomIntBetween(100, 1000) + "k") - .put("compress", randomBoolean()) - ) - .get(); - assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true)); + Settings.Builder settings = Settings.builder() + .put("uri", "hdfs:///") + .put("conf.fs.AbstractFileSystem.hdfs.impl", TestingFs.class.getName()) + .put("path", "foo") + .put("chunk_size", randomIntBetween(100, 1000) + "k") + .put("compress", randomBoolean()); + OpenSearchIntegTestCase.putRepository(client.admin().cluster(), "test-repo", "hdfs", settings); createIndex("test-idx-1"); createIndex("test-idx-2"); @@ -168,7 +160,7 @@ public void testSimpleWorkflow() { public void testMissingUri() { try { - client().admin().cluster().preparePutRepository("test-repo").setType("hdfs").setSettings(Settings.EMPTY).get(); + OpenSearchIntegTestCase.putRepository(client().admin().cluster(), "test-repo", "hdfs", Settings.builder()); fail(); } catch (RepositoryException e) { assertTrue(e.getCause() instanceof IllegalArgumentException); @@ -178,12 +170,8 @@ public void testMissingUri() { public void testEmptyUri() { try { - client().admin() - .cluster() - .preparePutRepository("test-repo") - .setType("hdfs") - .setSettings(Settings.builder().put("uri", "/path").build()) - .get(); + Settings.Builder settings = Settings.builder().put("uri", "/path"); + OpenSearchIntegTestCase.putRepository(client().admin().cluster(), "test-repo", "hdfs", settings); fail(); } catch (RepositoryException e) { assertTrue(e.getCause() instanceof IllegalArgumentException); @@ -193,12 +181,8 @@ public void testEmptyUri() { public void testNonHdfsUri() { try { - client().admin() - .cluster() - .preparePutRepository("test-repo") - .setType("hdfs") - .setSettings(Settings.builder().put("uri", "file:///").build()) - .get(); + Settings.Builder settings = Settings.builder().put("uri", "file:///"); + OpenSearchIntegTestCase.putRepository(client().admin().cluster(), "test-repo", "hdfs", settings); fail(); } catch (RepositoryException e) { assertTrue(e.getCause() instanceof IllegalArgumentException); @@ -208,12 +192,8 @@ public void testNonHdfsUri() { public void testPathSpecifiedInHdfs() { try { - client().admin() - .cluster() - .preparePutRepository("test-repo") - .setType("hdfs") - .setSettings(Settings.builder().put("uri", "hdfs:///some/path").build()) - .get(); + Settings.Builder settings = Settings.builder().put("uri", "hdfs:///some/path"); + OpenSearchIntegTestCase.putRepository(client().admin().cluster(), "test-repo", "hdfs", settings); fail(); } catch (RepositoryException e) { assertTrue(e.getCause() instanceof IllegalArgumentException); @@ -223,12 +203,8 @@ public void testPathSpecifiedInHdfs() { public void testMissingPath() { try { - client().admin() - .cluster() - .preparePutRepository("test-repo") - .setType("hdfs") - .setSettings(Settings.builder().put("uri", "hdfs:///").build()) - .get(); + Settings.Builder settings = Settings.builder().put("uri", "hdfs:///"); + OpenSearchIntegTestCase.putRepository(client().admin().cluster(), "test-repo", "hdfs", settings); fail(); } catch (RepositoryException e) { assertTrue(e.getCause() instanceof IllegalArgumentException); diff --git a/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3RepositoryThirdPartyTests.java b/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3RepositoryThirdPartyTests.java index f7a84864a8569..7db9a0d3ba790 100644 --- a/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3RepositoryThirdPartyTests.java +++ b/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3RepositoryThirdPartyTests.java @@ -33,7 +33,6 @@ import software.amazon.awssdk.services.s3.model.StorageClass; -import org.opensearch.action.support.master.AcknowledgedResponse; import org.opensearch.common.SuppressForbidden; import org.opensearch.common.blobstore.BlobMetadata; import org.opensearch.common.blobstore.BlobPath; @@ -43,6 +42,7 @@ import org.opensearch.plugins.Plugin; import org.opensearch.repositories.AbstractThirdPartyRepositoryTestCase; import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.test.OpenSearchIntegTestCase; import org.junit.Before; import java.util.Collection; @@ -51,7 +51,6 @@ import java.util.concurrent.TimeUnit; import static org.hamcrest.Matchers.blankOrNullString; -import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.not; public class S3RepositoryThirdPartyTests extends AbstractThirdPartyRepositoryTestCase { @@ -111,13 +110,7 @@ protected void createRepository(String repoName) { settings.put("storage_class", storageClass); } } - AcknowledgedResponse putRepositoryResponse = client().admin() - .cluster() - .preparePutRepository("test-repo") - .setType("s3") - .setSettings(settings) - .get(); - assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true)); + OpenSearchIntegTestCase.putRepository(client().admin().cluster(), "test-repo", "s3", settings); } @Override diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/RepositoryCredentialsTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/RepositoryCredentialsTests.java index 573a4f3f51a41..21017160d77e5 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/RepositoryCredentialsTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/RepositoryCredentialsTests.java @@ -55,6 +55,7 @@ import org.opensearch.rest.RestRequest; import org.opensearch.rest.RestResponse; import org.opensearch.rest.action.admin.cluster.RestGetRepositoriesAction; +import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.test.OpenSearchSingleNodeTestCase; import org.opensearch.test.rest.FakeRestRequest; @@ -68,7 +69,6 @@ import static org.opensearch.repositories.s3.S3ClientSettings.ACCESS_KEY_SETTING; import static org.opensearch.repositories.s3.S3ClientSettings.SECRET_KEY_SETTING; -import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; @@ -277,14 +277,8 @@ public void sendResponse(RestResponse response) { } private void createRepository(final String name, final Settings repositorySettings) { - assertAcked( - client().admin() - .cluster() - .preparePutRepository(name) - .setType(S3Repository.TYPE) - .setVerify(false) - .setSettings(repositorySettings) - ); + Settings.Builder settings = Settings.builder().put(repositorySettings); + OpenSearchIntegTestCase.putRepository(client().admin().cluster(), name, S3Repository.TYPE, false, settings); } /** diff --git a/plugins/transport-reactor-netty4/src/javaRestTest/java/org/opensearch/rest/ReactorNetty4BadRequestIT.java b/plugins/transport-reactor-netty4/src/javaRestTest/java/org/opensearch/rest/ReactorNetty4BadRequestIT.java index 62834483b5e9b..b1d21fe4eee09 100644 --- a/plugins/transport-reactor-netty4/src/javaRestTest/java/org/opensearch/rest/ReactorNetty4BadRequestIT.java +++ b/plugins/transport-reactor-netty4/src/javaRestTest/java/org/opensearch/rest/ReactorNetty4BadRequestIT.java @@ -112,4 +112,16 @@ public void testInvalidHeaderValue() throws IOException { assertThat(map.get("type"), equalTo("content_type_header_exception")); assertThat(map.get("reason"), equalTo("java.lang.IllegalArgumentException: invalid Content-Type header []")); } + + public void testUnsupportedContentType() throws IOException { + final Request request = new Request("POST", "/_bulk/stream"); + final RequestOptions.Builder options = request.getOptions().toBuilder(); + request.setOptions(options); + final ResponseException e = expectThrows(ResponseException.class, () -> client().performRequest(request)); + final Response response = e.getResponse(); + assertThat(response.getStatusLine().getStatusCode(), equalTo(406)); + final ObjectPath objectPath = ObjectPath.createFromResponse(response); + final String error = objectPath.evaluate("error"); + assertThat(error, equalTo("Content-Type header [] is not supported")); + } } diff --git a/plugins/transport-reactor-netty4/src/javaRestTest/java/org/opensearch/rest/ReactorNetty4StreamingStressIT.java b/plugins/transport-reactor-netty4/src/javaRestTest/java/org/opensearch/rest/ReactorNetty4StreamingStressIT.java index a978af1b11db4..a853ed56fad32 100644 --- a/plugins/transport-reactor-netty4/src/javaRestTest/java/org/opensearch/rest/ReactorNetty4StreamingStressIT.java +++ b/plugins/transport-reactor-netty4/src/javaRestTest/java/org/opensearch/rest/ReactorNetty4StreamingStressIT.java @@ -8,7 +8,6 @@ package org.opensearch.rest; -import org.apache.hc.core5.http.ConnectionClosedException; import org.opensearch.client.Request; import org.opensearch.client.Response; import org.opensearch.client.StreamingRequest; @@ -16,24 +15,20 @@ import org.opensearch.test.rest.OpenSearchRestTestCase; import org.junit.After; +import java.io.IOException; import java.io.InterruptedIOException; +import java.io.UncheckedIOException; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.time.Duration; -import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Stream; import reactor.core.publisher.Flux; -import reactor.test.subscriber.TestSubscriber; +import reactor.test.StepVerifier; +import reactor.test.scheduler.VirtualTimeScheduler; -import static org.hamcrest.CoreMatchers.anyOf; import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.CoreMatchers.instanceOf; -import static org.hamcrest.CoreMatchers.not; -import static org.hamcrest.collection.IsEmptyCollection.empty; public class ReactorNetty4StreamingStressIT extends OpenSearchRestTestCase { @After @@ -49,6 +44,8 @@ public void tearDown() throws Exception { } public void testCloseClientStreamingRequest() throws Exception { + final VirtualTimeScheduler scheduler = VirtualTimeScheduler.create(true); + final AtomicInteger id = new AtomicInteger(0); final Stream stream = Stream.generate( () -> "{ \"index\": { \"_index\": \"test-stress-streaming\", \"_id\": \"" @@ -57,39 +54,28 @@ public void testCloseClientStreamingRequest() throws Exception { + "{ \"name\": \"josh\" }\n" ); + final Duration delay = Duration.ofMillis(1); final StreamingRequest streamingRequest = new StreamingRequest<>( "POST", "/_bulk/stream", - Flux.fromStream(stream).delayElements(Duration.ofMillis(500)).map(s -> ByteBuffer.wrap(s.getBytes(StandardCharsets.UTF_8))) + Flux.fromStream(stream).delayElements(delay, scheduler).map(s -> ByteBuffer.wrap(s.getBytes(StandardCharsets.UTF_8))) ); streamingRequest.addParameter("refresh", "true"); final StreamingResponse streamingResponse = client().streamRequest(streamingRequest); - TestSubscriber subscriber = TestSubscriber.create(); - streamingResponse.getBody().subscribe(subscriber); - - final ScheduledExecutorService executor = Executors.newSingleThreadScheduledExecutor(); - try { - // Await for subscriber to receive at least one chunk - assertBusy(() -> assertThat(subscriber.getReceivedOnNext(), not(empty()))); - - // Close client forceably - executor.schedule(() -> { - client().close(); - return null; - }, 2, TimeUnit.SECONDS); + scheduler.advanceTimeBy(delay); /* emit first element */ - // Await for subscriber to terminate - subscriber.block(Duration.ofSeconds(10)); - assertThat( - subscriber.expectTerminalError(), - anyOf(instanceOf(InterruptedIOException.class), instanceOf(ConnectionClosedException.class)) - ); - } finally { - executor.shutdown(); - if (executor.awaitTermination(1, TimeUnit.SECONDS) == false) { - executor.shutdownNow(); - } - } + StepVerifier.create(Flux.from(streamingResponse.getBody()).map(b -> new String(b.array(), StandardCharsets.UTF_8))) + .expectNextMatches(s -> s.contains("\"result\":\"created\"") && s.contains("\"_id\":\"1\"")) + .then(() -> { + try { + client().close(); + } catch (final IOException ex) { + throw new UncheckedIOException(ex); + } + }) + .then(() -> scheduler.advanceTimeBy(delay)) + .expectErrorMatches(t -> t instanceof InterruptedIOException) + .verify(); } } diff --git a/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/WorkloadManagementPlugin.java b/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/WorkloadManagementPlugin.java index 64f510fa1db67..c86490552f2f2 100644 --- a/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/WorkloadManagementPlugin.java +++ b/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/WorkloadManagementPlugin.java @@ -24,9 +24,12 @@ import org.opensearch.plugin.wlm.action.TransportCreateQueryGroupAction; import org.opensearch.plugin.wlm.action.TransportDeleteQueryGroupAction; import org.opensearch.plugin.wlm.action.TransportGetQueryGroupAction; +import org.opensearch.plugin.wlm.action.TransportUpdateQueryGroupAction; +import org.opensearch.plugin.wlm.action.UpdateQueryGroupAction; import org.opensearch.plugin.wlm.rest.RestCreateQueryGroupAction; import org.opensearch.plugin.wlm.rest.RestDeleteQueryGroupAction; import org.opensearch.plugin.wlm.rest.RestGetQueryGroupAction; +import org.opensearch.plugin.wlm.rest.RestUpdateQueryGroupAction; import org.opensearch.plugin.wlm.service.QueryGroupPersistenceService; import org.opensearch.plugins.ActionPlugin; import org.opensearch.plugins.Plugin; @@ -52,7 +55,8 @@ public WorkloadManagementPlugin() {} return List.of( new ActionPlugin.ActionHandler<>(CreateQueryGroupAction.INSTANCE, TransportCreateQueryGroupAction.class), new ActionPlugin.ActionHandler<>(GetQueryGroupAction.INSTANCE, TransportGetQueryGroupAction.class), - new ActionPlugin.ActionHandler<>(DeleteQueryGroupAction.INSTANCE, TransportDeleteQueryGroupAction.class) + new ActionPlugin.ActionHandler<>(DeleteQueryGroupAction.INSTANCE, TransportDeleteQueryGroupAction.class), + new ActionPlugin.ActionHandler<>(UpdateQueryGroupAction.INSTANCE, TransportUpdateQueryGroupAction.class) ); } @@ -66,7 +70,12 @@ public List getRestHandlers( IndexNameExpressionResolver indexNameExpressionResolver, Supplier nodesInCluster ) { - return List.of(new RestCreateQueryGroupAction(), new RestGetQueryGroupAction(), new RestDeleteQueryGroupAction()); + return List.of( + new RestCreateQueryGroupAction(), + new RestGetQueryGroupAction(), + new RestDeleteQueryGroupAction(), + new RestUpdateQueryGroupAction() + ); } @Override diff --git a/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/action/CreateQueryGroupRequest.java b/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/action/CreateQueryGroupRequest.java index ff6422be36885..d92283391dd3b 100644 --- a/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/action/CreateQueryGroupRequest.java +++ b/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/action/CreateQueryGroupRequest.java @@ -40,7 +40,7 @@ public class CreateQueryGroupRequest extends ActionRequest { * Constructor for CreateQueryGroupRequest * @param queryGroup - A {@link QueryGroup} object */ - public CreateQueryGroupRequest(QueryGroup queryGroup) { + CreateQueryGroupRequest(QueryGroup queryGroup) { this.queryGroup = queryGroup; } @@ -48,7 +48,7 @@ public CreateQueryGroupRequest(QueryGroup queryGroup) { * Constructor for CreateQueryGroupRequest * @param in - A {@link StreamInput} object */ - public CreateQueryGroupRequest(StreamInput in) throws IOException { + CreateQueryGroupRequest(StreamInput in) throws IOException { super(in); queryGroup = new QueryGroup(in); } diff --git a/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/action/TransportUpdateQueryGroupAction.java b/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/action/TransportUpdateQueryGroupAction.java new file mode 100644 index 0000000000000..a6aa2da8fdc08 --- /dev/null +++ b/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/action/TransportUpdateQueryGroupAction.java @@ -0,0 +1,51 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.plugin.wlm.action; + +import org.opensearch.action.support.ActionFilters; +import org.opensearch.action.support.HandledTransportAction; +import org.opensearch.common.inject.Inject; +import org.opensearch.core.action.ActionListener; +import org.opensearch.plugin.wlm.service.QueryGroupPersistenceService; +import org.opensearch.tasks.Task; +import org.opensearch.transport.TransportService; + +/** + * Transport action to update QueryGroup + * + * @opensearch.experimental + */ +public class TransportUpdateQueryGroupAction extends HandledTransportAction { + + private final QueryGroupPersistenceService queryGroupPersistenceService; + + /** + * Constructor for TransportUpdateQueryGroupAction + * + * @param actionName - action name + * @param transportService - a {@link TransportService} object + * @param actionFilters - a {@link ActionFilters} object + * @param queryGroupPersistenceService - a {@link QueryGroupPersistenceService} object + */ + @Inject + public TransportUpdateQueryGroupAction( + String actionName, + TransportService transportService, + ActionFilters actionFilters, + QueryGroupPersistenceService queryGroupPersistenceService + ) { + super(UpdateQueryGroupAction.NAME, transportService, actionFilters, UpdateQueryGroupRequest::new); + this.queryGroupPersistenceService = queryGroupPersistenceService; + } + + @Override + protected void doExecute(Task task, UpdateQueryGroupRequest request, ActionListener listener) { + queryGroupPersistenceService.updateInClusterStateMetadata(request, listener); + } +} diff --git a/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/action/UpdateQueryGroupAction.java b/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/action/UpdateQueryGroupAction.java new file mode 100644 index 0000000000000..ff472f206131c --- /dev/null +++ b/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/action/UpdateQueryGroupAction.java @@ -0,0 +1,36 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.plugin.wlm.action; + +import org.opensearch.action.ActionType; + +/** + * Transport action to update QueryGroup + * + * @opensearch.experimental + */ +public class UpdateQueryGroupAction extends ActionType { + + /** + * An instance of UpdateQueryGroupAction + */ + public static final UpdateQueryGroupAction INSTANCE = new UpdateQueryGroupAction(); + + /** + * Name for UpdateQueryGroupAction + */ + public static final String NAME = "cluster:admin/opensearch/wlm/query_group/_update"; + + /** + * Default constructor + */ + private UpdateQueryGroupAction() { + super(NAME, UpdateQueryGroupResponse::new); + } +} diff --git a/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/action/UpdateQueryGroupRequest.java b/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/action/UpdateQueryGroupRequest.java new file mode 100644 index 0000000000000..048b599f095fd --- /dev/null +++ b/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/action/UpdateQueryGroupRequest.java @@ -0,0 +1,83 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.plugin.wlm.action; + +import org.opensearch.action.ActionRequest; +import org.opensearch.action.ActionRequestValidationException; +import org.opensearch.cluster.metadata.QueryGroup; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.wlm.MutableQueryGroupFragment; + +import java.io.IOException; + +/** + * A request for update QueryGroup + * + * @opensearch.experimental + */ +public class UpdateQueryGroupRequest extends ActionRequest { + private final String name; + private final MutableQueryGroupFragment mutableQueryGroupFragment; + + /** + * Constructor for UpdateQueryGroupRequest + * @param name - QueryGroup name for UpdateQueryGroupRequest + * @param mutableQueryGroupFragment - MutableQueryGroupFragment for UpdateQueryGroupRequest + */ + UpdateQueryGroupRequest(String name, MutableQueryGroupFragment mutableQueryGroupFragment) { + this.name = name; + this.mutableQueryGroupFragment = mutableQueryGroupFragment; + } + + /** + * Constructor for UpdateQueryGroupRequest + * @param in - A {@link StreamInput} object + */ + UpdateQueryGroupRequest(StreamInput in) throws IOException { + this(in.readString(), new MutableQueryGroupFragment(in)); + } + + /** + * Generate a UpdateQueryGroupRequest from XContent + * @param parser - A {@link XContentParser} object + * @param name - name of the QueryGroup to be updated + */ + public static UpdateQueryGroupRequest fromXContent(XContentParser parser, String name) throws IOException { + QueryGroup.Builder builder = QueryGroup.Builder.fromXContent(parser); + return new UpdateQueryGroupRequest(name, builder.getMutableQueryGroupFragment()); + } + + @Override + public ActionRequestValidationException validate() { + QueryGroup.validateName(name); + return null; + } + + /** + * name getter + */ + public String getName() { + return name; + } + + /** + * mutableQueryGroupFragment getter + */ + public MutableQueryGroupFragment getmMutableQueryGroupFragment() { + return mutableQueryGroupFragment; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(name); + mutableQueryGroupFragment.writeTo(out); + } +} diff --git a/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/action/UpdateQueryGroupResponse.java b/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/action/UpdateQueryGroupResponse.java new file mode 100644 index 0000000000000..9071f52ecb5a7 --- /dev/null +++ b/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/action/UpdateQueryGroupResponse.java @@ -0,0 +1,74 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.plugin.wlm.action; + +import org.opensearch.cluster.metadata.QueryGroup; +import org.opensearch.core.action.ActionResponse; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.rest.RestStatus; +import org.opensearch.core.xcontent.ToXContent; +import org.opensearch.core.xcontent.ToXContentObject; +import org.opensearch.core.xcontent.XContentBuilder; + +import java.io.IOException; + +/** + * Response for the update API for QueryGroup + * + * @opensearch.experimental + */ +public class UpdateQueryGroupResponse extends ActionResponse implements ToXContent, ToXContentObject { + private final QueryGroup queryGroup; + private final RestStatus restStatus; + + /** + * Constructor for UpdateQueryGroupResponse + * @param queryGroup - the QueryGroup to be updated + * @param restStatus - the rest status for the response + */ + public UpdateQueryGroupResponse(final QueryGroup queryGroup, RestStatus restStatus) { + this.queryGroup = queryGroup; + this.restStatus = restStatus; + } + + /** + * Constructor for UpdateQueryGroupResponse + * @param in - a {@link StreamInput} object + */ + public UpdateQueryGroupResponse(StreamInput in) throws IOException { + queryGroup = new QueryGroup(in); + restStatus = RestStatus.readFrom(in); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + queryGroup.writeTo(out); + RestStatus.writeTo(out, restStatus); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + return queryGroup.toXContent(builder, params); + } + + /** + * queryGroup getter + */ + public QueryGroup getQueryGroup() { + return queryGroup; + } + + /** + * restStatus getter + */ + public RestStatus getRestStatus() { + return restStatus; + } +} diff --git a/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rest/RestGetQueryGroupAction.java b/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rest/RestGetQueryGroupAction.java index c250bd2979e98..c87973e113138 100644 --- a/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rest/RestGetQueryGroupAction.java +++ b/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rest/RestGetQueryGroupAction.java @@ -27,7 +27,7 @@ import static org.opensearch.rest.RestRequest.Method.GET; /** - * Rest action to get a QueryGroup0 + * Rest action to get a QueryGroup * * @opensearch.experimental */ diff --git a/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rest/RestUpdateQueryGroupAction.java b/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rest/RestUpdateQueryGroupAction.java new file mode 100644 index 0000000000000..55b4bc5a295c4 --- /dev/null +++ b/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rest/RestUpdateQueryGroupAction.java @@ -0,0 +1,72 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.plugin.wlm.rest; + +import org.opensearch.client.node.NodeClient; +import org.opensearch.core.rest.RestStatus; +import org.opensearch.core.xcontent.ToXContent; +import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.plugin.wlm.action.UpdateQueryGroupAction; +import org.opensearch.plugin.wlm.action.UpdateQueryGroupRequest; +import org.opensearch.plugin.wlm.action.UpdateQueryGroupResponse; +import org.opensearch.rest.BaseRestHandler; +import org.opensearch.rest.BytesRestResponse; +import org.opensearch.rest.RestChannel; +import org.opensearch.rest.RestRequest; +import org.opensearch.rest.RestResponse; +import org.opensearch.rest.action.RestResponseListener; + +import java.io.IOException; +import java.util.List; + +import static org.opensearch.rest.RestRequest.Method.POST; +import static org.opensearch.rest.RestRequest.Method.PUT; + +/** + * Rest action to update a QueryGroup + * + * @opensearch.experimental + */ +public class RestUpdateQueryGroupAction extends BaseRestHandler { + + /** + * Constructor for RestUpdateQueryGroupAction + */ + public RestUpdateQueryGroupAction() {} + + @Override + public String getName() { + return "update_query_group"; + } + + /** + * The list of {@link Route}s that this RestHandler is responsible for handling. + */ + @Override + public List routes() { + return List.of(new Route(POST, "_wlm/query_group/{name}"), new Route(PUT, "_wlm/query_group/{name}")); + } + + @Override + protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) throws IOException { + try (XContentParser parser = request.contentParser()) { + UpdateQueryGroupRequest updateQueryGroupRequest = UpdateQueryGroupRequest.fromXContent(parser, request.param("name")); + return channel -> client.execute(UpdateQueryGroupAction.INSTANCE, updateQueryGroupRequest, updateQueryGroupResponse(channel)); + } + } + + private RestResponseListener updateQueryGroupResponse(final RestChannel channel) { + return new RestResponseListener<>(channel) { + @Override + public RestResponse buildResponse(final UpdateQueryGroupResponse response) throws Exception { + return new BytesRestResponse(RestStatus.OK, response.toXContent(channel.newBuilder(), ToXContent.EMPTY_PARAMS)); + } + }; + } +} diff --git a/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/service/QueryGroupPersistenceService.java b/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/service/QueryGroupPersistenceService.java index 7561a2f6f99c3..f9332ff3022dc 100644 --- a/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/service/QueryGroupPersistenceService.java +++ b/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/service/QueryGroupPersistenceService.java @@ -29,6 +29,9 @@ import org.opensearch.core.rest.RestStatus; import org.opensearch.plugin.wlm.action.CreateQueryGroupResponse; import org.opensearch.plugin.wlm.action.DeleteQueryGroupRequest; +import org.opensearch.plugin.wlm.action.UpdateQueryGroupRequest; +import org.opensearch.plugin.wlm.action.UpdateQueryGroupResponse; +import org.opensearch.wlm.MutableQueryGroupFragment; import org.opensearch.wlm.ResourceType; import java.util.Collection; @@ -37,6 +40,8 @@ import java.util.Optional; import java.util.stream.Collectors; +import static org.opensearch.cluster.metadata.QueryGroup.updateExistingQueryGroup; + /** * This class defines the functions for QueryGroup persistence */ @@ -44,6 +49,7 @@ public class QueryGroupPersistenceService { static final String SOURCE = "query-group-persistence-service"; private static final String CREATE_QUERY_GROUP_THROTTLING_KEY = "create-query-group"; private static final String DELETE_QUERY_GROUP_THROTTLING_KEY = "delete-query-group"; + private static final String UPDATE_QUERY_GROUP_THROTTLING_KEY = "update-query-group"; private static final Logger logger = LogManager.getLogger(QueryGroupPersistenceService.class); /** * max QueryGroup count setting name @@ -72,6 +78,7 @@ public class QueryGroupPersistenceService { private volatile int maxQueryGroupCount; final ThrottlingKey createQueryGroupThrottlingKey; final ThrottlingKey deleteQueryGroupThrottlingKey; + final ThrottlingKey updateQueryGroupThrottlingKey; /** * Constructor for QueryGroupPersistenceService @@ -89,6 +96,7 @@ public QueryGroupPersistenceService( this.clusterService = clusterService; this.createQueryGroupThrottlingKey = clusterService.registerClusterManagerTask(CREATE_QUERY_GROUP_THROTTLING_KEY, true); this.deleteQueryGroupThrottlingKey = clusterService.registerClusterManagerTask(DELETE_QUERY_GROUP_THROTTLING_KEY, true); + this.updateQueryGroupThrottlingKey = clusterService.registerClusterManagerTask(UPDATE_QUERY_GROUP_THROTTLING_KEY, true); setMaxQueryGroupCount(MAX_QUERY_GROUP_COUNT.get(settings)); clusterSettings.addSettingsUpdateConsumer(MAX_QUERY_GROUP_COUNT, this::setMaxQueryGroupCount); } @@ -169,39 +177,13 @@ ClusterState saveQueryGroupInClusterState(final QueryGroup queryGroup, final Clu } // check if there's any resource allocation that exceed limit of 1.0 - Map totalUsageMap = calculateTotalUsage(existingQueryGroups, queryGroup); - for (ResourceType resourceType : queryGroup.getResourceLimits().keySet()) { - if (totalUsageMap.get(resourceType) > 1) { - logger.warn("Total resource allocation for {} will go above the max limit of 1.0.", resourceType.getName()); - throw new IllegalArgumentException( - "Total resource allocation for " + resourceType.getName() + " will go above the max limit of 1.0." - ); - } - } + validateTotalUsage(existingQueryGroups, groupName, queryGroup.getResourceLimits()); return ClusterState.builder(currentClusterState) .metadata(Metadata.builder(currentClusterState.metadata()).put(queryGroup).build()) .build(); } - /** - * This method calculates the existing total usage of the all the resource limits - * @param existingQueryGroups - existing QueryGroups in the system - * @param queryGroup - the QueryGroup we're creating or updating - */ - private Map calculateTotalUsage(Map existingQueryGroups, QueryGroup queryGroup) { - final Map map = new EnumMap<>(ResourceType.class); - map.putAll(queryGroup.getResourceLimits()); - for (QueryGroup currGroup : existingQueryGroups.values()) { - if (!currGroup.getName().equals(queryGroup.getName())) { - for (ResourceType resourceType : queryGroup.getResourceLimits().keySet()) { - map.compute(resourceType, (k, v) -> v + currGroup.getResourceLimits().get(resourceType)); - } - } - } - return map; - } - /** * Get the QueryGroups with the specified name from cluster state * @param name - the QueryGroup name we are getting @@ -264,10 +246,113 @@ ClusterState deleteQueryGroupInClusterState(final String name, final ClusterStat return ClusterState.builder(currentClusterState).metadata(Metadata.builder(metadata).remove(queryGroupToRemove).build()).build(); } + /** + * Modify cluster state to update the QueryGroup + * @param toUpdateGroup {@link QueryGroup} - the QueryGroup that we want to update + * @param listener - ActionListener for UpdateQueryGroupResponse + */ + public void updateInClusterStateMetadata(UpdateQueryGroupRequest toUpdateGroup, ActionListener listener) { + clusterService.submitStateUpdateTask(SOURCE, new ClusterStateUpdateTask(Priority.NORMAL) { + @Override + public ClusterState execute(ClusterState currentState) { + return updateQueryGroupInClusterState(toUpdateGroup, currentState); + } + + @Override + public ThrottlingKey getClusterManagerThrottlingKey() { + return updateQueryGroupThrottlingKey; + } + + @Override + public void onFailure(String source, Exception e) { + logger.warn("Failed to update QueryGroup due to error: {}, for source: {}", e.getMessage(), source); + listener.onFailure(e); + } + + @Override + public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { + String name = toUpdateGroup.getName(); + Optional findUpdatedGroup = newState.metadata() + .queryGroups() + .values() + .stream() + .filter(group -> group.getName().equals(name)) + .findFirst(); + assert findUpdatedGroup.isPresent(); + QueryGroup updatedGroup = findUpdatedGroup.get(); + UpdateQueryGroupResponse response = new UpdateQueryGroupResponse(updatedGroup, RestStatus.OK); + listener.onResponse(response); + } + }); + } + + /** + * Modify cluster state to update the existing QueryGroup + * @param updateQueryGroupRequest {@link QueryGroup} - the QueryGroup that we want to update + * @param currentState - current cluster state + */ + ClusterState updateQueryGroupInClusterState(UpdateQueryGroupRequest updateQueryGroupRequest, ClusterState currentState) { + final Metadata metadata = currentState.metadata(); + final Map existingGroups = currentState.metadata().queryGroups(); + String name = updateQueryGroupRequest.getName(); + MutableQueryGroupFragment mutableQueryGroupFragment = updateQueryGroupRequest.getmMutableQueryGroupFragment(); + + final QueryGroup existingGroup = existingGroups.values() + .stream() + .filter(group -> group.getName().equals(name)) + .findFirst() + .orElseThrow(() -> new ResourceNotFoundException("No QueryGroup exists with the provided name: " + name)); + + validateTotalUsage(existingGroups, name, mutableQueryGroupFragment.getResourceLimits()); + return ClusterState.builder(currentState) + .metadata( + Metadata.builder(metadata) + .remove(existingGroup) + .put(updateExistingQueryGroup(existingGroup, mutableQueryGroupFragment)) + .build() + ) + .build(); + } + + /** + * This method checks if there's any resource allocation that exceed limit of 1.0 + * @param existingQueryGroups - existing QueryGroups in the system + * @param resourceLimits - the QueryGroup we're creating or updating + */ + private void validateTotalUsage(Map existingQueryGroups, String name, Map resourceLimits) { + if (resourceLimits == null || resourceLimits.isEmpty()) { + return; + } + final Map totalUsage = new EnumMap<>(ResourceType.class); + totalUsage.putAll(resourceLimits); + for (QueryGroup currGroup : existingQueryGroups.values()) { + if (!currGroup.getName().equals(name)) { + for (ResourceType resourceType : resourceLimits.keySet()) { + totalUsage.compute(resourceType, (k, v) -> v + currGroup.getResourceLimits().getOrDefault(resourceType, 0.0)); + } + } + } + totalUsage.forEach((resourceType, total) -> { + if (total > 1.0) { + logger.warn("Total resource allocation for {} will go above the max limit of 1.0.", resourceType.getName()); + throw new IllegalArgumentException( + "Total resource allocation for " + resourceType.getName() + " will go above the max limit of 1.0." + ); + } + }); + } + /** * maxQueryGroupCount getter */ public int getMaxQueryGroupCount() { return maxQueryGroupCount; } + + /** + * clusterService getter + */ + public ClusterService getClusterService() { + return clusterService; + } } diff --git a/plugins/workload-management/src/test/java/org/opensearch/plugin/wlm/QueryGroupTestUtils.java b/plugins/workload-management/src/test/java/org/opensearch/plugin/wlm/QueryGroupTestUtils.java index e165645775d5c..c6eb3140e943d 100644 --- a/plugins/workload-management/src/test/java/org/opensearch/plugin/wlm/QueryGroupTestUtils.java +++ b/plugins/workload-management/src/test/java/org/opensearch/plugin/wlm/QueryGroupTestUtils.java @@ -21,6 +21,8 @@ import org.opensearch.common.settings.Settings; import org.opensearch.plugin.wlm.service.QueryGroupPersistenceService; import org.opensearch.threadpool.ThreadPool; +import org.opensearch.wlm.MutableQueryGroupFragment; +import org.opensearch.wlm.ResourceType; import java.util.ArrayList; import java.util.Collection; @@ -31,7 +33,6 @@ import java.util.Set; import static org.opensearch.cluster.metadata.QueryGroup.builder; -import static org.opensearch.wlm.ResourceType.fromName; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; @@ -43,21 +44,21 @@ public class QueryGroupTestUtils { public static final String _ID_ONE = "AgfUO5Ja9yfsYlONlYi3TQ=="; public static final String _ID_TWO = "G5iIqHy4g7eK1qIAAAAIH53=1"; public static final String NAME_NONE_EXISTED = "query_group_none_existed"; - public static final String MEMORY_STRING = "memory"; - public static final String MONITOR_STRING = "monitor"; public static final long TIMESTAMP_ONE = 4513232413L; public static final long TIMESTAMP_TWO = 4513232415L; public static final QueryGroup queryGroupOne = builder().name(NAME_ONE) ._id(_ID_ONE) - .mode(MONITOR_STRING) - .resourceLimits(Map.of(fromName(MEMORY_STRING), 0.3)) + .mutableQueryGroupFragment( + new MutableQueryGroupFragment(MutableQueryGroupFragment.ResiliencyMode.MONITOR, Map.of(ResourceType.MEMORY, 0.3)) + ) .updatedAt(TIMESTAMP_ONE) .build(); public static final QueryGroup queryGroupTwo = builder().name(NAME_TWO) ._id(_ID_TWO) - .mode(MONITOR_STRING) - .resourceLimits(Map.of(fromName(MEMORY_STRING), 0.6)) + .mutableQueryGroupFragment( + new MutableQueryGroupFragment(MutableQueryGroupFragment.ResiliencyMode.MONITOR, Map.of(ResourceType.MEMORY, 0.6)) + ) .updatedAt(TIMESTAMP_TWO) .build(); @@ -131,14 +132,35 @@ public static Tuple preparePersisten return new Tuple(queryGroupPersistenceService, clusterState); } - public static void assertEqualQueryGroups(Collection collectionOne, Collection collectionTwo) { + public static void assertEqualResourceLimits( + Map resourceLimitMapOne, + Map resourceLimitMapTwo + ) { + assertTrue(resourceLimitMapOne.keySet().containsAll(resourceLimitMapTwo.keySet())); + assertTrue(resourceLimitMapOne.values().containsAll(resourceLimitMapTwo.values())); + } + + public static void assertEqualQueryGroups( + Collection collectionOne, + Collection collectionTwo, + boolean assertUpdateAt + ) { assertEquals(collectionOne.size(), collectionTwo.size()); List listOne = new ArrayList<>(collectionOne); List listTwo = new ArrayList<>(collectionTwo); listOne.sort(Comparator.comparing(QueryGroup::getName)); listTwo.sort(Comparator.comparing(QueryGroup::getName)); for (int i = 0; i < listOne.size(); i++) { - assertTrue(listOne.get(i).equals(listTwo.get(i))); + if (assertUpdateAt) { + QueryGroup one = listOne.get(i); + QueryGroup two = listTwo.get(i); + assertEquals(one.getName(), two.getName()); + assertEquals(one.getResourceLimits(), two.getResourceLimits()); + assertEquals(one.getResiliencyMode(), two.getResiliencyMode()); + assertEquals(one.get_id(), two.get_id()); + } else { + assertEquals(listOne.get(i), listTwo.get(i)); + } } } } diff --git a/plugins/workload-management/src/test/java/org/opensearch/plugin/wlm/action/CreateQueryGroupRequestTests.java b/plugins/workload-management/src/test/java/org/opensearch/plugin/wlm/action/CreateQueryGroupRequestTests.java index b0fa96a46df80..dd9de4bf8fb1a 100644 --- a/plugins/workload-management/src/test/java/org/opensearch/plugin/wlm/action/CreateQueryGroupRequestTests.java +++ b/plugins/workload-management/src/test/java/org/opensearch/plugin/wlm/action/CreateQueryGroupRequestTests.java @@ -35,6 +35,6 @@ public void testSerialization() throws IOException { List list2 = new ArrayList<>(); list1.add(queryGroupOne); list2.add(otherRequest.getQueryGroup()); - assertEqualQueryGroups(list1, list2); + assertEqualQueryGroups(list1, list2, false); } } diff --git a/plugins/workload-management/src/test/java/org/opensearch/plugin/wlm/action/CreateQueryGroupResponseTests.java b/plugins/workload-management/src/test/java/org/opensearch/plugin/wlm/action/CreateQueryGroupResponseTests.java index ecb9a6b2dc0d2..3a2ce215d21b5 100644 --- a/plugins/workload-management/src/test/java/org/opensearch/plugin/wlm/action/CreateQueryGroupResponseTests.java +++ b/plugins/workload-management/src/test/java/org/opensearch/plugin/wlm/action/CreateQueryGroupResponseTests.java @@ -42,7 +42,7 @@ public void testSerialization() throws IOException { List listTwo = new ArrayList<>(); listOne.add(responseGroup); listTwo.add(otherResponseGroup); - QueryGroupTestUtils.assertEqualQueryGroups(listOne, listTwo); + QueryGroupTestUtils.assertEqualQueryGroups(listOne, listTwo, false); } /** @@ -56,10 +56,10 @@ public void testToXContentCreateQueryGroup() throws IOException { + " \"_id\" : \"AgfUO5Ja9yfsYlONlYi3TQ==\",\n" + " \"name\" : \"query_group_one\",\n" + " \"resiliency_mode\" : \"monitor\",\n" - + " \"updated_at\" : 4513232413,\n" + " \"resource_limits\" : {\n" + " \"memory\" : 0.3\n" - + " }\n" + + " },\n" + + " \"updated_at\" : 4513232413\n" + "}"; assertEquals(expected, actual); } diff --git a/plugins/workload-management/src/test/java/org/opensearch/plugin/wlm/action/GetQueryGroupResponseTests.java b/plugins/workload-management/src/test/java/org/opensearch/plugin/wlm/action/GetQueryGroupResponseTests.java index 774f4b2d8db52..1a2ac282d86a4 100644 --- a/plugins/workload-management/src/test/java/org/opensearch/plugin/wlm/action/GetQueryGroupResponseTests.java +++ b/plugins/workload-management/src/test/java/org/opensearch/plugin/wlm/action/GetQueryGroupResponseTests.java @@ -41,7 +41,7 @@ public void testSerializationSingleQueryGroup() throws IOException { GetQueryGroupResponse otherResponse = new GetQueryGroupResponse(streamInput); assertEquals(response.getRestStatus(), otherResponse.getRestStatus()); - QueryGroupTestUtils.assertEqualQueryGroups(response.getQueryGroups(), otherResponse.getQueryGroups()); + QueryGroupTestUtils.assertEqualQueryGroups(response.getQueryGroups(), otherResponse.getQueryGroups(), false); } /** @@ -58,7 +58,7 @@ public void testSerializationMultipleQueryGroup() throws IOException { GetQueryGroupResponse otherResponse = new GetQueryGroupResponse(streamInput); assertEquals(response.getRestStatus(), otherResponse.getRestStatus()); assertEquals(2, otherResponse.getQueryGroups().size()); - QueryGroupTestUtils.assertEqualQueryGroups(response.getQueryGroups(), otherResponse.getQueryGroups()); + QueryGroupTestUtils.assertEqualQueryGroups(response.getQueryGroups(), otherResponse.getQueryGroups(), false); } /** @@ -93,10 +93,10 @@ public void testToXContentGetSingleQueryGroup() throws IOException { + " \"_id\" : \"AgfUO5Ja9yfsYlONlYi3TQ==\",\n" + " \"name\" : \"query_group_one\",\n" + " \"resiliency_mode\" : \"monitor\",\n" - + " \"updated_at\" : 4513232413,\n" + " \"resource_limits\" : {\n" + " \"memory\" : 0.3\n" - + " }\n" + + " },\n" + + " \"updated_at\" : 4513232413\n" + " }\n" + " ]\n" + "}"; @@ -119,19 +119,19 @@ public void testToXContentGetMultipleQueryGroup() throws IOException { + " \"_id\" : \"AgfUO5Ja9yfsYlONlYi3TQ==\",\n" + " \"name\" : \"query_group_one\",\n" + " \"resiliency_mode\" : \"monitor\",\n" - + " \"updated_at\" : 4513232413,\n" + " \"resource_limits\" : {\n" + " \"memory\" : 0.3\n" - + " }\n" + + " },\n" + + " \"updated_at\" : 4513232413\n" + " },\n" + " {\n" + " \"_id\" : \"G5iIqHy4g7eK1qIAAAAIH53=1\",\n" + " \"name\" : \"query_group_two\",\n" + " \"resiliency_mode\" : \"monitor\",\n" - + " \"updated_at\" : 4513232415,\n" + " \"resource_limits\" : {\n" + " \"memory\" : 0.6\n" - + " }\n" + + " },\n" + + " \"updated_at\" : 4513232415\n" + " }\n" + " ]\n" + "}"; diff --git a/plugins/workload-management/src/test/java/org/opensearch/plugin/wlm/action/QueryGroupActionTestUtils.java b/plugins/workload-management/src/test/java/org/opensearch/plugin/wlm/action/QueryGroupActionTestUtils.java new file mode 100644 index 0000000000000..08d128ca7ed59 --- /dev/null +++ b/plugins/workload-management/src/test/java/org/opensearch/plugin/wlm/action/QueryGroupActionTestUtils.java @@ -0,0 +1,17 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.plugin.wlm.action; + +import org.opensearch.wlm.MutableQueryGroupFragment; + +public class QueryGroupActionTestUtils { + public static UpdateQueryGroupRequest updateQueryGroupRequest(String name, MutableQueryGroupFragment mutableQueryGroupFragment) { + return new UpdateQueryGroupRequest(name, mutableQueryGroupFragment); + } +} diff --git a/plugins/workload-management/src/test/java/org/opensearch/plugin/wlm/action/UpdateQueryGroupRequestTests.java b/plugins/workload-management/src/test/java/org/opensearch/plugin/wlm/action/UpdateQueryGroupRequestTests.java new file mode 100644 index 0000000000000..b99f079e81984 --- /dev/null +++ b/plugins/workload-management/src/test/java/org/opensearch/plugin/wlm/action/UpdateQueryGroupRequestTests.java @@ -0,0 +1,97 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.plugin.wlm.action; + +import org.opensearch.common.io.stream.BytesStreamOutput; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.wlm.MutableQueryGroupFragment; +import org.opensearch.wlm.MutableQueryGroupFragment.ResiliencyMode; +import org.opensearch.wlm.ResourceType; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import static org.opensearch.plugin.wlm.QueryGroupTestUtils.NAME_ONE; +import static org.opensearch.plugin.wlm.QueryGroupTestUtils.queryGroupOne; + +public class UpdateQueryGroupRequestTests extends OpenSearchTestCase { + + /** + * Test case to verify the serialization and deserialization of UpdateQueryGroupRequest. + */ + public void testSerialization() throws IOException { + UpdateQueryGroupRequest request = new UpdateQueryGroupRequest(NAME_ONE, queryGroupOne.getMutableQueryGroupFragment()); + BytesStreamOutput out = new BytesStreamOutput(); + request.writeTo(out); + StreamInput streamInput = out.bytes().streamInput(); + UpdateQueryGroupRequest otherRequest = new UpdateQueryGroupRequest(streamInput); + assertEquals(request.getName(), otherRequest.getName()); + assertEquals(request.getmMutableQueryGroupFragment(), otherRequest.getmMutableQueryGroupFragment()); + } + + /** + * Test case to verify the serialization and deserialization of UpdateQueryGroupRequest with only name field. + */ + public void testSerializationOnlyName() throws IOException { + UpdateQueryGroupRequest request = new UpdateQueryGroupRequest(NAME_ONE, new MutableQueryGroupFragment(null, new HashMap<>())); + BytesStreamOutput out = new BytesStreamOutput(); + request.writeTo(out); + StreamInput streamInput = out.bytes().streamInput(); + UpdateQueryGroupRequest otherRequest = new UpdateQueryGroupRequest(streamInput); + assertEquals(request.getName(), otherRequest.getName()); + assertEquals(request.getmMutableQueryGroupFragment(), otherRequest.getmMutableQueryGroupFragment()); + } + + /** + * Test case to verify the serialization and deserialization of UpdateQueryGroupRequest with only resourceLimits field. + */ + public void testSerializationOnlyResourceLimit() throws IOException { + UpdateQueryGroupRequest request = new UpdateQueryGroupRequest( + NAME_ONE, + new MutableQueryGroupFragment(null, Map.of(ResourceType.MEMORY, 0.4)) + ); + BytesStreamOutput out = new BytesStreamOutput(); + request.writeTo(out); + StreamInput streamInput = out.bytes().streamInput(); + UpdateQueryGroupRequest otherRequest = new UpdateQueryGroupRequest(streamInput); + assertEquals(request.getName(), otherRequest.getName()); + assertEquals(request.getmMutableQueryGroupFragment(), otherRequest.getmMutableQueryGroupFragment()); + } + + /** + * Tests invalid ResourceType. + */ + public void testInvalidResourceLimitList() { + assertThrows( + IllegalArgumentException.class, + () -> new UpdateQueryGroupRequest( + NAME_ONE, + new MutableQueryGroupFragment( + ResiliencyMode.MONITOR, + Map.of(ResourceType.MEMORY, 0.3, ResourceType.fromName("random"), 0.4) + ) + ) + ); + } + + /** + * Tests invalid resiliencyMode. + */ + public void testInvalidEnforcement() { + assertThrows( + IllegalArgumentException.class, + () -> new UpdateQueryGroupRequest( + NAME_ONE, + new MutableQueryGroupFragment(ResiliencyMode.fromName("random"), Map.of(ResourceType.fromName("memory"), 0.3)) + ) + ); + } +} diff --git a/plugins/workload-management/src/test/java/org/opensearch/plugin/wlm/action/UpdateQueryGroupResponseTests.java b/plugins/workload-management/src/test/java/org/opensearch/plugin/wlm/action/UpdateQueryGroupResponseTests.java new file mode 100644 index 0000000000000..a7ab4c6a682ef --- /dev/null +++ b/plugins/workload-management/src/test/java/org/opensearch/plugin/wlm/action/UpdateQueryGroupResponseTests.java @@ -0,0 +1,67 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.plugin.wlm.action; + +import org.opensearch.cluster.metadata.QueryGroup; +import org.opensearch.common.io.stream.BytesStreamOutput; +import org.opensearch.common.xcontent.json.JsonXContent; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.rest.RestStatus; +import org.opensearch.core.xcontent.ToXContent; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.plugin.wlm.QueryGroupTestUtils; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import static org.opensearch.plugin.wlm.QueryGroupTestUtils.queryGroupOne; +import static org.mockito.Mockito.mock; + +public class UpdateQueryGroupResponseTests extends OpenSearchTestCase { + + /** + * Test case to verify the serialization and deserialization of UpdateQueryGroupResponse. + */ + public void testSerialization() throws IOException { + UpdateQueryGroupResponse response = new UpdateQueryGroupResponse(queryGroupOne, RestStatus.OK); + BytesStreamOutput out = new BytesStreamOutput(); + response.writeTo(out); + StreamInput streamInput = out.bytes().streamInput(); + UpdateQueryGroupResponse otherResponse = new UpdateQueryGroupResponse(streamInput); + assertEquals(response.getRestStatus(), otherResponse.getRestStatus()); + QueryGroup responseGroup = response.getQueryGroup(); + QueryGroup otherResponseGroup = otherResponse.getQueryGroup(); + List list1 = new ArrayList<>(); + List list2 = new ArrayList<>(); + list1.add(responseGroup); + list2.add(otherResponseGroup); + QueryGroupTestUtils.assertEqualQueryGroups(list1, list2, false); + } + + /** + * Test case to verify the toXContent method of UpdateQueryGroupResponse. + */ + public void testToXContentUpdateSingleQueryGroup() throws IOException { + XContentBuilder builder = JsonXContent.contentBuilder().prettyPrint(); + UpdateQueryGroupResponse otherResponse = new UpdateQueryGroupResponse(queryGroupOne, RestStatus.OK); + String actual = otherResponse.toXContent(builder, mock(ToXContent.Params.class)).toString(); + String expected = "{\n" + + " \"_id\" : \"AgfUO5Ja9yfsYlONlYi3TQ==\",\n" + + " \"name\" : \"query_group_one\",\n" + + " \"resiliency_mode\" : \"monitor\",\n" + + " \"resource_limits\" : {\n" + + " \"memory\" : 0.3\n" + + " },\n" + + " \"updated_at\" : 4513232413\n" + + "}"; + assertEquals(expected, actual); + } +} diff --git a/plugins/workload-management/src/test/java/org/opensearch/plugin/wlm/service/QueryGroupPersistenceServiceTests.java b/plugins/workload-management/src/test/java/org/opensearch/plugin/wlm/service/QueryGroupPersistenceServiceTests.java index 5cb3d8fc6d11f..08b51fd46cfcf 100644 --- a/plugins/workload-management/src/test/java/org/opensearch/plugin/wlm/service/QueryGroupPersistenceServiceTests.java +++ b/plugins/workload-management/src/test/java/org/opensearch/plugin/wlm/service/QueryGroupPersistenceServiceTests.java @@ -24,8 +24,12 @@ import org.opensearch.plugin.wlm.QueryGroupTestUtils; import org.opensearch.plugin.wlm.action.CreateQueryGroupResponse; import org.opensearch.plugin.wlm.action.DeleteQueryGroupRequest; +import org.opensearch.plugin.wlm.action.UpdateQueryGroupRequest; +import org.opensearch.plugin.wlm.action.UpdateQueryGroupResponse; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.threadpool.ThreadPool; +import org.opensearch.wlm.MutableQueryGroupFragment; +import org.opensearch.wlm.MutableQueryGroupFragment.ResiliencyMode; import org.opensearch.wlm.ResourceType; import java.util.ArrayList; @@ -33,14 +37,13 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; import org.mockito.ArgumentCaptor; import static org.opensearch.cluster.metadata.QueryGroup.builder; -import static org.opensearch.plugin.wlm.QueryGroupTestUtils.MEMORY_STRING; -import static org.opensearch.plugin.wlm.QueryGroupTestUtils.MONITOR_STRING; import static org.opensearch.plugin.wlm.QueryGroupTestUtils.NAME_NONE_EXISTED; import static org.opensearch.plugin.wlm.QueryGroupTestUtils.NAME_ONE; import static org.opensearch.plugin.wlm.QueryGroupTestUtils.NAME_TWO; @@ -55,6 +58,7 @@ import static org.opensearch.plugin.wlm.QueryGroupTestUtils.queryGroupOne; import static org.opensearch.plugin.wlm.QueryGroupTestUtils.queryGroupPersistenceService; import static org.opensearch.plugin.wlm.QueryGroupTestUtils.queryGroupTwo; +import static org.opensearch.plugin.wlm.action.QueryGroupActionTestUtils.updateQueryGroupRequest; import static org.opensearch.plugin.wlm.service.QueryGroupPersistenceService.QUERY_GROUP_COUNT_SETTING_NAME; import static org.opensearch.plugin.wlm.service.QueryGroupPersistenceService.SOURCE; import static org.mockito.ArgumentMatchers.any; @@ -83,7 +87,7 @@ public void testCreateQueryGroup() { List listTwo = new ArrayList<>(); listOne.add(queryGroupOne); listTwo.add(updatedGroupsMap.get(_ID_ONE)); - assertEqualQueryGroups(listOne, listTwo); + assertEqualQueryGroups(listOne, listTwo, false); } /** @@ -99,7 +103,7 @@ public void testCreateAnotherQueryGroup() { assertEquals(2, updatedGroups.size()); assertTrue(updatedGroups.containsKey(_ID_TWO)); Collection values = updatedGroups.values(); - assertEqualQueryGroups(queryGroupList(), new ArrayList<>(values)); + assertEqualQueryGroups(queryGroupList(), new ArrayList<>(values), false); } /** @@ -111,8 +115,7 @@ public void testCreateQueryGroupDuplicateName() { ClusterState clusterState = setup.v2(); QueryGroup toCreate = builder().name(NAME_ONE) ._id("W5iIqHyhgi4K1qIAAAAIHw==") - .mode(MONITOR_STRING) - .resourceLimits(Map.of(ResourceType.fromName(MEMORY_STRING), 0.3)) + .mutableQueryGroupFragment(new MutableQueryGroupFragment(ResiliencyMode.MONITOR, Map.of(ResourceType.MEMORY, 0.3))) .updatedAt(1690934400000L) .build(); assertThrows(RuntimeException.class, () -> queryGroupPersistenceService1.saveQueryGroupInClusterState(toCreate, clusterState)); @@ -126,8 +129,7 @@ public void testCreateQueryGroupOverflowAllocation() { Tuple setup = preparePersistenceServiceSetup(Map.of(_ID_TWO, queryGroupTwo)); QueryGroup toCreate = builder().name(NAME_ONE) ._id("W5iIqHyhgi4K1qIAAAAIHw==") - .mode(MONITOR_STRING) - .resourceLimits(Map.of(ResourceType.fromName(MEMORY_STRING), 0.41)) + .mutableQueryGroupFragment(new MutableQueryGroupFragment(ResiliencyMode.MONITOR, Map.of(ResourceType.MEMORY, 0.41))) .updatedAt(1690934400000L) .build(); @@ -143,8 +145,7 @@ public void testCreateQueryGroupOverflowAllocation() { public void testCreateQueryGroupOverflowCount() { QueryGroup toCreate = builder().name(NAME_NONE_EXISTED) ._id("W5iIqHyhgi4K1qIAAAAIHw==") - .mode(MONITOR_STRING) - .resourceLimits(Map.of(ResourceType.fromName(MEMORY_STRING), 0.5)) + .mutableQueryGroupFragment(new MutableQueryGroupFragment(ResiliencyMode.MONITOR, Map.of(ResourceType.MEMORY, 0.5))) .updatedAt(1690934400000L) .build(); Metadata metadata = Metadata.builder().queryGroups(Map.of(_ID_ONE, queryGroupOne, _ID_TWO, queryGroupTwo)).build(); @@ -267,7 +268,7 @@ public void testGetSingleQueryGroup() { List listTwo = new ArrayList<>(); listOne.add(QueryGroupTestUtils.queryGroupOne); listTwo.add(queryGroup); - QueryGroupTestUtils.assertEqualQueryGroups(listOne, listTwo); + QueryGroupTestUtils.assertEqualQueryGroups(listOne, listTwo, false); } /** @@ -281,7 +282,7 @@ public void testGetAllQueryGroups() { Set currentNAME = res.stream().map(QueryGroup::getName).collect(Collectors.toSet()); assertTrue(currentNAME.contains(QueryGroupTestUtils.NAME_ONE)); assertTrue(currentNAME.contains(QueryGroupTestUtils.NAME_TWO)); - QueryGroupTestUtils.assertEqualQueryGroups(QueryGroupTestUtils.queryGroupList(), res); + QueryGroupTestUtils.assertEqualQueryGroups(QueryGroupTestUtils.queryGroupList(), res, false); } /** @@ -316,7 +317,7 @@ public void testDeleteSingleQueryGroup() { assertEquals(1, afterDeletionGroups.size()); List oldQueryGroups = new ArrayList<>(); oldQueryGroups.add(queryGroupOne); - assertEqualQueryGroups(new ArrayList<>(afterDeletionGroups.values()), oldQueryGroups); + assertEqualQueryGroups(new ArrayList<>(afterDeletionGroups.values()), oldQueryGroups, false); } /** @@ -356,4 +357,159 @@ public void testDeleteInClusterStateMetadata() throws Exception { queryGroupPersistenceService.deleteInClusterStateMetadata(request, listener); verify(clusterService).submitStateUpdateTask(eq(SOURCE), any(AckedClusterStateUpdateTask.class)); } + + /** + * Tests updating a QueryGroup with all fields + */ + public void testUpdateQueryGroupAllFields() { + QueryGroup updated = builder().name(NAME_ONE) + ._id(_ID_ONE) + .mutableQueryGroupFragment(new MutableQueryGroupFragment(ResiliencyMode.ENFORCED, Map.of(ResourceType.MEMORY, 0.15))) + .updatedAt(1690934400000L) + .build(); + UpdateQueryGroupRequest updateQueryGroupRequest = updateQueryGroupRequest(NAME_ONE, updated.getMutableQueryGroupFragment()); + ClusterState newClusterState = queryGroupPersistenceService().updateQueryGroupInClusterState( + updateQueryGroupRequest, + clusterState() + ); + List updatedQueryGroups = new ArrayList<>(newClusterState.getMetadata().queryGroups().values()); + assertEquals(2, updatedQueryGroups.size()); + List expectedList = new ArrayList<>(); + expectedList.add(queryGroupTwo); + expectedList.add(updated); + assertEqualQueryGroups(expectedList, updatedQueryGroups, true); + } + + /** + * Tests updating a QueryGroup with only updated resourceLimits + */ + public void testUpdateQueryGroupResourceLimitsOnly() { + QueryGroup updated = builder().name(NAME_ONE) + ._id(_ID_ONE) + .mutableQueryGroupFragment(new MutableQueryGroupFragment(ResiliencyMode.MONITOR, Map.of(ResourceType.MEMORY, 0.15))) + .updatedAt(1690934400000L) + .build(); + UpdateQueryGroupRequest updateQueryGroupRequest = updateQueryGroupRequest(NAME_ONE, updated.getMutableQueryGroupFragment()); + ClusterState newClusterState = queryGroupPersistenceService().updateQueryGroupInClusterState( + updateQueryGroupRequest, + clusterState() + ); + List updatedQueryGroups = new ArrayList<>(newClusterState.getMetadata().queryGroups().values()); + assertEquals(2, updatedQueryGroups.size()); + Optional findUpdatedGroupOne = newClusterState.metadata() + .queryGroups() + .values() + .stream() + .filter(group -> group.getName().equals(NAME_ONE)) + .findFirst(); + Optional findUpdatedGroupTwo = newClusterState.metadata() + .queryGroups() + .values() + .stream() + .filter(group -> group.getName().equals(NAME_TWO)) + .findFirst(); + assertTrue(findUpdatedGroupOne.isPresent()); + assertTrue(findUpdatedGroupTwo.isPresent()); + List list1 = new ArrayList<>(); + list1.add(updated); + List list2 = new ArrayList<>(); + list2.add(findUpdatedGroupOne.get()); + assertEqualQueryGroups(list1, list2, true); + } + + /** + * Tests updating a QueryGroup with invalid name + */ + public void testUpdateQueryGroupNonExistedName() { + QueryGroupPersistenceService queryGroupPersistenceService = queryGroupPersistenceService(); + UpdateQueryGroupRequest updateQueryGroupRequest = updateQueryGroupRequest( + NAME_NONE_EXISTED, + new MutableQueryGroupFragment(ResiliencyMode.MONITOR, Map.of(ResourceType.MEMORY, 0.15)) + ); + assertThrows( + RuntimeException.class, + () -> queryGroupPersistenceService.updateQueryGroupInClusterState(updateQueryGroupRequest, clusterState()) + ); + List updatedQueryGroups = new ArrayList<>( + queryGroupPersistenceService.getClusterService().state().metadata().queryGroups().values() + ); + assertEquals(2, updatedQueryGroups.size()); + List expectedList = new ArrayList<>(); + expectedList.add(queryGroupTwo); + expectedList.add(queryGroupOne); + assertEqualQueryGroups(expectedList, updatedQueryGroups, true); + } + + /** + * Tests UpdateInClusterStateMetadata function + */ + public void testUpdateInClusterStateMetadata() { + ClusterService clusterService = mock(ClusterService.class); + @SuppressWarnings("unchecked") + ActionListener listener = mock(ActionListener.class); + QueryGroupPersistenceService queryGroupPersistenceService = new QueryGroupPersistenceService( + clusterService, + QueryGroupTestUtils.settings(), + clusterSettings() + ); + queryGroupPersistenceService.updateInClusterStateMetadata(null, listener); + verify(clusterService).submitStateUpdateTask(eq(SOURCE), any()); + } + + /** + * Tests UpdateInClusterStateMetadata function with inner functions + */ + public void testUpdateInClusterStateMetadataInner() { + ClusterService clusterService = mock(ClusterService.class); + @SuppressWarnings("unchecked") + ActionListener listener = mock(ActionListener.class); + QueryGroupPersistenceService queryGroupPersistenceService = new QueryGroupPersistenceService( + clusterService, + QueryGroupTestUtils.settings(), + clusterSettings() + ); + UpdateQueryGroupRequest updateQueryGroupRequest = updateQueryGroupRequest( + NAME_TWO, + new MutableQueryGroupFragment(ResiliencyMode.SOFT, new HashMap<>()) + ); + ArgumentCaptor captor = ArgumentCaptor.forClass(ClusterStateUpdateTask.class); + queryGroupPersistenceService.updateInClusterStateMetadata(updateQueryGroupRequest, listener); + verify(clusterService, times(1)).submitStateUpdateTask(eq(SOURCE), captor.capture()); + ClusterStateUpdateTask capturedTask = captor.getValue(); + assertEquals(queryGroupPersistenceService.updateQueryGroupThrottlingKey, capturedTask.getClusterManagerThrottlingKey()); + + doAnswer(invocation -> { + ClusterStateUpdateTask task = invocation.getArgument(1); + task.clusterStateProcessed(SOURCE, clusterState(), clusterState()); + return null; + }).when(clusterService).submitStateUpdateTask(anyString(), any()); + queryGroupPersistenceService.updateInClusterStateMetadata(updateQueryGroupRequest, listener); + verify(listener).onResponse(any(UpdateQueryGroupResponse.class)); + } + + /** + * Tests UpdateInClusterStateMetadata function with failure + */ + public void testUpdateInClusterStateMetadataFailure() { + ClusterService clusterService = mock(ClusterService.class); + @SuppressWarnings("unchecked") + ActionListener listener = mock(ActionListener.class); + QueryGroupPersistenceService queryGroupPersistenceService = new QueryGroupPersistenceService( + clusterService, + QueryGroupTestUtils.settings(), + clusterSettings() + ); + UpdateQueryGroupRequest updateQueryGroupRequest = updateQueryGroupRequest( + NAME_TWO, + new MutableQueryGroupFragment(ResiliencyMode.SOFT, new HashMap<>()) + ); + doAnswer(invocation -> { + ClusterStateUpdateTask task = invocation.getArgument(1); + Exception exception = new RuntimeException("Test Exception"); + task.onFailure(SOURCE, exception); + return null; + }).when(clusterService).submitStateUpdateTask(anyString(), any()); + queryGroupPersistenceService.updateInClusterStateMetadata(updateQueryGroupRequest, listener); + verify(listener).onFailure(any(RuntimeException.class)); + } } diff --git a/plugins/workload-management/src/yamlRestTest/resources/rest-api-spec/api/update_query_group_context.json b/plugins/workload-management/src/yamlRestTest/resources/rest-api-spec/api/update_query_group_context.json new file mode 100644 index 0000000000000..fbfa2dde292ee --- /dev/null +++ b/plugins/workload-management/src/yamlRestTest/resources/rest-api-spec/api/update_query_group_context.json @@ -0,0 +1,23 @@ +{ + "update_query_group_context": { + "stability": "experimental", + "url": { + "paths": [ + { + "path": "/_wlm/query_group/{name}", + "methods": ["PUT", "POST"], + "parts": { + "name": { + "type": "string", + "description": "QueryGroup name" + } + } + } + ] + }, + "params":{}, + "body":{ + "description":"The updated QueryGroup schema" + } + } +} diff --git a/plugins/workload-management/src/yamlRestTest/resources/rest-api-spec/test/wlm/10_query_group.yml b/plugins/workload-management/src/yamlRestTest/resources/rest-api-spec/test/wlm/10_query_group.yml index a00314986a5cf..40ec665351094 100644 --- a/plugins/workload-management/src/yamlRestTest/resources/rest-api-spec/test/wlm/10_query_group.yml +++ b/plugins/workload-management/src/yamlRestTest/resources/rest-api-spec/test/wlm/10_query_group.yml @@ -29,6 +29,48 @@ - match: { query_groups.0.resource_limits.cpu: 0.4 } - match: { query_groups.0.resource_limits.memory: 0.2 } + - do: + update_query_group_context: + name: "analytics" + body: + { + "resiliency_mode": "monitor", + "resource_limits": { + "cpu": 0.42, + "memory": 0.22 + } + } + + - match: { name: "analytics" } + - match: { resiliency_mode: "monitor" } + - match: { resource_limits.cpu: 0.42 } + - match: { resource_limits.memory: 0.22 } + + - do: + catch: /resource_not_found_exception/ + update_query_group_context: + name: "analytics5" + body: + { + "resiliency_mode": "monitor", + "resource_limits": { + "cpu": 0.42, + "memory": 0.22 + } + } + + - do: + catch: /illegal_argument_exception/ + update_query_group_context: + name: "analytics" + body: + { + "resiliency_mode": "monitor", + "resource_limits": { + "cpu": 1.1 + } + } + - do: catch: /illegal_argument_exception/ create_query_group_context: diff --git a/qa/repository-multi-version/src/test/java/org/opensearch/upgrades/MultiVersionRepositoryAccessIT.java b/qa/repository-multi-version/src/test/java/org/opensearch/upgrades/MultiVersionRepositoryAccessIT.java index 7c27f2ea71942..c47e0065b708b 100644 --- a/qa/repository-multi-version/src/test/java/org/opensearch/upgrades/MultiVersionRepositoryAccessIT.java +++ b/qa/repository-multi-version/src/test/java/org/opensearch/upgrades/MultiVersionRepositoryAccessIT.java @@ -32,6 +32,7 @@ package org.opensearch.upgrades; +import com.sun.jna.StringArray; import org.opensearch.action.admin.cluster.repositories.put.PutRepositoryRequest; import org.opensearch.action.admin.cluster.snapshots.delete.DeleteSnapshotRequest; import org.opensearch.action.admin.cluster.snapshots.status.SnapshotStatus; @@ -44,6 +45,7 @@ import org.opensearch.client.RestClient; import org.opensearch.client.RestHighLevelClient; import org.opensearch.common.settings.Settings; +import org.opensearch.core.common.Strings; import org.opensearch.core.xcontent.DeprecationHandler; import org.opensearch.core.xcontent.XContentParser; import org.opensearch.common.xcontent.json.JsonXContent; @@ -141,14 +143,14 @@ public void testCreateAndRestoreSnapshot() throws IOException { case STEP2_NEW_CLUSTER: case STEP4_NEW_CLUSTER: assertSnapshotStatusSuccessful(client, repoName, - snapshots.stream().map(sn -> (String) sn.get("snapshot")).toArray(String[]::new)); + snapshots.stream().map(sn -> (String) sn.get("snapshot")).toArray(String[]::new), Strings.EMPTY_ARRAY); break; case STEP1_OLD_CLUSTER: - assertSnapshotStatusSuccessful(client, repoName, "snapshot-" + TEST_STEP); + assertSnapshotStatusSuccessful(client, repoName, new String[] {"snapshot-" + TEST_STEP}, Strings.EMPTY_ARRAY); break; case STEP3_OLD_CLUSTER: assertSnapshotStatusSuccessful( - client, repoName, "snapshot-" + TEST_STEP, "snapshot-" + TestStep.STEP3_OLD_CLUSTER); + client, repoName, new String[] {"snapshot-" + TEST_STEP, "snapshot-" + TestStep.STEP3_OLD_CLUSTER}, Strings.EMPTY_ARRAY); break; } if (TEST_STEP == TestStep.STEP3_OLD_CLUSTER) { @@ -186,10 +188,10 @@ public void testReadOnlyRepo() throws IOException { break; } if (TEST_STEP == TestStep.STEP1_OLD_CLUSTER || TEST_STEP == TestStep.STEP3_OLD_CLUSTER) { - assertSnapshotStatusSuccessful(client, repoName, "snapshot-" + TestStep.STEP1_OLD_CLUSTER); + assertSnapshotStatusSuccessful(client, repoName, new String[] {"snapshot-" + TestStep.STEP1_OLD_CLUSTER}, Strings.EMPTY_ARRAY); } else { assertSnapshotStatusSuccessful(client, repoName, - "snapshot-" + TestStep.STEP1_OLD_CLUSTER, "snapshot-" + TestStep.STEP2_NEW_CLUSTER); + new String[] {"snapshot-" + TestStep.STEP1_OLD_CLUSTER, "snapshot-" + TestStep.STEP2_NEW_CLUSTER}, Strings.EMPTY_ARRAY); } if (TEST_STEP == TestStep.STEP3_OLD_CLUSTER) { ensureSnapshotRestoreWorks(repoName, "snapshot-" + TestStep.STEP1_OLD_CLUSTER, shards); @@ -214,7 +216,7 @@ public void testUpgradeMovesRepoToNewMetaVersion() throws IOException { // Every step creates one snapshot assertThat(snapshots, hasSize(TEST_STEP.ordinal() + 1)); assertSnapshotStatusSuccessful(client, repoName, - snapshots.stream().map(sn -> (String) sn.get("snapshot")).toArray(String[]::new)); + snapshots.stream().map(sn -> (String) sn.get("snapshot")).toArray(String[]::new), Strings.EMPTY_ARRAY); if (TEST_STEP == TestStep.STEP1_OLD_CLUSTER) { ensureSnapshotRestoreWorks(repoName, "snapshot-" + TestStep.STEP1_OLD_CLUSTER, shards); } else { @@ -239,9 +241,9 @@ public void testUpgradeMovesRepoToNewMetaVersion() throws IOException { } private static void assertSnapshotStatusSuccessful(RestHighLevelClient client, String repoName, - String... snapshots) throws IOException { + String[] snapshots, String[] indices) throws IOException { final SnapshotsStatusResponse statusResponse = client.snapshot() - .status(new SnapshotsStatusRequest(repoName, snapshots), RequestOptions.DEFAULT); + .status((new SnapshotsStatusRequest(repoName, snapshots)).indices(indices), RequestOptions.DEFAULT); for (SnapshotStatus status : statusResponse.getSnapshots()) { assertThat(status.getShardsStats().getFailedShards(), is(0)); } diff --git a/release-notes/opensearch.release-notes-2.17.0.md b/release-notes/opensearch.release-notes-2.17.0.md new file mode 100644 index 0000000000000..e374b222104b0 --- /dev/null +++ b/release-notes/opensearch.release-notes-2.17.0.md @@ -0,0 +1,108 @@ +## 2024-09-17 Version 2.17.0 Release Notes + +## [2.17.0] +### Added +- [Workload Management] Add Settings for Workload Management feature ([#15028](https://github.com/opensearch-project/OpenSearch/pull/15028)) +- Fix for hasInitiatedFetching to fix allocation explain and manual reroute APIs (([#14972](https://github.com/opensearch-project/OpenSearch/pull/14972)) +- [Workload Management] Add queryGroupId to Task ([14708](https://github.com/opensearch-project/OpenSearch/pull/14708)) +- Add setting to ignore throttling nodes for allocation of unassigned primaries in remote restore ([#14991](https://github.com/opensearch-project/OpenSearch/pull/14991)) +- [Workload Management] Add Delete QueryGroup API Logic ([#14735](https://github.com/opensearch-project/OpenSearch/pull/14735)) +- [Streaming Indexing] Enhance RestClient with a new streaming API support ([#14437](https://github.com/opensearch-project/OpenSearch/pull/14437)) +- Add basic aggregation support for derived fields ([#14618](https://github.com/opensearch-project/OpenSearch/pull/14618)) +- [Workload Management] Add Create QueryGroup API Logic ([#14680](https://github.com/opensearch-project/OpenSearch/pull/14680))- [Workload Management] Add Create QueryGroup API Logic ([#14680](https://github.com/opensearch-project/OpenSearch/pull/14680)) +- Add ThreadContextPermission for markAsSystemContext and allow core to perform the method ([#15016](https://github.com/opensearch-project/OpenSearch/pull/15016)) +- Add ThreadContextPermission for stashAndMergeHeaders and stashWithOrigin ([#15039](https://github.com/opensearch-project/OpenSearch/pull/15039)) +- [Concurrent Segment Search] Support composite aggregations with scripting ([#15072](https://github.com/opensearch-project/OpenSearch/pull/15072)) +- Add `rangeQuery` and `regexpQuery` for `constant_keyword` field type ([#14711](https://github.com/opensearch-project/OpenSearch/pull/14711)) +- Add took time to request nodes stats ([#15054](https://github.com/opensearch-project/OpenSearch/pull/15054)) +- [Workload Management] Add Get QueryGroup API Logic ([14709](https://github.com/opensearch-project/OpenSearch/pull/14709)) +- [Workload Management] Add Update QueryGroup API Logic ([#14775](https://github.com/opensearch-project/OpenSearch/pull/14775)) +- [Workload Management] QueryGroup resource tracking framework changes ([#13897](https://github.com/opensearch-project/OpenSearch/pull/13897)) +- Support filtering on a large list encoded by bitmap ([#14774](https://github.com/opensearch-project/OpenSearch/pull/14774)) +- Add slice execution listeners to SearchOperationListener interface ([#15153](https://github.com/opensearch-project/OpenSearch/pull/15153)) +- Make balanced shards allocator timebound ([#15239](https://github.com/opensearch-project/OpenSearch/pull/15239)) +- Add allowlist setting for ingest-geoip and ingest-useragent ([#15325](https://github.com/opensearch-project/OpenSearch/pull/15325)) +- Adding access to noSubMatches and noOverlappingMatches in Hyphenation ([#13895](https://github.com/opensearch-project/OpenSearch/pull/13895)) +- Star tree mapping changes ([#14605](https://github.com/opensearch-project/OpenSearch/pull/14605)) +- Add support for index level max slice count setting for concurrent segment search ([#15336](https://github.com/opensearch-project/OpenSearch/pull/15336)) +- Support cancellation for cat shards and node stats API.([#13966](https://github.com/opensearch-project/OpenSearch/pull/13966)) +- [Streaming Indexing] Introduce bulk HTTP API streaming flavor ([#15381](https://github.com/opensearch-project/OpenSearch/pull/15381)) +- Add support for centralize snapshot creation with pinned timestamp ([#15124](https://github.com/opensearch-project/OpenSearch/pull/15124)) +- Add concurrent search support for Derived Fields ([#15326](https://github.com/opensearch-project/OpenSearch/pull/15326)) +- [Workload Management] Add query group stats constructs ([#15343](https://github.com/opensearch-project/OpenSearch/pull/15343))) +- Add limit on number of processors for Ingest pipeline([#15460](https://github.com/opensearch-project/OpenSearch/pull/15465)). +- Add runAs to Subject interface and introduce IdentityAwarePlugin extension point ([#14630](https://github.com/opensearch-project/OpenSearch/pull/14630)) +- [Workload Management] Add rejection logic for co-ordinator and shard level requests ([#15428](https://github.com/opensearch-project/OpenSearch/pull/15428))) +- Adding translog durability validation in index templates ([#15494](https://github.com/opensearch-project/OpenSearch/pull/15494)) +- [Range Queries] Add new approximateable query framework to short-circuit range queries ([#13788](https://github.com/opensearch-project/OpenSearch/pull/13788)) +- [Workload Management] Add query group level failure tracking ([#15227](https://github.com/opensearch-project/OpenSearch/pull/15527)) +- [Reader Writer Separation] Add experimental search replica shard type to achieve reader writer separation ([#15237](https://github.com/opensearch-project/OpenSearch/pull/15237)) +- Add index creation using the context field ([#15290](https://github.com/opensearch-project/OpenSearch/pull/15290)) +- [Remote Publication] Add remote download stats ([#15291](https://github.com/opensearch-project/OpenSearch/pull/15291)) +- Add support to upload snapshot shard blobs with hashed prefix ([#15426](https://github.com/opensearch-project/OpenSearch/pull/15426)) +- Add prefix support to hashed prefix & infix path types on remote store ([#15557](https://github.com/opensearch-project/OpenSearch/pull/15557)) +- Add canRemain method to TargetPoolAllocationDecider to move shards from local to remote pool for hot to warm tiering ([#15010](https://github.com/opensearch-project/OpenSearch/pull/15010)) +- Add support for pluggable deciders for concurrent search ([#15363](https://github.com/opensearch-project/OpenSearch/pull/15363)) +- Optimise snapshot deletion to speed up snapshot deletion and creation ([#15568](https://github.com/opensearch-project/OpenSearch/pull/15568)) +- [Remote Publication] Added checksum validation for cluster state behind a cluster setting ([#15218](https://github.com/opensearch-project/OpenSearch/pull/15218)) +- Optimize NodeIndicesStats output behind flag ([#14454](https://github.com/opensearch-project/OpenSearch/pull/14454)) +- Add support for comma-separated list of index names to be used with Snapshot Status API ([#15409](https://github.com/opensearch-project/OpenSearch/pull/15409))[SnapshotV2] Snapshot Status API changes (#15409)) +- ClusterManagerTaskThrottler Improvements ([#15508](https://github.com/opensearch-project/OpenSearch/pull/15508)) +- Relax the join validation for Remote State publication ([#15471](https://github.com/opensearch-project/OpenSearch/pull/15471)) +- Reset DiscoveryNodes in all transport node actions request ([#15131](https://github.com/opensearch-project/OpenSearch/pull/15131)) +- [Remote Publication] Upload incremental cluster state on master re-election ([#15145](https://github.com/opensearch-project/OpenSearch/pull/15145)) +- Static RemotePublication setting added, removed experimental feature flag ([#15478](https://github.com/opensearch-project/OpenSearch/pull/15478)) +- Making _cat/allocation API use indexLevelStats ([#15292](https://github.com/opensearch-project/OpenSearch/pull/15292)) +- Memory optimisations in _cluster/health API ([#15492](https://github.com/opensearch-project/OpenSearch/pull/15492)) + +### Dependencies +- Bump `netty` from 4.1.111.Final to 4.1.112.Final ([#15081](https://github.com/opensearch-project/OpenSearch/pull/15081)) +- Bump `org.apache.commons:commons-lang3` from 3.14.0 to 3.15.0 ([#14861](https://github.com/opensearch-project/OpenSearch/pull/14861)) +- OpenJDK Update (July 2024 Patch releases) ([#14998](https://github.com/opensearch-project/OpenSearch/pull/14998)) +- Bump `com.microsoft.azure:msal4j` from 1.16.1 to 1.17.0 ([#14995](https://github.com/opensearch-project/OpenSearch/pull/14995), [#15420](https://github.com/opensearch-project/OpenSearch/pull/15420)) +- Bump `actions/github-script` from 6 to 7 ([#14997](https://github.com/opensearch-project/OpenSearch/pull/14997)) +- Bump `org.tukaani:xz` from 1.9 to 1.10 ([#15110](https://github.com/opensearch-project/OpenSearch/pull/15110)) +- Bump `org.apache.avro:avro` from 1.11.3 to 1.12.0 in /plugins/repository-hdfs ([#15119](https://github.com/opensearch-project/OpenSearch/pull/15119)) +- Bump `org.bouncycastle:bcpg-fips` from 1.0.7.1 to 2.0.9 ([#15103](https://github.com/opensearch-project/OpenSearch/pull/15103), [#15299](https://github.com/opensearch-project/OpenSearch/pull/15299)) +- Bump `com.azure:azure-core` from 1.49.1 to 1.51.0 ([#15111](https://github.com/opensearch-project/OpenSearch/pull/15111)) +- Bump `org.xerial.snappy:snappy-java` from 1.1.10.5 to 1.1.10.6 ([#15207](https://github.com/opensearch-project/OpenSearch/pull/15207)) +- Bump `com.azure:azure-xml` from 1.0.0 to 1.1.0 ([#15206](https://github.com/opensearch-project/OpenSearch/pull/15206)) +- Bump `reactor` from 3.5.19 to 3.5.20 ([#15262](https://github.com/opensearch-project/OpenSearch/pull/15262)) +- Bump `reactor-netty` from 1.1.21 to 1.1.22 ([#15262](https://github.com/opensearch-project/OpenSearch/pull/15262)) +- Bump `org.apache.kerby:kerb-admin` from 2.0.3 to 2.1.0 ([#15301](https://github.com/opensearch-project/OpenSearch/pull/15301)) +- Bump `com.azure:azure-core-http-netty` from 1.15.1 to 1.15.3 ([#15300](https://github.com/opensearch-project/OpenSearch/pull/15300)) +- Bump `com.gradle.develocity` from 3.17.6 to 3.18 ([#15297](https://github.com/opensearch-project/OpenSearch/pull/15297)) +- Bump `commons-cli:commons-cli` from 1.8.0 to 1.9.0 ([#15298](https://github.com/opensearch-project/OpenSearch/pull/15298)) +- Bump `opentelemetry` from 1.40.0 to 1.41.0 ([#15361](https://github.com/opensearch-project/OpenSearch/pull/15361)) +- Bump `opentelemetry-semconv` from 1.26.0-alpha to 1.27.0-alpha ([#15361](https://github.com/opensearch-project/OpenSearch/pull/15361)) +- Bump `tj-actions/changed-files` from 44 to 45 ([#15422](https://github.com/opensearch-project/OpenSearch/pull/15422)) +- Bump `dnsjava:dnsjava` from 3.6.0 to 3.6.1 ([#15418](https://github.com/opensearch-project/OpenSearch/pull/15418)) +- Bump `com.netflix.nebula.ospackage-base` from 11.9.1 to 11.10.0 ([#15419](https://github.com/opensearch-project/OpenSearch/pull/15419)) +- Bump `org.roaringbitmap:RoaringBitmap` from 1.1.0 to 1.2.1 ([#15423](https://github.com/opensearch-project/OpenSearch/pull/15423)) +- Bump `icu4j` from 70.1 to 75.1 ([#15469](https://github.com/opensearch-project/OpenSearch/pull/15469)) + +### Changed +- Add lower limit for primary and replica batch allocators timeout ([#14979](https://github.com/opensearch-project/OpenSearch/pull/14979)) +- Optimize regexp-based include/exclude on aggregations when pattern matches prefixes ([#14371](https://github.com/opensearch-project/OpenSearch/pull/14371)) +- Replace and block usages of org.apache.logging.log4j.util.Strings ([#15238](https://github.com/opensearch-project/OpenSearch/pull/15238)) +- Remote publication using minimum node version for backward compatibility ([#15216](https://github.com/opensearch-project/OpenSearch/pull/15216)) + +### Deprecated + +### Removed +- Remove some unused code in the search backpressure package ([#15518](https://github.com/opensearch-project/OpenSearch/pull/15518)) + +### Fixed +- Fix constraint bug which allows more primary shards than average primary shards per index ([#14908](https://github.com/opensearch-project/OpenSearch/pull/14908)) +- Fix NPE when bulk ingest with empty pipeline ([#15033](https://github.com/opensearch-project/OpenSearch/pull/15033)) +- Fix missing value of FieldSort for unsigned_long ([#14963](https://github.com/opensearch-project/OpenSearch/pull/14963)) +- Fix delete index template failed when the index template matches a data stream but is unused ([#15080](https://github.com/opensearch-project/OpenSearch/pull/15080)) +- Fix array_index_out_of_bounds_exception when indexing documents with field name containing only dot ([#15126](https://github.com/opensearch-project/OpenSearch/pull/15126)) +- Fixed array field name omission in flat_object function for nested JSON ([#13620](https://github.com/opensearch-project/OpenSearch/pull/13620)) +- Fix incorrect parameter names in MinHash token filter configuration handling ([#15233](https://github.com/opensearch-project/OpenSearch/pull/15233)) +- Fix range aggregation optimization ignoring top level queries ([#15287](https://github.com/opensearch-project/OpenSearch/pull/15287)) +- Fix indexing error when flat_object field is explicitly null ([#15375](https://github.com/opensearch-project/OpenSearch/pull/15375)) +- Fix split response processor not included in allowlist ([#15393](https://github.com/opensearch-project/OpenSearch/pull/15393)) +- Fix unchecked cast in dynamic action map getter ([#15394](https://github.com/opensearch-project/OpenSearch/pull/15394)) +- Fix null values indexed as "null" strings in flat_object field ([#14069](https://github.com/opensearch-project/OpenSearch/pull/14069)) +- Fix terms query on wildcard field returns nothing ([#15607](https://github.com/opensearch-project/OpenSearch/pull/15607)) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/snapshot.status.json b/rest-api-spec/src/main/resources/rest-api-spec/api/snapshot.status.json index 1ac6042941013..354d3c35d2bda 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/snapshot.status.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/snapshot.status.json @@ -40,6 +40,26 @@ "description":"A comma-separated list of snapshot names" } } + }, + { + "path":"/_snapshot/{repository}/{snapshot}/{index}/_status", + "methods":[ + "GET" + ], + "parts":{ + "repository":{ + "type":"string", + "description":"A repository name" + }, + "snapshot":{ + "type":"string", + "description":"A snapshot name" + }, + "index":{ + "type": "list", + "description":"A comma-separated list of index names" + } + } } ] }, @@ -58,7 +78,7 @@ }, "ignore_unavailable":{ "type":"boolean", - "description":"Whether to ignore unavailable snapshots, defaults to false which means a SnapshotMissingException is thrown" + "description":"Whether to ignore unavailable snapshots and indices, defaults to false which means a SnapshotMissingException or IndexNotFoundException is thrown" } } } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/index/91_flat_object_null_value.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/index/91_flat_object_null_value.yml index 98abd58a54e4b..716b6fb51cb43 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/index/91_flat_object_null_value.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/index/91_flat_object_null_value.yml @@ -16,12 +16,15 @@ setup: properties: record: type: "flat_object" + order: + type: "integer" - do: index: index: flat_object_null_value id: 1 body: { - "record": null + "record": null, + "order" : 1 } - do: @@ -31,7 +34,8 @@ setup: body: { "record": { "name": null - } + }, + "order" : 2 } - do: @@ -43,7 +47,8 @@ setup: "name": null, "age":"5", "name1": null - } + }, + "order" : 3 } - do: @@ -60,7 +65,8 @@ setup: } } ] - } + }, + "order" : 4 } - do: @@ -77,7 +83,8 @@ setup: }, null ] - } + }, + "order" : 5 } - do: @@ -97,7 +104,8 @@ setup: } } ] - } + }, + "order" : 6 } - do: @@ -108,7 +116,8 @@ setup: "record": { "name": null, "age":"3" - } + }, + "order" : 7 } - do: @@ -119,7 +128,8 @@ setup: "record": { "age":"3", "name": null - } + }, + "order" : 8 } - do: @@ -133,7 +143,8 @@ setup: 3 ], "age": 4 - } + }, + "order" : 9 } - do: @@ -147,7 +158,8 @@ setup: null, 3 ] - } + }, + "order" : 10 } - do: @@ -157,7 +169,8 @@ setup: body: { "record": { "name": null - } + }, + "order": 11 } - do: @@ -171,7 +184,8 @@ setup: null ] } - } + }, + "order": 12 } - do: @@ -183,7 +197,8 @@ setup: "labels": [ null ] - } + }, + "order": 13 } - do: @@ -198,7 +213,8 @@ setup: null ] } - } + }, + "order": 14 } - do: @@ -211,7 +227,8 @@ setup: "labels": [ null ] - } + }, + "order": 15 } - do: @@ -224,7 +241,8 @@ setup: null ], "age": "4" - } + }, + "order": 16 } - do: @@ -239,7 +257,8 @@ setup: "dsdsdsd" ] } - } + }, + "order": 17 } - do: @@ -253,7 +272,8 @@ setup: "name2": null } } - } + }, + "order": 18 } - do: @@ -271,7 +291,8 @@ setup: ] ] } - } + }, + "order": 19 } - do: @@ -299,7 +320,7 @@ teardown: - is_true: flat_object_null_value.mappings - match: { flat_object_null_value.mappings.properties.record.type: flat_object } # https://github.com/opensearch-project/OpenSearch/tree/main/rest-api-spec/src/main/resources/rest-api-spec/test#length - - length: { flat_object_null_value.mappings.properties: 1 } + - length: { flat_object_null_value.mappings.properties: 2 } --- @@ -328,7 +349,8 @@ teardown: size: 30, query: { exists: { "field": "record" } - } + }, + sort: [{ order: asc}] } - length: { hits.hits: 12 } @@ -352,7 +374,8 @@ teardown: _source: true, query: { exists: { "field": "record.d" } - } + }, + sort: [{ order: asc}] } - length: { hits.hits: 3 } @@ -367,7 +390,8 @@ teardown: _source: true, query: { term: { record: "dsdsdsd" } - } + }, + sort: [{ order: asc}] } - length: { hits.hits: 2 } @@ -381,7 +405,8 @@ teardown: _source: true, query: { term: { record.name.name1: "dsdsdsd" } - } + }, + sort: [{ order: asc}] } - length: { hits.hits: 2 } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/270_wildcard_fieldtype_queries.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/270_wildcard_fieldtype_queries.yml index 05b6b2e5ed712..d92538824232d 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/270_wildcard_fieldtype_queries.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/270_wildcard_fieldtype_queries.yml @@ -56,6 +56,12 @@ setup: id: 6 body: other_field: "test" + - do: + index: + index: test + id: 7 + body: + my_field: "ABCD" - do: indices.refresh: {} @@ -90,8 +96,9 @@ setup: query: term: my_field.lower: "abcd" - - match: { hits.total.value: 1 } + - match: { hits.total.value: 2 } - match: { hits.hits.0._id: "5" } + - match: { hits.hits.1._id: "7" } - do: search: @@ -100,8 +107,9 @@ setup: query: term: my_field.lower: "ABCD" - - match: { hits.total.value: 1 } + - match: { hits.total.value: 2 } - match: { hits.hits.0._id: "5" } + - match: { hits.hits.1._id: "7" } - do: search: @@ -215,7 +223,7 @@ setup: wildcard: my_field: value: "*" - - match: { hits.total.value: 5 } + - match: { hits.total.value: 6 } --- "regexp match-all works": - do: @@ -226,4 +234,39 @@ setup: regexp: my_field: value: ".*" - - match: { hits.total.value: 5 } + - match: { hits.total.value: 6 } +--- +"terms query on wildcard field matches": + - do: + search: + index: test + body: + query: + terms: { my_field: ["AbCd"] } + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "5" } +--- +"case insensitive query on wildcard field": + - do: + search: + index: test + body: + query: + wildcard: + my_field: + value: "AbCd" + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "5" } + + - do: + search: + index: test + body: + query: + wildcard: + my_field: + value: "AbCd" + case_insensitive: true + - match: { hits.total.value: 2 } + - match: { hits.hits.0._id: "5" } + - match: { hits.hits.1._id: "7" } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/370_approximate_range.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/370_approximate_range.yml new file mode 100644 index 0000000000000..ba896dfcad506 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/370_approximate_range.yml @@ -0,0 +1,72 @@ +--- +"search with approximate range": + - do: + indices.create: + index: test + body: + mappings: + properties: + date: + type: date + index: true + doc_values: true + + - do: + bulk: + index: test + refresh: true + body: + - '{"index": {"_index": "test", "_id": "1" }}' + - '{ "date": "2018-10-29T12:12:12.987Z" }' + - '{ "index": { "_index": "test", "_id": "2" }}' + - '{ "date": "2020-10-29T12:12:12.987Z" }' + - '{ "index": { "_index": "test", "_id": "3" } }' + - '{ "date": "2024-10-29T12:12:12.987Z" }' + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + range: { + date: { + gte: "2018-10-29T12:12:12.987Z" + }, + } + + - match: { hits.total: 3 } + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + sort: [{ date: asc }] + query: + range: { + date: { + gte: "2018-10-29T12:12:12.987Z" + }, + } + + + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "1" } + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + sort: [{ date: desc }] + query: + range: { + date: { + gte: "2018-10-29T12:12:12.987Z", + lte: "2020-10-29T12:12:12.987Z" + }, + } + + - match: { hits.total: 2 } + - match: { hits.hits.0._id: "2" } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/snapshot.status/10_basic.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/snapshot.status/10_basic.yml index c35f2419bdc91..819f04407b219 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/snapshot.status/10_basic.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/snapshot.status/10_basic.yml @@ -25,38 +25,40 @@ setup: snapshot: test_snapshot wait_for_completion: true - - do: - snapshot.status: - repository: test_repo_status_1 - snapshot: test_snapshot - - - is_true: snapshots - - match: { snapshots.0.snapshot: test_snapshot } - - match: { snapshots.0.state: SUCCESS } - - gt: { snapshots.0.stats.incremental.file_count: 0 } - - gt: { snapshots.0.stats.incremental.size_in_bytes: 0 } - - gt: { snapshots.0.stats.total.file_count: 0 } - - gt: { snapshots.0.stats.total.size_in_bytes: 0 } - - is_true: snapshots.0.stats.start_time_in_millis -## fast in memory snapshots can take less than one millisecond to complete. - - gte: { snapshots.0.stats.time_in_millis: 0 } - ---- -"Get missing snapshot status throws an exception": - - - do: - catch: /snapshot_missing_exception.+ is missing/ - snapshot.status: - repository: test_repo_status_1 - snapshot: test_nonexistent_snapshot - ---- -"Get missing snapshot status succeeds when ignoreUnavailable is true": - - - do: - snapshot.status: - repository: test_repo_status_1 - snapshot: test_nonexistent_snapshot - ignore_unavailable: true +# TODO: fix and unmute tests - - is_true: snapshots +# - do: +# snapshot.status: +# repository: test_repo_status_1 +# snapshot: test_snapshot +# +# - is_true: snapshots +# - match: { snapshots.0.snapshot: test_snapshot } +# - match: { snapshots.0.state: SUCCESS } +# - gt: { snapshots.0.stats.incremental.file_count: 0 } +# - gt: { snapshots.0.stats.incremental.size_in_bytes: 0 } +# - gt: { snapshots.0.stats.total.file_count: 0 } +# - gt: { snapshots.0.stats.total.size_in_bytes: 0 } +# - is_true: snapshots.0.stats.start_time_in_millis +### fast in memory snapshots can take less than one millisecond to complete. +# - gte: { snapshots.0.stats.time_in_millis: 0 } +# +#--- +#"Get missing snapshot status throws an exception": +# +# - do: +# catch: /snapshot_missing_exception.+ is missing/ +# snapshot.status: +# repository: test_repo_status_1 +# snapshot: test_nonexistent_snapshot +# +#--- +#"Get missing snapshot status succeeds when ignoreUnavailable is true": +# +# - do: +# snapshot.status: +# repository: test_repo_status_1 +# snapshot: test_nonexistent_snapshot +# ignore_unavailable: true +# +# - is_true: snapshots diff --git a/server/licenses/protobuf-java-3.22.3.jar.sha1 b/server/licenses/protobuf-java-3.22.3.jar.sha1 deleted file mode 100644 index 80feeec023e7b..0000000000000 --- a/server/licenses/protobuf-java-3.22.3.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -fdee98b8f6abab73f146a4edb4c09e56f8278d03 \ No newline at end of file diff --git a/server/licenses/protobuf-java-3.25.4.jar.sha1 b/server/licenses/protobuf-java-3.25.4.jar.sha1 new file mode 100644 index 0000000000000..21b6fdfd24dc8 --- /dev/null +++ b/server/licenses/protobuf-java-3.25.4.jar.sha1 @@ -0,0 +1 @@ +43fcb86e4a411516c7fc681450f1516de0b862a2 \ No newline at end of file diff --git a/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/repositories/RepositoryBlocksIT.java b/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/repositories/RepositoryBlocksIT.java index 36fe3748e9d10..6c0a156eb6752 100644 --- a/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/repositories/RepositoryBlocksIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/repositories/RepositoryBlocksIT.java @@ -55,13 +55,17 @@ public void testPutRepositoryWithBlocks() { logger.info("--> registering a repository is blocked when the cluster is read only"); try { setClusterReadOnly(true); + Settings.Builder settings = Settings.builder().put("location", randomRepoPath()); assertBlocked( - client().admin() - .cluster() - .preparePutRepository("test-repo-blocks") - .setType("fs") - .setVerify(false) - .setSettings(Settings.builder().put("location", randomRepoPath())), + OpenSearchIntegTestCase.putRepositoryRequestBuilder( + client().admin().cluster(), + "test-repo-blocks", + "fs", + false, + settings, + null, + false + ), Metadata.CLUSTER_READ_ONLY_BLOCK ); } finally { @@ -69,25 +73,13 @@ public void testPutRepositoryWithBlocks() { } logger.info("--> registering a repository is allowed when the cluster is not read only"); - assertAcked( - client().admin() - .cluster() - .preparePutRepository("test-repo-blocks") - .setType("fs") - .setVerify(false) - .setSettings(Settings.builder().put("location", randomRepoPath())) - ); + Settings.Builder settings = Settings.builder().put("location", randomRepoPath()); + OpenSearchIntegTestCase.putRepository(client().admin().cluster(), "test-repo-blocks", "fs", false, settings); } public void testVerifyRepositoryWithBlocks() { - assertAcked( - client().admin() - .cluster() - .preparePutRepository("test-repo-blocks") - .setType("fs") - .setVerify(false) - .setSettings(Settings.builder().put("location", randomRepoPath())) - ); + Settings.Builder settings = Settings.builder().put("location", randomRepoPath()); + OpenSearchIntegTestCase.putRepository(client().admin().cluster(), "test-repo-blocks", "fs", false, settings); // This test checks that the Get Repository operation is never blocked, even if the cluster is read only. try { @@ -104,14 +96,8 @@ public void testVerifyRepositoryWithBlocks() { } public void testDeleteRepositoryWithBlocks() { - assertAcked( - client().admin() - .cluster() - .preparePutRepository("test-repo-blocks") - .setType("fs") - .setVerify(false) - .setSettings(Settings.builder().put("location", randomRepoPath())) - ); + Settings.Builder settings = Settings.builder().put("location", randomRepoPath()); + OpenSearchIntegTestCase.putRepository(client().admin().cluster(), "test-repo-blocks", "fs", false, settings); logger.info("--> deleting a repository is blocked when the cluster is read only"); try { @@ -126,14 +112,8 @@ public void testDeleteRepositoryWithBlocks() { } public void testGetRepositoryWithBlocks() { - assertAcked( - client().admin() - .cluster() - .preparePutRepository("test-repo-blocks") - .setType("fs") - .setVerify(false) - .setSettings(Settings.builder().put("location", randomRepoPath())) - ); + Settings.Builder settings = Settings.builder().put("location", randomRepoPath()); + OpenSearchIntegTestCase.putRepository(client().admin().cluster(), "test-repo-blocks", "fs", false, settings); // This test checks that the Get Repository operation is never blocked, even if the cluster is read only. try { diff --git a/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/shards/TransportCatShardsActionIT.java b/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/shards/TransportCatShardsActionIT.java new file mode 100644 index 0000000000000..b86521dedf739 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/shards/TransportCatShardsActionIT.java @@ -0,0 +1,129 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.shards; + +import org.opensearch.action.admin.cluster.state.ClusterStateResponse; +import org.opensearch.action.admin.indices.stats.IndicesStatsResponse; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.action.ActionListener; +import org.opensearch.core.common.Strings; +import org.opensearch.core.tasks.TaskCancelledException; +import org.opensearch.test.InternalTestCluster; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.io.IOException; +import java.util.List; +import java.util.concurrent.CountDownLatch; + +import static org.opensearch.cluster.routing.UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING; +import static org.opensearch.common.unit.TimeValue.timeValueMillis; +import static org.opensearch.search.SearchService.NO_TIMEOUT; + +@OpenSearchIntegTestCase.ClusterScope(numDataNodes = 0, scope = OpenSearchIntegTestCase.Scope.TEST) +public class TransportCatShardsActionIT extends OpenSearchIntegTestCase { + + public void testCatShardsWithSuccessResponse() throws InterruptedException { + internalCluster().startClusterManagerOnlyNodes(1); + List nodes = internalCluster().startDataOnlyNodes(3); + createIndex( + "test", + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 2) + .put(INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), "60m") + .build() + ); + ensureGreen("test"); + + final CatShardsRequest shardsRequest = new CatShardsRequest(); + shardsRequest.setCancelAfterTimeInterval(NO_TIMEOUT); + shardsRequest.setIndices(Strings.EMPTY_ARRAY); + CountDownLatch latch = new CountDownLatch(1); + client().execute(CatShardsAction.INSTANCE, shardsRequest, new ActionListener() { + @Override + public void onResponse(CatShardsResponse catShardsResponse) { + ClusterStateResponse clusterStateResponse = catShardsResponse.getClusterStateResponse(); + IndicesStatsResponse indicesStatsResponse = catShardsResponse.getIndicesStatsResponse(); + for (ShardRouting shard : clusterStateResponse.getState().routingTable().allShards()) { + assertEquals("test", shard.getIndexName()); + assertNotNull(indicesStatsResponse.asMap().get(shard)); + } + latch.countDown(); + } + + @Override + public void onFailure(Exception e) { + fail(); + latch.countDown(); + } + }); + latch.await(); + } + + public void testCatShardsWithTimeoutException() throws IOException, AssertionError, InterruptedException { + List masterNodes = internalCluster().startClusterManagerOnlyNodes(1); + List nodes = internalCluster().startDataOnlyNodes(3); + createIndex( + "test", + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 2) + .put(INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), "60m") + .build() + ); + ensureGreen("test"); + + Settings clusterManagerDataPathSettings = internalCluster().dataPathSettings(masterNodes.get(0)); + // Dropping master node to delay in cluster state call. + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(masterNodes.get(0))); + + CountDownLatch latch = new CountDownLatch(2); + new Thread(() -> { + try { + // Ensures the cancellation timeout expires. + Thread.sleep(2000); + // Starting master node to proceed in cluster state call. + internalCluster().startClusterManagerOnlyNode( + Settings.builder().put("node.name", masterNodes.get(0)).put(clusterManagerDataPathSettings).build() + ); + latch.countDown(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + }).start(); + + final CatShardsRequest shardsRequest = new CatShardsRequest(); + TimeValue timeoutInterval = timeValueMillis(1000); + shardsRequest.setCancelAfterTimeInterval(timeoutInterval); + shardsRequest.clusterManagerNodeTimeout(timeValueMillis(2500)); + shardsRequest.setIndices(Strings.EMPTY_ARRAY); + client().execute(CatShardsAction.INSTANCE, shardsRequest, new ActionListener() { + @Override + public void onResponse(CatShardsResponse catShardsResponse) { + // onResponse should not be called. + latch.countDown(); + throw new AssertionError( + "The cat shards action is expected to fail with a TaskCancelledException, but it received a successful response instead." + ); + } + + @Override + public void onFailure(Exception e) { + assertSame(e.getClass(), TaskCancelledException.class); + assertEquals(e.getMessage(), "Cancellation timeout of " + timeoutInterval + " is expired"); + latch.countDown(); + } + }); + latch.await(); + } + +} diff --git a/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/snapshots/SnapshotBlocksIT.java b/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/snapshots/SnapshotBlocksIT.java index 78fb01b07b6b1..0f29f02b284a4 100644 --- a/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/snapshots/SnapshotBlocksIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/snapshots/SnapshotBlocksIT.java @@ -80,13 +80,8 @@ protected void setUpRepository() throws Exception { logger.info("--> register a repository"); - assertAcked( - client().admin() - .cluster() - .preparePutRepository(REPOSITORY_NAME) - .setType("fs") - .setSettings(Settings.builder().put("location", randomRepoPath())) - ); + Settings.Builder settings = Settings.builder().put("location", randomRepoPath()); + OpenSearchIntegTestCase.putRepository(client().admin().cluster(), REPOSITORY_NAME, "fs", settings); logger.info("--> verify the repository"); VerifyRepositoryResponse verifyResponse = client().admin().cluster().prepareVerifyRepository(REPOSITORY_NAME).get(); diff --git a/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/CreateIndexIT.java b/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/CreateIndexIT.java index fbe713d9e22c4..bd3c9e1456074 100644 --- a/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/CreateIndexIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/CreateIndexIT.java @@ -41,16 +41,24 @@ import org.opensearch.action.support.IndicesOptions; import org.opensearch.action.support.master.AcknowledgedResponse; import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.applicationtemplates.ClusterStateSystemTemplateLoader; +import org.opensearch.cluster.applicationtemplates.SystemTemplate; +import org.opensearch.cluster.applicationtemplates.SystemTemplateMetadata; +import org.opensearch.cluster.applicationtemplates.TemplateRepositoryMetadata; +import org.opensearch.cluster.metadata.Context; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.MappingMetadata; import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.settings.Settings; import org.opensearch.common.settings.SettingsException; import org.opensearch.common.unit.TimeValue; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.core.action.ActionListener; +import org.opensearch.core.common.bytes.BytesReference; import org.opensearch.index.IndexNotFoundException; import org.opensearch.index.IndexService; +import org.opensearch.index.IndexSettings; import org.opensearch.index.mapper.MapperParsingException; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.query.RangeQueryBuilder; @@ -59,7 +67,10 @@ import org.opensearch.test.OpenSearchIntegTestCase.ClusterScope; import org.opensearch.test.OpenSearchIntegTestCase.Scope; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; import java.util.Map; +import java.util.UUID; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.BiFunction; @@ -430,4 +441,53 @@ public void testCreateIndexWithNullReplicaCountPickUpClusterReplica() { ); } } + + public void testCreateIndexWithContextSettingsAndTemplate() throws Exception { + int numReplicas = 1; + String indexName = "test-idx-1"; + Settings settings = Settings.builder() + .put(IndexMetadata.INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), 1) + .put(IndexMetadata.INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), (String) null) + .build(); + Context context = new Context("test"); + + String templateContent = "{\n" + + " \"template\": {\n" + + " \"settings\": {\n" + + " \"merge.policy\": \"log_byte_size\"\n" + + " }\n" + + " },\n" + + " \"_meta\": {\n" + + " \"_type\": \"@abc_template\",\n" + + " \"_version\": 1\n" + + " },\n" + + " \"version\": 1\n" + + "}\n"; + + ClusterStateSystemTemplateLoader loader = new ClusterStateSystemTemplateLoader( + internalCluster().clusterManagerClient(), + () -> internalCluster().getInstance(ClusterService.class).state() + ); + loader.loadTemplate( + new SystemTemplate( + BytesReference.fromByteBuffer(ByteBuffer.wrap(templateContent.getBytes(StandardCharsets.UTF_8))), + SystemTemplateMetadata.fromComponentTemplateInfo("test", 1L), + new TemplateRepositoryMetadata(UUID.randomUUID().toString(), 1L) + ) + ); + + assertAcked(client().admin().indices().prepareCreate(indexName).setSettings(settings).setContext(context).get()); + + IndicesService indicesService = internalCluster().getInstance(IndicesService.class, internalCluster().getClusterManagerName()); + + for (IndexService indexService : indicesService) { + assertEquals(indexName, indexService.index().getName()); + assertEquals( + numReplicas, + (int) indexService.getIndexSettings().getSettings().getAsInt(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, null) + ); + assertEquals(context, indexService.getMetadata().context()); + assertEquals("log_byte_size", indexService.getMetadata().getSettings().get(IndexSettings.INDEX_MERGE_POLICY.getKey())); + } + } } diff --git a/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/RemoteCloneIndexIT.java b/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/RemoteCloneIndexIT.java index 009f5111078de..abce2fc878f27 100644 --- a/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/RemoteCloneIndexIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/RemoteCloneIndexIT.java @@ -224,7 +224,7 @@ protected void setLowPriorityUploadRate(String repoName, String value) throws Ex Settings.Builder settings = Settings.builder() .put("location", rmd.settings().get("location")) .put("max_remote_low_priority_upload_bytes_per_sec", value); - assertAcked(client().admin().cluster().preparePutRepository(repoName).setType(rmd.type()).setSettings(settings).get()); + createRepository(repoName, rmd.type(), settings); } public void testCreateCloneIndexFailure() throws ExecutionException, InterruptedException { diff --git a/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/RemoteSplitIndexIT.java b/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/RemoteSplitIndexIT.java index 928c9e33e19cb..3038db4583cf4 100644 --- a/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/RemoteSplitIndexIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/RemoteSplitIndexIT.java @@ -66,6 +66,7 @@ import org.opensearch.index.seqno.SeqNoStats; import org.opensearch.index.shard.IndexShard; import org.opensearch.indices.IndicesService; +import org.opensearch.indices.RemoteStoreSettings; import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.remotestore.RemoteStoreBaseIntegTestCase; import org.opensearch.test.OpenSearchIntegTestCase; @@ -109,13 +110,16 @@ public void cleanUp() throws Exception { assertAcked( client().admin().indices().prepareDelete("*").setIndicesOptions(IndicesOptions.LENIENT_EXPAND_OPEN_CLOSED_HIDDEN).get() ); - assertBusy(() -> { - try { - assertEquals(0, getFileCount(translogRepoPath)); - } catch (IOException e) { - fail(); - } - }, 30, TimeUnit.SECONDS); + // With pinned timestamp, we can have tlog files even after deletion. + if (RemoteStoreSettings.isPinnedTimestampsEnabled() == false) { + assertBusy(() -> { + try { + assertEquals(0, getFileCount(translogRepoPath)); + } catch (IOException e) { + fail(); + } + }, 30, TimeUnit.SECONDS); + } super.teardown(); } diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/ClusterHealthIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/ClusterHealthIT.java index 0304e00a49070..33f101cce2382 100644 --- a/server/src/internalClusterTest/java/org/opensearch/cluster/ClusterHealthIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/ClusterHealthIT.java @@ -37,6 +37,8 @@ import org.opensearch.action.support.IndicesOptions; import org.opensearch.action.support.PlainActionFuture; import org.opensearch.cluster.health.ClusterHealthStatus; +import org.opensearch.cluster.health.ClusterIndexHealth; +import org.opensearch.cluster.health.ClusterShardHealth; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.routing.UnassignedInfo; import org.opensearch.cluster.service.ClusterService; @@ -49,6 +51,7 @@ import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.concurrent.atomic.AtomicBoolean; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; @@ -439,4 +442,164 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS completionFuture.actionGet(TimeValue.timeValueSeconds(30)); } } + + public void testHealthWithClusterLevelAppliedAtTransportLayer() { + createIndex( + "test1", + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build() + ); + ensureGreen(); + ClusterHealthResponse healthResponse = client().admin() + .cluster() + .prepareHealth() + .setApplyLevelAtTransportLayer(true) + .execute() + .actionGet(); + assertEquals(ClusterHealthStatus.GREEN, healthResponse.getStatus()); + assertTrue(healthResponse.getIndices().isEmpty()); + assertEquals(1, healthResponse.getActiveShards()); + assertEquals(1, healthResponse.getActivePrimaryShards()); + assertEquals(0, healthResponse.getUnassignedShards()); + assertEquals(0, healthResponse.getInitializingShards()); + assertEquals(0, healthResponse.getRelocatingShards()); + assertEquals(0, healthResponse.getDelayedUnassignedShards()); + } + + public void testHealthWithIndicesLevelAppliedAtTransportLayer() { + createIndex( + "test1", + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build() + ); + ensureGreen(); + ClusterHealthResponse healthResponse = client().admin() + .cluster() + .prepareHealth() + .setLevel("indices") + .setApplyLevelAtTransportLayer(true) + .execute() + .actionGet(); + assertEquals(ClusterHealthStatus.GREEN, healthResponse.getStatus()); + + assertEquals(1, healthResponse.getActiveShards()); + assertEquals(1, healthResponse.getActivePrimaryShards()); + assertEquals(0, healthResponse.getUnassignedShards()); + assertEquals(0, healthResponse.getInitializingShards()); + assertEquals(0, healthResponse.getRelocatingShards()); + assertEquals(0, healthResponse.getDelayedUnassignedShards()); + + Map indices = healthResponse.getIndices(); + assertFalse(indices.isEmpty()); + assertEquals(1, indices.size()); + for (Map.Entry indicesHealth : indices.entrySet()) { + String indexName = indicesHealth.getKey(); + assertEquals("test1", indexName); + ClusterIndexHealth indicesHealthValue = indicesHealth.getValue(); + assertEquals(1, indicesHealthValue.getActiveShards()); + assertEquals(1, indicesHealthValue.getActivePrimaryShards()); + assertEquals(0, indicesHealthValue.getInitializingShards()); + assertEquals(0, indicesHealthValue.getUnassignedShards()); + assertEquals(0, indicesHealthValue.getDelayedUnassignedShards()); + assertEquals(0, indicesHealthValue.getRelocatingShards()); + assertEquals(ClusterHealthStatus.GREEN, indicesHealthValue.getStatus()); + assertTrue(indicesHealthValue.getShards().isEmpty()); + } + } + + public void testHealthWithShardLevelAppliedAtTransportLayer() { + int dataNodes = internalCluster().getDataNodeNames().size(); + int greenClusterReplicaCount = dataNodes - 1; + int yellowClusterReplicaCount = dataNodes; + + createIndex( + "test1", + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, greenClusterReplicaCount) + .build() + ); + ensureGreen(TimeValue.timeValueSeconds(120), "test1"); + createIndex( + "test2", + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, greenClusterReplicaCount) + .build() + ); + ensureGreen(TimeValue.timeValueSeconds(120)); + client().admin() + .indices() + .prepareUpdateSettings() + .setIndices("test2") + .setSettings(Settings.builder().put("index.number_of_replicas", yellowClusterReplicaCount).build()) + .execute() + .actionGet(); + ClusterHealthResponse healthResponse = client().admin() + .cluster() + .prepareHealth() + .setLevel("shards") + .setApplyLevelAtTransportLayer(true) + .execute() + .actionGet(); + assertEquals(ClusterHealthStatus.YELLOW, healthResponse.getStatus()); + + assertEquals(2 * dataNodes, healthResponse.getActiveShards()); + assertEquals(2, healthResponse.getActivePrimaryShards()); + assertEquals(1, healthResponse.getUnassignedShards()); + assertEquals(0, healthResponse.getInitializingShards()); + assertEquals(0, healthResponse.getRelocatingShards()); + assertEquals(0, healthResponse.getDelayedUnassignedShards()); + + Map indices = healthResponse.getIndices(); + assertFalse(indices.isEmpty()); + assertEquals(2, indices.size()); + for (Map.Entry indicesHealth : indices.entrySet()) { + String indexName = indicesHealth.getKey(); + boolean indexHasMoreReplicas = indexName.equals("test2"); + ClusterIndexHealth indicesHealthValue = indicesHealth.getValue(); + assertEquals(dataNodes, indicesHealthValue.getActiveShards()); + assertEquals(1, indicesHealthValue.getActivePrimaryShards()); + assertEquals(0, indicesHealthValue.getInitializingShards()); + assertEquals(indexHasMoreReplicas ? 1 : 0, indicesHealthValue.getUnassignedShards()); + assertEquals(0, indicesHealthValue.getDelayedUnassignedShards()); + assertEquals(0, indicesHealthValue.getRelocatingShards()); + assertEquals(indexHasMoreReplicas ? ClusterHealthStatus.YELLOW : ClusterHealthStatus.GREEN, indicesHealthValue.getStatus()); + Map shards = indicesHealthValue.getShards(); + assertFalse(shards.isEmpty()); + assertEquals(1, shards.size()); + for (Map.Entry shardHealth : shards.entrySet()) { + ClusterShardHealth clusterShardHealth = shardHealth.getValue(); + assertEquals(dataNodes, clusterShardHealth.getActiveShards()); + assertEquals(indexHasMoreReplicas ? 1 : 0, clusterShardHealth.getUnassignedShards()); + assertEquals(0, clusterShardHealth.getDelayedUnassignedShards()); + assertEquals(0, clusterShardHealth.getRelocatingShards()); + assertEquals(0, clusterShardHealth.getInitializingShards()); + assertTrue(clusterShardHealth.isPrimaryActive()); + assertEquals(indexHasMoreReplicas ? ClusterHealthStatus.YELLOW : ClusterHealthStatus.GREEN, clusterShardHealth.getStatus()); + } + } + } + + public void testHealthWithAwarenessAttributesLevelAppliedAtTransportLayer() { + createIndex( + "test1", + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build() + ); + ensureGreen(); + ClusterHealthResponse healthResponse = client().admin() + .cluster() + .prepareHealth() + .setLevel("awareness_attributes") + .setApplyLevelAtTransportLayer(true) + .execute() + .actionGet(); + assertEquals(ClusterHealthStatus.GREEN, healthResponse.getStatus()); + assertTrue(healthResponse.getIndices().isEmpty()); + assertNotNull(healthResponse.getClusterAwarenessHealth()); + assertEquals(1, healthResponse.getActiveShards()); + assertEquals(1, healthResponse.getActivePrimaryShards()); + assertEquals(0, healthResponse.getUnassignedShards()); + assertEquals(0, healthResponse.getInitializingShards()); + assertEquals(0, healthResponse.getRelocatingShards()); + assertEquals(0, healthResponse.getDelayedUnassignedShards()); + } } diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/allocation/SearchReplicaFilteringAllocationIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/allocation/SearchReplicaFilteringAllocationIT.java new file mode 100644 index 0000000000000..5f65d6647f26d --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/allocation/SearchReplicaFilteringAllocationIT.java @@ -0,0 +1,125 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.allocation; + +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.routing.IndexShardRoutingTable; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; +import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.util.List; +import java.util.stream.Collectors; + +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REPLICATION_TYPE; +import static org.opensearch.cluster.routing.allocation.decider.SearchReplicaAllocationDecider.SEARCH_REPLICA_ROUTING_INCLUDE_GROUP_SETTING; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +public class SearchReplicaFilteringAllocationIT extends OpenSearchIntegTestCase { + + @Override + protected Settings featureFlagSettings() { + return Settings.builder().put(super.featureFlagSettings()).put(FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL, Boolean.TRUE).build(); + } + + public void testSearchReplicaDedicatedIncludes() { + List nodesIds = internalCluster().startNodes(3); + final String node_0 = nodesIds.get(0); + final String node_1 = nodesIds.get(1); + final String node_2 = nodesIds.get(2); + assertEquals(3, cluster().size()); + + client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings( + Settings.builder().put(SEARCH_REPLICA_ROUTING_INCLUDE_GROUP_SETTING.getKey() + "_name", node_1 + "," + node_0) + ) + .execute() + .actionGet(); + + createIndex( + "test", + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put(IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS, 1) + .put(SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) + .build() + ); + ensureGreen("test"); + // ensure primary is not on node 0 or 1, + IndexShardRoutingTable routingTable = getRoutingTable(); + assertEquals(node_2, getNodeName(routingTable.primaryShard().currentNodeId())); + + String existingSearchReplicaNode = getNodeName(routingTable.searchOnlyReplicas().get(0).currentNodeId()); + String emptyAllowedNode = existingSearchReplicaNode.equals(node_0) ? node_1 : node_0; + + // set the included nodes to the other open node, search replica should relocate to that node. + client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put(SEARCH_REPLICA_ROUTING_INCLUDE_GROUP_SETTING.getKey() + "_name", emptyAllowedNode)) + .execute() + .actionGet(); + ensureGreen("test"); + + routingTable = getRoutingTable(); + assertEquals(node_2, getNodeName(routingTable.primaryShard().currentNodeId())); + assertEquals(emptyAllowedNode, getNodeName(routingTable.searchOnlyReplicas().get(0).currentNodeId())); + } + + public void testSearchReplicaDedicatedIncludes_DoNotAssignToOtherNodes() { + List nodesIds = internalCluster().startNodes(3); + final String node_0 = nodesIds.get(0); + final String node_1 = nodesIds.get(1); + final String node_2 = nodesIds.get(2); + assertEquals(3, cluster().size()); + + // set filter on 1 node and set search replica count to 2 - should leave 1 unassigned + client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put(SEARCH_REPLICA_ROUTING_INCLUDE_GROUP_SETTING.getKey() + "_name", node_1)) + .execute() + .actionGet(); + + logger.info("--> creating an index with no replicas"); + createIndex( + "test", + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put(IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS, 2) + .put(SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) + .build() + ); + ensureYellowAndNoInitializingShards("test"); + IndexShardRoutingTable routingTable = getRoutingTable(); + assertEquals(2, routingTable.searchOnlyReplicas().size()); + List assignedSearchShards = routingTable.searchOnlyReplicas() + .stream() + .filter(ShardRouting::assignedToNode) + .collect(Collectors.toList()); + assertEquals(1, assignedSearchShards.size()); + assertEquals(node_1, getNodeName(assignedSearchShards.get(0).currentNodeId())); + assertEquals(1, routingTable.searchOnlyReplicas().stream().filter(ShardRouting::unassigned).count()); + } + + private IndexShardRoutingTable getRoutingTable() { + IndexShardRoutingTable routingTable = getClusterState().routingTable().index("test").getShards().get(0); + return routingTable; + } + + private String getNodeName(String id) { + return getClusterState().nodes().get(id).getName(); + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/routing/allocation/decider/DiskThresholdDeciderIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/routing/allocation/decider/DiskThresholdDeciderIT.java index cc8747e5f5666..fc126613ce34e 100644 --- a/server/src/internalClusterTest/java/org/opensearch/cluster/routing/allocation/decider/DiskThresholdDeciderIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/routing/allocation/decider/DiskThresholdDeciderIT.java @@ -364,13 +364,8 @@ public void testRestoreSnapshotAllocationDoesNotExceedWatermark() throws Excepti final String dataNodeName = internalCluster().startDataOnlyNode(); ensureStableCluster(3); - assertAcked( - client().admin() - .cluster() - .preparePutRepository("repo") - .setType(FsRepository.TYPE) - .setSettings(Settings.builder().put("location", randomRepoPath()).put("compress", randomBoolean())) - ); + Settings.Builder settings = Settings.builder().put("location", randomRepoPath()).put("compress", randomBoolean()); + createRepository("repo", FsRepository.TYPE, settings); final InternalClusterInfoService clusterInfoService = (InternalClusterInfoService) internalCluster() .getCurrentClusterManagerNodeInstance(ClusterInfoService.class); diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/shards/ClusterShardLimitIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/shards/ClusterShardLimitIT.java index 5eef7074e1dd6..3718dce538053 100644 --- a/server/src/internalClusterTest/java/org/opensearch/cluster/shards/ClusterShardLimitIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/shards/ClusterShardLimitIT.java @@ -494,8 +494,7 @@ public void testRestoreSnapshotOverLimit() { repoSettings.put("location", randomRepoPath()); repoSettings.put("compress", randomBoolean()); repoSettings.put("chunk_size", randomIntBetween(100, 1000), ByteSizeUnit.BYTES); - - assertAcked(client.admin().cluster().preparePutRepository("test-repo").setType("fs").setSettings(repoSettings.build())); + createRepository("test-repo", "fs", repoSettings); int dataNodes = client().admin().cluster().prepareState().get().getState().getNodes().getDataNodes().size(); ShardCounts counts = ShardCounts.forDataNodeCount(dataNodes); diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerIT.java index 7d2e24c777da3..cf17a58d937de 100644 --- a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerIT.java @@ -31,15 +31,17 @@ import java.util.Base64; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; -import static org.opensearch.common.util.FeatureFlags.REMOTE_PUBLICATION_EXPERIMENTAL; import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.CLUSTER_STATE_CLEANUP_INTERVAL_DEFAULT; import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING; import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.RETAINED_MANIFESTS; import static org.opensearch.gateway.remote.RemoteClusterStateCleanupManager.SKIP_CLEANUP_STATE_CHANGES; import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING; +import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_PUBLICATION_SETTING_KEY; +import static org.opensearch.gateway.remote.RemoteUploadStats.REMOTE_UPLOAD; import static org.opensearch.gateway.remote.routingtable.RemoteIndexRoutingTable.INDEX_ROUTING_TABLE; import static org.opensearch.indices.IndicesService.CLUSTER_DEFAULT_INDEX_REFRESH_INTERVAL_SETTING; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY; @@ -187,7 +189,7 @@ public void testRemoteCleanupDeleteStaleIndexRoutingFiles() throws Exception { RemoteStoreEnums.PathType.HASHED_PREFIX.toString() ) .put("node.attr." + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY, REMOTE_ROUTING_TABLE_REPO) - .put(REMOTE_PUBLICATION_EXPERIMENTAL, true); + .put(REMOTE_PUBLICATION_SETTING_KEY, true); int shardCount = randomIntBetween(1, 2); int replicaCount = 1; @@ -253,11 +255,13 @@ private void verifyIndexRoutingFilesDeletion( DiscoveryStats discoveryStats = nodesStatsResponse.getNodes().get(0).getDiscoveryStats(); assertNotNull(discoveryStats.getClusterStateStats()); for (PersistedStateStats persistedStateStats : discoveryStats.getClusterStateStats().getPersistenceStats()) { - Map extendedFields = persistedStateStats.getExtendedFields(); - assertTrue(extendedFields.containsKey(RemotePersistenceStats.INDEX_ROUTING_FILES_CLEANUP_ATTEMPT_FAILED_COUNT)); - long cleanupAttemptFailedCount = extendedFields.get(RemotePersistenceStats.INDEX_ROUTING_FILES_CLEANUP_ATTEMPT_FAILED_COUNT) - .get(); - assertEquals(0, cleanupAttemptFailedCount); + if (Objects.equals(persistedStateStats.getStatsName(), REMOTE_UPLOAD)) { + Map extendedFields = persistedStateStats.getExtendedFields(); + assertTrue(extendedFields.containsKey(RemoteUploadStats.INDEX_ROUTING_FILES_CLEANUP_ATTEMPT_FAILED_COUNT)); + long cleanupAttemptFailedCount = extendedFields.get(RemoteUploadStats.INDEX_ROUTING_FILES_CLEANUP_ATTEMPT_FAILED_COUNT) + .get(); + assertEquals(0, cleanupAttemptFailedCount); + } } } diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateTermVersionIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateTermVersionIT.java new file mode 100644 index 0000000000000..256c2ef44b078 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateTermVersionIT.java @@ -0,0 +1,212 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import org.opensearch.action.admin.cluster.state.ClusterStateAction; +import org.opensearch.action.admin.cluster.state.ClusterStateRequest; +import org.opensearch.action.admin.cluster.state.ClusterStateResponse; +import org.opensearch.action.support.clustermanager.term.GetTermVersionAction; +import org.opensearch.action.support.clustermanager.term.GetTermVersionResponse; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.coordination.ClusterStateTermVersion; +import org.opensearch.cluster.coordination.PublicationTransportHandler; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.settings.Settings; +import org.opensearch.core.transport.TransportResponse; +import org.opensearch.gateway.remote.model.RemoteRoutingTableBlobStore; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.remote.RemoteStoreEnums; +import org.opensearch.plugins.Plugin; +import org.opensearch.remotestore.RemoteStoreBaseIntegTestCase; +import org.opensearch.repositories.RepositoriesService; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.test.OpenSearchIntegTestCase; +import org.opensearch.test.transport.MockTransportService; +import org.opensearch.transport.TransportService; +import org.junit.Before; + +import java.nio.file.Path; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING; +import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_PUBLICATION_SETTING_KEY; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY; +import static org.hamcrest.Matchers.is; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +public class RemoteClusterStateTermVersionIT extends RemoteStoreBaseIntegTestCase { + private static final String INDEX_NAME = "test-index"; + private static final String INDEX_NAME_1 = "test-index-1"; + List indexRoutingPaths; + AtomicInteger indexRoutingFiles = new AtomicInteger(); + private final RemoteStoreEnums.PathType pathType = RemoteStoreEnums.PathType.HASHED_PREFIX; + + @Before + public void setup() { + asyncUploadMockFsRepo = false; + } + + protected Collection> nodePlugins() { + return List.of(MockTransportService.TestPlugin.class); + } + + @Override + protected Settings nodeSettings(int nodeOrdinal) { + return Settings.builder() + .put(super.nodeSettings(nodeOrdinal)) + .put(REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true) + .put( + RemoteRoutingTableBlobStore.REMOTE_ROUTING_TABLE_PATH_TYPE_SETTING.getKey(), + RemoteStoreEnums.PathType.HASHED_PREFIX.toString() + ) + .put("node.attr." + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY, REMOTE_ROUTING_TABLE_REPO) + .put(REMOTE_PUBLICATION_SETTING_KEY, true) + .build(); + } + + public void testRemoteClusterStateFallback() throws Exception { + BlobStoreRepository repository = prepareClusterAndVerifyRepository(); + + RemoteClusterStateService remoteClusterStateService = internalCluster().getClusterManagerNodeInstance( + RemoteClusterStateService.class + ); + + RemoteManifestManager remoteManifestManager = remoteClusterStateService.getRemoteManifestManager(); + Optional latestManifest = remoteManifestManager.getLatestClusterMetadataManifest( + getClusterState().getClusterName().value(), + getClusterState().getMetadata().clusterUUID() + ); + + String[] dataNodes = internalCluster().getDataNodeNames().toArray(String[]::new); + MockTransportService primaryService = (MockTransportService) internalCluster().getInstance(TransportService.class, dataNodes[0]); + + String cm = internalCluster().getClusterManagerName(); + primaryService.addRequestHandlingBehavior( + PublicationTransportHandler.COMMIT_STATE_ACTION_NAME, + (handler, request, channel, task) -> { + // not committing the state + logger.info("ignoring the commit from cluster-manager {}", request); + channel.sendResponse(TransportResponse.Empty.INSTANCE); + } + ); + + String index = "index_1"; + createIndex( + index, + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put(MapperService.INDEX_MAPPING_TOTAL_FIELDS_LIMIT_SETTING.getKey(), Long.MAX_VALUE) + .build() + ); + logger.info("created index {}", index); + Map callCounters = Map.ofEntries( + Map.entry(ClusterStateAction.NAME, new AtomicInteger()), + Map.entry(GetTermVersionAction.NAME, new AtomicInteger()) + ); + + addCallCountInterceptor(cm, callCounters); + + ClusterStateResponse stateResponseM = client(cm).admin().cluster().state(new ClusterStateRequest()).actionGet(); + + ClusterStateResponse stateResponseD = client(dataNodes[0]).admin().cluster().state(new ClusterStateRequest()).actionGet(); + assertEquals(stateResponseM, stateResponseD); + assertThat(callCounters.get(ClusterStateAction.NAME).get(), is(0)); + assertThat(callCounters.get(GetTermVersionAction.NAME).get(), is(1)); + + } + + public void testNoRemoteClusterStateFound() throws Exception { + BlobStoreRepository repository = prepareClusterAndVerifyRepository(); + + RemoteClusterStateService remoteClusterStateService = internalCluster().getClusterManagerNodeInstance( + RemoteClusterStateService.class + ); + + RemoteManifestManager remoteManifestManager = remoteClusterStateService.getRemoteManifestManager(); + Optional latestManifest = remoteManifestManager.getLatestClusterMetadataManifest( + getClusterState().getClusterName().value(), + getClusterState().getMetadata().clusterUUID() + ); + + String[] dataNodes = internalCluster().getDataNodeNames().toArray(String[]::new); + MockTransportService primaryService = (MockTransportService) internalCluster().getInstance(TransportService.class, dataNodes[0]); + primaryService.addRequestHandlingBehavior( + PublicationTransportHandler.COMMIT_STATE_ACTION_NAME, + (handler, request, channel, task) -> { + // not committing the state + logger.info("ignoring the commit from cluster-manager {}", request); + channel.sendResponse(TransportResponse.Empty.INSTANCE); + } + ); + + ClusterState state = internalCluster().clusterService().state(); + String cm = internalCluster().getClusterManagerName(); + MockTransportService cmservice = (MockTransportService) internalCluster().getInstance(TransportService.class, cm); + cmservice.addRequestHandlingBehavior(GetTermVersionAction.NAME, (handler, request, channel, task) -> { + channel.sendResponse( + new GetTermVersionResponse(new ClusterStateTermVersion(state.getClusterName(), state.stateUUID(), -1, -1), true) + ); + }); + + Map callCounters = Map.ofEntries( + Map.entry(ClusterStateAction.NAME, new AtomicInteger()), + Map.entry(GetTermVersionAction.NAME, new AtomicInteger()) + ); + + addCallCountInterceptor(cm, callCounters); + + ClusterStateResponse stateResponseM = client(cm).admin().cluster().state(new ClusterStateRequest()).actionGet(); + ClusterStateResponse stateResponseD = client(dataNodes[0]).admin().cluster().state(new ClusterStateRequest()).actionGet(); + assertEquals(stateResponseM, stateResponseD); + assertThat(callCounters.get(ClusterStateAction.NAME).get(), is(1)); + assertThat(callCounters.get(GetTermVersionAction.NAME).get(), is(1)); + + } + + private void addCallCountInterceptor(String nodeName, Map callCounters) { + MockTransportService primaryService = (MockTransportService) internalCluster().getInstance(TransportService.class, nodeName); + for (var ctrEnty : callCounters.entrySet()) { + primaryService.addRequestHandlingBehavior(ctrEnty.getKey(), (handler, request, channel, task) -> { + ctrEnty.getValue().incrementAndGet(); + logger.info("--> {} response redirect", ctrEnty.getKey()); + handler.messageReceived(request, channel, task); + }); + } + } + + private BlobStoreRepository prepareClusterAndVerifyRepository() throws Exception { + clusterSettingsSuppliedByTest = true; + Path segmentRepoPath = randomRepoPath(); + Path translogRepoPath = randomRepoPath(); + Path remoteRoutingTableRepoPath = randomRepoPath(); + Settings settings = buildRemoteStoreNodeAttributes( + REPOSITORY_NAME, + segmentRepoPath, + REPOSITORY_2_NAME, + translogRepoPath, + REMOTE_ROUTING_TABLE_REPO, + remoteRoutingTableRepoPath, + false + ); + prepareCluster(1, 3, INDEX_NAME, 1, 5, settings); + ensureGreen(INDEX_NAME); + + RepositoriesService repositoriesService = internalCluster().getClusterManagerNodeInstance(RepositoriesService.class); + BlobStoreRepository repository = (BlobStoreRepository) repositoriesService.repository(REMOTE_ROUTING_TABLE_REPO); + + return repository; + } + +} diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteRoutingTableServiceIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteRoutingTableServiceIT.java index b0d046cbdf3db..d143cbd7c3450 100644 --- a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteRoutingTableServiceIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteRoutingTableServiceIT.java @@ -24,6 +24,7 @@ import org.opensearch.test.OpenSearchIntegTestCase; import org.junit.Before; +import java.io.IOException; import java.nio.charset.StandardCharsets; import java.nio.file.Path; import java.util.ArrayList; @@ -36,8 +37,8 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; -import static org.opensearch.common.util.FeatureFlags.REMOTE_PUBLICATION_EXPERIMENTAL; import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING; +import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_PUBLICATION_SETTING_KEY; import static org.opensearch.gateway.remote.routingtable.RemoteIndexRoutingTable.INDEX_ROUTING_TABLE; import static org.opensearch.indices.IndicesService.CLUSTER_DEFAULT_INDEX_REFRESH_INTERVAL_SETTING; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY; @@ -46,7 +47,7 @@ public class RemoteRoutingTableServiceIT extends RemoteStoreBaseIntegTestCase { private static final String INDEX_NAME = "test-index"; private static final String INDEX_NAME_1 = "test-index-1"; - BlobPath indexRoutingPath; + List indexRoutingPaths; AtomicInteger indexRoutingFiles = new AtomicInteger(); private final RemoteStoreEnums.PathType pathType = RemoteStoreEnums.PathType.HASHED_PREFIX; @@ -65,7 +66,11 @@ protected Settings nodeSettings(int nodeOrdinal) { RemoteStoreEnums.PathType.HASHED_PREFIX.toString() ) .put("node.attr." + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY, REMOTE_ROUTING_TABLE_REPO) - .put(REMOTE_PUBLICATION_EXPERIMENTAL, true) + .put(REMOTE_PUBLICATION_SETTING_KEY, true) + .put( + RemoteClusterStateService.REMOTE_CLUSTER_STATE_CHECKSUM_VALIDATION_MODE_SETTING.getKey(), + RemoteClusterStateService.RemoteClusterStateValidationMode.FAILURE + ) .build(); } @@ -91,7 +96,7 @@ public void testRemoteRoutingTableIndexLifecycle() throws Exception { updateIndexSettings(INDEX_NAME, IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 2); ensureGreen(INDEX_NAME); assertBusy(() -> { - int indexRoutingFilesAfterUpdate = repository.blobStore().blobContainer(indexRoutingPath).listBlobs().size(); + int indexRoutingFilesAfterUpdate = repository.blobStore().blobContainer(indexRoutingPaths.get(0)).listBlobs().size(); // At-least 3 new index routing files will be created as shards will transition from INIT -> UNASSIGNED -> STARTED state assertTrue(indexRoutingFilesAfterUpdate >= indexRoutingFiles.get() + 3); }); @@ -112,6 +117,47 @@ public void testRemoteRoutingTableIndexLifecycle() throws Exception { assertTrue(areRoutingTablesSame(routingTableVersions)); } + public void testRemoteRoutingTableWithMultipleIndex() throws Exception { + BlobStoreRepository repository = prepareClusterAndVerifyRepository(); + + RemoteClusterStateService remoteClusterStateService = internalCluster().getClusterManagerNodeInstance( + RemoteClusterStateService.class + ); + RemoteManifestManager remoteManifestManager = remoteClusterStateService.getRemoteManifestManager(); + Optional latestManifest = remoteManifestManager.getLatestClusterMetadataManifest( + getClusterState().getClusterName().value(), + getClusterState().getMetadata().clusterUUID() + ); + List expectedIndexNames = new ArrayList<>(); + List deletedIndexNames = new ArrayList<>(); + verifyUpdatesInManifestFile(latestManifest, expectedIndexNames, 1, deletedIndexNames, true); + + List routingTables = getRoutingTableFromAllNodes(); + // Verify indices in routing table + Set expectedIndicesInRoutingTable = Set.of(INDEX_NAME); + assertEquals(routingTables.get(0).getIndicesRouting().keySet(), expectedIndicesInRoutingTable); + // Verify routing table across all nodes is equal + assertTrue(areRoutingTablesSame(routingTables)); + + // Create new index + createIndex(INDEX_NAME_1, remoteStoreIndexSettings(1, 5)); + ensureGreen(INDEX_NAME_1); + + latestManifest = remoteManifestManager.getLatestClusterMetadataManifest( + getClusterState().getClusterName().value(), + getClusterState().getMetadata().clusterUUID() + ); + + updateIndexRoutingPaths(repository); + verifyUpdatesInManifestFile(latestManifest, expectedIndexNames, 2, deletedIndexNames, true); + routingTables = getRoutingTableFromAllNodes(); + // Verify indices in routing table + expectedIndicesInRoutingTable = Set.of(INDEX_NAME, INDEX_NAME_1); + assertEquals(routingTables.get(0).getIndicesRouting().keySet(), expectedIndicesInRoutingTable); + // Verify routing table across all nodes is equal + assertTrue(areRoutingTablesSame(routingTables)); + } + public void testRemoteRoutingTableEmptyRoutingTableDiff() throws Exception { prepareClusterAndVerifyRepository(); @@ -166,7 +212,7 @@ public void testRemoteRoutingTableIndexNodeRestart() throws Exception { assertRemoteStoreRepositoryOnAllNodes(REMOTE_ROUTING_TABLE_REPO); assertBusy(() -> { - int indexRoutingFilesAfterNodeDrop = repository.blobStore().blobContainer(indexRoutingPath).listBlobs().size(); + int indexRoutingFilesAfterNodeDrop = repository.blobStore().blobContainer(indexRoutingPaths.get(0)).listBlobs().size(); assertTrue(indexRoutingFilesAfterNodeDrop > indexRoutingFiles.get()); }); @@ -201,7 +247,7 @@ public void testRemoteRoutingTableIndexMasterRestart() throws Exception { assertRemoteStoreRepositoryOnAllNodes(REMOTE_ROUTING_TABLE_REPO); assertBusy(() -> { - int indexRoutingFilesAfterNodeDrop = repository.blobStore().blobContainer(indexRoutingPath).listBlobs().size(); + int indexRoutingFilesAfterNodeDrop = repository.blobStore().blobContainer(indexRoutingPaths.get(0)).listBlobs().size(); assertTrue(indexRoutingFilesAfterNodeDrop > indexRoutingFiles.get()); }); @@ -240,10 +286,14 @@ private BlobStoreRepository prepareClusterAndVerifyRepository() throws Exception BlobPath baseMetadataPath = getBaseMetadataPath(repository); List indexRoutingTables = new ArrayList<>(getClusterState().routingTable().indicesRouting().values()); - indexRoutingPath = getIndexRoutingPath(baseMetadataPath.add(INDEX_ROUTING_TABLE), indexRoutingTables.get(0).getIndex().getUUID()); + indexRoutingPaths = new ArrayList<>(); + for (IndexRoutingTable indexRoutingTable : indexRoutingTables) { + indexRoutingPaths.add(getIndexRoutingPath(baseMetadataPath.add(INDEX_ROUTING_TABLE), indexRoutingTable.getIndex().getUUID())); + } assertBusy(() -> { - indexRoutingFiles.set(repository.blobStore().blobContainer(indexRoutingPath).listBlobs().size()); + int totalRoutingFiles = calculateTotalRoutingFiles(repository); + indexRoutingFiles.set(totalRoutingFiles); // There would be >=3 files as shards will transition from UNASSIGNED -> INIT -> STARTED state assertTrue(indexRoutingFiles.get() >= 3); }); @@ -280,11 +330,19 @@ private void verifyUpdatesInManifestFile( assertTrue(latestManifest.isPresent()); ClusterMetadataManifest manifest = latestManifest.get(); - assertEquals(expectedIndexNames, manifest.getDiffManifest().getIndicesRoutingUpdated()); assertEquals(expectedDeletedIndex, manifest.getDiffManifest().getIndicesDeleted()); assertEquals(expectedIndicesRoutingFilesInManifest, manifest.getIndicesRouting().size()); + + // Check if all paths in manifest.getIndicesRouting() are present in indexRoutingPaths for (ClusterMetadataManifest.UploadedIndexMetadata uploadedFilename : manifest.getIndicesRouting()) { - assertTrue(uploadedFilename.getUploadedFilename().contains(indexRoutingPath.buildAsString())); + boolean pathFound = false; + for (BlobPath indexRoutingPath : indexRoutingPaths) { + if (uploadedFilename.getUploadedFilename().contains(indexRoutingPath.buildAsString())) { + pathFound = true; + break; + } + } + assertTrue("Uploaded file not found in indexRoutingPaths: " + uploadedFilename.getUploadedFilename(), pathFound); } assertEquals(isRoutingTableDiffFileExpected, manifest.getDiffManifest().getIndicesRoutingDiffPath() != null); } @@ -305,6 +363,24 @@ private List getRoutingTableFromAllNodes() throws ExecutionExcepti return routingTables; } + private void updateIndexRoutingPaths(BlobStoreRepository repository) { + BlobPath baseMetadataPath = getBaseMetadataPath(repository); + List indexRoutingTables = new ArrayList<>(getClusterState().routingTable().indicesRouting().values()); + + indexRoutingPaths.clear(); // Clear the list to avoid stale data + for (IndexRoutingTable indexRoutingTable : indexRoutingTables) { + indexRoutingPaths.add(getIndexRoutingPath(baseMetadataPath.add(INDEX_ROUTING_TABLE), indexRoutingTable.getIndex().getUUID())); + } + } + + private int calculateTotalRoutingFiles(BlobStoreRepository repository) throws IOException { + int totalRoutingFiles = 0; + for (BlobPath path : indexRoutingPaths) { + totalRoutingFiles += repository.blobStore().blobContainer(path).listBlobs().size(); + } + return totalRoutingFiles; + } + private boolean areRoutingTablesSame(List routingTables) { if (routingTables == null || routingTables.isEmpty()) { return false; @@ -356,7 +432,6 @@ private void deleteIndexAndVerify(RemoteManifestManager remoteManifestManager) { ); assertTrue(latestManifest.isPresent()); ClusterMetadataManifest manifest = latestManifest.get(); - assertTrue(manifest.getDiffManifest().getIndicesRoutingUpdated().isEmpty()); assertTrue(manifest.getDiffManifest().getIndicesDeleted().contains(INDEX_NAME)); assertTrue(manifest.getIndicesRouting().isEmpty()); } diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteStatePublicationIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteStatePublicationIT.java index 6a2e7ce4957ae..faab3645ae894 100644 --- a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteStatePublicationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteStatePublicationIT.java @@ -8,18 +8,30 @@ package org.opensearch.gateway.remote; +import org.opensearch.action.admin.cluster.node.info.NodesInfoResponse; +import org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse; import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest; import org.opensearch.action.admin.cluster.state.ClusterStateResponse; import org.opensearch.client.Client; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.coordination.CoordinationState; +import org.opensearch.cluster.coordination.PersistedStateRegistry; +import org.opensearch.cluster.coordination.PublishClusterStateStats; import org.opensearch.common.blobstore.BlobPath; import org.opensearch.common.settings.Settings; -import org.opensearch.common.util.FeatureFlags; +import org.opensearch.discovery.DiscoveryStats; +import org.opensearch.gateway.GatewayMetaState; +import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata; import org.opensearch.gateway.remote.model.RemoteClusterMetadataManifest; +import org.opensearch.gateway.remote.model.RemoteRoutingTableBlobStore; +import org.opensearch.index.remote.RemoteStoreEnums.PathHashAlgorithm; +import org.opensearch.index.remote.RemoteStoreEnums.PathType; import org.opensearch.indices.recovery.RecoverySettings; import org.opensearch.remotestore.RemoteStoreBaseIntegTestCase; import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.blobstore.BlobStoreRepository; import org.opensearch.repositories.fs.ReloadableFsRepository; +import org.opensearch.test.InternalTestCluster; import org.opensearch.test.OpenSearchIntegTestCase.ClusterScope; import org.opensearch.test.OpenSearchIntegTestCase.Scope; import org.junit.Before; @@ -27,13 +39,22 @@ import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.Base64; +import java.util.HashSet; import java.util.Locale; import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.ExecutionException; import java.util.function.Function; import java.util.stream.Collectors; +import static org.opensearch.action.admin.cluster.node.info.NodesInfoRequest.Metric.SETTINGS; +import static org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest.Metric.DISCOVERY; +import static org.opensearch.cluster.metadata.Metadata.isGlobalStateEquals; import static org.opensearch.gateway.remote.RemoteClusterStateAttributesManager.DISCOVERY_NODES; import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING; +import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_PUBLICATION_SETTING; +import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_PUBLICATION_SETTING_KEY; import static org.opensearch.gateway.remote.RemoteClusterStateUtils.DELIMITER; import static org.opensearch.gateway.remote.model.RemoteClusterBlocks.CLUSTER_BLOCKS; import static org.opensearch.gateway.remote.model.RemoteCoordinationMetadata.COORDINATION_METADATA; @@ -45,27 +66,27 @@ import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.startsWith; @ClusterScope(scope = Scope.TEST, numDataNodes = 0) public class RemoteStatePublicationIT extends RemoteStoreBaseIntegTestCase { - private static String INDEX_NAME = "test-index"; + private static final String INDEX_NAME = "test-index"; + private static final String REMOTE_STATE_PREFIX = "!"; + private static final String REMOTE_ROUTING_PREFIX = "_"; private boolean isRemoteStateEnabled = true; - private String isRemotePublicationEnabled = "true"; + private boolean isRemotePublicationEnabled = true; + private boolean hasRemoteStateCharPrefix; + private boolean hasRemoteRoutingCharPrefix; @Before public void setup() { asyncUploadMockFsRepo = false; isRemoteStateEnabled = true; - isRemotePublicationEnabled = "true"; - } - - @Override - protected Settings featureFlagSettings() { - return Settings.builder() - .put(super.featureFlagSettings()) - .put(FeatureFlags.REMOTE_PUBLICATION_EXPERIMENTAL, isRemotePublicationEnabled) - .build(); + isRemotePublicationEnabled = true; + hasRemoteStateCharPrefix = randomBoolean(); + hasRemoteRoutingCharPrefix = randomBoolean(); } @Override @@ -81,12 +102,32 @@ protected Settings nodeSettings(int nodeOrdinal) { "node.attr." + REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX, routingTableRepoName ); + return Settings.builder() .put(super.nodeSettings(nodeOrdinal)) .put(REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), isRemoteStateEnabled) + .put(REMOTE_PUBLICATION_SETTING_KEY, isRemotePublicationEnabled) .put("node.attr." + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY, routingTableRepoName) .put(routingTableRepoTypeAttributeKey, ReloadableFsRepository.TYPE) .put(routingTableRepoSettingsAttributeKeyPrefix + "location", segmentRepoPath) + .put( + RemoteClusterStateService.REMOTE_CLUSTER_STATE_CHECKSUM_VALIDATION_MODE_SETTING.getKey(), + RemoteClusterStateService.RemoteClusterStateValidationMode.FAILURE + ) + .put(REMOTE_PUBLICATION_SETTING_KEY, isRemotePublicationEnabled) + .put( + RemoteClusterStateService.CLUSTER_REMOTE_STORE_STATE_PATH_PREFIX.getKey(), + hasRemoteStateCharPrefix ? REMOTE_STATE_PREFIX : "" + ) + .put( + RemoteRoutingTableBlobStore.CLUSTER_REMOTE_STORE_ROUTING_TABLE_PATH_PREFIX.getKey(), + hasRemoteRoutingCharPrefix ? REMOTE_ROUTING_PREFIX : "" + ) + .put(RemoteIndexMetadataManager.REMOTE_INDEX_METADATA_PATH_TYPE_SETTING.getKey(), PathType.HASHED_PREFIX.toString()) + .put( + RemoteIndexMetadataManager.REMOTE_INDEX_METADATA_PATH_HASH_ALGO_SETTING.getKey(), + PathHashAlgorithm.FNV_1A_COMPOSITE_1.toString() + ) .build(); } @@ -130,6 +171,27 @@ public void testPublication() throws Exception { Map manifestFiles = getMetadataFiles(repository, RemoteClusterMetadataManifest.MANIFEST); assertTrue(manifestFiles.containsKey(RemoteClusterMetadataManifest.MANIFEST)); + RemoteClusterStateService remoteClusterStateService = internalCluster().getInstance( + RemoteClusterStateService.class, + internalCluster().getClusterManagerName() + ); + ClusterMetadataManifest manifest = remoteClusterStateService.getLatestClusterMetadataManifest( + getClusterState().getClusterName().value(), + getClusterState().metadata().clusterUUID() + ).get(); + assertThat(manifest.getIndices().size(), is(1)); + if (hasRemoteStateCharPrefix) { + for (UploadedIndexMetadata md : manifest.getIndices()) { + assertThat(md.getUploadedFilename(), startsWith(REMOTE_STATE_PREFIX)); + } + } + assertThat(manifest.getIndicesRouting().size(), is(1)); + if (hasRemoteRoutingCharPrefix) { + for (UploadedIndexMetadata md : manifest.getIndicesRouting()) { + assertThat(md.getUploadedFilename(), startsWith(REMOTE_ROUTING_PREFIX)); + } + } + // get settings from each node and verify that it is updated Settings settings = clusterService().getSettings(); logger.info("settings : {}", settings); @@ -155,6 +217,200 @@ public void testRemotePublicationDisableIfRemoteStateDisabled() { assertNull(internalCluster().getCurrentClusterManagerNodeInstance(RemoteClusterStateService.class)); } + public void testRemotePublicationDownloadStats() { + int shardCount = randomIntBetween(1, 2); + int replicaCount = 1; + int dataNodeCount = shardCount * (replicaCount + 1); + int clusterManagerNodeCount = 1; + prepareCluster(clusterManagerNodeCount, dataNodeCount, INDEX_NAME, replicaCount, shardCount); + String dataNode = internalCluster().getDataNodeNames().stream().collect(Collectors.toList()).get(0); + + NodesStatsResponse nodesStatsResponseDataNode = client().admin() + .cluster() + .prepareNodesStats(dataNode) + .addMetric(DISCOVERY.metricName()) + .get(); + + assertDataNodeDownloadStats(nodesStatsResponseDataNode); + } + + @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/15767") + public void testRemotePublicationDisabledByRollingRestart() throws Exception { + prepareCluster(3, 2, INDEX_NAME, 1, 2); + ensureStableCluster(5); + ensureGreen(INDEX_NAME); + + Set clusterManagers = internalCluster().getClusterManagerNames(); + Set restartedMasters = new HashSet<>(); + + for (String clusterManager : clusterManagers) { + internalCluster().restartNode(clusterManager, new InternalTestCluster.RestartCallback() { + @Override + public Settings onNodeStopped(String nodeName) { + restartedMasters.add(nodeName); + return Settings.builder().put(REMOTE_PUBLICATION_SETTING_KEY, false).build(); + } + + @Override + public void doAfterNodes(int n, Client client) { + String activeCM = internalCluster().getClusterManagerName(); + Set followingCMs = clusterManagers.stream() + .filter(node -> !Objects.equals(node, activeCM)) + .collect(Collectors.toSet()); + boolean activeCMRestarted = restartedMasters.contains(activeCM); + NodesStatsResponse response = client().admin() + .cluster() + .prepareNodesStats(followingCMs.toArray(new String[0])) + .clear() + .addMetric(DISCOVERY.metricName()) + .get(); + // after master is flipped to restarted master, publication should happen on Transport + response.getNodes().forEach(nodeStats -> { + if (activeCMRestarted) { + PublishClusterStateStats stats = nodeStats.getDiscoveryStats().getPublishStats(); + assertTrue( + stats.getFullClusterStateReceivedCount() > 0 || stats.getCompatibleClusterStateDiffReceivedCount() > 0 + ); + assertEquals(0, stats.getIncompatibleClusterStateDiffReceivedCount()); + } else { + DiscoveryStats stats = nodeStats.getDiscoveryStats(); + assertEquals(0, stats.getPublishStats().getFullClusterStateReceivedCount()); + assertEquals(0, stats.getPublishStats().getCompatibleClusterStateDiffReceivedCount()); + assertEquals(0, stats.getPublishStats().getIncompatibleClusterStateDiffReceivedCount()); + } + }); + + NodesInfoResponse nodesInfoResponse = client().admin() + .cluster() + .prepareNodesInfo(activeCM) + .clear() + .addMetric(SETTINGS.metricName()) + .get(); + // if masterRestarted is true Publication Setting should be false, and vice versa + assertTrue(REMOTE_PUBLICATION_SETTING.get(nodesInfoResponse.getNodes().get(0).getSettings()) != activeCMRestarted); + + followingCMs.forEach(node -> { + PersistedStateRegistry registry = internalCluster().getInstance(PersistedStateRegistry.class, node); + CoordinationState.PersistedState remoteState = registry.getPersistedState( + PersistedStateRegistry.PersistedStateType.REMOTE + ); + if (activeCMRestarted) { + assertNull(remoteState.getLastAcceptedState()); + // assertNull(remoteState.getLastAcceptedManifest()); + } else { + ClusterState localState = registry.getPersistedState(PersistedStateRegistry.PersistedStateType.LOCAL) + .getLastAcceptedState(); + ClusterState remotePersistedState = remoteState.getLastAcceptedState(); + assertTrue(isGlobalStateEquals(localState.metadata(), remotePersistedState.metadata())); + assertEquals(localState.nodes(), remotePersistedState.nodes()); + assertEquals(localState.routingTable(), remotePersistedState.routingTable()); + assertEquals(localState.customs(), remotePersistedState.customs()); + } + }); + } + }); + + } + ensureGreen(INDEX_NAME); + ensureStableCluster(5); + + String activeCM = internalCluster().getClusterManagerName(); + Set followingCMs = clusterManagers.stream().filter(node -> !Objects.equals(node, activeCM)).collect(Collectors.toSet()); + NodesStatsResponse response = client().admin() + .cluster() + .prepareNodesStats(followingCMs.toArray(new String[0])) + .clear() + .addMetric(DISCOVERY.metricName()) + .get(); + response.getNodes().forEach(nodeStats -> { + PublishClusterStateStats stats = nodeStats.getDiscoveryStats().getPublishStats(); + assertTrue(stats.getFullClusterStateReceivedCount() > 0 || stats.getCompatibleClusterStateDiffReceivedCount() > 0); + assertEquals(0, stats.getIncompatibleClusterStateDiffReceivedCount()); + }); + NodesInfoResponse nodesInfoResponse = client().admin() + .cluster() + .prepareNodesInfo(activeCM) + .clear() + .addMetric(SETTINGS.metricName()) + .get(); + // if masterRestarted is true Publication Setting should be false, and vice versa + assertFalse(REMOTE_PUBLICATION_SETTING.get(nodesInfoResponse.getNodes().get(0).getSettings())); + + followingCMs.forEach(node -> { + PersistedStateRegistry registry = internalCluster().getInstance(PersistedStateRegistry.class, node); + CoordinationState.PersistedState remoteState = registry.getPersistedState(PersistedStateRegistry.PersistedStateType.REMOTE); + assertNull(remoteState.getLastAcceptedState()); + // assertNull(remoteState.getLastAcceptedManifest()); + }); + } + + public void testMasterReElectionUsesIncrementalUpload() throws IOException { + prepareCluster(3, 2, INDEX_NAME, 1, 1); + PersistedStateRegistry persistedStateRegistry = internalCluster().getClusterManagerNodeInstance(PersistedStateRegistry.class); + GatewayMetaState.RemotePersistedState remotePersistedState = (GatewayMetaState.RemotePersistedState) persistedStateRegistry + .getPersistedState(PersistedStateRegistry.PersistedStateType.REMOTE); + ClusterMetadataManifest manifest = remotePersistedState.getLastAcceptedManifest(); + // force elected master to step down + internalCluster().stopCurrentClusterManagerNode(); + ensureStableCluster(4); + + persistedStateRegistry = internalCluster().getClusterManagerNodeInstance(PersistedStateRegistry.class); + CoordinationState.PersistedState persistedStateAfterElection = persistedStateRegistry.getPersistedState( + PersistedStateRegistry.PersistedStateType.REMOTE + ); + ClusterMetadataManifest manifestAfterElection = persistedStateAfterElection.getLastAcceptedManifest(); + + // coordination metadata is updated, it will be unequal + assertNotEquals(manifest.getCoordinationMetadata(), manifestAfterElection.getCoordinationMetadata()); + // all other attributes are not uploaded again and will be pointing to same files in manifest after new master is elected + assertEquals(manifest.getClusterUUID(), manifestAfterElection.getClusterUUID()); + assertEquals(manifest.getIndices(), manifestAfterElection.getIndices()); + assertEquals(manifest.getSettingsMetadata(), manifestAfterElection.getSettingsMetadata()); + assertEquals(manifest.getTemplatesMetadata(), manifestAfterElection.getTemplatesMetadata()); + assertEquals(manifest.getCustomMetadataMap(), manifestAfterElection.getCustomMetadataMap()); + assertEquals(manifest.getRoutingTableVersion(), manifest.getRoutingTableVersion()); + assertEquals(manifest.getIndicesRouting(), manifestAfterElection.getIndicesRouting()); + } + + public void testVotingConfigAreCommitted() throws ExecutionException, InterruptedException { + prepareCluster(3, 2, INDEX_NAME, 1, 2); + ensureStableCluster(5); + ensureGreen(INDEX_NAME); + // add two new nodes to the cluster, to update the voting config + internalCluster().startClusterManagerOnlyNodes(2, Settings.EMPTY); + ensureStableCluster(7); + + internalCluster().getInstances(PersistedStateRegistry.class).forEach(persistedStateRegistry -> { + CoordinationState.PersistedState localState = persistedStateRegistry.getPersistedState( + PersistedStateRegistry.PersistedStateType.LOCAL + ); + CoordinationState.PersistedState remoteState = persistedStateRegistry.getPersistedState( + PersistedStateRegistry.PersistedStateType.REMOTE + ); + if (remoteState != null) { + assertEquals( + localState.getLastAcceptedState().getLastCommittedConfiguration(), + remoteState.getLastAcceptedState().getLastCommittedConfiguration() + ); + assertEquals(5, remoteState.getLastAcceptedState().getLastCommittedConfiguration().getNodeIds().size()); + } + }); + } + + private void assertDataNodeDownloadStats(NodesStatsResponse nodesStatsResponse) { + // assert cluster state stats for data node + DiscoveryStats dataNodeDiscoveryStats = nodesStatsResponse.getNodes().get(0).getDiscoveryStats(); + assertNotNull(dataNodeDiscoveryStats.getClusterStateStats()); + assertEquals(0, dataNodeDiscoveryStats.getClusterStateStats().getUpdateSuccess()); + assertTrue(dataNodeDiscoveryStats.getClusterStateStats().getPersistenceStats().get(0).getSuccessCount() > 0); + assertEquals(0, dataNodeDiscoveryStats.getClusterStateStats().getPersistenceStats().get(0).getFailedCount()); + assertTrue(dataNodeDiscoveryStats.getClusterStateStats().getPersistenceStats().get(0).getTotalTimeInMillis() > 0); + + assertTrue(dataNodeDiscoveryStats.getClusterStateStats().getPersistenceStats().get(1).getSuccessCount() > 0); + assertEquals(0, dataNodeDiscoveryStats.getClusterStateStats().getPersistenceStats().get(1).getFailedCount()); + assertTrue(dataNodeDiscoveryStats.getClusterStateStats().getPersistenceStats().get(1).getTotalTimeInMillis() > 0); + } + private Map getMetadataFiles(BlobStoreRepository repository, String subDirectory) throws IOException { BlobPath metadataPath = repository.basePath() .add( diff --git a/server/src/internalClusterTest/java/org/opensearch/index/mapper/StarTreeMapperIT.java b/server/src/internalClusterTest/java/org/opensearch/index/mapper/StarTreeMapperIT.java index 52c6c6801a3c2..0d69b762ab4f2 100644 --- a/server/src/internalClusterTest/java/org/opensearch/index/mapper/StarTreeMapperIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/index/mapper/StarTreeMapperIT.java @@ -8,19 +8,26 @@ package org.opensearch.index.mapper; +import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest; +import org.opensearch.action.index.IndexResponse; +import org.opensearch.action.search.SearchResponse; import org.opensearch.action.support.master.AcknowledgedResponse; -import org.opensearch.common.Rounding; import org.opensearch.common.settings.Settings; import org.opensearch.common.util.FeatureFlags; +import org.opensearch.core.common.unit.ByteSizeUnit; +import org.opensearch.core.common.unit.ByteSizeValue; import org.opensearch.core.index.Index; +import org.opensearch.core.rest.RestStatus; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.index.IndexService; +import org.opensearch.index.IndexSettings; import org.opensearch.index.compositeindex.CompositeIndexSettings; -import org.opensearch.index.compositeindex.datacube.DateDimension; import org.opensearch.index.compositeindex.datacube.MetricStat; import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; import org.opensearch.index.compositeindex.datacube.startree.StarTreeIndexSettings; +import org.opensearch.index.query.QueryBuilders; import org.opensearch.indices.IndicesService; +import org.opensearch.search.SearchHit; import org.opensearch.test.OpenSearchIntegTestCase; import org.junit.After; import org.junit.Before; @@ -39,6 +46,10 @@ */ public class StarTreeMapperIT extends OpenSearchIntegTestCase { private static final String TEST_INDEX = "test"; + Settings settings = Settings.builder() + .put(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey(), true) + .put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), new ByteSizeValue(512, ByteSizeUnit.MB)) + .build(); private static XContentBuilder createMinimalTestMapping(boolean invalidDim, boolean invalidMetric, boolean keywordDim) { try { @@ -49,7 +60,10 @@ private static XContentBuilder createMinimalTestMapping(boolean invalidDim, bool .startObject("config") .startArray("ordered_dimensions") .startObject() - .field("name", "timestamp") + .field("name", "numeric_dv_1") + .endObject() + .startObject() + .field("name", "numeric_dv_2") .endObject() .startObject() .field("name", getDim(invalidDim, keywordDim)) @@ -71,6 +85,14 @@ private static XContentBuilder createMinimalTestMapping(boolean invalidDim, bool .field("type", "integer") .field("doc_values", true) .endObject() + .startObject("numeric_dv_1") + .field("type", "integer") + .field("doc_values", true) + .endObject() + .startObject("numeric_dv_2") + .field("type", "integer") + .field("doc_values", true) + .endObject() .startObject("numeric") .field("type", "integer") .field("doc_values", false) @@ -99,11 +121,7 @@ private static XContentBuilder createMaxDimTestMapping() { .startObject("config") .startArray("ordered_dimensions") .startObject() - .field("name", "timestamp") - .startArray("calendar_intervals") - .value("day") - .value("month") - .endArray() + .field("name", "dim4") .endObject() .startObject() .field("name", "dim2") @@ -116,6 +134,12 @@ private static XContentBuilder createMaxDimTestMapping() { .startObject() .field("name", "dim2") .endObject() + .startObject() + .field("name", "dim3") + .endObject() + .startObject() + .field("name", "dim4") + .endObject() .endArray() .endObject() .endObject() @@ -132,6 +156,10 @@ private static XContentBuilder createMaxDimTestMapping() { .field("type", "integer") .field("doc_values", true) .endObject() + .startObject("dim4") + .field("type", "integer") + .field("doc_values", true) + .endObject() .endObject() .endObject(); } catch (IOException e) { @@ -178,7 +206,7 @@ private static XContentBuilder createUpdateTestMapping(boolean changeDim, boolea .startObject("config") .startArray("ordered_dimensions") .startObject() - .field("name", "timestamp") + .field("name", "numeric_dv1") .endObject() .startObject() .field("name", changeDim ? "numeric_new" : getDim(false, false)) @@ -200,6 +228,10 @@ private static XContentBuilder createUpdateTestMapping(boolean changeDim, boolea .field("type", "integer") .field("doc_values", true) .endObject() + .startObject("numeric_dv1") + .field("type", "integer") + .field("doc_values", true) + .endObject() .startObject("numeric") .field("type", "integer") .field("doc_values", false) @@ -223,6 +255,60 @@ private static XContentBuilder createUpdateTestMapping(boolean changeDim, boolea } } + private XContentBuilder getMappingWithDuplicateFields(boolean isDuplicateDim, boolean isDuplicateMetric) { + XContentBuilder mapping = null; + try { + mapping = jsonBuilder().startObject() + .startObject("composite") + .startObject("startree-1") + .field("type", "star_tree") + .startObject("config") + .startArray("ordered_dimensions") + .startObject() + .field("name", "numeric_dv2") + .endObject() + .startObject() + .field("name", "numeric_dv") + .endObject() + .startObject() + .field("name", isDuplicateDim ? "numeric_dv" : "numeric_dv1") // Duplicate dimension + .endObject() + .endArray() + .startArray("metrics") + .startObject() + .field("name", "numeric_dv") + .endObject() + .startObject() + .field("name", isDuplicateMetric ? "numeric_dv" : "numeric_dv1") // Duplicate metric + .endObject() + .endArray() + .endObject() + .endObject() + .endObject() + .startObject("properties") + .startObject("timestamp") + .field("type", "date") + .endObject() + .startObject("numeric_dv") + .field("type", "integer") + .field("doc_values", true) + .endObject() + .startObject("numeric_dv2") + .field("type", "integer") + .field("doc_values", true) + .endObject() + .startObject("numeric_dv1") + .field("type", "integer") + .field("doc_values", true) + .endObject() + .endObject() + .endObject(); + } catch (IOException e) { + fail("Failed to create mapping: " + e.getMessage()); + } + return mapping; + } + private static String getDim(boolean hasDocValues, boolean isKeyword) { if (hasDocValues) { return "numeric"; @@ -244,7 +330,7 @@ public final void setupNodeSettings() { } public void testValidCompositeIndex() { - prepareCreate(TEST_INDEX).setMapping(createMinimalTestMapping(false, false, false)).get(); + prepareCreate(TEST_INDEX).setSettings(settings).setMapping(createMinimalTestMapping(false, false, false)).get(); Iterable dataNodeInstances = internalCluster().getDataNodeInstances(IndicesService.class); for (IndicesService service : dataNodeInstances) { final Index index = resolveIndex("test"); @@ -255,15 +341,8 @@ public void testValidCompositeIndex() { for (CompositeMappedFieldType ft : fts) { assertTrue(ft instanceof StarTreeMapper.StarTreeFieldType); StarTreeMapper.StarTreeFieldType starTreeFieldType = (StarTreeMapper.StarTreeFieldType) ft; - assertEquals("timestamp", starTreeFieldType.getDimensions().get(0).getField()); - assertTrue(starTreeFieldType.getDimensions().get(0) instanceof DateDimension); - DateDimension dateDim = (DateDimension) starTreeFieldType.getDimensions().get(0); - List expectedTimeUnits = Arrays.asList( - Rounding.DateTimeUnit.MINUTES_OF_HOUR, - Rounding.DateTimeUnit.HOUR_OF_DAY - ); - assertEquals(expectedTimeUnits, dateDim.getIntervals()); - assertEquals("numeric_dv", starTreeFieldType.getDimensions().get(1).getField()); + assertEquals("numeric_dv_1", starTreeFieldType.getDimensions().get(0).getField()); + assertEquals("numeric_dv_2", starTreeFieldType.getDimensions().get(1).getField()); assertEquals(2, starTreeFieldType.getMetrics().size()); assertEquals("numeric_dv", starTreeFieldType.getMetrics().get(0).getField()); @@ -285,8 +364,91 @@ public void testValidCompositeIndex() { } } + public void testCompositeIndexWithIndexNotSpecified() { + Settings settings = Settings.builder() + .put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), new ByteSizeValue(512, ByteSizeUnit.MB)) + .build(); + MapperParsingException ex = expectThrows( + MapperParsingException.class, + () -> prepareCreate(TEST_INDEX).setSettings(settings).setMapping(createMinimalTestMapping(false, false, false)).get() + ); + assertEquals( + "Failed to parse mapping [_doc]: Set 'index.composite_index' as true as part of index settings to use star tree index", + ex.getMessage() + ); + } + + public void testCompositeIndexWithHigherTranslogFlushSize() { + Settings settings = Settings.builder() + .put(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey(), true) + .put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), new ByteSizeValue(513, ByteSizeUnit.MB)) + .build(); + IllegalArgumentException ex = expectThrows( + IllegalArgumentException.class, + () -> prepareCreate(TEST_INDEX).setSettings(settings).setMapping(createMinimalTestMapping(false, false, false)).get() + ); + assertEquals("You can configure 'index.translog.flush_threshold_size' with upto '512mb' for composite index", ex.getMessage()); + } + + public void testCompositeIndexWithArraysInCompositeField() throws IOException { + prepareCreate(TEST_INDEX).setSettings(settings).setMapping(createMinimalTestMapping(false, false, false)).get(); + // Attempt to index a document with an array field + XContentBuilder doc = jsonBuilder().startObject() + .field("timestamp", "2023-06-01T12:00:00Z") + .startArray("numeric_dv") + .value(10) + .value(20) + .value(30) + .endArray() + .endObject(); + + // Index the document and refresh + MapperParsingException ex = expectThrows( + MapperParsingException.class, + () -> client().prepareIndex(TEST_INDEX).setSource(doc).get() + ); + assertEquals( + "object mapping for [_doc] with array for [numeric_dv] cannot be accepted as field is also part of composite index mapping which does not accept arrays", + ex.getMessage() + ); + } + + public void testCompositeIndexWithArraysInNonCompositeField() throws IOException { + prepareCreate(TEST_INDEX).setSettings(settings).setMapping(createMinimalTestMapping(false, false, false)).get(); + // Attempt to index a document with an array field + XContentBuilder doc = jsonBuilder().startObject() + .field("timestamp", "2023-06-01T12:00:00Z") + .startArray("numeric") + .value(10) + .value(20) + .value(30) + .endArray() + .endObject(); + + // Index the document and refresh + IndexResponse indexResponse = client().prepareIndex(TEST_INDEX).setSource(doc).get(); + + assertEquals(RestStatus.CREATED, indexResponse.status()); + + client().admin().indices().prepareRefresh(TEST_INDEX).get(); + // Verify the document was indexed + SearchResponse searchResponse = client().prepareSearch(TEST_INDEX).setQuery(QueryBuilders.matchAllQuery()).get(); + + assertEquals(1, searchResponse.getHits().getTotalHits().value); + + // Verify the values in the indexed document + SearchHit hit = searchResponse.getHits().getAt(0); + assertEquals("2023-06-01T12:00:00Z", hit.getSourceAsMap().get("timestamp")); + + List values = (List) hit.getSourceAsMap().get("numeric"); + assertEquals(3, values.size()); + assertTrue(values.contains(10)); + assertTrue(values.contains(20)); + assertTrue(values.contains(30)); + } + public void testUpdateIndexWithAdditionOfStarTree() { - prepareCreate(TEST_INDEX).setMapping(createMinimalTestMapping(false, false, false)).get(); + prepareCreate(TEST_INDEX).setSettings(settings).setMapping(createMinimalTestMapping(false, false, false)).get(); IllegalArgumentException ex = expectThrows( IllegalArgumentException.class, @@ -296,7 +458,7 @@ public void testUpdateIndexWithAdditionOfStarTree() { } public void testUpdateIndexWithNewerStarTree() { - prepareCreate(TEST_INDEX).setMapping(createTestMappingWithoutStarTree(false, false, false)).get(); + prepareCreate(TEST_INDEX).setSettings(settings).setMapping(createTestMappingWithoutStarTree(false, false, false)).get(); IllegalArgumentException ex = expectThrows( IllegalArgumentException.class, @@ -309,7 +471,7 @@ public void testUpdateIndexWithNewerStarTree() { } public void testUpdateIndexWhenMappingIsDifferent() { - prepareCreate(TEST_INDEX).setMapping(createMinimalTestMapping(false, false, false)).get(); + prepareCreate(TEST_INDEX).setSettings(settings).setMapping(createMinimalTestMapping(false, false, false)).get(); // update some field in the mapping IllegalArgumentException ex = expectThrows( @@ -320,7 +482,7 @@ public void testUpdateIndexWhenMappingIsDifferent() { } public void testUpdateIndexWhenMappingIsSame() { - prepareCreate(TEST_INDEX).setMapping(createMinimalTestMapping(false, false, false)).get(); + prepareCreate(TEST_INDEX).setSettings(settings).setMapping(createMinimalTestMapping(false, false, false)).get(); // update some field in the mapping AcknowledgedResponse putMappingResponse = client().admin() @@ -340,15 +502,8 @@ public void testUpdateIndexWhenMappingIsSame() { for (CompositeMappedFieldType ft : fts) { assertTrue(ft instanceof StarTreeMapper.StarTreeFieldType); StarTreeMapper.StarTreeFieldType starTreeFieldType = (StarTreeMapper.StarTreeFieldType) ft; - assertEquals("timestamp", starTreeFieldType.getDimensions().get(0).getField()); - assertTrue(starTreeFieldType.getDimensions().get(0) instanceof DateDimension); - DateDimension dateDim = (DateDimension) starTreeFieldType.getDimensions().get(0); - List expectedTimeUnits = Arrays.asList( - Rounding.DateTimeUnit.MINUTES_OF_HOUR, - Rounding.DateTimeUnit.HOUR_OF_DAY - ); - assertEquals(expectedTimeUnits, dateDim.getIntervals()); - assertEquals("numeric_dv", starTreeFieldType.getDimensions().get(1).getField()); + assertEquals("numeric_dv_1", starTreeFieldType.getDimensions().get(0).getField()); + assertEquals("numeric_dv_2", starTreeFieldType.getDimensions().get(1).getField()); assertEquals("numeric_dv", starTreeFieldType.getMetrics().get(0).getField()); // Assert default metrics @@ -368,7 +523,7 @@ public void testUpdateIndexWhenMappingIsSame() { public void testInvalidDimCompositeIndex() { IllegalArgumentException ex = expectThrows( IllegalArgumentException.class, - () -> prepareCreate(TEST_INDEX).setMapping(createMinimalTestMapping(true, false, false)).get() + () -> prepareCreate(TEST_INDEX).setSettings(settings).setMapping(createMinimalTestMapping(true, false, false)).get() ); assertEquals( "Aggregations not supported for the dimension field [numeric] with field type [integer] as part of star tree field", @@ -379,8 +534,14 @@ public void testInvalidDimCompositeIndex() { public void testMaxDimsCompositeIndex() { MapperParsingException ex = expectThrows( MapperParsingException.class, - () -> prepareCreate(TEST_INDEX).setMapping(createMaxDimTestMapping()) - .setSettings(Settings.builder().put(StarTreeIndexSettings.STAR_TREE_MAX_DIMENSIONS_SETTING.getKey(), 2)) + () -> prepareCreate(TEST_INDEX).setSettings(settings) + .setMapping(createMaxDimTestMapping()) + .setSettings( + Settings.builder() + .put(StarTreeIndexSettings.STAR_TREE_MAX_DIMENSIONS_SETTING.getKey(), 2) + .put(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey(), true) + .put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), new ByteSizeValue(512, ByteSizeUnit.MB)) + ) .get() ); assertEquals( @@ -389,15 +550,21 @@ public void testMaxDimsCompositeIndex() { ); } - public void testMaxCalendarIntervalsCompositeIndex() { + public void testMaxMetricsCompositeIndex() { MapperParsingException ex = expectThrows( MapperParsingException.class, - () -> prepareCreate(TEST_INDEX).setMapping(createMaxDimTestMapping()) - .setSettings(Settings.builder().put(StarTreeIndexSettings.STAR_TREE_MAX_DATE_INTERVALS_SETTING.getKey(), 1)) + () -> prepareCreate(TEST_INDEX).setSettings(settings) + .setMapping(createMaxDimTestMapping()) + .setSettings( + Settings.builder() + .put(StarTreeIndexSettings.STAR_TREE_MAX_BASE_METRICS_SETTING.getKey(), 4) + .put(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey(), true) + .put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), new ByteSizeValue(512, ByteSizeUnit.MB)) + ) .get() ); assertEquals( - "Failed to parse mapping [_doc]: At most [1] calendar intervals are allowed in dimension [timestamp]", + "Failed to parse mapping [_doc]: There cannot be more than [4] base metrics for star tree field [startree-1]", ex.getMessage() ); } @@ -405,7 +572,7 @@ public void testMaxCalendarIntervalsCompositeIndex() { public void testUnsupportedDim() { MapperParsingException ex = expectThrows( MapperParsingException.class, - () -> prepareCreate(TEST_INDEX).setMapping(createMinimalTestMapping(false, false, true)).get() + () -> prepareCreate(TEST_INDEX).setSettings(settings).setMapping(createMinimalTestMapping(false, false, true)).get() ); assertEquals( "Failed to parse mapping [_doc]: unsupported field type associated with dimension [keyword] as part of star tree field [startree-1]", @@ -416,7 +583,7 @@ public void testUnsupportedDim() { public void testInvalidMetric() { IllegalArgumentException ex = expectThrows( IllegalArgumentException.class, - () -> prepareCreate(TEST_INDEX).setMapping(createMinimalTestMapping(false, true, false)).get() + () -> prepareCreate(TEST_INDEX).setSettings(settings).setMapping(createMinimalTestMapping(false, true, false)).get() ); assertEquals( "Aggregations not supported for the metrics field [numeric] with field type [integer] as part of star tree field", @@ -424,6 +591,145 @@ public void testInvalidMetric() { ); } + public void testDuplicateDimensions() { + XContentBuilder finalMapping = getMappingWithDuplicateFields(true, false); + MapperParsingException ex = expectThrows( + MapperParsingException.class, + () -> prepareCreate(TEST_INDEX).setSettings(settings).setMapping(finalMapping).setSettings(settings).get() + ); + assertEquals( + "Failed to parse mapping [_doc]: Duplicate dimension [numeric_dv] present as part star tree index field [startree-1]", + ex.getMessage() + ); + } + + public void testDuplicateMetrics() { + XContentBuilder finalMapping = getMappingWithDuplicateFields(false, true); + MapperParsingException ex = expectThrows( + MapperParsingException.class, + () -> prepareCreate(TEST_INDEX).setSettings(settings).setMapping(finalMapping).setSettings(settings).get() + ); + assertEquals( + "Failed to parse mapping [_doc]: Duplicate metrics [numeric_dv] present as part star tree index field [startree-1]", + ex.getMessage() + ); + } + + public void testValidTranslogFlushThresholdSize() { + Settings indexSettings = Settings.builder() + .put(settings) + .put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), new ByteSizeValue(256, ByteSizeUnit.MB)) + .build(); + + AcknowledgedResponse response = prepareCreate(TEST_INDEX).setSettings(indexSettings) + .setMapping(createMinimalTestMapping(false, false, false)) + .get(); + + assertTrue(response.isAcknowledged()); + } + + public void testInvalidTranslogFlushThresholdSize() { + Settings indexSettings = Settings.builder() + .put(settings) + .put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), new ByteSizeValue(1024, ByteSizeUnit.MB)) + .build(); + + IllegalArgumentException ex = expectThrows( + IllegalArgumentException.class, + () -> prepareCreate(TEST_INDEX).setSettings(indexSettings).setMapping(createMinimalTestMapping(false, false, false)).get() + ); + + assertTrue( + ex.getMessage().contains("You can configure 'index.translog.flush_threshold_size' with upto '512mb' for composite index") + ); + } + + public void testTranslogFlushThresholdSizeWithDefaultCompositeSettingLow() { + Settings updatedSettings = Settings.builder() + .put(CompositeIndexSettings.COMPOSITE_INDEX_MAX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), "130m") + .build(); + + ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest().transientSettings(updatedSettings); + + client().admin().cluster().updateSettings(updateSettingsRequest).actionGet(); + IllegalArgumentException ex = expectThrows( + IllegalArgumentException.class, + () -> prepareCreate(TEST_INDEX).setSettings(settings).setMapping(createMinimalTestMapping(false, false, false)).get() + ); + + assertEquals("You can configure 'index.translog.flush_threshold_size' with upto '130mb' for composite index", ex.getMessage()); + } + + public void testUpdateTranslogFlushThresholdSize() { + prepareCreate(TEST_INDEX).setSettings(settings).setMapping(createMinimalTestMapping(false, false, false)).get(); + + // Update to a valid value + AcknowledgedResponse validUpdateResponse = client().admin() + .indices() + .prepareUpdateSettings(TEST_INDEX) + .setSettings(Settings.builder().put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), "256mb")) + .get(); + assertTrue(validUpdateResponse.isAcknowledged()); + + // Try to update to an invalid value + IllegalArgumentException ex = expectThrows( + IllegalArgumentException.class, + () -> client().admin() + .indices() + .prepareUpdateSettings(TEST_INDEX) + .setSettings(Settings.builder().put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), "1024mb")) + .get() + ); + + assertTrue( + ex.getMessage().contains("You can configure 'index.translog.flush_threshold_size' with upto '512mb' for composite index") + ); + + // update cluster settings to higher value + Settings updatedSettings = Settings.builder() + .put(CompositeIndexSettings.COMPOSITE_INDEX_MAX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), "1030m") + .build(); + + ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest().transientSettings(updatedSettings); + + client().admin().cluster().updateSettings(updateSettingsRequest).actionGet(); + + // update index threshold flush to higher value + validUpdateResponse = client().admin() + .indices() + .prepareUpdateSettings(TEST_INDEX) + .setSettings(Settings.builder().put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), "1024mb")) + .get(); + assertTrue(validUpdateResponse.isAcknowledged()); + } + + public void testMinimumTranslogFlushThresholdSize() { + Settings indexSettings = Settings.builder() + .put(settings) + .put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), new ByteSizeValue(56, ByteSizeUnit.BYTES)) + .build(); + + AcknowledgedResponse response = prepareCreate(TEST_INDEX).setSettings(indexSettings) + .setMapping(createMinimalTestMapping(false, false, false)) + .get(); + + assertTrue(response.isAcknowledged()); + } + + public void testBelowMinimumTranslogFlushThresholdSize() { + Settings indexSettings = Settings.builder() + .put(settings) + .put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), new ByteSizeValue(55, ByteSizeUnit.BYTES)) + .build(); + + IllegalArgumentException ex = expectThrows( + IllegalArgumentException.class, + () -> prepareCreate(TEST_INDEX).setSettings(indexSettings).setMapping(createMinimalTestMapping(false, false, false)).get() + ); + + assertEquals("failed to parse value [55b] for setting [index.translog.flush_threshold_size], must be >= [56b]", ex.getMessage()); + } + @After public final void cleanupNodeSettings() { assertAcked( diff --git a/server/src/internalClusterTest/java/org/opensearch/index/store/CorruptedFileIT.java b/server/src/internalClusterTest/java/org/opensearch/index/store/CorruptedFileIT.java index f46f413f4d23f..3ee506f58a9d7 100644 --- a/server/src/internalClusterTest/java/org/opensearch/index/store/CorruptedFileIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/index/store/CorruptedFileIT.java @@ -577,18 +577,12 @@ public void testCorruptFileThenSnapshotAndRestore() throws ExecutionException, I // the other problem here why we can't corrupt segments.X files is that the snapshot flushes again before // it snapshots and that will write a new segments.X+1 file logger.info("--> creating repository"); - assertAcked( - client().admin() - .cluster() - .preparePutRepository("test-repo") - .setType("fs") - .setSettings( - Settings.builder() - .put("location", randomRepoPath().toAbsolutePath()) - .put("compress", randomBoolean()) - .put("chunk_size", randomIntBetween(100, 1000), ByteSizeUnit.BYTES) - ) - ); + Settings.Builder settings = Settings.builder() + .put("location", randomRepoPath().toAbsolutePath()) + .put("compress", randomBoolean()) + .put("chunk_size", randomIntBetween(100, 1000), ByteSizeUnit.BYTES); + createRepository("test-repo", "fs", settings); + logger.info("--> snapshot"); final CreateSnapshotResponse createSnapshotResponse = client().admin() .cluster() @@ -761,18 +755,11 @@ public void testPrimaryCorruptionDuringReplicationDoesNotFailReplicaShard() thro // Create a snapshot repository. This repo is used to take a snapshot after // corrupting a file, which causes the node to notice the corrupt data and // close the shard. - assertAcked( - client().admin() - .cluster() - .preparePutRepository("test-repo") - .setType("fs") - .setSettings( - Settings.builder() - .put("location", randomRepoPath().toAbsolutePath()) - .put("compress", randomBoolean()) - .put("chunk_size", randomIntBetween(100, 1000), ByteSizeUnit.BYTES) - ) - ); + Settings.Builder settings = Settings.builder() + .put("location", randomRepoPath().toAbsolutePath()) + .put("compress", randomBoolean()) + .put("chunk_size", randomIntBetween(100, 1000), ByteSizeUnit.BYTES); + createRepository("test-repo", "fs", settings); client().prepareIndex("test").setSource("field", "value").execute(); indexingInFlight.await(); diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/IndicesOptionsIntegrationIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/IndicesOptionsIntegrationIT.java index 06d2d2a90de87..0d3c8307c060f 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/IndicesOptionsIntegrationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/IndicesOptionsIntegrationIT.java @@ -51,7 +51,6 @@ import org.opensearch.action.search.SearchRequestBuilder; import org.opensearch.action.search.SearchResponse; import org.opensearch.action.support.IndicesOptions; -import org.opensearch.action.support.master.AcknowledgedResponse; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Setting.Property; import org.opensearch.common.settings.Settings; @@ -271,14 +270,8 @@ public void testSpecifiedIndexUnavailableSnapshotRestore() throws Exception { createIndex("test1"); ensureGreen("test1"); waitForRelocation(); + createRepository("dummy-repo", "fs", Settings.builder().put("location", randomRepoPath())); - AcknowledgedResponse putRepositoryResponse = client().admin() - .cluster() - .preparePutRepository("dummy-repo") - .setType("fs") - .setSettings(Settings.builder().put("location", randomRepoPath())) - .get(); - assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true)); client().admin().cluster().prepareCreateSnapshot("dummy-repo", "snap1").setWaitForCompletion(true).get(); verify(snapshot("snap2", "test1", "test2"), true); @@ -391,13 +384,8 @@ public void testWildcardBehaviourSnapshotRestore() throws Exception { ensureGreen("foobar"); waitForRelocation(); - AcknowledgedResponse putRepositoryResponse = client().admin() - .cluster() - .preparePutRepository("dummy-repo") - .setType("fs") - .setSettings(Settings.builder().put("location", randomRepoPath())) - .get(); - assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true)); + createRepository("dummy-repo", "fs", Settings.builder().put("location", randomRepoPath())); + client().admin().cluster().prepareCreateSnapshot("dummy-repo", "snap1").setWaitForCompletion(true).get(); IndicesOptions options = IndicesOptions.fromOptions(false, false, true, false); diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/recovery/IndexRecoveryIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/recovery/IndexRecoveryIT.java index cf93a432d0371..68b29851c6c04 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/recovery/IndexRecoveryIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/recovery/IndexRecoveryIT.java @@ -741,14 +741,7 @@ public void testSnapshotRecovery() throws Exception { String nodeA = internalCluster().startNode(); logger.info("--> create repository"); - assertAcked( - client().admin() - .cluster() - .preparePutRepository(REPO_NAME) - .setType("fs") - .setSettings(Settings.builder().put("location", randomRepoPath()).put("compress", false)) - .get() - ); + createRepository(REPO_NAME, "fs", Settings.builder().put("location", randomRepoPath()).put("compress", false)); ensureGreen(); diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SearchReplicaReplicationIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SearchReplicaReplicationIT.java new file mode 100644 index 0000000000000..a1b512c326ac5 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SearchReplicaReplicationIT.java @@ -0,0 +1,85 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.indices.replication; + +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; +import org.opensearch.test.OpenSearchIntegTestCase; +import org.junit.After; +import org.junit.Before; + +import java.nio.file.Path; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +public class SearchReplicaReplicationIT extends SegmentReplicationBaseIT { + + private static final String REPOSITORY_NAME = "test-remote-store-repo"; + protected Path absolutePath; + + private Boolean useRemoteStore; + + @Before + public void randomizeRemoteStoreEnabled() { + useRemoteStore = randomBoolean(); + } + + @Override + protected Settings nodeSettings(int nodeOrdinal) { + if (useRemoteStore) { + if (absolutePath == null) { + absolutePath = randomRepoPath().toAbsolutePath(); + } + return Settings.builder() + .put(super.nodeSettings(nodeOrdinal)) + .put(remoteStoreClusterSettings(REPOSITORY_NAME, absolutePath)) + .build(); + } + return super.nodeSettings(nodeOrdinal); + } + + @After + public void teardown() { + if (useRemoteStore) { + clusterAdmin().prepareCleanupRepository(REPOSITORY_NAME).get(); + } + } + + @Override + public Settings indexSettings() { + return Settings.builder() + .put(super.indexSettings()) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put(IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS, 1) + .build(); + } + + @Override + protected Settings featureFlagSettings() { + return Settings.builder().put(super.featureFlagSettings()).put(FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL, true).build(); + } + + public void testReplication() throws Exception { + internalCluster().startClusterManagerOnlyNode(); + final String primary = internalCluster().startDataOnlyNode(); + createIndex(INDEX_NAME); + ensureYellowAndNoInitializingShards(INDEX_NAME); + final String replica = internalCluster().startDataOnlyNode(); + ensureGreen(INDEX_NAME); + + final int docCount = 10; + for (int i = 0; i < docCount; i++) { + client().prepareIndex(INDEX_NAME).setId(Integer.toString(i)).setSource("field", "value" + i).execute().get(); + } + refresh(INDEX_NAME); + waitForSearchableDocs(docCount, primary, replica); + } + +} diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/settings/SearchOnlyReplicaFeatureFlagIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/settings/SearchOnlyReplicaFeatureFlagIT.java new file mode 100644 index 0000000000000..ef18cff7e5b29 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/indices/settings/SearchOnlyReplicaFeatureFlagIT.java @@ -0,0 +1,68 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.indices.settings; + +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.settings.SettingsException; +import org.opensearch.common.util.FeatureFlags; +import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.test.OpenSearchIntegTestCase; + +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REPLICATION_TYPE; +import static org.opensearch.cluster.routing.allocation.decider.SearchReplicaAllocationDecider.SEARCH_REPLICA_ROUTING_INCLUDE_GROUP_SETTING; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.SUITE, numDataNodes = 1) +public class SearchOnlyReplicaFeatureFlagIT extends OpenSearchIntegTestCase { + + private static final String TEST_INDEX = "test_index"; + + @Override + protected Settings featureFlagSettings() { + return Settings.builder() + .put(super.featureFlagSettings()) + .put(FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL, Boolean.FALSE) + .build(); + } + + public void testCreateFeatureFlagDisabled() { + Settings settings = Settings.builder().put(indexSettings()).put(FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL, false).build(); + SettingsException settingsException = expectThrows(SettingsException.class, () -> createIndex(TEST_INDEX, settings)); + assertTrue(settingsException.getMessage().contains("unknown setting")); + } + + public void testUpdateFeatureFlagDisabled() { + Settings settings = Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) + .build(); + + createIndex(TEST_INDEX, settings); + SettingsException settingsException = expectThrows(SettingsException.class, () -> { + client().admin() + .indices() + .prepareUpdateSettings(TEST_INDEX) + .setSettings(Settings.builder().put(SETTING_NUMBER_OF_SEARCH_REPLICAS, 1)) + .get(); + }); + assertTrue(settingsException.getMessage().contains("unknown setting")); + } + + public void testFilterAllocationSettingNotRegistered() { + expectThrows(SettingsException.class, () -> { + client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put(SEARCH_REPLICA_ROUTING_INCLUDE_GROUP_SETTING.getKey() + "_name", "node")) + .execute() + .actionGet(); + }); + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/settings/SearchOnlyReplicaIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/settings/SearchOnlyReplicaIT.java new file mode 100644 index 0000000000000..6bd91df1de66f --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/indices/settings/SearchOnlyReplicaIT.java @@ -0,0 +1,245 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.indices.settings; + +import org.opensearch.action.search.SearchResponse; +import org.opensearch.action.support.WriteRequest; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.routing.IndexShardRoutingTable; +import org.opensearch.cluster.routing.Preference; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.test.InternalTestCluster; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.io.IOException; + +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REPLICATION_TYPE; +import static org.opensearch.cluster.routing.UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertHitCount; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +public class SearchOnlyReplicaIT extends OpenSearchIntegTestCase { + + private static final String TEST_INDEX = "test_index"; + + @Override + protected Settings featureFlagSettings() { + return Settings.builder().put(super.featureFlagSettings()).put(FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL, Boolean.TRUE).build(); + } + + private final String expectedFailureMessage = + "To set index.number_of_search_only_replicas, index.replication.type must be set to SEGMENT"; + + @Override + public Settings indexSettings() { + return Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put(IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS, 1) + .put(INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), "0ms") // so that after we punt a node we can immediately try to + // reallocate after node left. + .put(SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) + .build(); + } + + public void testCreateDocRepFails() { + Settings settings = Settings.builder().put(indexSettings()).put(SETTING_REPLICATION_TYPE, ReplicationType.DOCUMENT).build(); + + IllegalArgumentException illegalArgumentException = expectThrows( + IllegalArgumentException.class, + () -> createIndex(TEST_INDEX, settings) + ); + assertEquals(expectedFailureMessage, illegalArgumentException.getMessage()); + } + + public void testUpdateDocRepFails() { + Settings settings = Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(SETTING_REPLICATION_TYPE, ReplicationType.DOCUMENT) + .build(); + // create succeeds + createIndex(TEST_INDEX, settings); + + // update fails + IllegalArgumentException illegalArgumentException = expectThrows(IllegalArgumentException.class, () -> { + client().admin() + .indices() + .prepareUpdateSettings(TEST_INDEX) + .setSettings(Settings.builder().put(SETTING_NUMBER_OF_SEARCH_REPLICAS, 1)) + .get(); + }); + assertEquals(expectedFailureMessage, illegalArgumentException.getMessage()); + } + + public void testFailoverWithSearchReplica_WithWriterReplicas() throws IOException { + int numSearchReplicas = 1; + int numWriterReplicas = 1; + internalCluster().startClusterManagerOnlyNode(); + String primaryNodeName = internalCluster().startDataOnlyNode(); + createIndex( + TEST_INDEX, + Settings.builder() + .put(indexSettings()) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, numWriterReplicas) + .put(IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS, numSearchReplicas) + .build() + ); + ensureYellow(TEST_INDEX); + // add 2 nodes for the replicas + internalCluster().startDataOnlyNodes(2); + ensureGreen(TEST_INDEX); + + // assert shards are on separate nodes & all active + assertActiveShardCounts(numSearchReplicas, numWriterReplicas); + + // stop the primary and ensure search shard is not promoted: + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(primaryNodeName)); + ensureYellowAndNoInitializingShards(TEST_INDEX); + + assertActiveShardCounts(numSearchReplicas, 0); // 1 repl is inactive that was promoted to primary + // add back a node + internalCluster().startDataOnlyNode(); + ensureGreen(TEST_INDEX); + } + + public void testFailoverWithSearchReplica_WithoutWriterReplicas() throws IOException { + int numSearchReplicas = 1; + int numWriterReplicas = 0; + internalCluster().startClusterManagerOnlyNode(); + String primaryNodeName = internalCluster().startDataOnlyNode(); + createIndex( + TEST_INDEX, + Settings.builder() + .put(indexSettings()) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, numWriterReplicas) + .put(IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS, numSearchReplicas) + .build() + ); + ensureYellow(TEST_INDEX); + client().prepareIndex(TEST_INDEX).setId("1").setSource("foo", "bar").setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE).get(); + // start a node for our search replica + String replica = internalCluster().startDataOnlyNode(); + ensureGreen(TEST_INDEX); + assertActiveSearchShards(numSearchReplicas); + assertHitCount(client(replica).prepareSearch(TEST_INDEX).setSize(0).setPreference("_only_local").get(), 1); + + // stop the primary and ensure search shard is not promoted: + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(primaryNodeName)); + ensureRed(TEST_INDEX); + assertActiveSearchShards(numSearchReplicas); + // while red our search shard is still searchable + assertHitCount(client(replica).prepareSearch(TEST_INDEX).setSize(0).setPreference("_only_local").get(), 1); + } + + public void testSearchReplicaScaling() { + internalCluster().startNodes(2); + createIndex(TEST_INDEX); + ensureGreen(TEST_INDEX); + // assert settings + Metadata metadata = client().admin().cluster().prepareState().get().getState().metadata(); + int numSearchReplicas = Integer.parseInt(metadata.index(TEST_INDEX).getSettings().get(SETTING_NUMBER_OF_SEARCH_REPLICAS)); + assertEquals(1, numSearchReplicas); + + // assert cluster state & routing table + assertActiveSearchShards(1); + + // Add another node and search replica + internalCluster().startDataOnlyNode(); + client().admin() + .indices() + .prepareUpdateSettings(TEST_INDEX) + .setSettings(Settings.builder().put(SETTING_NUMBER_OF_SEARCH_REPLICAS, 2)) + .get(); + + ensureGreen(TEST_INDEX); + assertActiveSearchShards(2); + + // remove all search shards + client().admin() + .indices() + .prepareUpdateSettings(TEST_INDEX) + .setSettings(Settings.builder().put(SETTING_NUMBER_OF_SEARCH_REPLICAS, 0)) + .get(); + ensureGreen(TEST_INDEX); + assertActiveSearchShards(0); + } + + public void testSearchReplicaRoutingPreference() throws IOException { + int numSearchReplicas = 1; + int numWriterReplicas = 1; + internalCluster().startClusterManagerOnlyNode(); + String primaryNodeName = internalCluster().startDataOnlyNode(); + createIndex( + TEST_INDEX, + Settings.builder() + .put(indexSettings()) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, numWriterReplicas) + .put(IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS, numSearchReplicas) + .build() + ); + ensureYellow(TEST_INDEX); + client().prepareIndex(TEST_INDEX).setId("1").setSource("foo", "bar").setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE).get(); + // add 2 nodes for the replicas + internalCluster().startDataOnlyNodes(2); + ensureGreen(TEST_INDEX); + + assertActiveShardCounts(numSearchReplicas, numWriterReplicas); + + // set preference to search replica here - we default to this when there are + // search replicas but tests will randomize this value if unset + SearchResponse response = client().prepareSearch(TEST_INDEX) + .setPreference(Preference.SEARCH_REPLICA.type()) + .setQuery(QueryBuilders.matchAllQuery()) + .get(); + + String nodeId = response.getHits().getAt(0).getShard().getNodeId(); + IndexShardRoutingTable indexShardRoutingTable = getIndexShardRoutingTable(); + assertEquals(nodeId, indexShardRoutingTable.searchOnlyReplicas().get(0).currentNodeId()); + } + + /** + * Helper to assert counts of active shards for each type. + */ + private void assertActiveShardCounts(int expectedSearchReplicaCount, int expectedWriteReplicaCount) { + // assert routing table + IndexShardRoutingTable indexShardRoutingTable = getIndexShardRoutingTable(); + // assert search replica count + int activeCount = expectedSearchReplicaCount + expectedWriteReplicaCount; + assertEquals(expectedSearchReplicaCount, indexShardRoutingTable.searchOnlyReplicas().stream().filter(ShardRouting::active).count()); + assertEquals(expectedWriteReplicaCount, indexShardRoutingTable.writerReplicas().stream().filter(ShardRouting::active).count()); + assertEquals( + expectedWriteReplicaCount + expectedSearchReplicaCount, + indexShardRoutingTable.replicaShards().stream().filter(ShardRouting::active).count() + ); + + // assert routing nodes + ClusterState clusterState = getClusterState(); + assertEquals(activeCount, clusterState.getRoutingNodes().shards(r -> r.active() && !r.primary()).size()); + assertEquals(expectedSearchReplicaCount, clusterState.getRoutingNodes().shards(r -> r.active() && r.isSearchOnly()).size()); + assertEquals( + expectedWriteReplicaCount, + clusterState.getRoutingNodes().shards(r -> r.active() && !r.primary() && !r.isSearchOnly()).size() + ); + } + + private void assertActiveSearchShards(int expectedSearchReplicaCount) { + assertActiveShardCounts(expectedSearchReplicaCount, 0); + } + + private IndexShardRoutingTable getIndexShardRoutingTable() { + return getClusterState().routingTable().index(TEST_INDEX).shards().values().stream().findFirst().get(); + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/settings/UpdateSettingsIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/settings/UpdateSettingsIT.java index beb0ea797bbec..475d0a154a98b 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/settings/UpdateSettingsIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/settings/UpdateSettingsIT.java @@ -35,6 +35,11 @@ import org.opensearch.action.admin.cluster.health.ClusterHealthResponse; import org.opensearch.action.admin.indices.settings.get.GetSettingsResponse; import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.applicationtemplates.ClusterStateSystemTemplateLoader; +import org.opensearch.cluster.applicationtemplates.SystemTemplate; +import org.opensearch.cluster.applicationtemplates.SystemTemplateMetadata; +import org.opensearch.cluster.applicationtemplates.TemplateRepositoryMetadata; +import org.opensearch.cluster.metadata.Context; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.Priority; @@ -42,6 +47,7 @@ import org.opensearch.common.settings.Settings; import org.opensearch.common.settings.SettingsException; import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.common.bytes.BytesReference; import org.opensearch.index.IndexModule; import org.opensearch.index.IndexService; import org.opensearch.index.VersionType; @@ -51,10 +57,14 @@ import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.threadpool.ThreadPool; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.List; +import java.util.UUID; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_BLOCKS_METADATA; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_BLOCKS_READ; @@ -99,6 +109,58 @@ public void testInvalidDynamicUpdate() { assertNotEquals(indexMetadata.getSettings().get("index.dummy"), "invalid dynamic value"); } + public void testDynamicUpdateWithContextSettingOverlap() throws IOException { + String templateContent = "{\n" + + " \"template\": {\n" + + " \"settings\": {\n" + + " \"index.merge.policy\": \"log_byte_size\"\n" + + " }\n" + + " },\n" + + " \"_meta\": {\n" + + " \"_type\": \"@abc_template\",\n" + + " \"_version\": 1\n" + + " },\n" + + " \"version\": 1\n" + + "}\n"; + + ClusterStateSystemTemplateLoader loader = new ClusterStateSystemTemplateLoader( + internalCluster().clusterManagerClient(), + () -> internalCluster().getInstance(ClusterService.class).state() + ); + loader.loadTemplate( + new SystemTemplate( + BytesReference.fromByteBuffer(ByteBuffer.wrap(templateContent.getBytes(StandardCharsets.UTF_8))), + SystemTemplateMetadata.fromComponentTemplateInfo("testcontext", 1L), + new TemplateRepositoryMetadata(UUID.randomUUID().toString(), 1L) + ) + ); + + createIndex("test", new Context("testcontext")); + + IllegalArgumentException validationException = expectThrows( + IllegalArgumentException.class, + () -> client().admin() + .indices() + .prepareUpdateSettings("test") + .setSettings(Settings.builder().put("index.merge.policy", "tiered")) + .execute() + .actionGet() + ); + assertTrue( + validationException.getMessage() + .contains("Cannot apply context template as user provide settings have overlap with the included context template") + ); + + assertAcked( + client().admin() + .indices() + .prepareUpdateSettings("test") + .setSettings(Settings.builder().put("index.refresh_interval", "60s")) + .execute() + .actionGet() + ); + } + @Override protected Collection> nodePlugins() { return Arrays.asList(DummySettingPlugin.class, FinalSettingPlugin.class); diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/MigrationBaseTestCase.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/MigrationBaseTestCase.java index e4e681a5433b5..17a9c3ddbe317 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotemigration/MigrationBaseTestCase.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/MigrationBaseTestCase.java @@ -108,9 +108,7 @@ protected void setFailRate(String repoName, int value) throws ExecutionException Settings.Builder settings = Settings.builder() .put("location", rmd.settings().get("location")) .put(REPOSITORIES_FAILRATE_SETTING.getKey(), value); - assertAcked( - client().admin().cluster().preparePutRepository(repoName).setType(ReloadableFsRepository.TYPE).setSettings(settings).get() - ); + createRepository(repoName, ReloadableFsRepository.TYPE, settings); } public void initDocRepToRemoteMigration() { diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemotePrimaryLocalRecoveryIT.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemotePrimaryLocalRecoveryIT.java index 024fc68602a19..8ab093b8495cb 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemotePrimaryLocalRecoveryIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemotePrimaryLocalRecoveryIT.java @@ -19,6 +19,7 @@ import org.opensearch.core.util.FileSystemUtils; import org.opensearch.index.remote.RemoteSegmentStats; import org.opensearch.index.translog.RemoteTranslogStats; +import org.opensearch.indices.RemoteStoreSettings; import org.opensearch.test.InternalTestCluster; import org.opensearch.test.OpenSearchIntegTestCase; @@ -67,6 +68,7 @@ public void testLocalRecoveryRollingRestartAndNodeFailure() throws Exception { assertTrue(remoteSegmentStats.getUploadBytesSucceeded() > 0); } + String segmentsPathFixedPrefix = RemoteStoreSettings.CLUSTER_REMOTE_STORE_SEGMENTS_PATH_PREFIX.get(getNodeSettings()); assertBusy(() -> { String shardPath = getShardLevelBlobPath( client(), @@ -74,7 +76,8 @@ public void testLocalRecoveryRollingRestartAndNodeFailure() throws Exception { new BlobPath(), String.valueOf(shardRouting.getId()), SEGMENTS, - DATA + DATA, + segmentsPathFixedPrefix ).buildAsString(); Path segmentDataRepoPath = segmentRepoPath.resolve(shardPath); List segmentsNFilesInRepo = Arrays.stream(FileSystemUtils.files(segmentDataRepoPath)) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationSettingsUpdateIT.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationSettingsUpdateIT.java index 377bd9529ca7a..30c597e405f4e 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationSettingsUpdateIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationSettingsUpdateIT.java @@ -11,6 +11,7 @@ import org.opensearch.client.Client; import org.opensearch.common.settings.Settings; import org.opensearch.common.settings.SettingsException; +import org.opensearch.common.unit.TimeValue; import org.opensearch.test.InternalTestCluster; import org.opensearch.test.OpenSearchIntegTestCase; @@ -21,7 +22,6 @@ import static org.opensearch.node.remotestore.RemoteStoreNodeService.CompatibilityMode.MIXED; import static org.opensearch.node.remotestore.RemoteStoreNodeService.CompatibilityMode.STRICT; import static org.opensearch.node.remotestore.RemoteStoreNodeService.Direction.REMOTE_STORE; -import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; @OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) public class RemoteStoreMigrationSettingsUpdateIT extends RemoteStoreMigrationShardAllocationBaseTestCase { @@ -68,6 +68,7 @@ public void testNewIndexIsRemoteStoreBackedForRemoteStoreDirectionAndMixedMode() assertRemoteStoreBackedIndex(indexName2); } + @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/15793") public void testNewRestoredIndexIsRemoteStoreBackedForRemoteStoreDirectionAndMixedMode() throws Exception { logger.info("Initialize cluster: gives non remote cluster manager"); initializeCluster(false); @@ -92,11 +93,7 @@ public void testNewRestoredIndexIsRemoteStoreBackedForRemoteStoreDirectionAndMix String snapshotName = "test-snapshot"; String snapshotRepoName = "test-restore-snapshot-repo"; Path snapshotRepoNameAbsolutePath = randomRepoPath().toAbsolutePath(); - assertAcked( - clusterAdmin().preparePutRepository(snapshotRepoName) - .setType("fs") - .setSettings(Settings.builder().put("location", snapshotRepoNameAbsolutePath)) - ); + createRepository(snapshotRepoName, "fs", Settings.builder().put("location", snapshotRepoNameAbsolutePath)); logger.info("Create snapshot of non remote stored backed index"); @@ -114,7 +111,7 @@ public void testNewRestoredIndexIsRemoteStoreBackedForRemoteStoreDirectionAndMix setDirection(REMOTE_STORE.direction); String restoredIndexName2 = TEST_INDEX + "-restored2"; restoreSnapshot(snapshotRepoName, snapshotName, restoredIndexName2); - ensureGreen(restoredIndexName2); + ensureGreen(TimeValue.timeValueSeconds(90), restoredIndexName2); logger.info("Verify that restored index is non remote-backed"); assertRemoteStoreBackedIndex(restoredIndexName2); diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationTestCase.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationTestCase.java index 4d37b2a1feb88..7d29e5d328492 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationTestCase.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationTestCase.java @@ -116,15 +116,10 @@ public void testNoShallowSnapshotInMixedMode() throws Exception { logger.info("Create shallow snapshot setting enabled repo"); String shallowSnapshotRepoName = "shallow-snapshot-repo-name"; Path shallowSnapshotRepoPath = randomRepoPath(); - assertAcked( - clusterAdmin().preparePutRepository(shallowSnapshotRepoName) - .setType("fs") - .setSettings( - Settings.builder() - .put("location", shallowSnapshotRepoPath) - .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), Boolean.TRUE) - ) - ); + Settings.Builder settings = Settings.builder() + .put("location", shallowSnapshotRepoPath) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), Boolean.TRUE); + createRepository(shallowSnapshotRepoName, "fs", settings); logger.info("Verify shallow snapshot creation"); final String snapshot1 = "snapshot1"; diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/BaseRemoteStoreRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/BaseRemoteStoreRestoreIT.java index 280fd13f0fdcf..e8df2c8686610 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/BaseRemoteStoreRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/BaseRemoteStoreRestoreIT.java @@ -76,29 +76,4 @@ protected void verifyRestoredData(Map indexStats, String indexName protected void verifyRestoredData(Map indexStats, String indexName) throws Exception { verifyRestoredData(indexStats, indexName, true); } - - public void prepareCluster(int numClusterManagerNodes, int numDataOnlyNodes, String indices, int replicaCount, int shardCount) { - prepareCluster(numClusterManagerNodes, numDataOnlyNodes, indices, replicaCount, shardCount, Settings.EMPTY); - } - - public void prepareCluster( - int numClusterManagerNodes, - int numDataOnlyNodes, - String indices, - int replicaCount, - int shardCount, - Settings settings - ) { - prepareCluster(numClusterManagerNodes, numDataOnlyNodes, settings); - for (String index : indices.split(",")) { - createIndex(index, remoteStoreIndexSettings(replicaCount, shardCount)); - ensureYellowAndNoInitializingShards(index); - ensureGreen(index); - } - } - - public void prepareCluster(int numClusterManagerNodes, int numDataOnlyNodes, Settings settings) { - internalCluster().startClusterManagerOnlyNodes(numClusterManagerNodes, settings); - internalCluster().startDataOnlyNodes(numDataOnlyNodes, settings); - } } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java index 42e44bd3f37c3..0acb578e2e7bf 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java @@ -443,13 +443,15 @@ public void testRestoreInSameRemoteStoreEnabledIndex() throws IOException { void assertRemoteSegmentsAndTranslogUploaded(String idx) throws IOException { Client client = client(); - String path = getShardLevelBlobPath(client, idx, new BlobPath(), "0", TRANSLOG, METADATA).buildAsString(); + String translogPathFixedPrefix = RemoteStoreSettings.CLUSTER_REMOTE_STORE_TRANSLOG_PATH_PREFIX.get(getNodeSettings()); + String segmentsPathFixedPrefix = RemoteStoreSettings.CLUSTER_REMOTE_STORE_SEGMENTS_PATH_PREFIX.get(getNodeSettings()); + String path = getShardLevelBlobPath(client, idx, new BlobPath(), "0", TRANSLOG, METADATA, translogPathFixedPrefix).buildAsString(); Path remoteTranslogMetadataPath = Path.of(remoteRepoPath + "/" + path); - path = getShardLevelBlobPath(client, idx, new BlobPath(), "0", TRANSLOG, DATA).buildAsString(); + path = getShardLevelBlobPath(client, idx, new BlobPath(), "0", TRANSLOG, DATA, translogPathFixedPrefix).buildAsString(); Path remoteTranslogDataPath = Path.of(remoteRepoPath + "/" + path); - path = getShardLevelBlobPath(client, idx, new BlobPath(), "0", SEGMENTS, METADATA).buildAsString(); + path = getShardLevelBlobPath(client, idx, new BlobPath(), "0", SEGMENTS, METADATA, segmentsPathFixedPrefix).buildAsString(); Path segmentMetadataPath = Path.of(remoteRepoPath + "/" + path); - path = getShardLevelBlobPath(client, idx, new BlobPath(), "0", SEGMENTS, DATA).buildAsString(); + path = getShardLevelBlobPath(client, idx, new BlobPath(), "0", SEGMENTS, DATA, segmentsPathFixedPrefix).buildAsString(); Path segmentDataPath = Path.of(remoteRepoPath + "/" + path); try ( @@ -803,20 +805,14 @@ public void testCreateSnapshotV2() throws Exception { Path absolutePath1 = randomRepoPath().toAbsolutePath(); logger.info("Snapshot Path [{}]", absolutePath1); - assertAcked( - client().admin() - .cluster() - .preparePutRepository(snapshotRepoName) - .setType(FsRepository.TYPE) - .setSettings( - Settings.builder() - .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) - .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) - .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) - .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) - .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true) - ) - ); + Settings.Builder settings = Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true); + + createRepository(snapshotRepoName, FsRepository.TYPE, settings); Client client = client(); Settings indexSettings = getIndexSettings(20, 0).build(); @@ -845,18 +841,13 @@ public void testCreateSnapshotV2() throws Exception { String snapshotName2 = "test-create-snapshot2"; - // verify even if waitForCompletion is not true, the request executes in a sync manner - CreateSnapshotResponse createSnapshotResponse2 = client().admin() + // verify response status if waitForCompletion is not true + RestStatus createSnapshotResponseStatus = client().admin() .cluster() .prepareCreateSnapshot(snapshotRepoName, snapshotName2) - .get(); - snapshotInfo = createSnapshotResponse2.getSnapshotInfo(); - assertThat(snapshotInfo.state(), equalTo(SnapshotState.SUCCESS)); - assertThat(snapshotInfo.successfulShards(), greaterThan(0)); - assertThat(snapshotInfo.successfulShards(), equalTo(snapshotInfo.totalShards())); - assertThat(snapshotInfo.snapshotId().getName(), equalTo(snapshotName2)); - assertThat(snapshotInfo.getPinnedTimestamp(), greaterThan(0L)); - + .get() + .status(); + assertEquals(RestStatus.ACCEPTED, createSnapshotResponseStatus); } public void testMixedSnapshotCreationWithV2RepositorySetting() throws Exception { @@ -872,20 +863,14 @@ public void testMixedSnapshotCreationWithV2RepositorySetting() throws Exception Path absolutePath1 = randomRepoPath().toAbsolutePath(); logger.info("Snapshot Path [{}]", absolutePath1); - assertAcked( - client().admin() - .cluster() - .preparePutRepository(snapshotRepoName) - .setType(FsRepository.TYPE) - .setSettings( - Settings.builder() - .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) - .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) - .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) - .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) - .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), false) - ) - ); + Settings.Builder settings = Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), false); + createRepository(snapshotRepoName, FsRepository.TYPE, settings); + Client client = client(); Settings indexSettings = getIndexSettings(20, 0).build(); createIndex(indexName1, indexSettings); @@ -906,20 +891,14 @@ public void testMixedSnapshotCreationWithV2RepositorySetting() throws Exception assertThat(snapshotInfo.getPinnedTimestamp(), equalTo(0L)); // enable shallow_snapshot_v2 - assertAcked( - client().admin() - .cluster() - .preparePutRepository(snapshotRepoName) - .setType(FsRepository.TYPE) - .setSettings( - Settings.builder() - .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) - .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) - .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) - .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) - .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true) - ) - ); + settings = Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true); + createRepository(snapshotRepoName, FsRepository.TYPE, settings); + indexDocuments(client, indexName1, 10); indexDocuments(client, indexName2, 20); @@ -932,6 +911,7 @@ public void testMixedSnapshotCreationWithV2RepositorySetting() throws Exception CreateSnapshotResponse createSnapshotResponse2 = client().admin() .cluster() .prepareCreateSnapshot(snapshotRepoName, snapshotName2) + .setWaitForCompletion(true) .get(); snapshotInfo = createSnapshotResponse2.getSnapshotInfo(); assertThat(snapshotInfo.state(), equalTo(SnapshotState.SUCCESS)); @@ -952,20 +932,13 @@ public void testConcurrentSnapshotV2CreateOperation() throws InterruptedExceptio Path absolutePath1 = randomRepoPath().toAbsolutePath(); logger.info("Snapshot Path [{}]", absolutePath1); - assertAcked( - client().admin() - .cluster() - .preparePutRepository(snapshotRepoName) - .setType(FsRepository.TYPE) - .setSettings( - Settings.builder() - .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) - .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) - .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) - .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) - .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true) - ) - ); + Settings.Builder settings = Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true); + createRepository(snapshotRepoName, FsRepository.TYPE, settings); Client client = client(); Settings indexSettings = getIndexSettings(20, 0).build(); @@ -993,6 +966,7 @@ public void testConcurrentSnapshotV2CreateOperation() throws InterruptedExceptio CreateSnapshotResponse createSnapshotResponse2 = client().admin() .cluster() .prepareCreateSnapshot(snapshotRepoName, snapshotName) + .setWaitForCompletion(true) .get(); SnapshotInfo snapshotInfo = createSnapshotResponse2.getSnapshotInfo(); assertThat(snapshotInfo.state(), equalTo(SnapshotState.SUCCESS)); @@ -1034,20 +1008,13 @@ public void testCreateSnapshotV2WithRedIndex() throws Exception { Path absolutePath1 = randomRepoPath().toAbsolutePath(); logger.info("Snapshot Path [{}]", absolutePath1); - assertAcked( - client().admin() - .cluster() - .preparePutRepository(snapshotRepoName) - .setType(FsRepository.TYPE) - .setSettings( - Settings.builder() - .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) - .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) - .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) - .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) - .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true) - ) - ); + Settings.Builder settings = Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true); + createRepository(snapshotRepoName, FsRepository.TYPE, settings); Client client = client(); Settings indexSettings = getIndexSettings(20, 0).build(); @@ -1068,6 +1035,7 @@ public void testCreateSnapshotV2WithRedIndex() throws Exception { CreateSnapshotResponse createSnapshotResponse2 = client().admin() .cluster() .prepareCreateSnapshot(snapshotRepoName, snapshotName1) + .setWaitForCompletion(true) .get(); SnapshotInfo snapshotInfo = createSnapshotResponse2.getSnapshotInfo(); assertThat(snapshotInfo.state(), equalTo(SnapshotState.SUCCESS)); @@ -1088,20 +1056,13 @@ public void testCreateSnapshotV2WithIndexingLoad() throws Exception { Path absolutePath1 = randomRepoPath().toAbsolutePath(); logger.info("Snapshot Path [{}]", absolutePath1); - assertAcked( - client().admin() - .cluster() - .preparePutRepository(snapshotRepoName) - .setType(FsRepository.TYPE) - .setSettings( - Settings.builder() - .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) - .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) - .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) - .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) - .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true) - ) - ); + Settings.Builder settings = Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true); + createRepository(snapshotRepoName, FsRepository.TYPE, settings); Client client = client(); Settings indexSettings = getIndexSettings(20, 0).build(); @@ -1136,6 +1097,7 @@ public void testCreateSnapshotV2WithIndexingLoad() throws Exception { CreateSnapshotResponse createSnapshotResponse2 = client().admin() .cluster() .prepareCreateSnapshot(snapshotRepoName, snapshotName1) + .setWaitForCompletion(true) .get(); SnapshotInfo snapshotInfo = createSnapshotResponse2.getSnapshotInfo(); @@ -1162,20 +1124,13 @@ public void testCreateSnapshotV2WithShallowCopySettingDisabled() throws Exceptio Path absolutePath1 = randomRepoPath().toAbsolutePath(); logger.info("Snapshot Path [{}]", absolutePath1); - assertAcked( - client().admin() - .cluster() - .preparePutRepository(snapshotRepoName) - .setType(FsRepository.TYPE) - .setSettings( - Settings.builder() - .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) - .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) - .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) - .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), false) - .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true) - ) - ); + Settings.Builder settings = Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), false) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true); + createRepository(snapshotRepoName, FsRepository.TYPE, settings); Client client = client(); Settings indexSettings = getIndexSettings(20, 0).build(); @@ -1217,20 +1172,13 @@ public void testClusterManagerFailoverDuringSnapshotCreation() throws Exception Path absolutePath1 = randomRepoPath().toAbsolutePath(); logger.info("Snapshot Path [{}]", absolutePath1); - assertAcked( - client().admin() - .cluster() - .preparePutRepository(snapshotRepoName) - .setType(FsRepository.TYPE) - .setSettings( - Settings.builder() - .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) - .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) - .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) - .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) - .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true) - ) - ); + Settings.Builder settings = Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true); + createRepository(snapshotRepoName, FsRepository.TYPE, settings); Client client = client(); Settings indexSettings = getIndexSettings(20, 0).build(); @@ -1256,6 +1204,7 @@ public void testClusterManagerFailoverDuringSnapshotCreation() throws Exception CreateSnapshotResponse createSnapshotResponse = client().admin() .cluster() .prepareCreateSnapshot(snapshotRepoName, snapshotName1) + .setWaitForCompletion(true) .get(); snapshotInfo[0] = createSnapshotResponse.getSnapshotInfo(); @@ -1295,21 +1244,14 @@ public void testConcurrentV1SnapshotAndV2RepoSettingUpdate() throws Exception { String snapshotName1 = "test-create-snapshot-v1"; Path absolutePath1 = randomRepoPath().toAbsolutePath(); logger.info("Snapshot Path [{}]", absolutePath1); + Settings.Builder settings = Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), false); + createRepository(snapshotRepoName, FsRepository.TYPE, settings); - assertAcked( - client().admin() - .cluster() - .preparePutRepository(snapshotRepoName) - .setType(FsRepository.TYPE) - .setSettings( - Settings.builder() - .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) - .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) - .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) - .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) - .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), false) - ) - ); Client client = client(); Settings indexSettings = getIndexSettings(20, 0).build(); @@ -1346,19 +1288,16 @@ public void testConcurrentV1SnapshotAndV2RepoSettingUpdate() throws Exception { assertThrows( IllegalStateException.class, - () -> client().admin() - .cluster() - .preparePutRepository(snapshotRepoName) - .setType(FsRepository.TYPE) - .setSettings( - Settings.builder() - .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) - .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) - .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) - .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) - .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true) - ) - .get() + () -> createRepository( + snapshotRepoName, + FsRepository.TYPE, + Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true) + ) ); } catch (Exception e) { diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java index f83ae3e0ca820..bcb0d54c0a25c 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java @@ -190,9 +190,7 @@ protected void setFailRate(String repoName, int value) throws ExecutionException Settings.Builder settings = Settings.builder() .put("location", rmd.settings().get("location")) .put(REPOSITORIES_FAILRATE_SETTING.getKey(), value); - assertAcked( - client().admin().cluster().preparePutRepository(repoName).setType(ReloadableFsRepository.TYPE).setSettings(settings).get() - ); + createRepository(repoName, ReloadableFsRepository.TYPE, settings); } public Settings indexSettings() { @@ -353,13 +351,7 @@ protected void restore(boolean restoreAllShards, String... indices) { } protected void prepareCluster(int numClusterManagerNodes, int numDataOnlyNodes, String indices, int replicaCount, int shardCount) { - internalCluster().startClusterManagerOnlyNodes(numClusterManagerNodes); - internalCluster().startDataOnlyNodes(numDataOnlyNodes); - for (String index : indices.split(",")) { - createIndex(index, remoteStoreIndexSettings(replicaCount, shardCount)); - ensureYellowAndNoInitializingShards(index); - ensureGreen(index); - } + prepareCluster(numClusterManagerNodes, numDataOnlyNodes, indices, replicaCount, shardCount, Settings.EMPTY); } protected void prepareCluster( @@ -370,11 +362,16 @@ protected void prepareCluster( int shardCount, Settings settings ) { - internalCluster().startClusterManagerOnlyNodes(numClusterManagerNodes, settings); - internalCluster().startDataOnlyNodes(numDataOnlyNodes, settings); + prepareCluster(numClusterManagerNodes, numDataOnlyNodes, settings); for (String index : indices.split(",")) { createIndex(index, remoteStoreIndexSettings(replicaCount, shardCount)); + ensureYellowAndNoInitializingShards(index); ensureGreen(index); } } + + protected void prepareCluster(int numClusterManagerNodes, int numDataOnlyNodes, Settings settings) { + internalCluster().startClusterManagerOnlyNodes(numClusterManagerNodes, settings); + internalCluster().startDataOnlyNodes(numDataOnlyNodes, settings); + } } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java index 11260e0914dc5..d078ba05faa12 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java @@ -36,6 +36,8 @@ import org.opensearch.gateway.remote.ClusterMetadataManifest; import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata; import org.opensearch.gateway.remote.RemoteClusterStateService; +import org.opensearch.index.remote.RemoteStoreEnums.PathType; +import org.opensearch.repositories.blobstore.BlobStoreRepository; import org.opensearch.test.InternalTestCluster; import org.opensearch.test.OpenSearchIntegTestCase; import org.junit.Before; @@ -326,10 +328,17 @@ public void testFullClusterRestoreManifestFilePointsToInvalidIndexMetadataPathTh internalCluster().stopAllNodes(); // Step - 3 Delete index metadata file in remote try { - Files.move( - segmentRepoPath.resolve(encodeString(clusterName) + "/cluster-state/" + prevClusterUUID + "/index"), - segmentRepoPath.resolve("cluster-state/") + RemoteClusterStateService remoteClusterStateService = internalCluster().getInstance( + RemoteClusterStateService.class, + internalCluster().getClusterManagerName() ); + ClusterMetadataManifest manifest = remoteClusterStateService.getLatestClusterMetadataManifest( + getClusterState().getClusterName().value(), + getClusterState().metadata().clusterUUID() + ).get(); + for (UploadedIndexMetadata md : manifest.getIndices()) { + Files.move(segmentRepoPath.resolve(md.getUploadedFilename()), segmentRepoPath.resolve("cluster-state/")); + } } catch (IOException e) { throw new RuntimeException(e); } @@ -476,14 +485,7 @@ public void testFullClusterRestoreGlobalMetadata() throws Exception { private Path registerCustomRepository() { Path path = randomRepoPath(); - assertAcked( - client().admin() - .cluster() - .preparePutRepository("custom-repo") - .setType("fs") - .setSettings(Settings.builder().put("location", path).put("compress", false)) - .get() - ); + createRepository("custom-repo", "fs", Settings.builder().put("location", path).put("compress", false)); return path; } @@ -493,9 +495,15 @@ private void verifyRestoredRepositories(Path repoPath) { assertTrue(SYSTEM_REPOSITORY_SETTING.get(repositoriesMetadata.repository(REPOSITORY_NAME).settings())); assertTrue(SYSTEM_REPOSITORY_SETTING.get(repositoriesMetadata.repository(REPOSITORY_2_NAME).settings())); assertEquals("fs", repositoriesMetadata.repository("custom-repo").type()); + Settings settings = repositoriesMetadata.repository("custom-repo").settings(); + PathType pathType = BlobStoreRepository.SHARD_PATH_TYPE.get(settings); assertEquals( - Settings.builder().put("location", repoPath).put("compress", false).build(), - repositoriesMetadata.repository("custom-repo").settings() + Settings.builder() + .put("location", repoPath) + .put("compress", false) + .put(BlobStoreRepository.SHARD_PATH_TYPE.getKey(), pathType) + .build(), + settings ); // repo cleanup post verification diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java index a327b683874f6..692727357a88a 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java @@ -47,6 +47,7 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.util.Arrays; import java.util.Collection; import java.util.List; import java.util.Map; @@ -204,7 +205,16 @@ public void testStaleCommitDeletionWithInvokeFlush() throws Exception { createIndex(INDEX_NAME, remoteStoreIndexSettings(1, 10000l, -1)); int numberOfIterations = randomIntBetween(5, 15); indexData(numberOfIterations, true, INDEX_NAME); - String shardPath = getShardLevelBlobPath(client(), INDEX_NAME, BlobPath.cleanPath(), "0", SEGMENTS, METADATA).buildAsString(); + String segmentsPathFixedPrefix = RemoteStoreSettings.CLUSTER_REMOTE_STORE_SEGMENTS_PATH_PREFIX.get(getNodeSettings()); + String shardPath = getShardLevelBlobPath( + client(), + INDEX_NAME, + BlobPath.cleanPath(), + "0", + SEGMENTS, + METADATA, + segmentsPathFixedPrefix + ).buildAsString(); Path indexPath = Path.of(segmentRepoPath + "/" + shardPath); ; IndexShard indexShard = getIndexShard(dataNode, INDEX_NAME); @@ -235,7 +245,16 @@ public void testStaleCommitDeletionWithoutInvokeFlush() throws Exception { createIndex(INDEX_NAME, remoteStoreIndexSettings(1, 10000l, -1)); int numberOfIterations = randomIntBetween(5, 15); indexData(numberOfIterations, false, INDEX_NAME); - String shardPath = getShardLevelBlobPath(client(), INDEX_NAME, BlobPath.cleanPath(), "0", SEGMENTS, METADATA).buildAsString(); + String segmentsPathFixedPrefix = RemoteStoreSettings.CLUSTER_REMOTE_STORE_SEGMENTS_PATH_PREFIX.get(getNodeSettings()); + String shardPath = getShardLevelBlobPath( + client(), + INDEX_NAME, + BlobPath.cleanPath(), + "0", + SEGMENTS, + METADATA, + segmentsPathFixedPrefix + ).buildAsString(); Path indexPath = Path.of(segmentRepoPath + "/" + shardPath); int actualFileCount = getFileCount(indexPath); // We also allow (numberOfIterations + 1) as index creation also triggers refresh. @@ -246,11 +265,19 @@ public void testStaleCommitDeletionWithMinSegmentFiles_3() throws Exception { Settings.Builder settings = Settings.builder() .put(RemoteStoreSettings.CLUSTER_REMOTE_INDEX_SEGMENT_METADATA_RETENTION_MAX_COUNT_SETTING.getKey(), "3"); internalCluster().startNode(settings); - + String segmentsPathFixedPrefix = RemoteStoreSettings.CLUSTER_REMOTE_STORE_SEGMENTS_PATH_PREFIX.get(getNodeSettings()); createIndex(INDEX_NAME, remoteStoreIndexSettings(1, 10000l, -1)); int numberOfIterations = randomIntBetween(5, 15); indexData(numberOfIterations, true, INDEX_NAME); - String shardPath = getShardLevelBlobPath(client(), INDEX_NAME, BlobPath.cleanPath(), "0", SEGMENTS, METADATA).buildAsString(); + String shardPath = getShardLevelBlobPath( + client(), + INDEX_NAME, + BlobPath.cleanPath(), + "0", + SEGMENTS, + METADATA, + segmentsPathFixedPrefix + ).buildAsString(); Path indexPath = Path.of(segmentRepoPath + "/" + shardPath); int actualFileCount = getFileCount(indexPath); // We also allow (numberOfIterations + 1) as index creation also triggers refresh. @@ -270,7 +297,16 @@ public void testStaleCommitDeletionWithMinSegmentFiles_Disabled() throws Excepti createIndex(INDEX_NAME, remoteStoreIndexSettings(1, 10000l, -1)); int numberOfIterations = randomIntBetween(12, 18); indexData(numberOfIterations, true, INDEX_NAME); - String shardPath = getShardLevelBlobPath(client(), INDEX_NAME, BlobPath.cleanPath(), "0", SEGMENTS, METADATA).buildAsString(); + String segmentsPathFixedPrefix = RemoteStoreSettings.CLUSTER_REMOTE_STORE_SEGMENTS_PATH_PREFIX.get(getNodeSettings()); + String shardPath = getShardLevelBlobPath( + client(), + INDEX_NAME, + BlobPath.cleanPath(), + "0", + SEGMENTS, + METADATA, + segmentsPathFixedPrefix + ).buildAsString(); Path indexPath = Path.of(segmentRepoPath + "/" + shardPath); ; int actualFileCount = getFileCount(indexPath); @@ -510,9 +546,7 @@ public void testRestoreSnapshotToIndexWithSameNameDifferentUUID() throws Excepti List dataNodes = internalCluster().startDataOnlyNodes(2); Path absolutePath = randomRepoPath().toAbsolutePath(); - assertAcked( - clusterAdmin().preparePutRepository("test-repo").setType("fs").setSettings(Settings.builder().put("location", absolutePath)) - ); + createRepository("test-repo", "fs", Settings.builder().put("location", absolutePath)); logger.info("--> Create index and ingest 50 docs"); createIndex(INDEX_NAME, remoteStoreIndexSettings(1)); @@ -605,8 +639,10 @@ public void testFallbackToNodeToNodeSegmentCopy() throws Exception { indexBulk(INDEX_NAME, 50); flushAndRefresh(INDEX_NAME); + String segmentsPathFixedPrefix = RemoteStoreSettings.CLUSTER_REMOTE_STORE_SEGMENTS_PATH_PREFIX.get(getNodeSettings()); // 3. Delete data from remote segment store - String shardPath = getShardLevelBlobPath(client(), INDEX_NAME, BlobPath.cleanPath(), "0", SEGMENTS, DATA).buildAsString(); + String shardPath = getShardLevelBlobPath(client(), INDEX_NAME, BlobPath.cleanPath(), "0", SEGMENTS, DATA, segmentsPathFixedPrefix) + .buildAsString(); Path segmentDataPath = Path.of(segmentRepoPath + "/" + shardPath); try (Stream files = Files.list(segmentDataPath)) { @@ -845,7 +881,16 @@ public void testLocalOnlyTranslogCleanupOnNodeRestart() throws Exception { .get() .getSetting(INDEX_NAME, IndexMetadata.SETTING_INDEX_UUID); - String shardPath = getShardLevelBlobPath(client(), INDEX_NAME, BlobPath.cleanPath(), "0", TRANSLOG, METADATA).buildAsString(); + String translogPathFixedPrefix = RemoteStoreSettings.CLUSTER_REMOTE_STORE_TRANSLOG_PATH_PREFIX.get(getNodeSettings()); + String shardPath = getShardLevelBlobPath( + client(), + INDEX_NAME, + BlobPath.cleanPath(), + "0", + TRANSLOG, + METADATA, + translogPathFixedPrefix + ).buildAsString(); Path translogMetaDataPath = Path.of(translogRepoPath + "/" + shardPath); try (Stream files = Files.list(translogMetaDataPath)) { @@ -932,4 +977,38 @@ public void testFlushOnTooManyRemoteTranslogFiles() throws Exception { assertEquals(totalFiles, 501L); } } + + public void testAsyncTranslogDurabilityRestrictionsThroughIdxTemplates() throws Exception { + logger.info("Starting up cluster manager with cluster.remote_store.index.restrict.async-durability set to true"); + String cm1 = internalCluster().startClusterManagerOnlyNode( + Settings.builder().put(IndicesService.CLUSTER_REMOTE_INDEX_RESTRICT_ASYNC_DURABILITY_SETTING.getKey(), true).build() + ); + internalCluster().startDataOnlyNode(); + ensureStableCluster(2); + assertThrows( + IllegalArgumentException.class, + () -> internalCluster().client() + .admin() + .indices() + .preparePutTemplate("test") + .setPatterns(Arrays.asList("test*")) + .setSettings(Settings.builder().put(IndexSettings.INDEX_TRANSLOG_DURABILITY_SETTING.getKey(), "async")) + .get() + ); + logger.info("Starting up another cluster manager with cluster.remote_store.index.restrict.async-durability set to false"); + internalCluster().startClusterManagerOnlyNode( + Settings.builder().put(IndicesService.CLUSTER_REMOTE_INDEX_RESTRICT_ASYNC_DURABILITY_SETTING.getKey(), false).build() + ); + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(cm1)); + ensureStableCluster(2); + assertAcked( + internalCluster().client() + .admin() + .indices() + .preparePutTemplate("test") + .setPatterns(Arrays.asList("test*")) + .setSettings(Settings.builder().put(IndexSettings.INDEX_TRANSLOG_DURABILITY_SETTING.getKey(), "async")) + .get() + ); + } } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStorePinnedTimestampsIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStorePinnedTimestampsIT.java index cb91c63e17245..2fcda8c2d2f27 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStorePinnedTimestampsIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStorePinnedTimestampsIT.java @@ -8,6 +8,7 @@ package org.opensearch.remotestore; +import org.opensearch.action.LatchedActionListener; import org.opensearch.common.collect.Tuple; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; @@ -17,6 +18,7 @@ import org.opensearch.test.OpenSearchIntegTestCase; import java.util.Set; +import java.util.concurrent.CountDownLatch; @OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) public class RemoteStorePinnedTimestampsIT extends RemoteStoreBaseIntegTestCase { @@ -75,10 +77,25 @@ public void testTimestampPinUnpin() throws Exception { remoteStorePinnedTimestampService.rescheduleAsyncUpdatePinnedTimestampTask(TimeValue.timeValueMinutes(3)); - // This should be a no-op as pinning entity is different - remoteStorePinnedTimestampService.unpinTimestamp(timestamp1, "no-snapshot", noOpActionListener); // Unpinning already pinned entity remoteStorePinnedTimestampService.unpinTimestamp(timestamp2, "ss3", noOpActionListener); + + // This should fail as timestamp is not pinned by pinning entity + CountDownLatch latch = new CountDownLatch(1); + remoteStorePinnedTimestampService.unpinTimestamp(timestamp1, "no-snapshot", new LatchedActionListener<>(new ActionListener() { + @Override + public void onResponse(Void unused) { + // onResponse should not get called. + fail(); + } + + @Override + public void onFailure(Exception e) { + assertTrue(e instanceof IllegalArgumentException); + } + }, latch)); + latch.await(); + // Adding different entity to already pinned timestamp remoteStorePinnedTimestampService.pinTimestamp(timestamp3, "ss5", noOpActionListener); @@ -93,4 +110,74 @@ public void testTimestampPinUnpin() throws Exception { remoteStorePinnedTimestampService.rescheduleAsyncUpdatePinnedTimestampTask(TimeValue.timeValueMinutes(3)); } + + public void testPinnedTimestampClone() throws Exception { + prepareCluster(1, 1, INDEX_NAME, 0, 2); + ensureGreen(INDEX_NAME); + + RemoteStorePinnedTimestampService remoteStorePinnedTimestampService = internalCluster().getInstance( + RemoteStorePinnedTimestampService.class, + primaryNodeName(INDEX_NAME) + ); + + long timestamp1 = System.currentTimeMillis() + 30000L; + long timestamp2 = System.currentTimeMillis() + 60000L; + long timestamp3 = System.currentTimeMillis() + 900000L; + remoteStorePinnedTimestampService.pinTimestamp(timestamp1, "ss2", noOpActionListener); + remoteStorePinnedTimestampService.pinTimestamp(timestamp2, "ss3", noOpActionListener); + remoteStorePinnedTimestampService.pinTimestamp(timestamp3, "ss4", noOpActionListener); + + // Clone timestamp1 + remoteStorePinnedTimestampService.cloneTimestamp(timestamp1, "ss2", "ss2-2", noOpActionListener); + + // With clone, set of pinned timestamp will not change + remoteStorePinnedTimestampService.rescheduleAsyncUpdatePinnedTimestampTask(TimeValue.timeValueSeconds(1)); + assertBusy( + () -> assertEquals(Set.of(timestamp1, timestamp2, timestamp3), RemoteStorePinnedTimestampService.getPinnedTimestamps().v2()) + ); + remoteStorePinnedTimestampService.rescheduleAsyncUpdatePinnedTimestampTask(TimeValue.timeValueMinutes(3)); + + // Clone timestamp1 but provide invalid existing entity + CountDownLatch latch = new CountDownLatch(1); + remoteStorePinnedTimestampService.cloneTimestamp( + timestamp1, + "ss3", + "ss2-3", + new LatchedActionListener<>(new ActionListener() { + @Override + public void onResponse(Void unused) { + // onResponse should not get called. + fail(); + } + + @Override + public void onFailure(Exception e) { + assertTrue(e instanceof IllegalArgumentException); + } + }, latch) + ); + latch.await(); + + remoteStorePinnedTimestampService.rescheduleAsyncUpdatePinnedTimestampTask(TimeValue.timeValueSeconds(1)); + assertBusy( + () -> assertEquals(Set.of(timestamp1, timestamp2, timestamp3), RemoteStorePinnedTimestampService.getPinnedTimestamps().v2()) + ); + remoteStorePinnedTimestampService.rescheduleAsyncUpdatePinnedTimestampTask(TimeValue.timeValueMinutes(3)); + + // Now we have timestamp1 pinned by 2 entities, unpin 1, this should not change set of pinned timestamps + remoteStorePinnedTimestampService.unpinTimestamp(timestamp1, "ss2", noOpActionListener); + + remoteStorePinnedTimestampService.rescheduleAsyncUpdatePinnedTimestampTask(TimeValue.timeValueSeconds(1)); + assertBusy( + () -> assertEquals(Set.of(timestamp1, timestamp2, timestamp3), RemoteStorePinnedTimestampService.getPinnedTimestamps().v2()) + ); + remoteStorePinnedTimestampService.rescheduleAsyncUpdatePinnedTimestampTask(TimeValue.timeValueMinutes(3)); + + // Now unpin second entity as well, set of pinned timestamp should be reduced by 1 + remoteStorePinnedTimestampService.unpinTimestamp(timestamp1, "ss2-2", noOpActionListener); + + remoteStorePinnedTimestampService.rescheduleAsyncUpdatePinnedTimestampTask(TimeValue.timeValueSeconds(1)); + assertBusy(() -> assertEquals(Set.of(timestamp2, timestamp3), RemoteStorePinnedTimestampService.getPinnedTimestamps().v2())); + remoteStorePinnedTimestampService.rescheduleAsyncUpdatePinnedTimestampTask(TimeValue.timeValueMinutes(3)); + } } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRefreshListenerIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRefreshListenerIT.java index 7ae08bf968ade..a7292482100e1 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRefreshListenerIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRefreshListenerIT.java @@ -13,6 +13,7 @@ import org.opensearch.action.admin.indices.stats.IndicesStatsResponse; import org.opensearch.common.blobstore.BlobPath; import org.opensearch.common.settings.Settings; +import org.opensearch.indices.RemoteStoreSettings; import org.opensearch.test.OpenSearchIntegTestCase; import java.nio.file.Path; @@ -50,7 +51,10 @@ public void testRemoteRefreshRetryOnFailure() throws Exception { String indexName = response.getShards()[0].getShardRouting().index().getName(); String indexUuid = response.getShards()[0].getShardRouting().index().getUUID(); - String shardPath = getShardLevelBlobPath(client(), indexName, new BlobPath(), "0", SEGMENTS, DATA).buildAsString(); + + String segmentsPathFixedPrefix = RemoteStoreSettings.CLUSTER_REMOTE_STORE_SEGMENTS_PATH_PREFIX.get(getNodeSettings()); + String shardPath = getShardLevelBlobPath(client(), indexName, new BlobPath(), "0", SEGMENTS, DATA, segmentsPathFixedPrefix) + .buildAsString(); Path segmentDataRepoPath = location.resolve(shardPath); String segmentDataLocalPath = String.format(Locale.ROOT, "%s/indices/%s/0/index", response.getShards()[0].getDataPath(), indexUuid); diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRepositoryRegistrationIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRepositoryRegistrationIT.java index b0827dcfe4892..4cbafde6417af 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRepositoryRegistrationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRepositoryRegistrationIT.java @@ -131,13 +131,15 @@ public void testMultiNodeClusterRandomNodeRecoverNetworkIsolationPostNonRestrict .get(0); Settings.Builder updatedSettings = Settings.builder().put(repositoryMetadata.settings()).put("chunk_size", new ByteSizeValue(20)); updatedSettings.remove("system_repository"); - - client.admin() - .cluster() - .preparePutRepository(repositoryMetadata.name()) - .setType(repositoryMetadata.type()) - .setSettings(updatedSettings) - .get(); + OpenSearchIntegTestCase.putRepositoryRequestBuilder( + client.admin().cluster(), + repositoryMetadata.name(), + repositoryMetadata.type(), + true, + updatedSettings, + null, + false + ).get(); ensureStableCluster(3, nodesInOneSide.stream().findAny().get()); networkDisruption.stopDisrupting(); @@ -161,12 +163,7 @@ public void testNodeRestartPostNonRestrictedSettingsUpdate() throws Exception { Settings.Builder updatedSettings = Settings.builder().put(repositoryMetadata.settings()).put("chunk_size", new ByteSizeValue(20)); updatedSettings.remove("system_repository"); - client.admin() - .cluster() - .preparePutRepository(repositoryMetadata.name()) - .setType(repositoryMetadata.type()) - .setSettings(updatedSettings) - .get(); + createRepository(repositoryMetadata.name(), repositoryMetadata.type(), updatedSettings); internalCluster().restartRandomDataNode(); diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRestoreIT.java index bd84ab026dcea..1a1c7c0cd0e1a 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRestoreIT.java @@ -480,15 +480,7 @@ public void testRateLimitedRemoteDownloads() throws Exception { Settings.Builder settings = Settings.builder(); settingsMap.entrySet().forEach(entry -> settings.put(entry.getKey(), entry.getValue())); settings.put("location", segmentRepoPath).put("max_remote_download_bytes_per_sec", 4, ByteSizeUnit.KB); - - assertAcked( - client().admin() - .cluster() - .preparePutRepository(REPOSITORY_NAME) - .setType(ReloadableFsRepository.TYPE) - .setSettings(settings) - .get() - ); + createRepository(REPOSITORY_NAME, ReloadableFsRepository.TYPE, settings); for (RepositoriesService repositoriesService : internalCluster().getDataNodeInstances(RepositoriesService.class)) { Repository segmentRepo = repositoriesService.repository(REPOSITORY_NAME); @@ -517,14 +509,7 @@ public void testRateLimitedRemoteDownloads() throws Exception { // revert repo metadata to pass asserts on repo metadata vs. node attrs during teardown // https://github.com/opensearch-project/OpenSearch/pull/9569#discussion_r1345668700 settings.remove("max_remote_download_bytes_per_sec"); - assertAcked( - client().admin() - .cluster() - .preparePutRepository(REPOSITORY_NAME) - .setType(ReloadableFsRepository.TYPE) - .setSettings(settings) - .get() - ); + createRepository(REPOSITORY_NAME, ReloadableFsRepository.TYPE, settings); for (RepositoriesService repositoriesService : internalCluster().getDataNodeInstances(RepositoriesService.class)) { Repository segmentRepo = repositoriesService.repository(REPOSITORY_NAME); assertNull(segmentRepo.getMetadata().settings().get("max_remote_download_bytes_per_sec")); diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RestoreShallowSnapshotV2IT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RestoreShallowSnapshotV2IT.java new file mode 100644 index 0000000000000..c5a55f16cab2b --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RestoreShallowSnapshotV2IT.java @@ -0,0 +1,805 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.remotestore; + +import org.opensearch.action.DocWriteResponse; +import org.opensearch.action.admin.cluster.remotestore.restore.RestoreRemoteStoreRequest; +import org.opensearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse; +import org.opensearch.action.admin.indices.delete.DeleteIndexRequest; +import org.opensearch.action.admin.indices.recovery.RecoveryResponse; +import org.opensearch.action.delete.DeleteResponse; +import org.opensearch.action.support.PlainActionFuture; +import org.opensearch.client.Client; +import org.opensearch.client.Requests; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.routing.RecoverySource; +import org.opensearch.common.Nullable; +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.io.PathUtils; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.io.IOUtils; +import org.opensearch.core.index.Index; +import org.opensearch.core.rest.RestStatus; +import org.opensearch.index.IndexService; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.remote.RemoteStoreEnums; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.indices.IndicesService; +import org.opensearch.indices.RemoteStoreSettings; +import org.opensearch.indices.recovery.RecoveryState; +import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.snapshots.AbstractSnapshotIntegTestCase; +import org.opensearch.snapshots.SnapshotInfo; +import org.opensearch.snapshots.SnapshotRestoreException; +import org.opensearch.snapshots.SnapshotState; +import org.opensearch.test.InternalTestCluster; +import org.opensearch.test.OpenSearchIntegTestCase; +import org.junit.After; +import org.junit.Before; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.concurrent.ExecutionException; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_STORE_ENABLED; +import static org.opensearch.index.remote.RemoteStoreEnums.DataCategory.SEGMENTS; +import static org.opensearch.index.remote.RemoteStoreEnums.DataCategory.TRANSLOG; +import static org.opensearch.index.remote.RemoteStoreEnums.DataType.DATA; +import static org.opensearch.index.remote.RemoteStoreEnums.DataType.METADATA; +import static org.opensearch.indices.RemoteStoreSettings.CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.lessThanOrEqualTo; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +public class RestoreShallowSnapshotV2IT extends AbstractSnapshotIntegTestCase { + + private static final String BASE_REMOTE_REPO = "test-rs-repo" + TEST_REMOTE_STORE_REPO_SUFFIX; + private Path remoteRepoPath; + + @Before + public void setup() { + remoteRepoPath = randomRepoPath().toAbsolutePath(); + } + + @After + public void teardown() { + clusterAdmin().prepareCleanupRepository(BASE_REMOTE_REPO).get(); + } + + @Override + protected Settings nodeSettings(int nodeOrdinal) { + return Settings.builder() + .put(super.nodeSettings(nodeOrdinal)) + .put(remoteStoreClusterSettings(BASE_REMOTE_REPO, remoteRepoPath)) + .put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_ENABLED.getKey(), true) + .build(); + } + + @Override + protected Settings.Builder getRepositorySettings(Path location, boolean shallowCopyEnabled) { + Settings.Builder settingsBuilder = randomRepositorySettings(); + settingsBuilder.put("location", location); + if (shallowCopyEnabled) { + settingsBuilder.put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true); + } + return settingsBuilder; + } + + private Settings.Builder getIndexSettings(int numOfShards, int numOfReplicas) { + Settings.Builder settingsBuilder = Settings.builder() + .put(super.indexSettings()) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numOfShards) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, numOfReplicas) + .put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), "300s"); + return settingsBuilder; + } + + private void indexDocuments(Client client, String indexName, int numOfDocs) { + indexDocuments(client, indexName, 0, numOfDocs); + } + + protected void indexDocuments(Client client, String indexName, int fromId, int toId) { + for (int i = fromId; i < toId; i++) { + String id = Integer.toString(i); + client.prepareIndex(indexName).setId(id).setSource("text", "sometext").get(); + } + } + + private void assertDocsPresentInIndex(Client client, String indexName, int numOfDocs) { + for (int i = 0; i < numOfDocs; i++) { + String id = Integer.toString(i); + logger.info("checking for index " + indexName + " with docId" + id); + assertTrue("doc with id" + id + " is not present for index " + indexName, client.prepareGet(indexName, id).get().isExists()); + } + } + + public void testRestoreOperationsShallowCopyEnabled() throws Exception { + String clusterManagerNode = internalCluster().startClusterManagerOnlyNode(); + String primary = internalCluster().startDataOnlyNode(); + String indexName1 = "testindex1"; + String indexName2 = "testindex2"; + String snapshotRepoName = "test-restore-snapshot-repo"; + String snapshotName1 = "test-restore-snapshot1"; + String snapshotName2 = "test-restore-snapshot2"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + String restoredIndexName1 = indexName1 + "-restored"; + String restoredIndexName2 = indexName2 + "-restored"; + + createRepository(snapshotRepoName, "fs", getRepositorySettings(absolutePath1, true)); + + Client client = client(); + Settings indexSettings = getIndexSettings(1, 0).build(); + createIndex(indexName1, indexSettings); + + Settings indexSettings2 = getIndexSettings(1, 0).build(); + createIndex(indexName2, indexSettings2); + + final int numDocsInIndex1 = 5; + final int numDocsInIndex2 = 6; + indexDocuments(client, indexName1, numDocsInIndex1); + indexDocuments(client, indexName2, numDocsInIndex2); + ensureGreen(indexName1, indexName2); + + internalCluster().startDataOnlyNode(); + logger.info("--> snapshot"); + + SnapshotInfo snapshotInfo = createSnapshot(snapshotRepoName, snapshotName1, new ArrayList<>()); + assertThat(snapshotInfo.state(), equalTo(SnapshotState.SUCCESS)); + assertThat(snapshotInfo.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo.successfulShards(), equalTo(snapshotInfo.totalShards())); + + updateRepository(snapshotRepoName, "fs", getRepositorySettings(absolutePath1, false)); + SnapshotInfo snapshotInfo2 = createSnapshot( + snapshotRepoName, + snapshotName2, + new ArrayList<>(Arrays.asList(indexName1, indexName2)) + ); + assertThat(snapshotInfo2.state(), equalTo(SnapshotState.SUCCESS)); + assertThat(snapshotInfo2.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo2.successfulShards(), equalTo(snapshotInfo2.totalShards())); + + DeleteResponse deleteResponse = client().prepareDelete(indexName1, "0").execute().actionGet(); + assertEquals(deleteResponse.getResult(), DocWriteResponse.Result.DELETED); + indexDocuments(client, indexName1, numDocsInIndex1, numDocsInIndex1 + randomIntBetween(2, 5)); + ensureGreen(indexName1); + + RestoreSnapshotResponse restoreSnapshotResponse1 = client.admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepoName, snapshotName1) + .setWaitForCompletion(false) + .setIndices(indexName1) + .setRenamePattern(indexName1) + .setRenameReplacement(restoredIndexName1) + .get(); + RestoreSnapshotResponse restoreSnapshotResponse2 = client.admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepoName, snapshotName2) + .setWaitForCompletion(false) + .setIndices(indexName2) + .setRenamePattern(indexName2) + .setRenameReplacement(restoredIndexName2) + .get(); + assertEquals(restoreSnapshotResponse1.status(), RestStatus.ACCEPTED); + assertEquals(restoreSnapshotResponse2.status(), RestStatus.ACCEPTED); + ensureGreen(restoredIndexName1, restoredIndexName2); + assertDocsPresentInIndex(client, restoredIndexName1, numDocsInIndex1); + assertDocsPresentInIndex(client, restoredIndexName2, numDocsInIndex2); + + // deleting data for restoredIndexName1 and restoring from remote store. + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(primary)); + ensureRed(restoredIndexName1); + // Re-initialize client to make sure we are not using client from stopped node. + client = client(clusterManagerNode); + assertAcked(client.admin().indices().prepareClose(restoredIndexName1)); + client.admin() + .cluster() + .restoreRemoteStore( + new RestoreRemoteStoreRequest().indices(restoredIndexName1).restoreAllShards(true), + PlainActionFuture.newFuture() + ); + ensureYellowAndNoInitializingShards(restoredIndexName1); + ensureGreen(restoredIndexName1); + assertDocsPresentInIndex(client(), restoredIndexName1, numDocsInIndex1); + // indexing some new docs and validating + indexDocuments(client, restoredIndexName1, numDocsInIndex1, numDocsInIndex1 + 2); + ensureGreen(restoredIndexName1); + assertDocsPresentInIndex(client, restoredIndexName1, numDocsInIndex1 + 2); + } + + public void testRemoteStoreCustomDataOnIndexCreationAndRestore() { + String clusterManagerNode = internalCluster().startClusterManagerOnlyNode(); + internalCluster().startDataOnlyNode(); + String indexName1 = "testindex1"; + String indexName2 = "testindex2"; + String snapshotRepoName = "test-restore-snapshot-repo"; + String snapshotName1 = "test-restore-snapshot1"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + String restoredIndexName1version1 = indexName1 + "-restored-1"; + String restoredIndexName1version2 = indexName1 + "-restored-2"; + + client(clusterManagerNode).admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put(CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING.getKey(), RemoteStoreEnums.PathType.FIXED)) + .get(); + createRepository(snapshotRepoName, "fs", getRepositorySettings(absolutePath1, true)); + Client client = client(); + Settings indexSettings = getIndexSettings(1, 0).build(); + createIndex(indexName1, indexSettings); + + indexDocuments(client, indexName1, randomIntBetween(5, 10)); + ensureGreen(indexName1); + validatePathType(indexName1, RemoteStoreEnums.PathType.FIXED); + + logger.info("--> snapshot"); + SnapshotInfo snapshotInfo = createSnapshot(snapshotRepoName, snapshotName1, new ArrayList<>(Arrays.asList(indexName1))); + assertEquals(SnapshotState.SUCCESS, snapshotInfo.state()); + assertTrue(snapshotInfo.successfulShards() > 0); + assertEquals(snapshotInfo.totalShards(), snapshotInfo.successfulShards()); + + RestoreSnapshotResponse restoreSnapshotResponse = client.admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepoName, snapshotName1) + .setWaitForCompletion(false) + .setRenamePattern(indexName1) + .setRenameReplacement(restoredIndexName1version1) + .get(); + assertEquals(RestStatus.ACCEPTED, restoreSnapshotResponse.status()); + ensureGreen(restoredIndexName1version1); + validatePathType(restoredIndexName1version1, RemoteStoreEnums.PathType.FIXED); + + client(clusterManagerNode).admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings( + Settings.builder().put(CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING.getKey(), RemoteStoreEnums.PathType.HASHED_PREFIX) + ) + .get(); + + restoreSnapshotResponse = client.admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepoName, snapshotName1) + .setWaitForCompletion(false) + .setRenamePattern(indexName1) + .setRenameReplacement(restoredIndexName1version2) + .get(); + assertEquals(RestStatus.ACCEPTED, restoreSnapshotResponse.status()); + ensureGreen(restoredIndexName1version2); + validatePathType( + restoredIndexName1version2, + RemoteStoreEnums.PathType.HASHED_PREFIX, + RemoteStoreEnums.PathHashAlgorithm.FNV_1A_COMPOSITE_1 + ); + + // Create index with cluster setting cluster.remote_store.index.path.type as hashed_prefix. + indexSettings = getIndexSettings(1, 0).build(); + createIndex(indexName2, indexSettings); + ensureGreen(indexName2); + validatePathType(indexName2, RemoteStoreEnums.PathType.HASHED_PREFIX, RemoteStoreEnums.PathHashAlgorithm.FNV_1A_COMPOSITE_1); + + // Validating that custom data has not changed for indexes which were created before the cluster setting got updated + validatePathType(indexName1, RemoteStoreEnums.PathType.FIXED); + + // Create Snapshot of index 2 + String snapshotName2 = "test-restore-snapshot2"; + snapshotInfo = createSnapshot(snapshotRepoName, snapshotName2, new ArrayList<>(List.of(indexName2))); + assertEquals(SnapshotState.SUCCESS, snapshotInfo.state()); + assertTrue(snapshotInfo.successfulShards() > 0); + assertEquals(snapshotInfo.totalShards(), snapshotInfo.successfulShards()); + + // Update cluster settings to FIXED + client(clusterManagerNode).admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put(CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING.getKey(), RemoteStoreEnums.PathType.FIXED)) + .get(); + + // Close index 2 + assertAcked(client().admin().indices().prepareClose(indexName2)); + restoreSnapshotResponse = client.admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepoName, snapshotName2) + .setWaitForCompletion(false) + .setIndices(indexName2) + .get(); + assertEquals(RestStatus.ACCEPTED, restoreSnapshotResponse.status()); + ensureGreen(indexName2); + + // Validating that custom data has not changed for testindex2 which was created before the cluster setting got updated + validatePathType(indexName2, RemoteStoreEnums.PathType.HASHED_PREFIX, RemoteStoreEnums.PathHashAlgorithm.FNV_1A_COMPOSITE_1); + } + + private void validatePathType(String index, RemoteStoreEnums.PathType pathType) { + validatePathType(index, pathType, null); + } + + private void validatePathType( + String index, + RemoteStoreEnums.PathType pathType, + @Nullable RemoteStoreEnums.PathHashAlgorithm pathHashAlgorithm + ) { + ClusterState state = client().admin().cluster().prepareState().execute().actionGet().getState(); + // Validate that the remote_store custom data is present in index metadata for the created index. + Map remoteCustomData = state.metadata().index(index).getCustomData(IndexMetadata.REMOTE_STORE_CUSTOM_KEY); + assertNotNull(remoteCustomData); + assertEquals(pathType.name(), remoteCustomData.get(RemoteStoreEnums.PathType.NAME)); + if (Objects.nonNull(pathHashAlgorithm)) { + assertEquals(pathHashAlgorithm.name(), remoteCustomData.get(RemoteStoreEnums.PathHashAlgorithm.NAME)); + } + } + + public void testRestoreInSameRemoteStoreEnabledIndex() throws IOException { + String clusterManagerNode = internalCluster().startClusterManagerOnlyNode(); + String primary = internalCluster().startDataOnlyNode(); + String indexName1 = "testindex1"; + String indexName2 = "testindex2"; + String snapshotRepoName = "test-restore-snapshot-repo"; + String snapshotName1 = "test-restore-snapshot1"; + String snapshotName2 = "test-restore-snapshot2"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + String restoredIndexName2 = indexName2 + "-restored"; + + boolean enableShallowCopy = randomBoolean(); + createRepository(snapshotRepoName, "fs", getRepositorySettings(absolutePath1, enableShallowCopy)); + + Client client = client(); + Settings indexSettings = getIndexSettings(1, 0).build(); + createIndex(indexName1, indexSettings); + + Settings indexSettings2 = getIndexSettings(1, 0).build(); + createIndex(indexName2, indexSettings2); + + final int numDocsInIndex1 = 5; + final int numDocsInIndex2 = 6; + indexDocuments(client, indexName1, numDocsInIndex1); + indexDocuments(client, indexName2, numDocsInIndex2); + ensureGreen(indexName1, indexName2); + + internalCluster().startDataOnlyNode(); + logger.info("--> snapshot"); + SnapshotInfo snapshotInfo1 = createSnapshot( + snapshotRepoName, + snapshotName1, + new ArrayList<>(Arrays.asList(indexName1, indexName2)) + ); + assertThat(snapshotInfo1.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo1.successfulShards(), equalTo(snapshotInfo1.totalShards())); + assertThat(snapshotInfo1.state(), equalTo(SnapshotState.SUCCESS)); + + updateRepository(snapshotRepoName, "fs", getRepositorySettings(absolutePath1, false)); + SnapshotInfo snapshotInfo2 = createSnapshot( + snapshotRepoName, + snapshotName2, + new ArrayList<>(Arrays.asList(indexName1, indexName2)) + ); + assertThat(snapshotInfo2.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo2.successfulShards(), equalTo(snapshotInfo2.totalShards())); + assertThat(snapshotInfo2.state(), equalTo(SnapshotState.SUCCESS)); + + DeleteResponse deleteResponse = client().prepareDelete(indexName1, "0").execute().actionGet(); + assertEquals(deleteResponse.getResult(), DocWriteResponse.Result.DELETED); + indexDocuments(client, indexName1, numDocsInIndex1, numDocsInIndex1 + randomIntBetween(2, 5)); + ensureGreen(indexName1); + + assertAcked(client().admin().indices().prepareClose(indexName1)); + + RestoreSnapshotResponse restoreSnapshotResponse1 = client.admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepoName, snapshotName1) + .setWaitForCompletion(false) + .setIndices(indexName1) + .get(); + RestoreSnapshotResponse restoreSnapshotResponse2 = client.admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepoName, snapshotName2) + .setWaitForCompletion(false) + .setIndices(indexName2) + .setRenamePattern(indexName2) + .setRenameReplacement(restoredIndexName2) + .get(); + assertEquals(restoreSnapshotResponse1.status(), RestStatus.ACCEPTED); + assertEquals(restoreSnapshotResponse2.status(), RestStatus.ACCEPTED); + ensureGreen(indexName1, restoredIndexName2); + + assertRemoteSegmentsAndTranslogUploaded(restoredIndexName2); + assertDocsPresentInIndex(client, indexName1, numDocsInIndex1); + assertDocsPresentInIndex(client, restoredIndexName2, numDocsInIndex2); + // indexing some new docs and validating + indexDocuments(client, indexName1, numDocsInIndex1, numDocsInIndex1 + 2); + ensureGreen(indexName1); + assertDocsPresentInIndex(client, indexName1, numDocsInIndex1 + 2); + + // deleting data for restoredIndexName1 and restoring from remote store. + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(primary)); + ensureRed(indexName1); + // Re-initialize client to make sure we are not using client from stopped node. + client = client(clusterManagerNode); + assertAcked(client.admin().indices().prepareClose(indexName1)); + client.admin() + .cluster() + .restoreRemoteStore(new RestoreRemoteStoreRequest().indices(indexName1).restoreAllShards(true), PlainActionFuture.newFuture()); + ensureYellowAndNoInitializingShards(indexName1); + ensureGreen(indexName1); + assertDocsPresentInIndex(client(), indexName1, numDocsInIndex1); + // indexing some new docs and validating + indexDocuments(client, indexName1, numDocsInIndex1 + 2, numDocsInIndex1 + 4); + ensureGreen(indexName1); + assertDocsPresentInIndex(client, indexName1, numDocsInIndex1 + 4); + } + + void assertRemoteSegmentsAndTranslogUploaded(String idx) throws IOException { + Client client = client(); + String translogPathFixedPrefix = RemoteStoreSettings.CLUSTER_REMOTE_STORE_TRANSLOG_PATH_PREFIX.get(getNodeSettings()); + String segmentsPathFixedPrefix = RemoteStoreSettings.CLUSTER_REMOTE_STORE_SEGMENTS_PATH_PREFIX.get(getNodeSettings()); + String path = getShardLevelBlobPath(client, idx, new BlobPath(), "0", TRANSLOG, METADATA, translogPathFixedPrefix).buildAsString(); + Path remoteTranslogMetadataPath = Path.of(remoteRepoPath + "/" + path); + path = getShardLevelBlobPath(client, idx, new BlobPath(), "0", TRANSLOG, DATA, translogPathFixedPrefix).buildAsString(); + Path remoteTranslogDataPath = Path.of(remoteRepoPath + "/" + path); + path = getShardLevelBlobPath(client, idx, new BlobPath(), "0", SEGMENTS, METADATA, segmentsPathFixedPrefix).buildAsString(); + Path segmentMetadataPath = Path.of(remoteRepoPath + "/" + path); + path = getShardLevelBlobPath(client, idx, new BlobPath(), "0", SEGMENTS, DATA, segmentsPathFixedPrefix).buildAsString(); + Path segmentDataPath = Path.of(remoteRepoPath + "/" + path); + + try ( + Stream translogMetadata = Files.list(remoteTranslogMetadataPath); + Stream translogData = Files.list(remoteTranslogDataPath); + Stream segmentMetadata = Files.list(segmentMetadataPath); + Stream segmentData = Files.list(segmentDataPath); + + ) { + assertTrue(translogData.count() > 0); + assertTrue(translogMetadata.count() > 0); + assertTrue(segmentMetadata.count() > 0); + assertTrue(segmentData.count() > 0); + } + + } + + public void testRemoteRestoreIndexRestoredFromSnapshot() throws IOException, ExecutionException, InterruptedException { + internalCluster().startClusterManagerOnlyNode(); + internalCluster().startDataOnlyNodes(2); + + String indexName1 = "testindex1"; + String snapshotRepoName = "test-restore-snapshot-repo"; + String snapshotName1 = "test-restore-snapshot1"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + + createRepository(snapshotRepoName, "fs", getRepositorySettings(absolutePath1, true)); + + Settings indexSettings = getIndexSettings(1, 0).build(); + createIndex(indexName1, indexSettings); + + final int numDocsInIndex1 = randomIntBetween(20, 30); + indexDocuments(client(), indexName1, numDocsInIndex1); + flushAndRefresh(indexName1); + ensureGreen(indexName1); + + logger.info("--> snapshot"); + SnapshotInfo snapshotInfo1 = createSnapshot(snapshotRepoName, snapshotName1, new ArrayList<>(Arrays.asList(indexName1))); + assertThat(snapshotInfo1.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo1.successfulShards(), equalTo(snapshotInfo1.totalShards())); + assertThat(snapshotInfo1.state(), equalTo(SnapshotState.SUCCESS)); + + assertAcked(client().admin().indices().delete(new DeleteIndexRequest(indexName1)).get()); + assertFalse(indexExists(indexName1)); + + RestoreSnapshotResponse restoreSnapshotResponse1 = client().admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepoName, snapshotName1) + .setWaitForCompletion(false) + .setIndices(indexName1) + .get(); + + assertEquals(restoreSnapshotResponse1.status(), RestStatus.ACCEPTED); + ensureGreen(indexName1); + assertDocsPresentInIndex(client(), indexName1, numDocsInIndex1); + + assertRemoteSegmentsAndTranslogUploaded(indexName1); + + // Clear the local data before stopping the node. This will make sure that remote translog is empty. + IndexShard indexShard = getIndexShard(primaryNodeName(indexName1), indexName1); + try (Stream files = Files.list(indexShard.shardPath().resolveTranslog())) { + IOUtils.deleteFilesIgnoringExceptions(files.collect(Collectors.toList())); + } + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(primaryNodeName(indexName1))); + + ensureRed(indexName1); + + client().admin() + .cluster() + .restoreRemoteStore(new RestoreRemoteStoreRequest().indices(indexName1).restoreAllShards(false), PlainActionFuture.newFuture()); + + ensureGreen(indexName1); + assertDocsPresentInIndex(client(), indexName1, numDocsInIndex1); + } + + private IndexShard getIndexShard(String node, String indexName) { + final Index index = resolveIndex(indexName); + IndicesService indicesService = internalCluster().getInstance(IndicesService.class, node); + IndexService indexService = indicesService.indexService(index); + assertNotNull(indexService); + final Optional shardId = indexService.shardIds().stream().findFirst(); + return shardId.map(indexService::getShard).orElse(null); + } + + public void testRestoreShallowSnapshotRepository() throws ExecutionException, InterruptedException { + String indexName1 = "testindex1"; + String snapshotRepoName = "test-restore-snapshot-repo"; + String remoteStoreRepoNameUpdated = "test-rs-repo-updated" + TEST_REMOTE_STORE_REPO_SUFFIX; + String snapshotName1 = "test-restore-snapshot1"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + Path absolutePath2 = randomRepoPath().toAbsolutePath(); + String[] pathTokens = absolutePath1.toString().split("/"); + String basePath = pathTokens[pathTokens.length - 1]; + Arrays.copyOf(pathTokens, pathTokens.length - 1); + Path location = PathUtils.get(String.join("/", pathTokens)); + pathTokens = absolutePath2.toString().split("/"); + String basePath2 = pathTokens[pathTokens.length - 1]; + Arrays.copyOf(pathTokens, pathTokens.length - 1); + Path location2 = PathUtils.get(String.join("/", pathTokens)); + logger.info("Path 1 [{}]", absolutePath1); + logger.info("Path 2 [{}]", absolutePath2); + String restoredIndexName1 = indexName1 + "-restored"; + + createRepository(snapshotRepoName, "fs", getRepositorySettings(location, basePath, true)); + + Client client = client(); + Settings indexSettings = Settings.builder() + .put(super.indexSettings()) + .put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), "300s") + .put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .build(); + createIndex(indexName1, indexSettings); + + int numDocsInIndex1 = randomIntBetween(2, 5); + indexDocuments(client, indexName1, numDocsInIndex1); + + ensureGreen(indexName1); + + logger.info("--> snapshot"); + SnapshotInfo snapshotInfo1 = createSnapshot(snapshotRepoName, snapshotName1, new ArrayList<>(List.of(indexName1))); + assertThat(snapshotInfo1.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo1.successfulShards(), equalTo(snapshotInfo1.totalShards())); + assertThat(snapshotInfo1.state(), equalTo(SnapshotState.SUCCESS)); + + client().admin().indices().close(Requests.closeIndexRequest(indexName1)).get(); + createRepository(remoteStoreRepoNameUpdated, "fs", remoteRepoPath); + RestoreSnapshotResponse restoreSnapshotResponse2 = client.admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepoName, snapshotName1) + .setWaitForCompletion(true) + .setIndices(indexName1) + .setRenamePattern(indexName1) + .setRenameReplacement(restoredIndexName1) + .setSourceRemoteStoreRepository(remoteStoreRepoNameUpdated) + .get(); + + assertTrue(restoreSnapshotResponse2.getRestoreInfo().failedShards() == 0); + ensureGreen(restoredIndexName1); + assertDocsPresentInIndex(client, restoredIndexName1, numDocsInIndex1); + + // ensure recovery details are non-zero + RecoveryResponse recoveryResponse = client().admin().indices().prepareRecoveries(restoredIndexName1).execute().actionGet(); + assertEquals(1, recoveryResponse.getTotalShards()); + assertEquals(1, recoveryResponse.getSuccessfulShards()); + assertEquals(0, recoveryResponse.getFailedShards()); + assertEquals(1, recoveryResponse.shardRecoveryStates().size()); + assertTrue(recoveryResponse.shardRecoveryStates().containsKey(restoredIndexName1)); + assertEquals(1, recoveryResponse.shardRecoveryStates().get(restoredIndexName1).size()); + + RecoveryState recoveryState = recoveryResponse.shardRecoveryStates().get(restoredIndexName1).get(0); + assertEquals(RecoveryState.Stage.DONE, recoveryState.getStage()); + assertEquals(0, recoveryState.getShardId().getId()); + assertTrue(recoveryState.getPrimary()); + assertEquals(RecoverySource.Type.SNAPSHOT, recoveryState.getRecoverySource().getType()); + assertThat(recoveryState.getIndex().time(), greaterThanOrEqualTo(0L)); + + // ensure populated file details + assertTrue(recoveryState.getIndex().totalFileCount() > 0); + assertTrue(recoveryState.getIndex().totalRecoverFiles() > 0); + assertTrue(recoveryState.getIndex().recoveredFileCount() > 0); + assertThat(recoveryState.getIndex().recoveredFilesPercent(), greaterThanOrEqualTo(0.0f)); + assertThat(recoveryState.getIndex().recoveredFilesPercent(), lessThanOrEqualTo(100.0f)); + assertFalse(recoveryState.getIndex().fileDetails().isEmpty()); + + // ensure populated bytes details + assertTrue(recoveryState.getIndex().recoveredBytes() > 0L); + assertTrue(recoveryState.getIndex().totalBytes() > 0L); + assertTrue(recoveryState.getIndex().totalRecoverBytes() > 0L); + assertThat(recoveryState.getIndex().recoveredBytesPercent(), greaterThanOrEqualTo(0.0f)); + assertThat(recoveryState.getIndex().recoveredBytesPercent(), lessThanOrEqualTo(100.0f)); + + // indexing some new docs and validating + indexDocuments(client, restoredIndexName1, numDocsInIndex1, numDocsInIndex1 + 2); + ensureGreen(restoredIndexName1); + assertDocsPresentInIndex(client, restoredIndexName1, numDocsInIndex1 + 2); + } + + public void testRestoreShallowSnapshotIndexAfterSnapshot() throws ExecutionException, InterruptedException { + String indexName1 = "testindex1"; + String snapshotRepoName = "test-restore-snapshot-repo"; + String remoteStoreRepoNameUpdated = "test-rs-repo-updated" + TEST_REMOTE_STORE_REPO_SUFFIX; + String snapshotName1 = "test-restore-snapshot1"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + Path absolutePath2 = randomRepoPath().toAbsolutePath(); + String[] pathTokens = absolutePath1.toString().split("/"); + String basePath = pathTokens[pathTokens.length - 1]; + Arrays.copyOf(pathTokens, pathTokens.length - 1); + Path location = PathUtils.get(String.join("/", pathTokens)); + pathTokens = absolutePath2.toString().split("/"); + String basePath2 = pathTokens[pathTokens.length - 1]; + Arrays.copyOf(pathTokens, pathTokens.length - 1); + Path location2 = PathUtils.get(String.join("/", pathTokens)); + logger.info("Path 1 [{}]", absolutePath1); + logger.info("Path 2 [{}]", absolutePath2); + String restoredIndexName1 = indexName1 + "-restored"; + + createRepository(snapshotRepoName, "fs", getRepositorySettings(location, basePath, true)); + + Client client = client(); + Settings indexSettings = Settings.builder() + .put(super.indexSettings()) + .put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .build(); + createIndex(indexName1, indexSettings); + + int numDocsInIndex1 = randomIntBetween(2, 5); + indexDocuments(client, indexName1, numDocsInIndex1); + + ensureGreen(indexName1); + + logger.info("--> snapshot"); + SnapshotInfo snapshotInfo1 = createSnapshot(snapshotRepoName, snapshotName1, new ArrayList<>(List.of(indexName1))); + assertThat(snapshotInfo1.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo1.successfulShards(), equalTo(snapshotInfo1.totalShards())); + assertThat(snapshotInfo1.state(), equalTo(SnapshotState.SUCCESS)); + + int extraNumDocsInIndex1 = randomIntBetween(20, 50); + indexDocuments(client, indexName1, extraNumDocsInIndex1); + refresh(indexName1); + + client().admin().indices().close(Requests.closeIndexRequest(indexName1)).get(); + createRepository(remoteStoreRepoNameUpdated, "fs", remoteRepoPath); + RestoreSnapshotResponse restoreSnapshotResponse2 = client.admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepoName, snapshotName1) + .setWaitForCompletion(true) + .setIndices(indexName1) + .setRenamePattern(indexName1) + .setRenameReplacement(restoredIndexName1) + .setSourceRemoteStoreRepository(remoteStoreRepoNameUpdated) + .get(); + + assertTrue(restoreSnapshotResponse2.getRestoreInfo().failedShards() == 0); + ensureGreen(restoredIndexName1); + assertDocsPresentInIndex(client, restoredIndexName1, numDocsInIndex1); + + // indexing some new docs and validating + indexDocuments(client, restoredIndexName1, numDocsInIndex1, numDocsInIndex1 + 2); + ensureGreen(restoredIndexName1); + assertDocsPresentInIndex(client, restoredIndexName1, numDocsInIndex1 + 2); + } + + public void testInvalidRestoreRequestScenarios() throws Exception { + internalCluster().startClusterManagerOnlyNode(); + internalCluster().startDataOnlyNode(); + String index = "test-index"; + String snapshotRepo = "test-restore-snapshot-repo"; + String newRemoteStoreRepo = "test-new-rs-repo"; + String snapshotName1 = "test-restore-snapshot1"; + String snapshotName2 = "test-restore-snapshot2"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + String restoredIndex = index + "-restored"; + + createRepository(snapshotRepo, "fs", getRepositorySettings(absolutePath1, true)); + + Client client = client(); + Settings indexSettings = getIndexSettings(1, 0).build(); + createIndex(index, indexSettings); + + final int numDocsInIndex = 5; + indexDocuments(client, index, numDocsInIndex); + ensureGreen(index); + + internalCluster().startDataOnlyNode(); + logger.info("--> snapshot"); + + SnapshotInfo snapshotInfo = createSnapshot(snapshotRepo, snapshotName1, new ArrayList<>(List.of(index))); + assertThat(snapshotInfo.state(), equalTo(SnapshotState.SUCCESS)); + assertThat(snapshotInfo.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo.successfulShards(), equalTo(snapshotInfo.totalShards())); + + updateRepository(snapshotRepo, "fs", getRepositorySettings(absolutePath1, false)); + SnapshotInfo snapshotInfo2 = createSnapshot(snapshotRepo, snapshotName2, new ArrayList<>(List.of(index))); + assertThat(snapshotInfo2.state(), equalTo(SnapshotState.SUCCESS)); + assertThat(snapshotInfo2.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo2.successfulShards(), equalTo(snapshotInfo2.totalShards())); + + DeleteResponse deleteResponse = client().prepareDelete(index, "0").execute().actionGet(); + assertEquals(deleteResponse.getResult(), DocWriteResponse.Result.DELETED); + indexDocuments(client, index, numDocsInIndex, numDocsInIndex + randomIntBetween(2, 5)); + ensureGreen(index); + + // try index restore with remote store disabled + SnapshotRestoreException exception = expectThrows( + SnapshotRestoreException.class, + () -> client().admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepo, snapshotName1) + .setWaitForCompletion(false) + .setIgnoreIndexSettings(SETTING_REMOTE_STORE_ENABLED) + .setIndices(index) + .setRenamePattern(index) + .setRenameReplacement(restoredIndex) + .get() + ); + assertTrue(exception.getMessage().contains("cannot remove setting [index.remote_store.enabled] on restore")); + + // try index restore with remote store repository modified + Settings remoteStoreIndexSettings = Settings.builder() + .put(IndexMetadata.SETTING_REMOTE_SEGMENT_STORE_REPOSITORY, newRemoteStoreRepo) + .build(); + + exception = expectThrows( + SnapshotRestoreException.class, + () -> client().admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepo, snapshotName1) + .setWaitForCompletion(false) + .setIndexSettings(remoteStoreIndexSettings) + .setIndices(index) + .setRenamePattern(index) + .setRenameReplacement(restoredIndex) + .get() + ); + assertTrue(exception.getMessage().contains("cannot modify setting [index.remote_store.segment.repository]" + " on restore")); + + // try index restore with remote store repository and translog store repository disabled + exception = expectThrows( + SnapshotRestoreException.class, + () -> client().admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepo, snapshotName1) + .setWaitForCompletion(false) + .setIgnoreIndexSettings( + IndexMetadata.SETTING_REMOTE_SEGMENT_STORE_REPOSITORY, + IndexMetadata.SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY + ) + .setIndices(index) + .setRenamePattern(index) + .setRenameReplacement(restoredIndex) + .get() + ); + assertTrue(exception.getMessage().contains("cannot remove setting [index.remote_store.segment.repository]" + " on restore")); + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/RemoteStoreMultipartIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/RemoteStoreMultipartIT.java index 3dfde6f472525..99cc58848a04a 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/RemoteStoreMultipartIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/RemoteStoreMultipartIT.java @@ -31,7 +31,6 @@ import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT; -import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; @@ -119,19 +118,12 @@ public void testRateLimitedRemoteUploads() throws Exception { internalCluster().startNode(clusterSettings.build()); Client client = client(); logger.info("--> updating repository"); - assertAcked( - client.admin() - .cluster() - .preparePutRepository(REPOSITORY_NAME) - .setType(MockFsRepositoryPlugin.TYPE) - .setSettings( - Settings.builder() - .put("location", repositoryLocation) - .put("compress", compress) - .put("max_remote_upload_bytes_per_sec", "1kb") - .put("chunk_size", 100, ByteSizeUnit.BYTES) - ) - ); + Settings.Builder settings = Settings.builder() + .put("location", repositoryLocation) + .put("compress", compress) + .put("max_remote_upload_bytes_per_sec", "1kb") + .put("chunk_size", 100, ByteSizeUnit.BYTES); + createRepository(REPOSITORY_NAME, MockFsRepositoryPlugin.TYPE, settings); createIndex(INDEX_NAME, remoteStoreIndexSettings(0)); ensureGreen(); diff --git a/server/src/internalClusterTest/java/org/opensearch/repositories/RepositoriesServiceIT.java b/server/src/internalClusterTest/java/org/opensearch/repositories/RepositoriesServiceIT.java index b8415f4b41815..e4347fef744ab 100644 --- a/server/src/internalClusterTest/java/org/opensearch/repositories/RepositoriesServiceIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/repositories/RepositoriesServiceIT.java @@ -45,7 +45,6 @@ import java.util.Collection; import java.util.Collections; -import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.instanceOf; @@ -70,9 +69,12 @@ public void testUpdateRepository() { .next(); final Settings.Builder repoSettings = Settings.builder().put("location", randomRepoPath()); - - assertAcked( - client.admin().cluster().preparePutRepository(repositoryName).setType(FsRepository.TYPE).setSettings(repoSettings).get() + OpenSearchIntegTestCase.putRepositoryWithNoSettingOverrides( + client().admin().cluster(), + repositoryName, + FsRepository.TYPE, + true, + repoSettings ); final GetRepositoriesResponse originalGetRepositoriesResponse = client.admin() @@ -91,8 +93,12 @@ public void testUpdateRepository() { final boolean updated = randomBoolean(); final String updatedRepositoryType = updated ? "mock" : FsRepository.TYPE; - assertAcked( - client.admin().cluster().preparePutRepository(repositoryName).setType(updatedRepositoryType).setSettings(repoSettings).get() + OpenSearchIntegTestCase.putRepositoryWithNoSettingOverrides( + client().admin().cluster(), + repositoryName, + updatedRepositoryType, + true, + repoSettings ); final GetRepositoriesResponse updatedGetRepositoriesResponse = client.admin() @@ -112,12 +118,8 @@ public void testUpdateRepository() { public void testSystemRepositoryCantBeCreated() { internalCluster(); final String repositoryName = "test-repo"; - final Client client = client(); final Settings.Builder repoSettings = Settings.builder().put("system_repository", true).put("location", randomRepoPath()); - assertThrows( - RepositoryException.class, - () -> client.admin().cluster().preparePutRepository(repositoryName).setType(FsRepository.TYPE).setSettings(repoSettings).get() - ); + assertThrows(RepositoryException.class, () -> createRepository(repositoryName, FsRepository.TYPE, repoSettings)); } } diff --git a/server/src/internalClusterTest/java/org/opensearch/repositories/fs/FsBlobStoreRepositoryIT.java b/server/src/internalClusterTest/java/org/opensearch/repositories/fs/FsBlobStoreRepositoryIT.java index 9057ef900efbd..34075b78e98af 100644 --- a/server/src/internalClusterTest/java/org/opensearch/repositories/fs/FsBlobStoreRepositoryIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/repositories/fs/FsBlobStoreRepositoryIT.java @@ -76,19 +76,11 @@ public void testMissingDirectoriesNotCreatedInReadonlyRepository() throws IOExce final Path repoPath = randomRepoPath(); logger.info("--> creating repository {} at {}", repoName, repoPath); - - assertAcked( - client().admin() - .cluster() - .preparePutRepository(repoName) - .setType("fs") - .setSettings( - Settings.builder() - .put("location", repoPath) - .put("compress", randomBoolean()) - .put("chunk_size", randomIntBetween(100, 1000), ByteSizeUnit.BYTES) - ) - ); + Settings.Builder settings = Settings.builder() + .put("location", repoPath) + .put("compress", randomBoolean()) + .put("chunk_size", randomIntBetween(100, 1000), ByteSizeUnit.BYTES); + createRepository(repoName, "fs", settings); final String indexName = randomName(); int docCount = iterations(10, 1000); @@ -112,14 +104,7 @@ public void testMissingDirectoriesNotCreatedInReadonlyRepository() throws IOExce IOUtils.rm(deletedPath); } assertFalse(Files.exists(deletedPath)); - - assertAcked( - client().admin() - .cluster() - .preparePutRepository(repoName) - .setType("fs") - .setSettings(Settings.builder().put("location", repoPath).put("readonly", true)) - ); + createRepository(repoName, "fs", Settings.builder().put("location", repoPath).put("readonly", true)); final OpenSearchException exception = expectThrows( OpenSearchException.class, diff --git a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/AggregationsIntegrationIT.java b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/AggregationsIntegrationIT.java index 4051bee3e4e5c..01f3b434bc4dc 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/AggregationsIntegrationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/AggregationsIntegrationIT.java @@ -57,7 +57,10 @@ import java.util.Collection; import java.util.List; -import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING; +import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_ALL; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_AUTO; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_NONE; import static org.opensearch.search.aggregations.AggregationBuilders.global; import static org.opensearch.search.aggregations.AggregationBuilders.stats; import static org.opensearch.search.aggregations.AggregationBuilders.terms; @@ -81,8 +84,12 @@ public AggregationsIntegrationIT(Settings staticSettings) { @ParametersFactory public static Collection parameters() { return Arrays.asList( - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), false).build() }, - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), true).build() } + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_ALL).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_AUTO).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_NONE).build() } ); } diff --git a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/CombiIT.java b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/CombiIT.java index 1826dd69cd804..4ce74f8195da9 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/CombiIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/CombiIT.java @@ -51,7 +51,10 @@ import java.util.Map; import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder; -import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING; +import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_ALL; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_AUTO; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_NONE; import static org.opensearch.search.aggregations.AggregationBuilders.histogram; import static org.opensearch.search.aggregations.AggregationBuilders.missing; import static org.opensearch.search.aggregations.AggregationBuilders.terms; @@ -69,8 +72,12 @@ public CombiIT(Settings staticSettings) { @ParametersFactory public static Collection parameters() { return Arrays.asList( - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), false).build() }, - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), true).build() } + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_ALL).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_AUTO).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_NONE).build() } ); } diff --git a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/EquivalenceIT.java b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/EquivalenceIT.java index 302ec3116d187..2fcf09d6ebf50 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/EquivalenceIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/EquivalenceIT.java @@ -72,7 +72,10 @@ import java.util.function.Function; import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder; -import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING; +import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_ALL; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_AUTO; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_NONE; import static org.opensearch.search.aggregations.AggregationBuilders.extendedStats; import static org.opensearch.search.aggregations.AggregationBuilders.filter; import static org.opensearch.search.aggregations.AggregationBuilders.histogram; @@ -102,8 +105,12 @@ public EquivalenceIT(Settings staticSettings) { @ParametersFactory public static Collection parameters() { return Arrays.asList( - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), false).build() }, - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), true).build() } + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_ALL).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_AUTO).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_NONE).build() } ); } diff --git a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/MetadataIT.java b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/MetadataIT.java index b650855083eed..ca13b4de00156 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/MetadataIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/MetadataIT.java @@ -49,7 +49,10 @@ import java.util.Map; import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder; -import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING; +import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_ALL; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_AUTO; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_NONE; import static org.opensearch.search.aggregations.AggregationBuilders.sum; import static org.opensearch.search.aggregations.AggregationBuilders.terms; import static org.opensearch.search.aggregations.PipelineAggregatorBuilders.maxBucket; @@ -65,8 +68,12 @@ public MetadataIT(Settings staticSettings) { @ParametersFactory public static Collection parameters() { return Arrays.asList( - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), false).build() }, - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), true).build() } + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_ALL).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_AUTO).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_NONE).build() } ); } diff --git a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/MissingValueIT.java b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/MissingValueIT.java index bdd16c7e74dc0..1310792e2f2e4 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/MissingValueIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/MissingValueIT.java @@ -51,7 +51,10 @@ import java.util.Arrays; import java.util.Collection; -import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING; +import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_ALL; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_AUTO; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_NONE; import static org.opensearch.search.aggregations.AggregationBuilders.cardinality; import static org.opensearch.search.aggregations.AggregationBuilders.dateHistogram; import static org.opensearch.search.aggregations.AggregationBuilders.geoCentroid; @@ -73,8 +76,12 @@ public MissingValueIT(Settings staticSettings) { @ParametersFactory public static Collection parameters() { return Arrays.asList( - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), false).build() }, - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), true).build() } + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_ALL).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_AUTO).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_NONE).build() } ); } diff --git a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/AvgBucketIT.java b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/AvgBucketIT.java index 48fd06bac285b..7e1cae8e5b628 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/AvgBucketIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/AvgBucketIT.java @@ -53,7 +53,10 @@ import java.util.List; import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder; -import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING; +import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_ALL; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_AUTO; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_NONE; import static org.opensearch.search.aggregations.AggregationBuilders.histogram; import static org.opensearch.search.aggregations.AggregationBuilders.sum; import static org.opensearch.search.aggregations.AggregationBuilders.terms; @@ -83,8 +86,12 @@ public AvgBucketIT(Settings staticSettings) { @ParametersFactory public static Collection parameters() { return Arrays.asList( - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), false).build() }, - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), true).build() } + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_ALL).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_AUTO).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_NONE).build() } ); } diff --git a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/BucketScriptIT.java b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/BucketScriptIT.java index 1b22cf2018d96..204aaa764849f 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/BucketScriptIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/BucketScriptIT.java @@ -63,7 +63,10 @@ import java.util.function.Function; import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder; -import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING; +import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_ALL; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_AUTO; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_NONE; import static org.opensearch.search.aggregations.AggregationBuilders.dateRange; import static org.opensearch.search.aggregations.AggregationBuilders.histogram; import static org.opensearch.search.aggregations.AggregationBuilders.sum; @@ -96,8 +99,12 @@ public BucketScriptIT(Settings staticSettings) { @ParametersFactory public static Collection parameters() { return Arrays.asList( - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), false).build() }, - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), true).build() } + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_ALL).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_AUTO).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_NONE).build() } ); } diff --git a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/BucketSelectorIT.java b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/BucketSelectorIT.java index 7dca1d0d79b1e..7677cfdac3e29 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/BucketSelectorIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/BucketSelectorIT.java @@ -62,7 +62,10 @@ import java.util.function.Function; import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder; -import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING; +import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_ALL; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_AUTO; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_NONE; import static org.opensearch.search.aggregations.AggregationBuilders.histogram; import static org.opensearch.search.aggregations.AggregationBuilders.sum; import static org.opensearch.search.aggregations.PipelineAggregatorBuilders.bucketSelector; @@ -95,8 +98,12 @@ public BucketSelectorIT(Settings staticSettings) { @ParametersFactory public static Collection parameters() { return Arrays.asList( - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), false).build() }, - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), true).build() } + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_ALL).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_AUTO).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_NONE).build() } ); } diff --git a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/BucketSortIT.java b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/BucketSortIT.java index ffb607866935b..cd489e5b982e6 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/BucketSortIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/BucketSortIT.java @@ -58,7 +58,10 @@ import java.util.List; import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder; -import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING; +import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_ALL; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_AUTO; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_NONE; import static org.opensearch.search.aggregations.AggregationBuilders.avg; import static org.opensearch.search.aggregations.AggregationBuilders.dateHistogram; import static org.opensearch.search.aggregations.AggregationBuilders.histogram; @@ -91,8 +94,12 @@ public BucketSortIT(Settings staticSettings) { @ParametersFactory public static Collection parameters() { return Arrays.asList( - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), false).build() }, - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), true).build() } + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_ALL).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_AUTO).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_NONE).build() } ); } diff --git a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/DateDerivativeIT.java b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/DateDerivativeIT.java index 8c89c1232ebb3..fc4c76f513f1d 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/DateDerivativeIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/DateDerivativeIT.java @@ -63,7 +63,10 @@ import java.util.List; import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder; -import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING; +import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_ALL; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_AUTO; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_NONE; import static org.opensearch.search.aggregations.AggregationBuilders.dateHistogram; import static org.opensearch.search.aggregations.AggregationBuilders.sum; import static org.opensearch.search.aggregations.PipelineAggregatorBuilders.derivative; @@ -89,8 +92,12 @@ public DateDerivativeIT(Settings staticSettings) { @ParametersFactory public static Collection parameters() { return Arrays.asList( - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), false).build() }, - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), true).build() } + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_ALL).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_AUTO).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_NONE).build() } ); } diff --git a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/DerivativeIT.java b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/DerivativeIT.java index f8def40ec003a..6fabbb32a4d15 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/DerivativeIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/DerivativeIT.java @@ -61,7 +61,10 @@ import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder; import static org.opensearch.index.query.QueryBuilders.matchAllQuery; -import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING; +import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_ALL; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_AUTO; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_NONE; import static org.opensearch.search.aggregations.AggregationBuilders.avg; import static org.opensearch.search.aggregations.AggregationBuilders.filters; import static org.opensearch.search.aggregations.AggregationBuilders.histogram; @@ -106,8 +109,12 @@ public DerivativeIT(Settings dynamicSettings) { @ParametersFactory public static Collection parameters() { return Arrays.asList( - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), false).build() }, - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), true).build() } + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_ALL).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_AUTO).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_NONE).build() } ); } diff --git a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/ExtendedStatsBucketIT.java b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/ExtendedStatsBucketIT.java index 1bd04cc13268f..06d46e80f9710 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/ExtendedStatsBucketIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/ExtendedStatsBucketIT.java @@ -56,7 +56,10 @@ import java.util.List; import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder; -import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING; +import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_ALL; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_AUTO; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_NONE; import static org.opensearch.search.aggregations.AggregationBuilders.histogram; import static org.opensearch.search.aggregations.AggregationBuilders.sum; import static org.opensearch.search.aggregations.AggregationBuilders.terms; @@ -86,8 +89,12 @@ public ExtendedStatsBucketIT(Settings staticSettings) { @ParametersFactory public static Collection parameters() { return Arrays.asList( - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), false).build() }, - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), true).build() } + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_ALL).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_AUTO).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_NONE).build() } ); } diff --git a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/MaxBucketIT.java b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/MaxBucketIT.java index ea6fcbd6a1560..a8bd9bc4eadcd 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/MaxBucketIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/MaxBucketIT.java @@ -66,7 +66,10 @@ import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder; import static org.opensearch.index.query.QueryBuilders.termQuery; -import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING; +import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_ALL; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_AUTO; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_NONE; import static org.opensearch.search.aggregations.AggregationBuilders.filter; import static org.opensearch.search.aggregations.AggregationBuilders.histogram; import static org.opensearch.search.aggregations.AggregationBuilders.sum; @@ -97,8 +100,12 @@ public MaxBucketIT(Settings staticSettings) { @ParametersFactory public static Collection parameters() { return Arrays.asList( - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), false).build() }, - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), true).build() } + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_ALL).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_AUTO).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_NONE).build() } ); } diff --git a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/MinBucketIT.java b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/MinBucketIT.java index 44d12436382f6..290583ce49a5e 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/MinBucketIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/MinBucketIT.java @@ -53,7 +53,10 @@ import java.util.List; import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder; -import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING; +import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_ALL; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_AUTO; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_NONE; import static org.opensearch.search.aggregations.AggregationBuilders.histogram; import static org.opensearch.search.aggregations.AggregationBuilders.sum; import static org.opensearch.search.aggregations.AggregationBuilders.terms; @@ -83,8 +86,12 @@ public MinBucketIT(Settings staticSettings) { @ParametersFactory public static Collection parameters() { return Arrays.asList( - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), false).build() }, - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), true).build() } + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_ALL).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_AUTO).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_NONE).build() } ); } diff --git a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/MovAvgIT.java b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/MovAvgIT.java index d35b80b7918fe..e481e48cf6188 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/MovAvgIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/MovAvgIT.java @@ -61,7 +61,10 @@ import java.util.Map; import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder; -import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING; +import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_ALL; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_AUTO; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_NONE; import static org.opensearch.search.aggregations.AggregationBuilders.avg; import static org.opensearch.search.aggregations.AggregationBuilders.histogram; import static org.opensearch.search.aggregations.AggregationBuilders.max; @@ -139,8 +142,12 @@ public MovAvgIT(Settings staticSettings) { @ParametersFactory public static Collection parameters() { return Arrays.asList( - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), false).build() }, - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), true).build() } + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_ALL).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_AUTO).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_NONE).build() } ); } diff --git a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/PercentilesBucketIT.java b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/PercentilesBucketIT.java index 29cb334bfcd00..592a151781048 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/PercentilesBucketIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/PercentilesBucketIT.java @@ -56,7 +56,10 @@ import java.util.List; import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder; -import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING; +import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_ALL; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_AUTO; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_NONE; import static org.opensearch.search.aggregations.AggregationBuilders.histogram; import static org.opensearch.search.aggregations.AggregationBuilders.sum; import static org.opensearch.search.aggregations.AggregationBuilders.terms; @@ -86,8 +89,12 @@ public PercentilesBucketIT(Settings staticSettings) { @ParametersFactory public static Collection parameters() { return Arrays.asList( - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), false).build() }, - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), true).build() } + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_ALL).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_AUTO).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_NONE).build() } ); } diff --git a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/SerialDiffIT.java b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/SerialDiffIT.java index 507bff51f0e39..2a412bc0f7720 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/SerialDiffIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/SerialDiffIT.java @@ -54,7 +54,10 @@ import java.util.Map; import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder; -import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING; +import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_ALL; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_AUTO; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_NONE; import static org.opensearch.search.aggregations.AggregationBuilders.avg; import static org.opensearch.search.aggregations.AggregationBuilders.histogram; import static org.opensearch.search.aggregations.AggregationBuilders.max; @@ -104,8 +107,12 @@ public SerialDiffIT(Settings staticSettings) { @ParametersFactory public static Collection parameters() { return Arrays.asList( - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), false).build() }, - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), true).build() } + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_ALL).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_AUTO).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_NONE).build() } ); } diff --git a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/StatsBucketIT.java b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/StatsBucketIT.java index fbaf799871c8a..4e52e6e706324 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/StatsBucketIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/StatsBucketIT.java @@ -53,7 +53,10 @@ import java.util.List; import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder; -import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING; +import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_ALL; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_AUTO; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_NONE; import static org.opensearch.search.aggregations.AggregationBuilders.histogram; import static org.opensearch.search.aggregations.AggregationBuilders.sum; import static org.opensearch.search.aggregations.AggregationBuilders.terms; @@ -82,8 +85,12 @@ public StatsBucketIT(Settings staticSettings) { @ParametersFactory public static Collection parameters() { return Arrays.asList( - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), false).build() }, - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), true).build() } + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_ALL).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_AUTO).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_NONE).build() } ); } diff --git a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/SumBucketIT.java b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/SumBucketIT.java index a5967124ff921..55652877d3f4b 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/SumBucketIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/aggregations/pipeline/SumBucketIT.java @@ -53,7 +53,10 @@ import java.util.List; import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder; -import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING; +import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_ALL; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_AUTO; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_NONE; import static org.opensearch.search.aggregations.AggregationBuilders.histogram; import static org.opensearch.search.aggregations.AggregationBuilders.sum; import static org.opensearch.search.aggregations.AggregationBuilders.terms; @@ -83,8 +86,12 @@ public SumBucketIT(Settings staticSettings) { @ParametersFactory public static Collection parameters() { return Arrays.asList( - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), false).build() }, - new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), true).build() } + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_ALL).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_AUTO).build() }, + new Object[] { + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_NONE).build() } ); } diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/CloneSnapshotIT.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/CloneSnapshotIT.java index 83f93ab9ff6b5..ddbe0520f0c1f 100644 --- a/server/src/internalClusterTest/java/org/opensearch/snapshots/CloneSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/CloneSnapshotIT.java @@ -64,7 +64,6 @@ import java.util.List; import java.util.concurrent.ExecutionException; -import static org.opensearch.remotestore.RemoteStoreBaseIntegTestCase.remoteStoreClusterSettings; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.hasSize; @@ -127,7 +126,7 @@ public void testShardClone() throws Exception { } public void testCloneSnapshotIndex() throws Exception { - internalCluster().startClusterManagerOnlyNode(); + internalCluster().startClusterManagerOnlyNode(LARGE_SNAPSHOT_POOL_SETTINGS); internalCluster().startDataOnlyNode(); final String repoName = "repo-name"; createRepository(repoName, "fs"); @@ -318,7 +317,7 @@ public void testClonePreventsSnapshotDelete() throws Exception { indexRandomDocs(indexName, randomIntBetween(20, 100)); final String targetSnapshot = "target-snapshot"; - blockNodeOnAnyFiles(repoName, clusterManagerName); + blockClusterManagerOnWriteIndexFile(repoName); final ActionFuture cloneFuture = startClone(repoName, sourceSnapshot, targetSnapshot, indexName); waitForBlock(clusterManagerName, repoName, TimeValue.timeValueSeconds(30L)); assertFalse(cloneFuture.isDone()); @@ -426,7 +425,7 @@ public void testLongRunningSnapshotAllowsConcurrentClone() throws Exception { } public void testDeletePreventsClone() throws Exception { - final String clusterManagerName = internalCluster().startClusterManagerOnlyNode(); + final String clusterManagerName = internalCluster().startClusterManagerOnlyNode(LARGE_SNAPSHOT_POOL_SETTINGS); internalCluster().startDataOnlyNode(); final String repoName = "repo-name"; createRepository(repoName, "mock"); @@ -439,7 +438,7 @@ public void testDeletePreventsClone() throws Exception { indexRandomDocs(indexName, randomIntBetween(20, 100)); final String targetSnapshot = "target-snapshot"; - blockNodeOnAnyFiles(repoName, clusterManagerName); + blockClusterManagerOnWriteIndexFile(repoName); final ActionFuture deleteFuture = startDeleteSnapshot(repoName, sourceSnapshot); waitForBlock(clusterManagerName, repoName, TimeValue.timeValueSeconds(30L)); assertFalse(deleteFuture.isDone()); @@ -591,7 +590,7 @@ public void testClusterManagerFailoverDuringCloneStep2() throws Exception { public void testExceptionDuringShardClone() throws Exception { // large snapshot pool so blocked snapshot threads from cloning don't prevent concurrent snapshot finalizations - internalCluster().startClusterManagerOnlyNodes(3, LARGE_SNAPSHOT_POOL_SETTINGS); + internalCluster().startClusterManagerOnlyNode(LARGE_SNAPSHOT_POOL_SETTINGS); internalCluster().startDataOnlyNode(); final String repoName = "test-repo"; createRepository(repoName, "mock"); @@ -602,7 +601,7 @@ public void testExceptionDuringShardClone() throws Exception { createFullSnapshot(repoName, sourceSnapshot); final String targetSnapshot = "target-snapshot"; - blockClusterManagerFromFinalizingSnapshotOnSnapFile(repoName); + blockClusterManagerFromFinalizingSnapshotOnIndexFile(repoName); final ActionFuture cloneFuture = startCloneFromDataNode(repoName, sourceSnapshot, targetSnapshot, testIndex); awaitNumberOfSnapshotsInProgress(1); final String clusterManagerNode = internalCluster().getClusterManagerName(); @@ -680,7 +679,7 @@ public void testStartSnapshotWithSuccessfulShardClonePendingFinalization() throw } public void testStartCloneWithSuccessfulShardClonePendingFinalization() throws Exception { - final String clusterManagerName = internalCluster().startClusterManagerOnlyNode(); + final String clusterManagerName = internalCluster().startClusterManagerOnlyNode(LARGE_SNAPSHOT_POOL_SETTINGS); internalCluster().startDataOnlyNode(); final String repoName = "test-repo"; createRepository(repoName, "mock"); diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/CloneSnapshotV2IT.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/CloneSnapshotV2IT.java new file mode 100644 index 0000000000000..c6744ae62db60 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/CloneSnapshotV2IT.java @@ -0,0 +1,438 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.snapshots; + +import org.opensearch.action.ActionRunnable; +import org.opensearch.action.DocWriteResponse; +import org.opensearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse; +import org.opensearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse; +import org.opensearch.action.delete.DeleteResponse; +import org.opensearch.action.support.PlainActionFuture; +import org.opensearch.action.support.master.AcknowledgedResponse; +import org.opensearch.client.Client; +import org.opensearch.common.settings.Settings; +import org.opensearch.core.common.unit.ByteSizeUnit; +import org.opensearch.core.rest.RestStatus; +import org.opensearch.indices.RemoteStoreSettings; +import org.opensearch.repositories.RepositoriesService; +import org.opensearch.repositories.RepositoryData; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.repositories.fs.FsRepository; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.nio.file.Path; + +import static org.opensearch.remotestore.RemoteStoreBaseIntegTestCase.remoteStoreClusterSettings; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +public class CloneSnapshotV2IT extends AbstractSnapshotIntegTestCase { + + public void testCloneShallowCopyV2() throws Exception { + disableRepoConsistencyCheck("Remote store repository is being used in the test"); + final Path remoteStoreRepoPath = randomRepoPath(); + internalCluster().startClusterManagerOnlyNode(snapshotV2Settings(remoteStoreRepoPath)); + internalCluster().startDataOnlyNode(snapshotV2Settings(remoteStoreRepoPath)); + internalCluster().startDataOnlyNode(snapshotV2Settings(remoteStoreRepoPath)); + + String indexName1 = "testindex1"; + String indexName2 = "testindex2"; + String indexName3 = "testindex3"; + String snapshotRepoName = "test-clone-snapshot-repo"; + String snapshotName1 = "test-create-snapshot1"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + + Client client = client(); + + assertAcked( + client.admin() + .cluster() + .preparePutRepository(snapshotRepoName) + .setType(FsRepository.TYPE) + .setSettings( + Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true) + ) + ); + + createIndex(indexName1, getRemoteStoreBackedIndexSettings()); + createIndex(indexName2, getRemoteStoreBackedIndexSettings()); + + final int numDocsInIndex1 = 10; + final int numDocsInIndex2 = 20; + indexRandomDocs(indexName1, numDocsInIndex1); + indexRandomDocs(indexName2, numDocsInIndex2); + ensureGreen(indexName1, indexName2); + + CreateSnapshotResponse createSnapshotResponse = client().admin() + .cluster() + .prepareCreateSnapshot(snapshotRepoName, snapshotName1) + .setWaitForCompletion(true) + .get(); + SnapshotInfo sourceSnapshotInfo = createSnapshotResponse.getSnapshotInfo(); + assertThat(sourceSnapshotInfo.state(), equalTo(SnapshotState.SUCCESS)); + assertThat(sourceSnapshotInfo.successfulShards(), greaterThan(0)); + assertThat(sourceSnapshotInfo.successfulShards(), equalTo(sourceSnapshotInfo.totalShards())); + assertThat(sourceSnapshotInfo.snapshotId().getName(), equalTo(snapshotName1)); + + // Validate that the snapshot was created + final BlobStoreRepository repository = (BlobStoreRepository) internalCluster().getCurrentClusterManagerNodeInstance( + RepositoriesService.class + ).repository(snapshotRepoName); + PlainActionFuture repositoryDataPlainActionFuture = new PlainActionFuture<>(); + repository.getRepositoryData(repositoryDataPlainActionFuture); + + RepositoryData repositoryData = repositoryDataPlainActionFuture.get(); + + assertTrue(repositoryData.getSnapshotIds().contains(sourceSnapshotInfo.snapshotId())); + + createIndex(indexName3, getRemoteStoreBackedIndexSettings()); + indexRandomDocs(indexName3, 10); + ensureGreen(indexName3); + + AcknowledgedResponse response = client().admin() + .cluster() + .prepareCloneSnapshot(snapshotRepoName, snapshotName1, "test_clone_snapshot1") + .setIndices("*") + .get(); + assertTrue(response.isAcknowledged()); + awaitClusterManagerFinishRepoOperations(); + + // Validate that snapshot is present in repository data + PlainActionFuture repositoryDataPlainActionFutureClone = new PlainActionFuture<>(); + repository.getRepositoryData(repositoryDataPlainActionFutureClone); + + repositoryData = repositoryDataPlainActionFutureClone.get(); + assertEquals(repositoryData.getSnapshotIds().size(), 2); + boolean foundCloneInRepoData = false; + SnapshotId cloneSnapshotId = null; + for (SnapshotId snapshotId : repositoryData.getSnapshotIds()) { + if (snapshotId.getName().equals("test_clone_snapshot1")) { + foundCloneInRepoData = true; + cloneSnapshotId = snapshotId; + } + } + final SnapshotId cloneSnapshotIdFinal = cloneSnapshotId; + SnapshotInfo cloneSnapshotInfo = PlainActionFuture.get( + f -> repository.threadPool().generic().execute(ActionRunnable.supply(f, () -> repository.getSnapshotInfo(cloneSnapshotIdFinal))) + ); + + assertTrue(foundCloneInRepoData); + + assertThat(cloneSnapshotInfo.getPinnedTimestamp(), equalTo(sourceSnapshotInfo.getPinnedTimestamp())); + for (String index : sourceSnapshotInfo.indices()) { + assertTrue(cloneSnapshotInfo.indices().contains(index)); + + } + assertThat(cloneSnapshotInfo.totalShards(), equalTo(sourceSnapshotInfo.totalShards())); + } + + public void testCloneShallowCopyAfterDisablingV2() throws Exception { + disableRepoConsistencyCheck("Remote store repository is being used in the test"); + final Path remoteStoreRepoPath = randomRepoPath(); + internalCluster().startClusterManagerOnlyNode(snapshotV2Settings(remoteStoreRepoPath)); + internalCluster().startDataOnlyNode(snapshotV2Settings(remoteStoreRepoPath)); + internalCluster().startDataOnlyNode(snapshotV2Settings(remoteStoreRepoPath)); + + String indexName1 = "testindex1"; + String indexName2 = "testindex2"; + String indexName3 = "testindex3"; + String snapshotRepoName = "test-clone-snapshot-repo"; + String sourceSnapshotV2 = "test-source-snapshot-v2"; + String sourceSnapshotV1 = "test-source-snapshot-v1"; + String cloneSnapshotV2 = "test-clone-snapshot-v2"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + + Client client = client(); + + assertAcked( + client.admin() + .cluster() + .preparePutRepository(snapshotRepoName) + .setType(FsRepository.TYPE) + .setSettings( + Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true) + ) + ); + + createIndex(indexName1, getRemoteStoreBackedIndexSettings()); + createIndex(indexName2, getRemoteStoreBackedIndexSettings()); + + final int numDocsInIndex1 = 10; + final int numDocsInIndex2 = 20; + indexRandomDocs(indexName1, numDocsInIndex1); + indexRandomDocs(indexName2, numDocsInIndex2); + ensureGreen(indexName1, indexName2); + + // create source snapshot which is v2 + CreateSnapshotResponse createSnapshotResponse = client().admin() + .cluster() + .prepareCreateSnapshot(snapshotRepoName, sourceSnapshotV2) + .setWaitForCompletion(true) + .get(); + SnapshotInfo sourceSnapshotInfo = createSnapshotResponse.getSnapshotInfo(); + assertThat(sourceSnapshotInfo.state(), equalTo(SnapshotState.SUCCESS)); + assertThat(sourceSnapshotInfo.successfulShards(), greaterThan(0)); + assertThat(sourceSnapshotInfo.successfulShards(), equalTo(sourceSnapshotInfo.totalShards())); + assertThat(sourceSnapshotInfo.snapshotId().getName(), equalTo(sourceSnapshotV2)); + + // Validate that the snapshot was created + final BlobStoreRepository repository = (BlobStoreRepository) internalCluster().getCurrentClusterManagerNodeInstance( + RepositoriesService.class + ).repository(snapshotRepoName); + PlainActionFuture repositoryDataPlainActionFuture = new PlainActionFuture<>(); + repository.getRepositoryData(repositoryDataPlainActionFuture); + + RepositoryData repositoryData = repositoryDataPlainActionFuture.get(); + + assertTrue(repositoryData.getSnapshotIds().contains(sourceSnapshotInfo.snapshotId())); + + createIndex(indexName3, getRemoteStoreBackedIndexSettings()); + indexRandomDocs(indexName3, 10); + ensureGreen(indexName3); + + // disable snapshot v2 in repo + assertAcked( + client.admin() + .cluster() + .preparePutRepository(snapshotRepoName) + .setType(FsRepository.TYPE) + .setSettings( + Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), false) + ) + ); + + // validate that the created snapshot is v1 + CreateSnapshotResponse createSnapshotResponseV1 = client().admin() + .cluster() + .prepareCreateSnapshot(snapshotRepoName, sourceSnapshotV1) + .setWaitForCompletion(true) + .get(); + SnapshotInfo sourceSnapshotInfoV1 = createSnapshotResponseV1.getSnapshotInfo(); + assertThat(sourceSnapshotInfoV1.state(), equalTo(SnapshotState.SUCCESS)); + assertThat(sourceSnapshotInfoV1.successfulShards(), greaterThan(0)); + assertThat(sourceSnapshotInfoV1.successfulShards(), equalTo(sourceSnapshotInfoV1.totalShards())); + assertThat(sourceSnapshotInfoV1.getPinnedTimestamp(), equalTo(0L)); + + // Validate that snapshot is present in repository data + PlainActionFuture repositoryDataV1PlainActionFuture = new PlainActionFuture<>(); + BlobStoreRepository repositoryV1 = (BlobStoreRepository) internalCluster().getCurrentClusterManagerNodeInstance( + RepositoriesService.class + ).repository(snapshotRepoName); + repositoryV1.getRepositoryData(repositoryDataV1PlainActionFuture); + + repositoryData = repositoryDataV1PlainActionFuture.get(); + + assertTrue(repositoryData.getSnapshotIds().contains(sourceSnapshotInfoV1.snapshotId())); + assertEquals(repositoryData.getSnapshotIds().size(), 2); + + // clone should get created for v2 snapshot + AcknowledgedResponse response = client().admin() + .cluster() + .prepareCloneSnapshot(snapshotRepoName, sourceSnapshotV2, cloneSnapshotV2) + .setIndices("*") + .get(); + assertTrue(response.isAcknowledged()); + awaitClusterManagerFinishRepoOperations(); + + // Validate that snapshot is present in repository data + PlainActionFuture repositoryDataCloneV2PlainActionFuture = new PlainActionFuture<>(); + BlobStoreRepository repositoryCloneV2 = (BlobStoreRepository) internalCluster().getCurrentClusterManagerNodeInstance( + RepositoriesService.class + ).repository(snapshotRepoName); + repositoryCloneV2.getRepositoryData(repositoryDataCloneV2PlainActionFuture); + + repositoryData = repositoryDataCloneV2PlainActionFuture.get(); + + assertEquals(repositoryData.getSnapshotIds().size(), 3); + boolean foundCloneInRepoData = false; + SnapshotId cloneSnapshotId = null; + for (SnapshotId snapshotId : repositoryData.getSnapshotIds()) { + if (snapshotId.getName().equals(cloneSnapshotV2)) { + foundCloneInRepoData = true; + cloneSnapshotId = snapshotId; + } + } + final SnapshotId cloneSnapshotIdFinal = cloneSnapshotId; + SnapshotInfo cloneSnapshotInfo = PlainActionFuture.get( + f -> repository.threadPool().generic().execute(ActionRunnable.supply(f, () -> repository.getSnapshotInfo(cloneSnapshotIdFinal))) + ); + + assertTrue(foundCloneInRepoData); + // pinned timestamp value in clone snapshot v2 matches source snapshot v2 + assertThat(cloneSnapshotInfo.getPinnedTimestamp(), equalTo(sourceSnapshotInfo.getPinnedTimestamp())); + for (String index : sourceSnapshotInfo.indices()) { + assertTrue(cloneSnapshotInfo.indices().contains(index)); + + } + assertThat(cloneSnapshotInfo.totalShards(), equalTo(sourceSnapshotInfo.totalShards())); + + } + + public void testRestoreFromClone() throws Exception { + disableRepoConsistencyCheck("Remote store repository is being used in the test"); + final Path remoteStoreRepoPath = randomRepoPath(); + + internalCluster().startClusterManagerOnlyNode(snapshotV2Settings(remoteStoreRepoPath)); + internalCluster().startDataOnlyNode(snapshotV2Settings(remoteStoreRepoPath)); + internalCluster().startDataOnlyNode(snapshotV2Settings(remoteStoreRepoPath)); + + String indexName1 = "testindex1"; + String indexName2 = "testindex2"; + + String snapshotRepoName = "test-clone-snapshot-repo"; + String sourceSnapshot = "test-source-snapshot"; + String cloneSnapshot = "test-clone-snapshot"; + + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + + String restoredIndexName1 = indexName1 + "-restored"; + + Client client = client(); + + assertAcked( + client.admin() + .cluster() + .preparePutRepository(snapshotRepoName) + .setType(FsRepository.TYPE) + .setSettings( + Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true) + ) + ); + + createIndex(indexName1, getRemoteStoreBackedIndexSettings()); + createIndex(indexName2, getRemoteStoreBackedIndexSettings()); + + final int numDocsInIndex1 = 10; + final int numDocsInIndex2 = 20; + indexRandomDocs(indexName1, numDocsInIndex1); + indexRandomDocs(indexName2, numDocsInIndex2); + ensureGreen(indexName1, indexName2); + + logger.info("--> create source snapshot"); + + CreateSnapshotResponse createSnapshotResponse = client().admin() + .cluster() + .prepareCreateSnapshot(snapshotRepoName, sourceSnapshot) + .setWaitForCompletion(true) + .get(); + SnapshotInfo sourceSnapshotInfo = createSnapshotResponse.getSnapshotInfo(); + assertThat(sourceSnapshotInfo.state(), equalTo(SnapshotState.SUCCESS)); + assertThat(sourceSnapshotInfo.successfulShards(), greaterThan(0)); + assertThat(sourceSnapshotInfo.successfulShards(), equalTo(sourceSnapshotInfo.totalShards())); + assertThat(sourceSnapshotInfo.snapshotId().getName(), equalTo(sourceSnapshot)); + + AcknowledgedResponse response = client().admin() + .cluster() + .prepareCloneSnapshot(snapshotRepoName, sourceSnapshot, cloneSnapshot) + .setIndices("*") + .get(); + assertTrue(response.isAcknowledged()); + + DeleteResponse deleteResponse = client().prepareDelete(indexName1, "0").execute().actionGet(); + assertEquals(deleteResponse.getResult(), DocWriteResponse.Result.DELETED); + ensureGreen(indexName1); + + deleteResponse = client().prepareDelete(indexName1, "1").execute().actionGet(); + assertEquals(deleteResponse.getResult(), DocWriteResponse.Result.DELETED); + ensureGreen(indexName1); + + // delete the source snapshot + AcknowledgedResponse deleteSnapshotResponse = internalCluster().clusterManagerClient() + .admin() + .cluster() + .prepareDeleteSnapshot(snapshotRepoName, sourceSnapshot) + .get(); + assertAcked(deleteSnapshotResponse); + + deleteResponse = client().prepareDelete(indexName1, "2").execute().actionGet(); + assertEquals(deleteResponse.getResult(), DocWriteResponse.Result.DELETED); + ensureGreen(indexName1); + ensureGreen(indexName1); + + // restore from clone + RestoreSnapshotResponse restoreSnapshotResponse1 = client.admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepoName, cloneSnapshot) + .setWaitForCompletion(true) + .setIndices(indexName1) + .setRenamePattern(indexName1) + .setRenameReplacement(restoredIndexName1) + .get(); + + assertEquals(restoreSnapshotResponse1.status(), RestStatus.OK); + ensureGreen(restoredIndexName1, indexName2); + assertDocsPresentInIndex(client, restoredIndexName1, numDocsInIndex1); + assertDocsPresentInIndex(client, indexName2, numDocsInIndex2); + } + + private void assertDocsPresentInIndex(Client client, String indexName, int numOfDocs) { + for (int i = 0; i < numOfDocs; i++) { + String id = Integer.toString(i); + logger.info("checking for index " + indexName + " with docId" + id); + assertTrue("doc with id" + id + " is not present for index " + indexName, client.prepareGet(indexName, id).get().isExists()); + } + } + + private Settings snapshotV2Settings(Path remoteStoreRepoPath) { + String REMOTE_REPO_NAME = "remote-store-repo-name"; + Settings settings = Settings.builder() + .put(remoteStoreClusterSettings(REMOTE_REPO_NAME, remoteStoreRepoPath)) + .put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_ENABLED.getKey(), true) + .build(); + return settings; + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/CorruptedBlobStoreRepositoryIT.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/CorruptedBlobStoreRepositoryIT.java index e685aaa52df00..aeca4bb82e40c 100644 --- a/server/src/internalClusterTest/java/org/opensearch/snapshots/CorruptedBlobStoreRepositoryIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/CorruptedBlobStoreRepositoryIT.java @@ -58,6 +58,7 @@ import java.util.Map; import java.util.stream.Stream; +import static org.opensearch.test.OpenSearchIntegTestCase.resolvePath; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertRequestBuilderThrows; import static org.hamcrest.Matchers.containsString; @@ -125,18 +126,11 @@ public void testConcurrentlyChangeRepositoryContents() throws Exception { assertAcked(client.admin().cluster().prepareDeleteRepository(repoName)); logger.info("--> recreate repository"); - assertAcked( - client.admin() - .cluster() - .preparePutRepository(repoName) - .setType("fs") - .setSettings( - Settings.builder() - .put("location", repo) - .put("compress", false) - .put("chunk_size", randomIntBetween(100, 1000), ByteSizeUnit.BYTES) - ) - ); + Settings.Builder settings = Settings.builder() + .put("location", repo) + .put("compress", false) + .put("chunk_size", randomIntBetween(100, 1000), ByteSizeUnit.BYTES); + createRepository(repoName, "fs", settings); startDeleteSnapshot(repoName, snapshot).get(); @@ -153,20 +147,12 @@ public void testConcurrentlyChangeRepositoryContentsInBwCMode() throws Exception Path repo = randomRepoPath(); final String repoName = "test-repo"; logger.info("--> creating repository at {}", repo.toAbsolutePath()); - assertAcked( - client.admin() - .cluster() - .preparePutRepository(repoName) - .setType("fs") - .setSettings( - Settings.builder() - .put("location", repo) - .put("compress", false) - .put(BlobStoreRepository.ALLOW_CONCURRENT_MODIFICATION.getKey(), true) - .put("chunk_size", randomIntBetween(100, 1000), ByteSizeUnit.BYTES) - ) - ); - + Settings.Builder settings = Settings.builder() + .put("location", repo) + .put("compress", false) + .put(BlobStoreRepository.ALLOW_CONCURRENT_MODIFICATION.getKey(), true) + .put("chunk_size", randomIntBetween(100, 1000), ByteSizeUnit.BYTES); + createRepository(repoName, "fs", settings); createIndex("test-idx-1", "test-idx-2"); logger.info("--> indexing some data"); indexRandom( @@ -373,9 +359,7 @@ public void testSnapshotWithCorruptedShardIndexFile() throws Exception { assertThat(indexIds.size(), equalTo(1)); final IndexId corruptedIndex = indexIds.get(indexName); - final Path shardIndexFile = repo.resolve("indices") - .resolve(corruptedIndex.getId()) - .resolve("0") + final Path shardIndexFile = repo.resolve(resolvePath(corruptedIndex, "0")) .resolve("index-" + repositoryData.shardGenerations().getShardGen(corruptedIndex, 0)); logger.info("--> truncating shard index file [{}]", shardIndexFile); @@ -450,7 +434,7 @@ public void testDeleteSnapshotWithMissingIndexAndShardMetadata() throws Exceptio logger.info("--> delete index metadata and shard metadata"); for (String index : indices) { - Path shardZero = indicesPath.resolve(indexIds.get(index).getId()).resolve("0"); + Path shardZero = repo.resolve(resolvePath(indexIds.get(index), "0")); if (randomBoolean()) { Files.delete( shardZero.resolve("index-" + getRepositoryData("test-repo").shardGenerations().getShardGen(indexIds.get(index), 0)) @@ -643,10 +627,9 @@ public void testSnapshotWithMissingShardLevelIndexFile() throws Exception { clusterAdmin().prepareCreateSnapshot("test-repo", "test-snap-1").setWaitForCompletion(true).setIndices("test-idx-*").get(); logger.info("--> deleting shard level index file"); - final Path indicesPath = repo.resolve("indices"); for (IndexId indexId : getRepositoryData("test-repo").getIndices().values()) { final Path shardGen; - try (Stream shardFiles = Files.list(indicesPath.resolve(indexId.getId()).resolve("0"))) { + try (Stream shardFiles = Files.list(repo.resolve(resolvePath(indexId, "0")))) { shardGen = shardFiles.filter(file -> file.getFileName().toString().startsWith(BlobStoreRepository.INDEX_FILE_PREFIX)) .findFirst() .orElseThrow(() -> new AssertionError("Failed to find shard index blob")); diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/DedicatedClusterSnapshotRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/DedicatedClusterSnapshotRestoreIT.java index 54db951eb41c2..686853c42aa03 100644 --- a/server/src/internalClusterTest/java/org/opensearch/snapshots/DedicatedClusterSnapshotRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/DedicatedClusterSnapshotRestoreIT.java @@ -86,6 +86,7 @@ import org.opensearch.rest.action.admin.cluster.RestGetRepositoriesAction; import org.opensearch.snapshots.mockstore.MockRepository; import org.opensearch.test.InternalTestCluster; +import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.test.OpenSearchIntegTestCase.ClusterScope; import org.opensearch.test.OpenSearchIntegTestCase.Scope; import org.opensearch.test.TestCustomMetadata; @@ -759,18 +760,26 @@ public void testRegistrationFailure() { internalCluster().startNode(nonClusterManagerNode()); // Register mock repositories for (int i = 0; i < 5; i++) { - clusterAdmin().preparePutRepository("test-repo" + i) - .setType("mock") - .setSettings(Settings.builder().put("location", randomRepoPath())) - .setVerify(false) - .get(); + OpenSearchIntegTestCase.putRepositoryRequestBuilder( + clusterAdmin(), + "test-repo" + i, + "mock", + false, + Settings.builder().put("location", randomRepoPath()), + null, + false + ).get(); } logger.info("--> make sure that properly setup repository can be registered on all nodes"); - clusterAdmin().preparePutRepository("test-repo-0") - .setType("fs") - .setSettings(Settings.builder().put("location", randomRepoPath())) - .get(); - + OpenSearchIntegTestCase.putRepositoryRequestBuilder( + clusterAdmin(), + "test-repo-0", + "fs", + true, + Settings.builder().put("location", randomRepoPath()), + null, + false + ).get(); } public void testThatSensitiveRepositorySettingsAreNotExposed() throws Exception { @@ -979,11 +988,7 @@ public void testSnapshotWithDateMath() { final String snapshotName = ""; logger.info("--> creating repository"); - assertAcked( - clusterAdmin().preparePutRepository(repo) - .setType("fs") - .setSettings(Settings.builder().put("location", randomRepoPath()).put("compress", randomBoolean())) - ); + createRepository(repo, "fs", Settings.builder().put("location", randomRepoPath()).put("compress", randomBoolean())); final String expression1 = nameExpressionResolver.resolveDateMathExpression(snapshotName); logger.info("--> creating date math snapshot"); diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotIT.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotIT.java index 2331d52c3a1bc..26b30af4c2c50 100644 --- a/server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotIT.java @@ -285,6 +285,7 @@ public void testDeleteMultipleShallowCopySnapshotsCase3() throws Exception { assert (getLockFilesInRemoteStore(remoteStoreEnabledIndexName, REMOTE_REPO_NAME).length == 0); } + @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/9208") public void testRemoteStoreCleanupForDeletedIndex() throws Exception { disableRepoConsistencyCheck("Remote store repository is being used in the test"); final Path remoteStoreRepoPath = randomRepoPath(); @@ -323,13 +324,15 @@ public void testRemoteStoreCleanupForDeletedIndex() throws Exception { final RepositoriesService repositoriesService = internalCluster().getCurrentClusterManagerNodeInstance(RepositoriesService.class); final BlobStoreRepository remoteStoreRepository = (BlobStoreRepository) repositoriesService.repository(REMOTE_REPO_NAME); + String segmentsPathFixedPrefix = RemoteStoreSettings.CLUSTER_REMOTE_STORE_SEGMENTS_PATH_PREFIX.get(getNodeSettings()); BlobPath shardLevelBlobPath = getShardLevelBlobPath( client(), remoteStoreEnabledIndexName, remoteStoreRepository.basePath(), "0", SEGMENTS, - LOCK_FILES + LOCK_FILES, + segmentsPathFixedPrefix ); BlobContainer blobContainer = remoteStoreRepository.blobStore().blobContainer(shardLevelBlobPath); String[] lockFiles; diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotV2IT.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotV2IT.java new file mode 100644 index 0000000000000..1d7a58384c0be --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotV2IT.java @@ -0,0 +1,342 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.snapshots; + +import org.opensearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse; +import org.opensearch.action.support.master.AcknowledgedResponse; +import org.opensearch.client.Client; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.common.unit.ByteSizeUnit; +import org.opensearch.index.remote.RemoteStoreEnums; +import org.opensearch.indices.RemoteStoreSettings; +import org.opensearch.node.remotestore.RemoteStorePinnedTimestampService; +import org.opensearch.remotestore.RemoteStoreBaseIntegTestCase; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.repositories.fs.FsRepository; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.nio.file.Path; +import java.util.concurrent.TimeUnit; + +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.lessThan; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +public class DeleteSnapshotV2IT extends AbstractSnapshotIntegTestCase { + + private static final String REMOTE_REPO_NAME = "remote-store-repo-name"; + + public void testDeleteShallowCopyV2() throws Exception { + disableRepoConsistencyCheck("Remote store repository is being used in the test"); + + final Path remoteStoreRepoPath = randomRepoPath(); + internalCluster().startClusterManagerOnlyNode(snapshotV2Settings(remoteStoreRepoPath)); + + internalCluster().startDataOnlyNode(snapshotV2Settings(remoteStoreRepoPath)); + internalCluster().startDataOnlyNode(snapshotV2Settings(remoteStoreRepoPath)); + + String indexName1 = "testindex1"; + String indexName2 = "testindex2"; + String indexName3 = "testindex3"; + String snapshotRepoName = "test-create-snapshot-repo"; + String snapshotName1 = "test-create-snapshot1"; + String snapshotName2 = "test-create-snapshot2"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + + Client client = client(); + + assertAcked( + client.admin() + .cluster() + .preparePutRepository(snapshotRepoName) + .setType(FsRepository.TYPE) + .setSettings( + Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true) + ) + ); + + createIndex(indexName1, getRemoteStoreBackedIndexSettings()); + createIndex(indexName2, getRemoteStoreBackedIndexSettings()); + + final int numDocsInIndex1 = 10; + final int numDocsInIndex2 = 20; + indexRandomDocs(indexName1, numDocsInIndex1); + indexRandomDocs(indexName2, numDocsInIndex2); + ensureGreen(indexName1, indexName2); + + CreateSnapshotResponse createSnapshotResponse = client().admin() + .cluster() + .prepareCreateSnapshot(snapshotRepoName, snapshotName1) + .setWaitForCompletion(true) + .get(); + SnapshotInfo snapshotInfo = createSnapshotResponse.getSnapshotInfo(); + assertThat(snapshotInfo.state(), equalTo(SnapshotState.SUCCESS)); + assertThat(snapshotInfo.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo.successfulShards(), equalTo(snapshotInfo.totalShards())); + assertThat(snapshotInfo.snapshotId().getName(), equalTo(snapshotName1)); + + createIndex(indexName3, getRemoteStoreBackedIndexSettings()); + indexRandomDocs(indexName3, 10); + CreateSnapshotResponse createSnapshotResponse2 = client().admin() + .cluster() + .prepareCreateSnapshot(snapshotRepoName, snapshotName2) + .setWaitForCompletion(true) + .get(); + snapshotInfo = createSnapshotResponse2.getSnapshotInfo(); + assertThat(snapshotInfo.state(), equalTo(SnapshotState.SUCCESS)); + assertThat(snapshotInfo.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo.successfulShards(), equalTo(snapshotInfo.totalShards())); + assertThat(snapshotInfo.snapshotId().getName(), equalTo(snapshotName2)); + + assertAcked(client().admin().indices().prepareDelete(indexName1)); + Thread.sleep(100); + + AcknowledgedResponse deleteResponse = client().admin() + .cluster() + .prepareDeleteSnapshot(snapshotRepoName, snapshotName2) + .setSnapshots(snapshotName2) + .get(); + assertTrue(deleteResponse.isAcknowledged()); + + // test delete non-existent snapshot + assertThrows( + SnapshotMissingException.class, + () -> client().admin().cluster().prepareDeleteSnapshot(snapshotRepoName, "random-snapshot").setSnapshots(snapshotName2).get() + ); + + } + + public void testDeleteShallowCopyV2MultipleSnapshots() throws Exception { + disableRepoConsistencyCheck("Remote store repository is being used in the test"); + final Path remoteStoreRepoPath = randomRepoPath(); + + internalCluster().startClusterManagerOnlyNode(snapshotV2Settings(remoteStoreRepoPath)); + internalCluster().startDataOnlyNode(snapshotV2Settings(remoteStoreRepoPath)); + internalCluster().startDataOnlyNode(snapshotV2Settings(remoteStoreRepoPath)); + + String indexName1 = "testindex1"; + String indexName2 = "testindex2"; + String indexName3 = "testindex3"; + String snapshotRepoName = "test-create-snapshot-repo"; + String snapshotName1 = "test-create-snapshot1"; + String snapshotName2 = "test-create-snapshot2"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + + Client client = client(); + + assertAcked( + client.admin() + .cluster() + .preparePutRepository(snapshotRepoName) + .setType(FsRepository.TYPE) + .setSettings( + Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true) + ) + ); + + createIndex(indexName1, getRemoteStoreBackedIndexSettings()); + + createIndex(indexName2, getRemoteStoreBackedIndexSettings()); + + final int numDocsInIndex1 = 10; + final int numDocsInIndex2 = 20; + indexRandomDocs(indexName1, numDocsInIndex1); + indexRandomDocs(indexName2, numDocsInIndex2); + ensureGreen(indexName1, indexName2); + + CreateSnapshotResponse createSnapshotResponse = client().admin() + .cluster() + .prepareCreateSnapshot(snapshotRepoName, snapshotName1) + .setWaitForCompletion(true) + .get(); + SnapshotInfo snapshotInfo = createSnapshotResponse.getSnapshotInfo(); + assertThat(snapshotInfo.state(), equalTo(SnapshotState.SUCCESS)); + assertThat(snapshotInfo.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo.successfulShards(), equalTo(snapshotInfo.totalShards())); + assertThat(snapshotInfo.snapshotId().getName(), equalTo(snapshotName1)); + + createIndex(indexName3, getRemoteStoreBackedIndexSettings()); + indexRandomDocs(indexName3, 10); + + CreateSnapshotResponse createSnapshotResponse2 = client().admin() + .cluster() + .prepareCreateSnapshot(snapshotRepoName, snapshotName2) + .setWaitForCompletion(true) + .get(); + snapshotInfo = createSnapshotResponse2.getSnapshotInfo(); + assertThat(snapshotInfo.state(), equalTo(SnapshotState.SUCCESS)); + assertThat(snapshotInfo.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo.successfulShards(), equalTo(snapshotInfo.totalShards())); + assertThat(snapshotInfo.snapshotId().getName(), equalTo(snapshotName2)); + + AcknowledgedResponse deleteResponse = client().admin() + .cluster() + .prepareDeleteSnapshot(snapshotRepoName, snapshotName1, snapshotName2) + .setSnapshots(snapshotName2) + .get(); + assertTrue(deleteResponse.isAcknowledged()); + + // test delete non-existent snapshot + assertThrows( + SnapshotMissingException.class, + () -> client().admin().cluster().prepareDeleteSnapshot(snapshotRepoName, "random-snapshot").setSnapshots(snapshotName2).get() + ); + + } + + @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/15692") + public void testRemoteStoreCleanupForDeletedIndexForSnapshotV2() throws Exception { + disableRepoConsistencyCheck("Remote store repository is being used in the test"); + final Path remoteStoreRepoPath = randomRepoPath(); + Settings settings = remoteStoreClusterSettings(REMOTE_REPO_NAME, remoteStoreRepoPath); + settings = Settings.builder() + .put(settings) + .put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_ENABLED.getKey(), true) + .put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING.getKey(), RemoteStoreEnums.PathType.FIXED.toString()) + .build(); + String clusterManagerName = internalCluster().startClusterManagerOnlyNode(settings); + internalCluster().startDataOnlyNode(settings); + final Client clusterManagerClient = internalCluster().clusterManagerClient(); + ensureStableCluster(2); + + RemoteStorePinnedTimestampService remoteStorePinnedTimestampService = internalCluster().getInstance( + RemoteStorePinnedTimestampService.class, + clusterManagerName + ); + remoteStorePinnedTimestampService.rescheduleAsyncUpdatePinnedTimestampTask(TimeValue.timeValueSeconds(1)); + + final String snapshotRepoName = "snapshot-repo-name"; + final Path snapshotRepoPath = randomRepoPath(); + createRepository(snapshotRepoName, "mock", snapshotRepoSettingsForShallowV2(snapshotRepoPath)); + + final String remoteStoreEnabledIndexName = "remote-index-1"; + final Settings remoteStoreEnabledIndexSettings = getRemoteStoreBackedIndexSettings(); + createIndex(remoteStoreEnabledIndexName, remoteStoreEnabledIndexSettings); + indexRandomDocs(remoteStoreEnabledIndexName, 5); + + String indexUUID = client().admin() + .indices() + .prepareGetSettings(remoteStoreEnabledIndexName) + .get() + .getSetting(remoteStoreEnabledIndexName, IndexMetadata.SETTING_INDEX_UUID); + + String numShards = client().admin() + .indices() + .prepareGetSettings(remoteStoreEnabledIndexName) + .get() + .getSetting(remoteStoreEnabledIndexName, IndexMetadata.SETTING_NUMBER_OF_SHARDS); + + logger.info("--> create two remote index shallow snapshots"); + CreateSnapshotResponse createSnapshotResponse = client().admin() + .cluster() + .prepareCreateSnapshot(snapshotRepoName, "snap1") + .setWaitForCompletion(true) + .get(); + SnapshotInfo snapshotInfo1 = createSnapshotResponse.getSnapshotInfo(); + + indexRandomDocs(remoteStoreEnabledIndexName, 25); + + CreateSnapshotResponse createSnapshotResponse2 = client().admin() + .cluster() + .prepareCreateSnapshot(snapshotRepoName, "snap2") + .setWaitForCompletion(true) + .get(); + SnapshotInfo snapshotInfo2 = createSnapshotResponse2.getSnapshotInfo(); + assertThat(snapshotInfo2.state(), equalTo(SnapshotState.SUCCESS)); + assertThat(snapshotInfo2.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo2.successfulShards(), equalTo(snapshotInfo2.totalShards())); + assertThat(snapshotInfo2.snapshotId().getName(), equalTo("snap2")); + + // delete remote store index + assertAcked(client().admin().indices().prepareDelete(remoteStoreEnabledIndexName)); + + logger.info("--> delete snapshot 2"); + + Path indexPath = Path.of(String.valueOf(remoteStoreRepoPath), indexUUID); + Path shardPath = Path.of(String.valueOf(indexPath), "0"); + Path segmentsPath = Path.of(String.valueOf(shardPath), "segments"); + Path translogPath = Path.of(String.valueOf(shardPath), "translog"); + + // Get total segments remote store directory file count for deleted index and shard 0 + int segmentFilesCountBeforeDeletingSnapshot1 = RemoteStoreBaseIntegTestCase.getFileCount(segmentsPath); + int translogFilesCountBeforeDeletingSnapshot1 = RemoteStoreBaseIntegTestCase.getFileCount(translogPath); + + RemoteStoreSettings.setPinnedTimestampsLookbackInterval(TimeValue.ZERO); + + AcknowledgedResponse deleteSnapshotResponse = clusterManagerClient.admin() + .cluster() + .prepareDeleteSnapshot(snapshotRepoName, snapshotInfo2.snapshotId().getName()) + .get(); + assertAcked(deleteSnapshotResponse); + + Thread.sleep(5000); + + assertBusy(() -> { + try { + assertThat(RemoteStoreBaseIntegTestCase.getFileCount(segmentsPath), lessThan(segmentFilesCountBeforeDeletingSnapshot1)); + } catch (Exception e) {} + }, 30, TimeUnit.SECONDS); + int segmentFilesCountAfterDeletingSnapshot1 = RemoteStoreBaseIntegTestCase.getFileCount(segmentsPath); + + logger.info("--> delete snapshot 1"); + RemoteStoreSettings.setPinnedTimestampsLookbackInterval(TimeValue.ZERO); + // on snapshot deletion, remote store segment files should get cleaned up for deleted index - `remote-index-1` + deleteSnapshotResponse = clusterManagerClient.admin() + .cluster() + .prepareDeleteSnapshot(snapshotRepoName, snapshotInfo1.snapshotId().getName()) + .get(); + assertAcked(deleteSnapshotResponse); + + // Delete is async. Give time for it + assertBusy(() -> { + try { + assertThat(RemoteStoreBaseIntegTestCase.getFileCount(segmentsPath), lessThan(segmentFilesCountAfterDeletingSnapshot1)); + } catch (Exception e) {} + }, 60, TimeUnit.SECONDS); + + assertBusy(() -> { + try { + assertThat(RemoteStoreBaseIntegTestCase.getFileCount(translogPath), lessThan(translogFilesCountBeforeDeletingSnapshot1)); + } catch (Exception e) {} + }, 60, TimeUnit.SECONDS); + + } + + private Settings snapshotV2Settings(Path remoteStoreRepoPath) { + Settings settings = Settings.builder() + .put(remoteStoreClusterSettings(REMOTE_REPO_NAME, remoteStoreRepoPath)) + .put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_ENABLED.getKey(), true) + .build(); + return settings; + } + + protected Settings.Builder snapshotRepoSettingsForShallowV2(Path path) { + final Settings.Builder settings = Settings.builder(); + settings.put("location", path); + settings.put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), Boolean.TRUE); + settings.put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true); + return settings; + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/MultiClusterRepoAccessIT.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/MultiClusterRepoAccessIT.java index 1c46e37dea93a..c96d4a2f079ee 100644 --- a/server/src/internalClusterTest/java/org/opensearch/snapshots/MultiClusterRepoAccessIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/MultiClusterRepoAccessIT.java @@ -115,13 +115,15 @@ public void testConcurrentDeleteFromOtherCluster() throws InterruptedException { secondCluster.startClusterManagerOnlyNode(); secondCluster.startDataOnlyNode(); - secondCluster.client() - .admin() - .cluster() - .preparePutRepository(repoNameOnSecondCluster) - .setType("fs") - .setSettings(Settings.builder().put("location", repoPath)) - .get(); + OpenSearchIntegTestCase.putRepositoryRequestBuilder( + secondCluster.client().admin().cluster(), + repoNameOnSecondCluster, + "fs", + true, + Settings.builder().put("location", repoPath), + null, + false + ).get(); createIndexWithRandomDocs("test-idx-1", randomIntBetween(1, 100)); createFullSnapshot(repoNameOnFirstCluster, "snap-1"); diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/RemoteIndexSnapshotStatusApiIT.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/RemoteIndexSnapshotStatusApiIT.java index 8e2580aba1745..e84de36df2fca 100644 --- a/server/src/internalClusterTest/java/org/opensearch/snapshots/RemoteIndexSnapshotStatusApiIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/RemoteIndexSnapshotStatusApiIT.java @@ -32,20 +32,28 @@ package org.opensearch.snapshots; +import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest; import org.opensearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse; import org.opensearch.action.admin.cluster.snapshots.status.SnapshotIndexShardStage; import org.opensearch.action.admin.cluster.snapshots.status.SnapshotIndexShardStatus; +import org.opensearch.action.admin.cluster.snapshots.status.SnapshotIndexStatus; import org.opensearch.action.admin.cluster.snapshots.status.SnapshotStatus; import org.opensearch.cluster.SnapshotsInProgress; import org.opensearch.common.action.ActionFuture; import org.opensearch.common.settings.Settings; +import org.opensearch.indices.RemoteStoreSettings; +import org.opensearch.repositories.blobstore.BlobStoreRepository; import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.threadpool.ThreadPool; import org.junit.Before; import java.nio.file.Path; +import java.util.Map; +import java.util.concurrent.TimeUnit; import static org.opensearch.remotestore.RemoteStoreBaseIntegTestCase.remoteStoreClusterSettings; +import static org.opensearch.snapshots.SnapshotsService.MAX_SHARDS_ALLOWED_IN_STATUS_API; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.is; @@ -192,6 +200,110 @@ public void testStatusAPICallInProgressShallowSnapshot() throws Exception { createSnapshotResponseActionFuture.actionGet(); } + public void testStatusAPICallForShallowV2Snapshot() throws Exception { + disableRepoConsistencyCheck("Remote store repository is being used for the test"); + Settings pinnedTimestampSettings = Settings.builder() + .put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_ENABLED.getKey(), true) + .build(); + internalCluster().startClusterManagerOnlyNode(pinnedTimestampSettings); + internalCluster().startDataOnlyNodes(2, pinnedTimestampSettings); + + final String index1 = "remote-index-1"; + final String index2 = "remote-index-2"; + final String index3 = "remote-index-3"; + final String snapshotRepoName = "snapshot-repo-name"; + final String snapshot = "snapshot"; + + logger.info("Create repository for shallow V2 snapshots"); + Settings.Builder snapshotV2RepoSettings = snapshotRepoSettingsForShallowCopy().put( + BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), + Boolean.TRUE + ); + createRepository(snapshotRepoName, "fs", snapshotV2RepoSettings); + + final Settings remoteStoreEnabledIndexSettings = getRemoteStoreBackedIndexSettings(); + createIndex(index1, remoteStoreEnabledIndexSettings); + createIndex(index2, remoteStoreEnabledIndexSettings); + createIndex(index3, remoteStoreEnabledIndexSettings); + ensureGreen(); + + logger.info("Indexing some data"); + for (int i = 0; i < 50; i++) { + index(index1, "_doc", Integer.toString(i), "foo", "bar" + i); + index(index2, "_doc", Integer.toString(i), "foo", "bar" + i); + index(index3, "_doc", Integer.toString(i), "foo", "bar" + i); + } + refresh(); + + SnapshotInfo snapshotInfo = createFullSnapshot(snapshotRepoName, snapshot); + assertTrue(snapshotInfo.getPinnedTimestamp() > 0); // to assert creation of a shallow v2 snapshot + + logger.info("Set MAX_SHARDS_ALLOWED_IN_STATUS_API to a low value"); + ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest(); + updateSettingsRequest.persistentSettings(Settings.builder().put(MAX_SHARDS_ALLOWED_IN_STATUS_API.getKey(), 2)); + assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); + + assertBusy(() -> { + // without index filter + // although no. of shards in snapshot (3) is greater than the max value allowed in a status api call, the request does not fail + SnapshotStatus snapshotStatusWithoutIndexFilter = client().admin() + .cluster() + .prepareSnapshotStatus(snapshotRepoName) + .setSnapshots(snapshot) + .execute() + .actionGet() + .getSnapshots() + .get(0); + + assertShallowV2SnapshotStatus(snapshotStatusWithoutIndexFilter, false); + + // with index filter + SnapshotStatus snapshotStatusWithIndexFilter = client().admin() + .cluster() + .prepareSnapshotStatus(snapshotRepoName) + .setSnapshots(snapshot) + .setIndices(index1, index2) + .execute() + .actionGet() + .getSnapshots() + .get(0); + + assertShallowV2SnapshotStatus(snapshotStatusWithIndexFilter, true); + + }, 1, TimeUnit.MINUTES); + + } + + private void assertShallowV2SnapshotStatus(SnapshotStatus snapshotStatus, boolean hasIndexFilter) { + if (hasIndexFilter) { + assertEquals(0, snapshotStatus.getStats().getTotalSize()); + } else { + // TODO: after adding primary store size at the snapshot level, total size here should be > 0 + } + // assert that total and incremental values of file count and size_in_bytes are 0 at index and shard levels + assertEquals(0, snapshotStatus.getStats().getTotalFileCount()); + assertEquals(0, snapshotStatus.getStats().getIncrementalSize()); + assertEquals(0, snapshotStatus.getStats().getIncrementalFileCount()); + + for (Map.Entry entry : snapshotStatus.getIndices().entrySet()) { + // index level + SnapshotIndexStatus snapshotIndexStatus = entry.getValue(); + assertEquals(0, snapshotIndexStatus.getStats().getTotalSize()); + assertEquals(0, snapshotIndexStatus.getStats().getTotalFileCount()); + assertEquals(0, snapshotIndexStatus.getStats().getIncrementalSize()); + assertEquals(0, snapshotIndexStatus.getStats().getIncrementalFileCount()); + + for (SnapshotIndexShardStatus snapshotIndexShardStatus : snapshotStatus.getShards()) { + // shard level + assertEquals(0, snapshotIndexShardStatus.getStats().getTotalSize()); + assertEquals(0, snapshotIndexShardStatus.getStats().getTotalFileCount()); + assertEquals(0, snapshotIndexShardStatus.getStats().getIncrementalSize()); + assertEquals(0, snapshotIndexShardStatus.getStats().getIncrementalFileCount()); + assertEquals(SnapshotIndexShardStage.DONE, snapshotIndexShardStatus.getStage()); + } + } + } + private static SnapshotIndexShardStatus stateFirstShard(SnapshotStatus snapshotStatus, String indexName) { return snapshotStatus.getIndices().get(indexName).getShards().get(0); } diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/RepositoriesIT.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/RepositoriesIT.java index dd40c77ba918d..271fcf166139f 100644 --- a/server/src/internalClusterTest/java/org/opensearch/snapshots/RepositoriesIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/RepositoriesIT.java @@ -33,6 +33,7 @@ package org.opensearch.snapshots; import org.opensearch.action.admin.cluster.repositories.get.GetRepositoriesResponse; +import org.opensearch.action.admin.cluster.repositories.put.PutRepositoryRequestBuilder; import org.opensearch.action.admin.cluster.repositories.verify.VerifyRepositoryResponse; import org.opensearch.action.admin.cluster.state.ClusterStateResponse; import org.opensearch.action.bulk.BulkRequest; @@ -56,7 +57,6 @@ import java.nio.file.Path; import java.util.List; -import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertRequestBuilderThrows; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; @@ -110,19 +110,17 @@ public void testRepositoryCreation() throws Exception { assertThat(findRepository(repositoriesResponse.repositories(), "test-repo-1"), notNullValue()); assertThat(findRepository(repositoriesResponse.repositories(), "test-repo-2"), notNullValue()); + RepositoryMetadata testRepo1Md = findRepository(repositoriesResponse.repositories(), "test-repo-1"); + logger.info("--> check that trying to create a repository with the same settings repeatedly does not update cluster state"); String beforeStateUuid = clusterStateResponse.getState().stateUUID(); - assertThat( - client.admin() - .cluster() - .preparePutRepository("test-repo-1") - .setType("fs") - .setSettings(Settings.builder().put("location", location)) - .get() - .isAcknowledged(), - equalTo(true) - ); - assertEquals(beforeStateUuid, client.admin().cluster().prepareState().clear().get().getState().stateUUID()); + createRepository("test-repo-1", "fs", Settings.builder().put("location", location)); + repositoriesResponse = client.admin().cluster().prepareGetRepositories(randomFrom("_all", "*", "test-repo-*")).get(); + RepositoryMetadata testRepo1MdAfterUpdate = findRepository(repositoriesResponse.repositories(), "test-repo-1"); + + if (testRepo1Md.settings().equals(testRepo1MdAfterUpdate.settings())) { + assertEquals(beforeStateUuid, client.admin().cluster().prepareState().clear().get().getState().stateUUID()); + } logger.info("--> delete repository test-repo-1"); client.admin().cluster().prepareDeleteRepository("test-repo-1").get(); @@ -225,12 +223,7 @@ public void testMisconfiguredRepository() throws Exception { Path invalidRepoPath = createTempDir().toAbsolutePath(); String location = invalidRepoPath.toString(); try { - client().admin() - .cluster() - .preparePutRepository("test-repo") - .setType("fs") - .setSettings(Settings.builder().put("location", location)) - .get(); + createRepository("test-repo", "fs", Settings.builder().put("location", location)); fail("Shouldn't be here"); } catch (RepositoryException ex) { assertThat( @@ -242,33 +235,28 @@ public void testMisconfiguredRepository() throws Exception { public void testRepositoryAckTimeout() throws Exception { logger.info("--> creating repository test-repo-1 with 0s timeout - shouldn't ack"); - AcknowledgedResponse putRepositoryResponse = client().admin() - .cluster() - .preparePutRepository("test-repo-1") - .setType("fs") - .setSettings( - Settings.builder() - .put("location", randomRepoPath()) - .put("compress", randomBoolean()) - .put("chunk_size", randomIntBetween(5, 100), ByteSizeUnit.BYTES) - ) - .setTimeout("0s") - .get(); - assertThat(putRepositoryResponse.isAcknowledged(), equalTo(false)); + + Settings.Builder settings = Settings.builder() + .put("location", randomRepoPath()) + .put("compress", randomBoolean()) + .put("chunk_size", randomIntBetween(5, 100), ByteSizeUnit.BYTES); + PutRepositoryRequestBuilder requestBuilder = OpenSearchIntegTestCase.putRepositoryRequestBuilder( + client().admin().cluster(), + "test-repo-1", + "fs", + true, + settings, + "0s", + false + ); + assertFalse(requestBuilder.get().isAcknowledged()); logger.info("--> creating repository test-repo-2 with standard timeout - should ack"); - putRepositoryResponse = client().admin() - .cluster() - .preparePutRepository("test-repo-2") - .setType("fs") - .setSettings( - Settings.builder() - .put("location", randomRepoPath()) - .put("compress", randomBoolean()) - .put("chunk_size", randomIntBetween(5, 100), ByteSizeUnit.BYTES) - ) - .get(); - assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true)); + settings = Settings.builder() + .put("location", randomRepoPath()) + .put("compress", randomBoolean()) + .put("chunk_size", randomIntBetween(5, 100), ByteSizeUnit.BYTES); + createRepository("test-repo-2", "fs", settings); logger.info("--> deleting repository test-repo-2 with 0s timeout - shouldn't ack"); AcknowledgedResponse deleteRepositoryResponse = client().admin() @@ -292,25 +280,45 @@ public void testRepositoryVerification() throws Exception { Settings readonlySettings = Settings.builder().put(settings).put("readonly", true).build(); logger.info("--> creating repository that cannot write any files - should fail"); assertRequestBuilderThrows( - client.admin().cluster().preparePutRepository("test-repo-1").setType("mock").setSettings(settings), + OpenSearchIntegTestCase.putRepositoryRequestBuilder( + client.admin().cluster(), + "test-repo-1", + "mock", + true, + Settings.builder().put(settings), + null, + false + ), RepositoryVerificationException.class ); logger.info("--> creating read-only repository that cannot read any files - should fail"); assertRequestBuilderThrows( - client.admin().cluster().preparePutRepository("test-repo-2").setType("mock").setSettings(readonlySettings), + OpenSearchIntegTestCase.putRepositoryRequestBuilder( + client.admin().cluster(), + "test-repo-2", + "mock", + true, + Settings.builder().put(readonlySettings), + null, + false + ), RepositoryVerificationException.class ); logger.info("--> creating repository that cannot write any files, but suppress verification - should be acked"); - assertAcked(client.admin().cluster().preparePutRepository("test-repo-1").setType("mock").setSettings(settings).setVerify(false)); + OpenSearchIntegTestCase.putRepository(client.admin().cluster(), "test-repo-1", "mock", false, Settings.builder().put(settings)); logger.info("--> verifying repository"); assertRequestBuilderThrows(client.admin().cluster().prepareVerifyRepository("test-repo-1"), RepositoryVerificationException.class); logger.info("--> creating read-only repository that cannot read any files, but suppress verification - should be acked"); - assertAcked( - client.admin().cluster().preparePutRepository("test-repo-2").setType("mock").setSettings(readonlySettings).setVerify(false) + OpenSearchIntegTestCase.putRepository( + client.admin().cluster(), + "test-repo-2", + "mock", + false, + Settings.builder().put(readonlySettings) ); logger.info("--> verifying repository"); @@ -320,12 +328,8 @@ public void testRepositoryVerification() throws Exception { logger.info("--> creating repository"); try { - client.admin() - .cluster() - .preparePutRepository("test-repo-1") - .setType("mock") - .setSettings(Settings.builder().put("location", location).put("localize_location", true)) - .get(); + Settings.Builder settingsBuilder = Settings.builder().put("location", location).put("localize_location", true); + createRepository("test-repo-1", "mock", settingsBuilder); fail("RepositoryVerificationException wasn't generated"); } catch (RepositoryVerificationException ex) { assertThat(ex.getMessage(), containsString("is not shared")); diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/RepositoryFilterUserMetadataIT.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/RepositoryFilterUserMetadataIT.java index 0bebe969b3f3e..4187ecdf5f283 100644 --- a/server/src/internalClusterTest/java/org/opensearch/snapshots/RepositoryFilterUserMetadataIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/RepositoryFilterUserMetadataIT.java @@ -59,7 +59,6 @@ import java.util.Map; import java.util.function.Function; -import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; import static org.hamcrest.Matchers.is; public class RepositoryFilterUserMetadataIT extends OpenSearchIntegTestCase { @@ -72,17 +71,10 @@ protected Collection> nodePlugins() { public void testFilteredRepoMetadataIsUsed() { final String clusterManagerName = internalCluster().getClusterManagerName(); final String repoName = "test-repo"; - assertAcked( - client().admin() - .cluster() - .preparePutRepository(repoName) - .setType(MetadataFilteringPlugin.TYPE) - .setSettings( - Settings.builder() - .put("location", randomRepoPath()) - .put(MetadataFilteringPlugin.CLUSTER_MANAGER_SETTING_VALUE, clusterManagerName) - ) - ); + Settings.Builder settings = Settings.builder() + .put("location", randomRepoPath()) + .put(MetadataFilteringPlugin.CLUSTER_MANAGER_SETTING_VALUE, clusterManagerName); + createRepository(repoName, MetadataFilteringPlugin.TYPE, settings); createIndex("test-idx"); final SnapshotInfo snapshotInfo = client().admin() .cluster() diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/SharedClusterSnapshotRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/SharedClusterSnapshotRestoreIT.java index 91b0aa6438753..d2003f5dca0e2 100644 --- a/server/src/internalClusterTest/java/org/opensearch/snapshots/SharedClusterSnapshotRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/SharedClusterSnapshotRestoreIT.java @@ -82,6 +82,7 @@ import org.opensearch.repositories.RepositoryException; import org.opensearch.repositories.blobstore.BlobStoreRepository; import org.opensearch.snapshots.mockstore.MockRepository; +import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.threadpool.ThreadPool; import java.nio.channels.SeekableByteChannel; @@ -389,17 +390,11 @@ public void testSnapshotFileFailureDuringSnapshot() throws InterruptedException disableRepoConsistencyCheck("This test uses a purposely broken repository so it would fail consistency checks"); logger.info("--> creating repository"); - assertAcked( - clusterAdmin().preparePutRepository("test-repo") - .setType("mock") - .setSettings( - Settings.builder() - .put("location", randomRepoPath()) - .put("random", randomAlphaOfLength(10)) - .put("random_control_io_exception_rate", 0.2) - ) - .setVerify(false) - ); + Settings.Builder settings = Settings.builder() + .put("location", randomRepoPath()) + .put("random", randomAlphaOfLength(10)) + .put("random_control_io_exception_rate", 0.2); + OpenSearchIntegTestCase.putRepository(clusterAdmin(), "test-repo", "mock", false, settings); createIndexWithRandomDocs("test-idx", 100); @@ -690,11 +685,8 @@ private void unrestorableUseCase( assertAcked(client().admin().indices().prepareDelete(indexName)); // update the test repository - assertAcked( - clusterAdmin().preparePutRepository("test-repo") - .setType("mock") - .setSettings(Settings.builder().put("location", repositoryLocation).put(repositorySettings).build()) - ); + Settings.Builder settings = Settings.builder().put("location", repositoryLocation).put(repositorySettings); + OpenSearchIntegTestCase.putRepository(clusterAdmin(), "test-repo", "mock", settings); // attempt to restore the snapshot with the given settings RestoreSnapshotResponse restoreResponse = clusterAdmin().prepareRestoreSnapshot("test-repo", "test-snap") @@ -1015,27 +1007,17 @@ public void testDeleteRepositoryWhileSnapshotting() throws Exception { } logger.info("--> trying to move repository to another location"); + Settings.Builder settings = Settings.builder().put("location", repositoryLocation.resolve("test")); try { - client.admin() - .cluster() - .preparePutRepository("test-repo") - .setType("fs") - .setSettings(Settings.builder().put("location", repositoryLocation.resolve("test"))) - .get(); + OpenSearchIntegTestCase.putRepository(client.admin().cluster(), "test-repo", "fs", settings); fail("shouldn't be able to replace in-use repository"); } catch (Exception ex) { logger.info("--> in-use repository replacement failed"); } logger.info("--> trying to create a repository with different name"); - assertAcked( - client.admin() - .cluster() - .preparePutRepository("test-repo-2") - .setVerify(false) // do not do verification itself as snapshot threads could be fully blocked - .setType("fs") - .setSettings(Settings.builder().put("location", repositoryLocation.resolve("test"))) - ); + Settings.Builder settingsBuilder = Settings.builder().put("location", repositoryLocation.resolve("test")); + OpenSearchIntegTestCase.putRepository(client.admin().cluster(), "test-repo-2", "fs", false, settingsBuilder); logger.info("--> unblocking blocked node"); unblockNode("test-repo", blockedNode); @@ -1941,20 +1923,12 @@ public void testSnapshotSucceedsAfterSnapshotFailure() throws Exception { logger.info("--> creating repository"); final Path repoPath = randomRepoPath(); final Client client = client(); - assertAcked( - client.admin() - .cluster() - .preparePutRepository("test-repo") - .setType("mock") - .setVerify(false) - .setSettings( - Settings.builder() - .put("location", repoPath) - .put("random_control_io_exception_rate", randomIntBetween(5, 20) / 100f) - // test that we can take a snapshot after a failed one, even if a partial index-N was written - .put("random", randomAlphaOfLength(10)) - ) - ); + Settings.Builder settings = Settings.builder() + .put("location", repoPath) + .put("random_control_io_exception_rate", randomIntBetween(5, 20) / 100f) + // test that we can take a snapshot after a failed one, even if a partial index-N was written + .put("random", randomAlphaOfLength(10)); + OpenSearchIntegTestCase.putRepository(client.admin().cluster(), "test-repo", "mock", false, settings); assertAcked( prepareCreate("test-idx").setSettings( @@ -2004,14 +1978,8 @@ public void testGetSnapshotsFromIndexBlobOnly() throws Exception { logger.info("--> creating repository"); final Path repoPath = randomRepoPath(); final Client client = client(); - assertAcked( - client.admin() - .cluster() - .preparePutRepository("test-repo") - .setType("fs") - .setVerify(false) - .setSettings(Settings.builder().put("location", repoPath)) - ); + Settings.Builder settings = Settings.builder().put("location", repoPath); + OpenSearchIntegTestCase.putRepository(client.admin().cluster(), "test-repo", "fs", false, settings); logger.info("--> creating random number of indices"); final int numIndices = randomIntBetween(1, 10); diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/SnapshotStatusApisIT.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/SnapshotStatusApisIT.java index fb69209f7adda..c3214022df663 100644 --- a/server/src/internalClusterTest/java/org/opensearch/snapshots/SnapshotStatusApisIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/SnapshotStatusApisIT.java @@ -33,11 +33,14 @@ package org.opensearch.snapshots; import org.opensearch.Version; +import org.opensearch.action.ActionRequestValidationException; +import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest; import org.opensearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse; import org.opensearch.action.admin.cluster.snapshots.get.GetSnapshotsRequest; import org.opensearch.action.admin.cluster.snapshots.get.GetSnapshotsResponse; import org.opensearch.action.admin.cluster.snapshots.status.SnapshotIndexShardStage; import org.opensearch.action.admin.cluster.snapshots.status.SnapshotIndexShardStatus; +import org.opensearch.action.admin.cluster.snapshots.status.SnapshotIndexStatus; import org.opensearch.action.admin.cluster.snapshots.status.SnapshotStats; import org.opensearch.action.admin.cluster.snapshots.status.SnapshotStatus; import org.opensearch.action.admin.cluster.snapshots.status.SnapshotsStatusResponse; @@ -48,7 +51,11 @@ import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.io.IOUtils; import org.opensearch.core.common.Strings; +import org.opensearch.core.common.breaker.CircuitBreakingException; import org.opensearch.core.common.unit.ByteSizeUnit; +import org.opensearch.core.rest.RestStatus; +import org.opensearch.index.IndexNotFoundException; +import org.opensearch.repositories.IndexId; import org.opensearch.repositories.blobstore.BlobStoreRepository; import org.opensearch.threadpool.ThreadPool; @@ -59,9 +66,13 @@ import java.util.Collections; import java.util.List; import java.util.Locale; +import java.util.Map; +import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; +import static org.opensearch.snapshots.SnapshotsService.MAX_SHARDS_ALLOWED_IN_STATUS_API; +import static org.opensearch.test.OpenSearchIntegTestCase.resolvePath; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; @@ -200,11 +211,9 @@ public void testExceptionOnMissingShardLevelSnapBlob() throws IOException { final SnapshotInfo snapshotInfo = createFullSnapshot("test-repo", "test-snap"); logger.info("--> delete shard-level snap-${uuid}.dat file for one shard in this snapshot to simulate concurrent delete"); - final String indexRepoId = getRepositoryData("test-repo").resolveIndexId(snapshotInfo.indices().get(0)).getId(); + IndexId indexId = getRepositoryData("test-repo").resolveIndexId(snapshotInfo.indices().get(0)); IOUtils.rm( - repoPath.resolve("indices") - .resolve(indexRepoId) - .resolve("0") + repoPath.resolve(resolvePath(indexId, "0")) .resolve(BlobStoreRepository.SNAPSHOT_PREFIX + snapshotInfo.snapshotId().getUUID() + ".dat") ); @@ -564,6 +573,194 @@ public void testGetSnapshotsRequest() throws Exception { waitForCompletion(repositoryName, inProgressSnapshot, TimeValue.timeValueSeconds(60)); } + public void testSnapshotStatusApiFailureForTooManyShardsAcrossSnapshots() throws Exception { + String repositoryName = "test-repo"; + String index1 = "test-idx-1"; + String index2 = "test-idx-2"; + String index3 = "test-idx-3"; + createRepository(repositoryName, "fs"); + + logger.info("Create indices"); + createIndex(index1, index2, index3); + ensureGreen(); + + logger.info("Indexing some data"); + for (int i = 0; i < 10; i++) { + index(index1, "_doc", Integer.toString(i), "foo", "bar" + i); + index(index2, "_doc", Integer.toString(i), "foo", "baz" + i); + index(index3, "_doc", Integer.toString(i), "foo", "baz" + i); + } + refresh(); + String snapshot1 = "test-snap-1"; + String snapshot2 = "test-snap-2"; + createSnapshot(repositoryName, snapshot1, List.of(index1, index2, index3)); + createSnapshot(repositoryName, snapshot2, List.of(index1, index2)); + + logger.info("Set MAX_SHARDS_ALLOWED_IN_STATUS_API to a low value"); + ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest(); + updateSettingsRequest.persistentSettings(Settings.builder().put(MAX_SHARDS_ALLOWED_IN_STATUS_API.getKey(), 2)); + assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); + + // across a single snapshot + assertBusy(() -> { + CircuitBreakingException exception = expectThrows( + CircuitBreakingException.class, + () -> client().admin().cluster().prepareSnapshotStatus(repositoryName).setSnapshots(snapshot1).execute().actionGet() + ); + assertEquals(exception.status(), RestStatus.TOO_MANY_REQUESTS); + assertTrue( + exception.getMessage().endsWith(" is more than the maximum allowed value of shard count [2] for snapshot status request") + ); + }, 1, TimeUnit.MINUTES); + + // across multiple snapshots + assertBusy(() -> { + CircuitBreakingException exception = expectThrows( + CircuitBreakingException.class, + () -> client().admin() + .cluster() + .prepareSnapshotStatus(repositoryName) + .setSnapshots(snapshot1, snapshot2) + .execute() + .actionGet() + ); + assertEquals(exception.status(), RestStatus.TOO_MANY_REQUESTS); + assertTrue( + exception.getMessage().endsWith(" is more than the maximum allowed value of shard count [2] for snapshot status request") + ); + }, 1, TimeUnit.MINUTES); + + logger.info("Reset MAX_SHARDS_ALLOWED_IN_STATUS_API to default value"); + updateSettingsRequest.persistentSettings(Settings.builder().putNull(MAX_SHARDS_ALLOWED_IN_STATUS_API.getKey())); + assertAcked(internalCluster().client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); + } + + public void testSnapshotStatusForIndexFilter() throws Exception { + String repositoryName = "test-repo"; + String index1 = "test-idx-1"; + String index2 = "test-idx-2"; + String index3 = "test-idx-3"; + createRepository(repositoryName, "fs"); + + logger.info("Create indices"); + createIndex(index1, index2, index3); + ensureGreen(); + + logger.info("Indexing some data"); + for (int i = 0; i < 10; i++) { + index(index1, "_doc", Integer.toString(i), "foo", "bar" + i); + index(index2, "_doc", Integer.toString(i), "foo", "baz" + i); + index(index3, "_doc", Integer.toString(i), "foo", "baz" + i); + } + refresh(); + String snapshot = "test-snap-1"; + createSnapshot(repositoryName, snapshot, List.of(index1, index2, index3)); + + assertBusy(() -> { + SnapshotStatus snapshotsStatus = client().admin() + .cluster() + .prepareSnapshotStatus(repositoryName) + .setSnapshots(snapshot) + .setIndices(index1, index2) + .get() + .getSnapshots() + .get(0); + Map snapshotIndexStatusMap = snapshotsStatus.getIndices(); + // Although the snapshot contains 3 indices, the response of status api call only contains results for 2 + assertEquals(snapshotIndexStatusMap.size(), 2); + assertEquals(snapshotIndexStatusMap.keySet(), Set.of(index1, index2)); + }, 1, TimeUnit.MINUTES); + } + + public void testSnapshotStatusFailuresWithIndexFilter() throws Exception { + String repositoryName = "test-repo"; + String index1 = "test-idx-1"; + String index2 = "test-idx-2"; + String index3 = "test-idx-3"; + createRepository(repositoryName, "fs"); + + logger.info("Create indices"); + createIndex(index1, index2, index3); + ensureGreen(); + + logger.info("Indexing some data"); + for (int i = 0; i < 10; i++) { + index(index1, "_doc", Integer.toString(i), "foo", "bar" + i); + index(index2, "_doc", Integer.toString(i), "foo", "baz" + i); + index(index3, "_doc", Integer.toString(i), "foo", "baz" + i); + } + refresh(); + String snapshot1 = "test-snap-1"; + String snapshot2 = "test-snap-2"; + createSnapshot(repositoryName, snapshot1, List.of(index1, index2, index3)); + createSnapshot(repositoryName, snapshot2, List.of(index1)); + + assertBusy(() -> { + // failure due to passing index filter for multiple snapshots + ActionRequestValidationException ex = expectThrows( + ActionRequestValidationException.class, + () -> client().admin() + .cluster() + .prepareSnapshotStatus(repositoryName) + .setSnapshots(snapshot1, snapshot2) + .setIndices(index1, index2, index3) + .execute() + .actionGet() + ); + String cause = "index list filter is supported only for a single snapshot"; + assertTrue(ex.getMessage().contains(cause)); + }, 1, TimeUnit.MINUTES); + + assertBusy(() -> { + // failure due to index not found in snapshot + IndexNotFoundException ex = expectThrows( + IndexNotFoundException.class, + () -> client().admin() + .cluster() + .prepareSnapshotStatus(repositoryName) + .setSnapshots(snapshot2) + .setIndices(index1, index2, index3) + .execute() + .actionGet() + ); + assertEquals(ex.status(), RestStatus.NOT_FOUND); + String cause = String.format( + Locale.ROOT, + "indices [%s] missing in snapshot [%s] of repository [%s]", + String.join(", ", List.of(index2, index3)), + snapshot2, + repositoryName + ); + assertEquals(cause, ex.getCause().getMessage()); + + }, 1, TimeUnit.MINUTES); + + assertBusy(() -> { + // failure due to too many shards requested + logger.info("Set MAX_SHARDS_ALLOWED_IN_STATUS_API to a low value"); + ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest(); + updateSettingsRequest.persistentSettings(Settings.builder().put(MAX_SHARDS_ALLOWED_IN_STATUS_API.getKey(), 2)); + assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); + + CircuitBreakingException ex = expectThrows( + CircuitBreakingException.class, + () -> client().admin() + .cluster() + .prepareSnapshotStatus(repositoryName) + .setSnapshots(snapshot1) + .setIndices(index1, index2, index3) + .execute() + .actionGet() + ); + assertEquals(ex.status(), RestStatus.TOO_MANY_REQUESTS); + assertTrue(ex.getMessage().endsWith(" is more than the maximum allowed value of shard count [2] for snapshot status request")); + + logger.info("Reset MAX_SHARDS_ALLOWED_IN_STATUS_API to default value"); + updateSettingsRequest.persistentSettings(Settings.builder().putNull(MAX_SHARDS_ALLOWED_IN_STATUS_API.getKey())); + assertAcked(internalCluster().client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); + }, 2, TimeUnit.MINUTES); + } + private static SnapshotIndexShardStatus stateFirstShard(SnapshotStatus snapshotStatus, String indexName) { return snapshotStatus.getIndices().get(indexName).getShards().get(0); } diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/SystemRepositoryIT.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/SystemRepositoryIT.java index 28b84655a2cc7..bb5cc89d4e1d5 100644 --- a/server/src/internalClusterTest/java/org/opensearch/snapshots/SystemRepositoryIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/SystemRepositoryIT.java @@ -8,7 +8,6 @@ package org.opensearch.snapshots; -import org.opensearch.client.Client; import org.opensearch.common.settings.Settings; import org.opensearch.core.common.unit.ByteSizeValue; import org.opensearch.repositories.RepositoryException; @@ -19,7 +18,6 @@ import java.nio.file.Path; import static org.opensearch.remotestore.RemoteStoreBaseIntegTestCase.remoteStoreClusterSettings; -import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; @OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) public class SystemRepositoryIT extends AbstractSnapshotIntegTestCase { @@ -43,13 +41,8 @@ public void testRestrictedSettingsCantBeUpdated() { disableRepoConsistencyCheck("System repository is being used for the test"); internalCluster().startNode(); - final Client client = client(); final Settings.Builder repoSettings = Settings.builder().put("location", randomRepoPath()); - - RepositoryException e = expectThrows( - RepositoryException.class, - () -> client.admin().cluster().preparePutRepository(systemRepoName).setType("mock").setSettings(repoSettings).get() - ); + RepositoryException e = expectThrows(RepositoryException.class, () -> createRepository(systemRepoName, "mock", repoSettings)); assertEquals( e.getMessage(), "[system-repo-name] trying to modify an unmodifiable attribute type of system " @@ -59,18 +52,8 @@ public void testRestrictedSettingsCantBeUpdated() { public void testSystemRepositoryNonRestrictedSettingsCanBeUpdated() { disableRepoConsistencyCheck("System repository is being used for the test"); - internalCluster().startNode(); - final Client client = client(); final Settings.Builder repoSettings = Settings.builder().put("location", absolutePath).put("chunk_size", new ByteSizeValue(20)); - - assertAcked( - client.admin() - .cluster() - .preparePutRepository(systemRepoName) - .setType(ReloadableFsRepository.TYPE) - .setSettings(repoSettings) - .get() - ); + createRepository(systemRepoName, ReloadableFsRepository.TYPE, repoSettings); } } diff --git a/server/src/main/java/org/opensearch/OpenSearchServerException.java b/server/src/main/java/org/opensearch/OpenSearchServerException.java index c5a5ce12b238c..a1fc61834f69b 100644 --- a/server/src/main/java/org/opensearch/OpenSearchServerException.java +++ b/server/src/main/java/org/opensearch/OpenSearchServerException.java @@ -13,6 +13,7 @@ import static org.opensearch.OpenSearchException.UNKNOWN_VERSION_ADDED; import static org.opensearch.Version.V_2_10_0; import static org.opensearch.Version.V_2_13_0; +import static org.opensearch.Version.V_2_17_0; import static org.opensearch.Version.V_2_1_0; import static org.opensearch.Version.V_2_4_0; import static org.opensearch.Version.V_2_5_0; @@ -1201,6 +1202,22 @@ public static void registerExceptions() { V_2_13_0 ) ); + registerExceptionHandle( + new OpenSearchExceptionHandle( + org.opensearch.indices.InvalidIndexContextException.class, + org.opensearch.indices.InvalidIndexContextException::new, + 174, + V_2_17_0 + ) + ); + registerExceptionHandle( + new OpenSearchExceptionHandle( + org.opensearch.snapshots.TooManyShardsInSnapshotsStatusException.class, + org.opensearch.snapshots.TooManyShardsInSnapshotsStatusException::new, + 175, + V_2_17_0 + ) + ); registerExceptionHandle( new OpenSearchExceptionHandle( org.opensearch.cluster.block.IndexCreateBlockException.class, diff --git a/server/src/main/java/org/opensearch/action/ActionModule.java b/server/src/main/java/org/opensearch/action/ActionModule.java index c86e6580122d5..fbf90b97d1e8f 100644 --- a/server/src/main/java/org/opensearch/action/ActionModule.java +++ b/server/src/main/java/org/opensearch/action/ActionModule.java @@ -85,7 +85,9 @@ import org.opensearch.action.admin.cluster.reroute.TransportClusterRerouteAction; import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsAction; import org.opensearch.action.admin.cluster.settings.TransportClusterUpdateSettingsAction; +import org.opensearch.action.admin.cluster.shards.CatShardsAction; import org.opensearch.action.admin.cluster.shards.ClusterSearchShardsAction; +import org.opensearch.action.admin.cluster.shards.TransportCatShardsAction; import org.opensearch.action.admin.cluster.shards.TransportClusterSearchShardsAction; import org.opensearch.action.admin.cluster.shards.routing.weighted.delete.ClusterDeleteWeightedRoutingAction; import org.opensearch.action.admin.cluster.shards.routing.weighted.delete.TransportDeleteWeightedRoutingAction; @@ -646,6 +648,7 @@ public void reg actions.register(ClusterGetWeightedRoutingAction.INSTANCE, TransportGetWeightedRoutingAction.class); actions.register(ClusterDeleteWeightedRoutingAction.INSTANCE, TransportDeleteWeightedRoutingAction.class); actions.register(IndicesStatsAction.INSTANCE, TransportIndicesStatsAction.class); + actions.register(CatShardsAction.INSTANCE, TransportCatShardsAction.class); actions.register(IndicesSegmentsAction.INSTANCE, TransportIndicesSegmentsAction.class); actions.register(IndicesShardStoresAction.INSTANCE, TransportIndicesShardStoresAction.class); actions.register(CreateIndexAction.INSTANCE, TransportCreateIndexAction.class); diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/allocation/ClusterAllocationExplanation.java b/server/src/main/java/org/opensearch/action/admin/cluster/allocation/ClusterAllocationExplanation.java index 3c8f07613561d..70a223d60069a 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/allocation/ClusterAllocationExplanation.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/allocation/ClusterAllocationExplanation.java @@ -95,7 +95,7 @@ public ClusterAllocationExplanation(StreamInput in) throws IOException { @Override public void writeTo(StreamOutput out) throws IOException { shardRouting.writeTo(out); - out.writeOptionalWriteable(currentNode); + out.writeOptionalWriteable((stream, node) -> node.writeToWithAttribute(stream), currentNode); out.writeOptionalWriteable(relocationTargetNode); out.writeOptionalWriteable(clusterInfo); shardAllocationDecision.writeTo(out); diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/health/ClusterHealthRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/health/ClusterHealthRequest.java index ec8b01d853da6..65fa32fecda2e 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/health/ClusterHealthRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/health/ClusterHealthRequest.java @@ -76,6 +76,17 @@ public class ClusterHealthRequest extends ClusterManagerNodeReadRequest 0) { diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/health/ClusterHealthRequestBuilder.java b/server/src/main/java/org/opensearch/action/admin/cluster/health/ClusterHealthRequestBuilder.java index a9a3756755265..87dd77aca20c6 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/health/ClusterHealthRequestBuilder.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/health/ClusterHealthRequestBuilder.java @@ -171,4 +171,9 @@ public final ClusterHealthRequestBuilder setEnsureNodeWeighedIn(boolean ensureNo request.ensureNodeWeighedIn(ensureNodeCommissioned); return this; } + + public ClusterHealthRequestBuilder setApplyLevelAtTransportLayer(boolean applyLevelAtTransportLayer) { + request.setApplyLevelAtTransportLayer(applyLevelAtTransportLayer); + return this; + } } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/health/ClusterHealthResponse.java b/server/src/main/java/org/opensearch/action/admin/cluster/health/ClusterHealthResponse.java index 1a27f161343e8..2dcfb58c3d7b8 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/health/ClusterHealthResponse.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/health/ClusterHealthResponse.java @@ -237,6 +237,26 @@ public ClusterHealthResponse( this.clusterHealthStatus = clusterStateHealth.getStatus(); } + public ClusterHealthResponse( + String clusterName, + String[] concreteIndices, + ClusterHealthRequest clusterHealthRequest, + ClusterState clusterState, + int numberOfPendingTasks, + int numberOfInFlightFetch, + TimeValue taskMaxWaitingTime + ) { + this.clusterName = clusterName; + this.numberOfPendingTasks = numberOfPendingTasks; + this.numberOfInFlightFetch = numberOfInFlightFetch; + this.taskMaxWaitingTime = taskMaxWaitingTime; + this.clusterStateHealth = clusterHealthRequest.isApplyLevelAtTransportLayer() + ? new ClusterStateHealth(clusterState, concreteIndices, clusterHealthRequest.level()) + : new ClusterStateHealth(clusterState, concreteIndices); + this.clusterHealthStatus = clusterStateHealth.getStatus(); + this.delayedUnassignedShards = clusterStateHealth.getDelayedUnassignedShards(); + } + // Awareness Attribute health public ClusterHealthResponse( String clusterName, @@ -261,6 +281,29 @@ public ClusterHealthResponse( this.clusterAwarenessHealth = new ClusterAwarenessHealth(clusterState, clusterSettings, awarenessAttributeName); } + public ClusterHealthResponse( + String clusterName, + ClusterHealthRequest clusterHealthRequest, + ClusterState clusterState, + ClusterSettings clusterSettings, + String[] concreteIndices, + String awarenessAttributeName, + int numberOfPendingTasks, + int numberOfInFlightFetch, + TimeValue taskMaxWaitingTime + ) { + this( + clusterName, + concreteIndices, + clusterHealthRequest, + clusterState, + numberOfPendingTasks, + numberOfInFlightFetch, + taskMaxWaitingTime + ); + this.clusterAwarenessHealth = new ClusterAwarenessHealth(clusterState, clusterSettings, awarenessAttributeName); + } + /** * For XContent Parser and serialization tests */ diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/health/TransportClusterHealthAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/health/TransportClusterHealthAction.java index f69f462372888..8f0202e6d1ed0 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/health/TransportClusterHealthAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/health/TransportClusterHealthAction.java @@ -52,7 +52,6 @@ import org.opensearch.cluster.metadata.ProcessClusterEventTimeoutException; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.routing.NodeWeighedAwayException; -import org.opensearch.cluster.routing.UnassignedInfo; import org.opensearch.cluster.routing.WeightedRoutingUtils; import org.opensearch.cluster.routing.allocation.AllocationService; import org.opensearch.cluster.service.ClusterService; @@ -487,7 +486,12 @@ private ClusterHealthResponse clusterHealth( TimeValue pendingTaskTimeInQueue ) { if (logger.isTraceEnabled()) { - logger.trace("Calculating health based on state version [{}]", clusterState.version()); + logger.trace( + "Calculating health based on state version [{}] for health level [{}] and applyLevelAtTransportLayer set to [{}]", + clusterState.version(), + request.level(), + request.isApplyLevelAtTransportLayer() + ); } String[] concreteIndices; @@ -496,13 +500,13 @@ private ClusterHealthResponse clusterHealth( concreteIndices = clusterState.getMetadata().getConcreteAllIndices(); return new ClusterHealthResponse( clusterState.getClusterName().value(), + request, clusterState, clusterService.getClusterSettings(), concreteIndices, awarenessAttribute, numberOfPendingTasks, numberOfInFlightFetch, - UnassignedInfo.getNumberOfDelayedUnassigned(clusterState), pendingTaskTimeInQueue ); } @@ -514,10 +518,10 @@ private ClusterHealthResponse clusterHealth( ClusterHealthResponse response = new ClusterHealthResponse( clusterState.getClusterName().value(), Strings.EMPTY_ARRAY, + request, clusterState, numberOfPendingTasks, numberOfInFlightFetch, - UnassignedInfo.getNumberOfDelayedUnassigned(clusterState), pendingTaskTimeInQueue ); response.setStatus(ClusterHealthStatus.RED); @@ -527,10 +531,10 @@ private ClusterHealthResponse clusterHealth( return new ClusterHealthResponse( clusterState.getClusterName().value(), concreteIndices, + request, clusterState, numberOfPendingTasks, numberOfInFlightFetch, - UnassignedInfo.getNumberOfDelayedUnassigned(clusterState), pendingTaskTimeInQueue ); } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/hotthreads/NodesHotThreadsRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/hotthreads/NodesHotThreadsRequest.java index 9e52b90f7bd38..b7b60732fcbfc 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/hotthreads/NodesHotThreadsRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/hotthreads/NodesHotThreadsRequest.java @@ -70,7 +70,7 @@ public NodesHotThreadsRequest(StreamInput in) throws IOException { * threads for all nodes is used. */ public NodesHotThreadsRequest(String... nodesIds) { - super(nodesIds); + super(false, nodesIds); } public int threads() { diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/info/NodesInfoRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/info/NodesInfoRequest.java index 17b633c533218..26b53e8db642f 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/info/NodesInfoRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/info/NodesInfoRequest.java @@ -72,7 +72,7 @@ public NodesInfoRequest(StreamInput in) throws IOException { * for all nodes will be returned. */ public NodesInfoRequest(String... nodesIds) { - super(nodesIds); + super(false, nodesIds); defaultMetrics(); } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java index 379836cf442e3..f1f9f93afdad2 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java @@ -58,7 +58,7 @@ public class NodesStatsRequest extends BaseNodesRequest { private final Set requestedMetrics = new HashSet<>(); public NodesStatsRequest() { - super((String[]) null); + super(false, (String[]) null); } public NodesStatsRequest(StreamInput in) throws IOException { @@ -74,7 +74,7 @@ public NodesStatsRequest(StreamInput in) throws IOException { * for all nodes will be returned. */ public NodesStatsRequest(String... nodesIds) { - super(nodesIds); + super(false, nodesIds); } /** diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/usage/NodesUsageRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/usage/NodesUsageRequest.java index 385f48d5690c1..955f77921009f 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/usage/NodesUsageRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/usage/NodesUsageRequest.java @@ -61,7 +61,7 @@ public NodesUsageRequest(StreamInput in) throws IOException { * passed, usage for all nodes will be returned. */ public NodesUsageRequest(String... nodesIds) { - super(nodesIds); + super(false, nodesIds); } /** diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/repositories/cleanup/TransportCleanupRepositoryAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/repositories/cleanup/TransportCleanupRepositoryAction.java index 774bffa10da4f..54b6d7a914a9b 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/repositories/cleanup/TransportCleanupRepositoryAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/repositories/cleanup/TransportCleanupRepositoryAction.java @@ -52,7 +52,9 @@ import org.opensearch.common.inject.Inject; import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.index.store.RemoteSegmentStoreDirectoryFactory; import org.opensearch.index.store.lockmanager.RemoteStoreLockManagerFactory; +import org.opensearch.indices.RemoteStoreSettings; import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.Repository; import org.opensearch.repositories.RepositoryCleanupResult; @@ -96,6 +98,8 @@ public final class TransportCleanupRepositoryAction extends TransportClusterMana private final RemoteStoreLockManagerFactory remoteStoreLockManagerFactory; + private final RemoteSegmentStoreDirectoryFactory remoteSegmentStoreDirectoryFactory; + @Override protected String executor() { return ThreadPool.Names.SAME; @@ -109,7 +113,8 @@ public TransportCleanupRepositoryAction( SnapshotsService snapshotsService, ThreadPool threadPool, ActionFilters actionFilters, - IndexNameExpressionResolver indexNameExpressionResolver + IndexNameExpressionResolver indexNameExpressionResolver, + RemoteStoreSettings remoteStoreSettings ) { super( CleanupRepositoryAction.NAME, @@ -122,7 +127,15 @@ public TransportCleanupRepositoryAction( ); this.repositoriesService = repositoriesService; this.snapshotsService = snapshotsService; - this.remoteStoreLockManagerFactory = new RemoteStoreLockManagerFactory(() -> repositoriesService); + this.remoteSegmentStoreDirectoryFactory = new RemoteSegmentStoreDirectoryFactory( + () -> repositoriesService, + threadPool, + remoteStoreSettings.getSegmentsPathFixedPrefix() + ); + this.remoteStoreLockManagerFactory = new RemoteStoreLockManagerFactory( + () -> repositoriesService, + remoteStoreSettings.getSegmentsPathFixedPrefix() + ); // We add a state applier that will remove any dangling repository cleanup actions on cluster-manager failover. // This is safe to do since cleanups will increment the repository state id before executing any operations to prevent concurrent // operations from corrupting the repository. This is the same safety mechanism used by snapshot deletes. @@ -272,6 +285,7 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS repositoryStateId, snapshotsService.minCompatibleVersion(newState.nodes().getMinNodeVersion(), repositoryData, null), remoteStoreLockManagerFactory, + remoteSegmentStoreDirectoryFactory, ActionListener.wrap(result -> after(null, result), e -> after(e, null)) ) ) diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/shards/CatShardsAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/shards/CatShardsAction.java new file mode 100644 index 0000000000000..381a950fe9a79 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/shards/CatShardsAction.java @@ -0,0 +1,25 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.shards; + +import org.opensearch.action.ActionType; + +/** + * Transport action for cat shards + * + * @opensearch.internal + */ +public class CatShardsAction extends ActionType { + public static final CatShardsAction INSTANCE = new CatShardsAction(); + public static final String NAME = "cluster:monitor/shards"; + + private CatShardsAction() { + super(NAME, CatShardsResponse::new); + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/shards/CatShardsRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/shards/CatShardsRequest.java new file mode 100644 index 0000000000000..49299777db8ae --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/shards/CatShardsRequest.java @@ -0,0 +1,62 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.shards; + +import org.opensearch.action.ActionRequestValidationException; +import org.opensearch.action.support.clustermanager.ClusterManagerNodeReadRequest; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.tasks.TaskId; +import org.opensearch.rest.action.admin.cluster.ClusterAdminTask; + +import java.io.IOException; +import java.util.Map; + +/** + * A request of _cat/shards. + * + * @opensearch.api + */ +public class CatShardsRequest extends ClusterManagerNodeReadRequest { + + private String[] indices; + private TimeValue cancelAfterTimeInterval; + + public CatShardsRequest() {} + + public CatShardsRequest(StreamInput in) throws IOException { + super(in); + } + + @Override + public ActionRequestValidationException validate() { + return null; + } + + public void setIndices(String[] indices) { + this.indices = indices; + } + + public String[] getIndices() { + return this.indices; + } + + public void setCancelAfterTimeInterval(TimeValue timeout) { + this.cancelAfterTimeInterval = timeout; + } + + public TimeValue getCancelAfterTimeInterval() { + return this.cancelAfterTimeInterval; + } + + @Override + public ClusterAdminTask createTask(long id, String type, String action, TaskId parentTaskId, Map headers) { + return new ClusterAdminTask(id, type, action, parentTaskId, headers, this.cancelAfterTimeInterval); + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/shards/CatShardsResponse.java b/server/src/main/java/org/opensearch/action/admin/cluster/shards/CatShardsResponse.java new file mode 100644 index 0000000000000..3dd88a2cda037 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/shards/CatShardsResponse.java @@ -0,0 +1,57 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.shards; + +import org.opensearch.action.admin.cluster.state.ClusterStateResponse; +import org.opensearch.action.admin.indices.stats.IndicesStatsResponse; +import org.opensearch.core.action.ActionResponse; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; + +import java.io.IOException; + +/** + * A response of a cat shards request. + * + * @opensearch.api + */ +public class CatShardsResponse extends ActionResponse { + + private ClusterStateResponse clusterStateResponse = null; + + private IndicesStatsResponse indicesStatsResponse = null; + + public CatShardsResponse() {} + + public CatShardsResponse(StreamInput in) throws IOException { + super(in); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + clusterStateResponse.writeTo(out); + indicesStatsResponse.writeTo(out); + } + + public void setClusterStateResponse(ClusterStateResponse clusterStateResponse) { + this.clusterStateResponse = clusterStateResponse; + } + + public ClusterStateResponse getClusterStateResponse() { + return this.clusterStateResponse; + } + + public void setIndicesStatsResponse(IndicesStatsResponse indicesStatsResponse) { + this.indicesStatsResponse = indicesStatsResponse; + } + + public IndicesStatsResponse getIndicesStatsResponse() { + return this.indicesStatsResponse; + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/shards/TransportCatShardsAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/shards/TransportCatShardsAction.java new file mode 100644 index 0000000000000..224d3cbc5f10a --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/shards/TransportCatShardsAction.java @@ -0,0 +1,110 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.shards; + +import org.opensearch.action.admin.cluster.state.ClusterStateRequest; +import org.opensearch.action.admin.cluster.state.ClusterStateResponse; +import org.opensearch.action.admin.indices.stats.IndicesStatsRequest; +import org.opensearch.action.admin.indices.stats.IndicesStatsResponse; +import org.opensearch.action.support.ActionFilters; +import org.opensearch.action.support.HandledTransportAction; +import org.opensearch.action.support.TimeoutTaskCancellationUtility; +import org.opensearch.client.node.NodeClient; +import org.opensearch.common.inject.Inject; +import org.opensearch.core.action.ActionListener; +import org.opensearch.core.action.NotifyOnceListener; +import org.opensearch.tasks.CancellableTask; +import org.opensearch.tasks.Task; +import org.opensearch.transport.TransportService; + +/** + * Perform cat shards action + * + * @opensearch.internal + */ +public class TransportCatShardsAction extends HandledTransportAction { + + private final NodeClient client; + + @Inject + public TransportCatShardsAction(NodeClient client, TransportService transportService, ActionFilters actionFilters) { + super(CatShardsAction.NAME, transportService, actionFilters, CatShardsRequest::new); + this.client = client; + } + + @Override + public void doExecute(Task parentTask, CatShardsRequest shardsRequest, ActionListener listener) { + final ClusterStateRequest clusterStateRequest = new ClusterStateRequest(); + clusterStateRequest.setShouldCancelOnTimeout(true); + clusterStateRequest.local(shardsRequest.local()); + clusterStateRequest.clusterManagerNodeTimeout(shardsRequest.clusterManagerNodeTimeout()); + clusterStateRequest.clear().nodes(true).routingTable(true).indices(shardsRequest.getIndices()); + assert parentTask instanceof CancellableTask; + clusterStateRequest.setParentTask(client.getLocalNodeId(), parentTask.getId()); + + ActionListener originalListener = new NotifyOnceListener() { + @Override + protected void innerOnResponse(CatShardsResponse catShardsResponse) { + listener.onResponse(catShardsResponse); + } + + @Override + protected void innerOnFailure(Exception e) { + listener.onFailure(e); + } + }; + ActionListener cancellableListener = TimeoutTaskCancellationUtility.wrapWithCancellationListener( + client, + (CancellableTask) parentTask, + ((CancellableTask) parentTask).getCancellationTimeout(), + originalListener, + e -> { + originalListener.onFailure(e); + } + ); + CatShardsResponse catShardsResponse = new CatShardsResponse(); + try { + client.admin().cluster().state(clusterStateRequest, new ActionListener() { + @Override + public void onResponse(ClusterStateResponse clusterStateResponse) { + catShardsResponse.setClusterStateResponse(clusterStateResponse); + IndicesStatsRequest indicesStatsRequest = new IndicesStatsRequest(); + indicesStatsRequest.setShouldCancelOnTimeout(true); + indicesStatsRequest.all(); + indicesStatsRequest.indices(shardsRequest.getIndices()); + indicesStatsRequest.setParentTask(client.getLocalNodeId(), parentTask.getId()); + try { + client.admin().indices().stats(indicesStatsRequest, new ActionListener() { + @Override + public void onResponse(IndicesStatsResponse indicesStatsResponse) { + catShardsResponse.setIndicesStatsResponse(indicesStatsResponse); + cancellableListener.onResponse(catShardsResponse); + } + + @Override + public void onFailure(Exception e) { + cancellableListener.onFailure(e); + } + }); + } catch (Exception e) { + cancellableListener.onFailure(e); + } + } + + @Override + public void onFailure(Exception e) { + cancellableListener.onFailure(e); + } + }); + } catch (Exception e) { + cancellableListener.onFailure(e); + } + + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/clone/TransportCloneSnapshotAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/clone/TransportCloneSnapshotAction.java index 54ef372609390..6da26d9a2da45 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/clone/TransportCloneSnapshotAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/clone/TransportCloneSnapshotAction.java @@ -95,7 +95,7 @@ protected void clusterManagerOperation( ClusterState state, ActionListener listener ) { - snapshotsService.cloneSnapshot(request, ActionListener.map(listener, v -> new AcknowledgedResponse(true))); + snapshotsService.executeClone(request, ActionListener.map(listener, v -> new AcknowledgedResponse(true))); } @Override diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/create/TransportCreateSnapshotAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/create/TransportCreateSnapshotAction.java index 25e71d5598a98..6b582396c2733 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/create/TransportCreateSnapshotAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/create/TransportCreateSnapshotAction.java @@ -113,8 +113,10 @@ protected void clusterManagerOperation( ) { Repository repository = repositoriesService.repository(request.repository()); boolean isSnapshotV2 = SHALLOW_SNAPSHOT_V2.get(repository.getMetadata().settings()); - if (request.waitForCompletion() || isSnapshotV2) { + if (request.waitForCompletion()) { snapshotsService.executeSnapshot(request, ActionListener.map(listener, CreateSnapshotResponse::new)); + } else if (isSnapshotV2) { + snapshotsService.executeSnapshot(request, ActionListener.map(listener, snapshot -> new CreateSnapshotResponse())); } else { snapshotsService.createSnapshot(request, ActionListener.map(listener, snapshot -> new CreateSnapshotResponse())); } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/restore/RestoreSnapshotRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/restore/RestoreSnapshotRequest.java index 492ef86bb7843..469a31407dc5d 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/restore/RestoreSnapshotRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/restore/RestoreSnapshotRequest.java @@ -121,6 +121,8 @@ private static StorageType fromString(String string) { private StorageType storageType = StorageType.LOCAL; @Nullable private String sourceRemoteStoreRepository = null; + @Nullable + private String sourceRemoteTranslogRepository = null; @Nullable // if any snapshot UUID will do private String snapshotUuid; @@ -159,6 +161,9 @@ public RestoreSnapshotRequest(StreamInput in) throws IOException { if (in.getVersion().onOrAfter(Version.V_2_10_0)) { sourceRemoteStoreRepository = in.readOptionalString(); } + if (in.getVersion().onOrAfter(Version.V_2_17_0)) { + sourceRemoteTranslogRepository = in.readOptionalString(); + } } @Override @@ -183,6 +188,9 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(Version.V_2_10_0)) { out.writeOptionalString(sourceRemoteStoreRepository); } + if (out.getVersion().onOrAfter(Version.V_2_17_0)) { + out.writeOptionalString(sourceRemoteTranslogRepository); + } } @Override @@ -545,6 +553,16 @@ public RestoreSnapshotRequest setSourceRemoteStoreRepository(String sourceRemote return this; } + /** + * Sets Source Remote Translog Repository for all the restored indices + * + * @param sourceRemoteTranslogRepository name of the remote translog repository that should be used for all restored indices. + */ + public RestoreSnapshotRequest setSourceRemoteTranslogRepository(String sourceRemoteTranslogRepository) { + this.sourceRemoteTranslogRepository = sourceRemoteTranslogRepository; + return this; + } + /** * Returns Source Remote Store Repository for all the restored indices * @@ -554,6 +572,15 @@ public String getSourceRemoteStoreRepository() { return sourceRemoteStoreRepository; } + /** + * Returns Source Remote Translog Repository for all the restored indices + * + * @return source Remote Translog Repository + */ + public String getSourceRemoteTranslogRepository() { + return sourceRemoteTranslogRepository; + } + /** * Parses restore definition * @@ -673,6 +700,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (sourceRemoteStoreRepository != null) { builder.field("source_remote_store_repository", sourceRemoteStoreRepository); } + if (sourceRemoteTranslogRepository != null) { + builder.field("source_remote_translog_repository", sourceRemoteTranslogRepository); + } builder.endObject(); return builder; } @@ -701,7 +731,8 @@ public boolean equals(Object o) { && Arrays.equals(ignoreIndexSettings, that.ignoreIndexSettings) && Objects.equals(snapshotUuid, that.snapshotUuid) && Objects.equals(storageType, that.storageType) - && Objects.equals(sourceRemoteStoreRepository, that.sourceRemoteStoreRepository); + && Objects.equals(sourceRemoteStoreRepository, that.sourceRemoteStoreRepository) + && Objects.equals(sourceRemoteTranslogRepository, that.sourceRemoteTranslogRepository); return equals; } @@ -721,7 +752,8 @@ public int hashCode() { indexSettings, snapshotUuid, storageType, - sourceRemoteStoreRepository + sourceRemoteStoreRepository, + sourceRemoteTranslogRepository ); result = 31 * result + Arrays.hashCode(indices); result = 31 * result + Arrays.hashCode(ignoreIndexSettings); diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/status/SnapshotsStatusRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/status/SnapshotsStatusRequest.java index 061e73f1094b5..3d7fb5b6beb56 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/status/SnapshotsStatusRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/status/SnapshotsStatusRequest.java @@ -32,6 +32,7 @@ package org.opensearch.action.admin.cluster.snapshots.status; +import org.opensearch.Version; import org.opensearch.action.ActionRequestValidationException; import org.opensearch.action.support.clustermanager.ClusterManagerNodeRequest; import org.opensearch.common.annotation.PublicApi; @@ -54,6 +55,7 @@ public class SnapshotsStatusRequest extends ClusterManagerNodeRequesttrue to ignore unavailable snapshots, instead of throwing an exception. - * Defaults to false, which means unavailable snapshots cause an exception to be thrown. + * Returns the names of the indices. + * + * @return the names of indices + */ + public String[] indices() { + return this.indices; + } + + /** + * Sets the list of indices to be returned + * + * @return this request + */ + public SnapshotsStatusRequest indices(String[] indices) { + this.indices = indices; + return this; + } + + /** + * Set to true to ignore unavailable snapshots and indices, instead of throwing an exception. + * Defaults to false, which means unavailable snapshots and indices cause an exception to be thrown. * - * @param ignoreUnavailable whether to ignore unavailable snapshots + * @param ignoreUnavailable whether to ignore unavailable snapshots and indices * @return this request */ public SnapshotsStatusRequest ignoreUnavailable(boolean ignoreUnavailable) { @@ -158,9 +201,9 @@ public SnapshotsStatusRequest ignoreUnavailable(boolean ignoreUnavailable) { } /** - * Returns whether the request permits unavailable snapshots to be ignored. + * Returns whether the request permits unavailable snapshots and indices to be ignored. * - * @return true if the request will ignore unavailable snapshots, false if it will throw an exception on unavailable snapshots + * @return true if the request will ignore unavailable snapshots and indices, false if it will throw an exception on unavailable snapshots and indices */ public boolean ignoreUnavailable() { return ignoreUnavailable; diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/status/SnapshotsStatusRequestBuilder.java b/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/status/SnapshotsStatusRequestBuilder.java index 9377eca60e353..6f0ac278d01c4 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/status/SnapshotsStatusRequestBuilder.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/status/SnapshotsStatusRequestBuilder.java @@ -96,10 +96,32 @@ public SnapshotsStatusRequestBuilder addSnapshots(String... snapshots) { } /** - * Set to true to ignore unavailable snapshots, instead of throwing an exception. - * Defaults to false, which means unavailable snapshots cause an exception to be thrown. + * Sets list of indices to return * - * @param ignoreUnavailable whether to ignore unavailable snapshots. + * @param indices list of indices + * @return this builder + */ + public SnapshotsStatusRequestBuilder setIndices(String... indices) { + request.indices(indices); + return this; + } + + /** + * Adds additional indices to the list of indices to return + * + * @param indices additional indices + * @return this builder + */ + public SnapshotsStatusRequestBuilder addIndices(String... indices) { + request.indices(ArrayUtils.concat(request.indices(), indices)); + return this; + } + + /** + * Set to true to ignore unavailable snapshots and indices, instead of throwing an exception. + * Defaults to false, which means unavailable snapshots and indices cause an exception to be thrown. + * + * @param ignoreUnavailable whether to ignore unavailable snapshots and indices. * @return this builder */ public SnapshotsStatusRequestBuilder setIgnoreUnavailable(boolean ignoreUnavailable) { diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/status/TransportNodesSnapshotsStatus.java b/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/status/TransportNodesSnapshotsStatus.java index 8f76dbce048cd..3654cab3c5a47 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/status/TransportNodesSnapshotsStatus.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/status/TransportNodesSnapshotsStatus.java @@ -161,7 +161,7 @@ public Request(StreamInput in) throws IOException { } public Request(String[] nodesIds) { - super(nodesIds); + super(false, nodesIds); } public Request snapshots(Snapshot[] snapshots) { diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusAction.java index 4fc2acb2caa51..8228cb6301c8c 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusAction.java @@ -49,9 +49,12 @@ import org.opensearch.common.util.set.Sets; import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.Strings; +import org.opensearch.core.common.breaker.CircuitBreaker; +import org.opensearch.core.common.breaker.CircuitBreakingException; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.util.CollectionUtils; import org.opensearch.core.index.shard.ShardId; +import org.opensearch.index.IndexNotFoundException; import org.opensearch.index.snapshots.IndexShardSnapshotStatus; import org.opensearch.repositories.IndexId; import org.opensearch.repositories.RepositoriesService; @@ -81,6 +84,7 @@ import java.util.stream.Collectors; import static java.util.Collections.unmodifiableMap; +import static org.opensearch.snapshots.SnapshotsService.MAX_SHARDS_ALLOWED_IN_STATUS_API; /** * Transport action for accessing snapshot status @@ -95,6 +99,8 @@ public class TransportSnapshotsStatusAction extends TransportClusterManagerNodeA private final TransportNodesSnapshotsStatus transportNodesSnapshotsStatus; + private long maximumAllowedShardCount; + @Inject public TransportSnapshotsStatusAction( TransportService transportService, @@ -314,38 +320,34 @@ private void loadRepositoryData( String repositoryName, ActionListener listener ) { - final Set requestedSnapshotNames = Sets.newHashSet(request.snapshots()); + maximumAllowedShardCount = clusterService.getClusterSettings().get(MAX_SHARDS_ALLOWED_IN_STATUS_API); final StepListener repositoryDataListener = new StepListener<>(); repositoriesService.getRepositoryData(repositoryName, repositoryDataListener); repositoryDataListener.whenComplete(repositoryData -> { - final Map matchedSnapshotIds = repositoryData.getSnapshotIds() - .stream() - .filter(s -> requestedSnapshotNames.contains(s.getName())) - .collect(Collectors.toMap(SnapshotId::getName, Function.identity())); - for (final String snapshotName : request.snapshots()) { - if (currentSnapshotNames.contains(snapshotName)) { - // we've already found this snapshot in the current snapshot entries, so skip over - continue; - } - SnapshotId snapshotId = matchedSnapshotIds.get(snapshotName); - if (snapshotId == null) { - // neither in the current snapshot entries nor found in the repository - if (request.ignoreUnavailable()) { - // ignoring unavailable snapshots, so skip over - logger.debug( - "snapshot status request ignoring snapshot [{}], not found in repository [{}]", - snapshotName, - repositoryName - ); - continue; - } else { - throw new SnapshotMissingException(repositoryName, snapshotName); - } - } - SnapshotInfo snapshotInfo = snapshot(snapshotsInProgress, repositoryName, snapshotId); + Map snapshotsInfoMap = snapshotsInfo( + request, + repositoryName, + repositoryData, + snapshotsInProgress, + currentSnapshotNames + ); + for (Map.Entry entry : snapshotsInfoMap.entrySet()) { + SnapshotId snapshotId = entry.getKey(); + SnapshotInfo snapshotInfo = entry.getValue(); List shardStatusBuilder = new ArrayList<>(); if (snapshotInfo.state().completed()) { - Map shardStatuses = snapshotShards(repositoryName, repositoryData, snapshotInfo); + Map shardStatuses = snapshotShards( + request, + repositoryName, + repositoryData, + snapshotInfo + ); + boolean isShallowV2Snapshot = snapshotInfo.getPinnedTimestamp() > 0; + long initialSnapshotTotalSize = 0; + if (isShallowV2Snapshot && request.indices().length == 0) { + // TODO: add primary store size in bytes at the snapshot level + } + for (Map.Entry shardStatus : shardStatuses.entrySet()) { IndexShardSnapshotStatus.Copy lastSnapshotStatus = shardStatus.getValue().asCopy(); shardStatusBuilder.add(new SnapshotIndexShardStatus(shardStatus.getKey(), lastSnapshotStatus)); @@ -406,6 +408,72 @@ private SnapshotInfo snapshot(SnapshotsInProgress snapshotsInProgress, String re return repositoriesService.repository(repositoryName).getSnapshotInfo(snapshotId); } + /** + * Returns snapshot info for finished snapshots + * @param request snapshot status request + * @param repositoryName repository name + * @param repositoryData repository data + * @param snapshotsInProgress currently running snapshots + * @param currentSnapshotNames list of names of currently running snapshots + * @return map of snapshot id to snapshot info + */ + private Map snapshotsInfo( + SnapshotsStatusRequest request, + String repositoryName, + RepositoryData repositoryData, + SnapshotsInProgress snapshotsInProgress, + Set currentSnapshotNames + ) { + final Set requestedSnapshotNames = Sets.newHashSet(request.snapshots()); + final Map snapshotsInfoMap = new HashMap<>(); + final Map matchedSnapshotIds = repositoryData.getSnapshotIds() + .stream() + .filter(s -> requestedSnapshotNames.contains(s.getName())) + .collect(Collectors.toMap(SnapshotId::getName, Function.identity())); + int totalShardsAcrossSnapshots = 0; + for (final String snapshotName : request.snapshots()) { + if (currentSnapshotNames.contains(snapshotName)) { + // we've already found this snapshot in the current snapshot entries, so skip over + continue; + } + SnapshotId snapshotId = matchedSnapshotIds.get(snapshotName); + if (snapshotId == null) { + // neither in the current snapshot entries nor found in the repository + if (request.ignoreUnavailable()) { + // ignoring unavailable snapshots, so skip over + logger.debug( + "snapshot status request ignoring snapshot [{}], not found in repository [{}]", + snapshotName, + repositoryName + ); + continue; + } else { + throw new SnapshotMissingException(repositoryName, snapshotName); + } + } + SnapshotInfo snapshotInfo = snapshot(snapshotsInProgress, repositoryName, snapshotId); + boolean isV2Snapshot = snapshotInfo.getPinnedTimestamp() > 0; + if (isV2Snapshot == false && request.indices().length == 0) { + totalShardsAcrossSnapshots += snapshotInfo.totalShards(); + } + snapshotsInfoMap.put(snapshotId, snapshotInfo); + } + if (totalShardsAcrossSnapshots > maximumAllowedShardCount && request.indices().length == 0) { + String message = "[" + + repositoryName + + ":" + + String.join(", ", request.snapshots()) + + "]" + + " Total shard count [" + + totalShardsAcrossSnapshots + + "] is more than the maximum allowed value of shard count [" + + maximumAllowedShardCount + + "] for snapshot status request"; + throw new CircuitBreakingException(message, CircuitBreaker.Durability.PERMANENT); + } + return unmodifiableMap(snapshotsInfoMap); + } + /** * Returns status of shards currently finished snapshots *

@@ -413,21 +481,69 @@ private SnapshotInfo snapshot(SnapshotsInProgress snapshotsInProgress, String re * {@link SnapshotShardsService#currentSnapshotShards(Snapshot)} because it * returns similar information but for already finished snapshots. *

- * + * @param request snapshot status request * @param repositoryName repository name * @param snapshotInfo snapshot info * @return map of shard id to snapshot status */ private Map snapshotShards( + final SnapshotsStatusRequest request, final String repositoryName, final RepositoryData repositoryData, final SnapshotInfo snapshotInfo ) throws IOException { + final Set requestedIndexNames = Sets.newHashSet(request.indices()); + String snapshotName = snapshotInfo.snapshotId().getName(); + Set indices = Sets.newHashSet(snapshotInfo.indices()); + if (requestedIndexNames.isEmpty() == false) { + Set finalIndices = indices; + List indicesNotFound = requestedIndexNames.stream() + .filter(i -> finalIndices.contains(i) == false) + .collect(Collectors.toList()); + if (indicesNotFound.isEmpty() == false) { + handleIndexNotFound(String.join(", ", indicesNotFound), request, snapshotName, repositoryName); + } + indices = requestedIndexNames; + } + final Repository repository = repositoriesService.repository(repositoryName); - final Map shardStatus = new HashMap<>(); - for (String index : snapshotInfo.indices()) { + boolean isV2Snapshot = snapshotInfo.getPinnedTimestamp() > 0; + int totalShardsAcrossIndices = 0; + final Map indexMetadataMap = new HashMap<>(); + + for (String index : indices) { IndexId indexId = repositoryData.resolveIndexId(index); IndexMetadata indexMetadata = repository.getSnapshotIndexMetaData(repositoryData, snapshotInfo.snapshotId(), indexId); + if (indexMetadata != null) { + if (requestedIndexNames.isEmpty() == false && isV2Snapshot == false) { + totalShardsAcrossIndices += indexMetadata.getNumberOfShards(); + } + indexMetadataMap.put(indexId, indexMetadata); + } else if (requestedIndexNames.isEmpty() == false) { + handleIndexNotFound(index, request, snapshotName, repositoryName); + } + } + + if (totalShardsAcrossIndices > maximumAllowedShardCount && requestedIndexNames.isEmpty() == false && isV2Snapshot == false) { + String message = "[" + + repositoryName + + ":" + + String.join(", ", request.snapshots()) + + "]" + + " Total shard count [" + + totalShardsAcrossIndices + + "] across the requested indices [" + + requestedIndexNames.stream().collect(Collectors.joining(", ")) + + "] is more than the maximum allowed value of shard count [" + + maximumAllowedShardCount + + "] for snapshot status request"; + throw new CircuitBreakingException(message, CircuitBreaker.Durability.PERMANENT); + } + + final Map shardStatus = new HashMap<>(); + for (Map.Entry entry : indexMetadataMap.entrySet()) { + IndexId indexId = entry.getKey(); + IndexMetadata indexMetadata = entry.getValue(); if (indexMetadata != null) { int numberOfShards = indexMetadata.getNumberOfShards(); for (int i = 0; i < numberOfShards; i++) { @@ -447,7 +563,12 @@ private Map snapshotShards( // could not be taken due to partial being set to false. shardSnapshotStatus = IndexShardSnapshotStatus.newFailed("skipped"); } else { - shardSnapshotStatus = repository.getShardSnapshotStatus(snapshotInfo.snapshotId(), indexId, shardId); + // TODO: to be refactored later + if (isV2Snapshot) { + shardSnapshotStatus = IndexShardSnapshotStatus.newDone(0, 0, 0, 0, 0, 0, null); + } else { + shardSnapshotStatus = repository.getShardSnapshotStatus(snapshotInfo.snapshotId(), indexId, shardId); + } } shardStatus.put(shardId, shardSnapshotStatus); } @@ -457,6 +578,21 @@ private Map snapshotShards( return unmodifiableMap(shardStatus); } + private void handleIndexNotFound(String index, SnapshotsStatusRequest request, String snapshotName, String repositoryName) { + if (request.ignoreUnavailable()) { + // ignoring unavailable index + logger.debug( + "snapshot status request ignoring indices [{}], not found in snapshot[{}] in repository [{}]", + index, + snapshotName, + repositoryName + ); + } else { + String cause = "indices [" + index + "] missing in snapshot [" + snapshotName + "] of repository [" + repositoryName + "]"; + throw new IndexNotFoundException(index, new IllegalArgumentException(cause)); + } + } + private static SnapshotShardFailure findShardFailure(List shardFailures, ShardId shardId) { for (SnapshotShardFailure shardFailure : shardFailures) { if (shardId.getIndexName().equals(shardFailure.index()) && shardId.getId() == shardFailure.shardId()) { diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/state/ClusterStateRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/state/ClusterStateRequest.java index 90a52f7406d57..66bb64ca5580b 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/state/ClusterStateRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/state/ClusterStateRequest.java @@ -41,8 +41,12 @@ import org.opensearch.core.common.Strings; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.tasks.TaskId; +import org.opensearch.rest.action.admin.cluster.ClusterAdminTask; +import org.opensearch.tasks.Task; import java.io.IOException; +import java.util.Map; /** * Transport request for obtaining cluster state @@ -211,4 +215,13 @@ public ClusterStateRequest waitForMetadataVersion(long waitForMetadataVersion) { this.waitForMetadataVersion = waitForMetadataVersion; return this; } + + @Override + public Task createTask(long id, String type, String action, TaskId parentTaskId, Map headers) { + if (this.getShouldCancelOnTimeout()) { + return new ClusterAdminTask(id, type, action, parentTaskId, headers); + } else { + return super.createTask(id, type, action, parentTaskId, headers); + } + } } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/state/TransportClusterStateAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/state/TransportClusterStateAction.java index 13ea7eaa43bf8..da91d273f3bce 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/state/TransportClusterStateAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/state/TransportClusterStateAction.java @@ -46,10 +46,12 @@ import org.opensearch.cluster.metadata.Metadata.Custom; import org.opensearch.cluster.routing.RoutingTable; import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.Nullable; import org.opensearch.common.inject.Inject; import org.opensearch.common.unit.TimeValue; import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.node.NodeClosedException; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportService; @@ -80,7 +82,8 @@ public TransportClusterStateAction( ClusterService clusterService, ThreadPool threadPool, ActionFilters actionFilters, - IndexNameExpressionResolver indexNameExpressionResolver + IndexNameExpressionResolver indexNameExpressionResolver, + @Nullable RemoteClusterStateService remoteClusterStateService ) { super( ClusterStateAction.NAME, @@ -93,6 +96,7 @@ public TransportClusterStateAction( indexNameExpressionResolver ); this.localExecuteSupported = true; + this.remoteClusterStateService = remoteClusterStateService; } @Override diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/stats/ClusterStatsRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/stats/ClusterStatsRequest.java index bd75b2210e474..b82a9d256a134 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/stats/ClusterStatsRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/stats/ClusterStatsRequest.java @@ -62,7 +62,7 @@ public ClusterStatsRequest(StreamInput in) throws IOException { * based on all nodes will be returned. */ public ClusterStatsRequest(String... nodesIds) { - super(nodesIds); + super(false, nodesIds); } public boolean useAggregatedNodeLevelResponses() { diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java index be7d41a7ba75e..a49ca2035783c 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -34,6 +34,7 @@ import org.apache.lucene.store.AlreadyClosedException; import org.opensearch.action.FailedNodeException; +import org.opensearch.action.admin.cluster.health.ClusterHealthRequest; import org.opensearch.action.admin.cluster.node.info.NodeInfo; import org.opensearch.action.admin.cluster.node.stats.NodeStats; import org.opensearch.action.admin.indices.stats.CommonStats; @@ -209,7 +210,7 @@ protected ClusterStatsNodeResponse nodeOperation(ClusterStatsNodeRequest nodeReq ClusterHealthStatus clusterStatus = null; if (clusterService.state().nodes().isLocalNodeElectedClusterManager()) { - clusterStatus = new ClusterStateHealth(clusterService.state()).getStatus(); + clusterStatus = new ClusterStateHealth(clusterService.state(), ClusterHealthRequest.Level.CLUSTER).getStatus(); } return new ClusterStatsNodeResponse( diff --git a/server/src/main/java/org/opensearch/action/admin/indices/create/CreateIndexClusterStateUpdateRequest.java b/server/src/main/java/org/opensearch/action/admin/indices/create/CreateIndexClusterStateUpdateRequest.java index ad45e5346f9fa..d7e86744ad528 100644 --- a/server/src/main/java/org/opensearch/action/admin/indices/create/CreateIndexClusterStateUpdateRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/indices/create/CreateIndexClusterStateUpdateRequest.java @@ -37,6 +37,7 @@ import org.opensearch.action.support.ActiveShardCount; import org.opensearch.cluster.ack.ClusterStateUpdateRequest; import org.opensearch.cluster.block.ClusterBlock; +import org.opensearch.cluster.metadata.Context; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.settings.Settings; import org.opensearch.core.index.Index; @@ -65,6 +66,8 @@ public class CreateIndexClusterStateUpdateRequest extends ClusterStateUpdateRequ private final Set aliases = new HashSet<>(); + private Context context; + private final Set blocks = new HashSet<>(); private ActiveShardCount waitForActiveShards = ActiveShardCount.DEFAULT; @@ -90,6 +93,11 @@ public CreateIndexClusterStateUpdateRequest aliases(Set aliases) { return this; } + public CreateIndexClusterStateUpdateRequest context(Context context) { + this.context = context; + return this; + } + public CreateIndexClusterStateUpdateRequest recoverFrom(Index recoverFrom) { this.recoverFrom = recoverFrom; return this; @@ -130,6 +138,10 @@ public Set aliases() { return aliases; } + public Context context() { + return context; + } + public Set blocks() { return blocks; } @@ -199,6 +211,8 @@ public String toString() { + settings + ", aliases=" + aliases + + ", context=" + + context + ", blocks=" + blocks + ", waitForActiveShards=" diff --git a/server/src/main/java/org/opensearch/action/admin/indices/create/CreateIndexRequest.java b/server/src/main/java/org/opensearch/action/admin/indices/create/CreateIndexRequest.java index 01b4cd779c261..6bb1bf0a3b97b 100644 --- a/server/src/main/java/org/opensearch/action/admin/indices/create/CreateIndexRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/indices/create/CreateIndexRequest.java @@ -42,6 +42,7 @@ import org.opensearch.action.support.ActiveShardCount; import org.opensearch.action.support.IndicesOptions; import org.opensearch.action.support.master.AcknowledgedRequest; +import org.opensearch.cluster.metadata.Context; import org.opensearch.common.annotation.PublicApi; import org.opensearch.common.settings.Settings; import org.opensearch.common.xcontent.LoggingDeprecationHandler; @@ -89,6 +90,7 @@ public class CreateIndexRequest extends AcknowledgedRequest public static final ParseField MAPPINGS = new ParseField("mappings"); public static final ParseField SETTINGS = new ParseField("settings"); public static final ParseField ALIASES = new ParseField("aliases"); + public static final ParseField CONTEXT = new ParseField("context"); private String cause = ""; @@ -100,6 +102,8 @@ public class CreateIndexRequest extends AcknowledgedRequest private final Set aliases = new HashSet<>(); + private Context context; + private ActiveShardCount waitForActiveShards = ActiveShardCount.DEFAULT; public CreateIndexRequest(StreamInput in) throws IOException { @@ -128,6 +132,9 @@ public CreateIndexRequest(StreamInput in) throws IOException { aliases.add(new Alias(in)); } waitForActiveShards = ActiveShardCount.readFrom(in); + if (in.getVersion().onOrAfter(Version.V_2_17_0)) { + context = in.readOptionalWriteable(Context::new); + } } public CreateIndexRequest() {} @@ -524,6 +531,8 @@ public CreateIndexRequest source(Map source, DeprecationHandler depre } } else if (ALIASES.match(name, deprecationHandler)) { aliases((Map) entry.getValue()); + } else if (CONTEXT.match(name, deprecationHandler)) { + context((Map) entry.getValue()); } else { throw new OpenSearchParseException("unknown key [{}] for create index", name); } @@ -571,6 +580,36 @@ public CreateIndexRequest waitForActiveShards(final int waitForActiveShards) { return waitForActiveShards(ActiveShardCount.from(waitForActiveShards)); } + public CreateIndexRequest context(Map source) { + try { + XContentBuilder builder = XContentFactory.jsonBuilder(); + builder.map(source); + return context(BytesReference.bytes(builder)); + } catch (IOException e) { + throw new OpenSearchGenerationException("Failed to generate [" + source + "]", e); + } + } + + public CreateIndexRequest context(BytesReference source) { + // EMPTY is safe here because we never call namedObject + try (XContentParser parser = XContentHelper.createParser(NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, source)) { + // move to the first alias + context(Context.fromXContent(parser)); + return this; + } catch (IOException e) { + throw new OpenSearchParseException("Failed to parse context", e); + } + } + + public CreateIndexRequest context(Context context) { + this.context = context; + return this; + } + + public Context context() { + return context; + } + @Override public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); @@ -593,6 +632,9 @@ public void writeTo(StreamOutput out) throws IOException { alias.writeTo(out); } waitForActiveShards.writeTo(out); + if (out.getVersion().onOrAfter(Version.V_2_17_0)) { + out.writeOptionalWriteable(context); + } } @Override @@ -611,6 +653,9 @@ public String toString() { + '\'' + ", aliases=" + aliases + + '\'' + + ", context=" + + context + ", waitForActiveShards=" + waitForActiveShards + '}'; diff --git a/server/src/main/java/org/opensearch/action/admin/indices/create/CreateIndexRequestBuilder.java b/server/src/main/java/org/opensearch/action/admin/indices/create/CreateIndexRequestBuilder.java index b233f45422967..27a580434333a 100644 --- a/server/src/main/java/org/opensearch/action/admin/indices/create/CreateIndexRequestBuilder.java +++ b/server/src/main/java/org/opensearch/action/admin/indices/create/CreateIndexRequestBuilder.java @@ -36,6 +36,7 @@ import org.opensearch.action.support.ActiveShardCount; import org.opensearch.action.support.master.AcknowledgedRequestBuilder; import org.opensearch.client.OpenSearchClient; +import org.opensearch.cluster.metadata.Context; import org.opensearch.common.annotation.PublicApi; import org.opensearch.common.settings.Settings; import org.opensearch.common.xcontent.LoggingDeprecationHandler; @@ -275,4 +276,24 @@ public CreateIndexRequestBuilder setWaitForActiveShards(ActiveShardCount waitFor public CreateIndexRequestBuilder setWaitForActiveShards(final int waitForActiveShards) { return setWaitForActiveShards(ActiveShardCount.from(waitForActiveShards)); } + + /** + * Adds context that will be added when the index gets created. + * + * @param source The mapping source + */ + public CreateIndexRequestBuilder setContext(Map source) { + request.context(source); + return this; + } + + /** + * Adds context that will be added when the index gets created. + * + * @param source The context source + */ + public CreateIndexRequestBuilder setContext(Context source) { + request.context(source); + return this; + } } diff --git a/server/src/main/java/org/opensearch/action/admin/indices/create/TransportCreateIndexAction.java b/server/src/main/java/org/opensearch/action/admin/indices/create/TransportCreateIndexAction.java index b5f822bd45b7e..250693c130c85 100644 --- a/server/src/main/java/org/opensearch/action/admin/indices/create/TransportCreateIndexAction.java +++ b/server/src/main/java/org/opensearch/action/admin/indices/create/TransportCreateIndexAction.java @@ -121,6 +121,7 @@ protected void clusterManagerOperation( .settings(request.settings()) .mappings(request.mappings()) .aliases(request.aliases()) + .context(request.context()) .waitForActiveShards(request.waitForActiveShards()); createIndexService.createIndex( diff --git a/server/src/main/java/org/opensearch/action/admin/indices/dangling/find/FindDanglingIndexRequest.java b/server/src/main/java/org/opensearch/action/admin/indices/dangling/find/FindDanglingIndexRequest.java index f853a47b3c2bf..b38cb46b23ace 100644 --- a/server/src/main/java/org/opensearch/action/admin/indices/dangling/find/FindDanglingIndexRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/indices/dangling/find/FindDanglingIndexRequest.java @@ -53,7 +53,7 @@ public FindDanglingIndexRequest(StreamInput in) throws IOException { } public FindDanglingIndexRequest(String indexUUID) { - super(Strings.EMPTY_ARRAY); + super(false, Strings.EMPTY_ARRAY); this.indexUUID = indexUUID; } diff --git a/server/src/main/java/org/opensearch/action/admin/indices/dangling/list/ListDanglingIndicesRequest.java b/server/src/main/java/org/opensearch/action/admin/indices/dangling/list/ListDanglingIndicesRequest.java index 119c4acbf4160..0076c9e294b69 100644 --- a/server/src/main/java/org/opensearch/action/admin/indices/dangling/list/ListDanglingIndicesRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/indices/dangling/list/ListDanglingIndicesRequest.java @@ -58,12 +58,12 @@ public ListDanglingIndicesRequest(StreamInput in) throws IOException { } public ListDanglingIndicesRequest() { - super(Strings.EMPTY_ARRAY); + super(false, Strings.EMPTY_ARRAY); this.indexUUID = null; } public ListDanglingIndicesRequest(String indexUUID) { - super(Strings.EMPTY_ARRAY); + super(false, Strings.EMPTY_ARRAY); this.indexUUID = indexUUID; } diff --git a/server/src/main/java/org/opensearch/action/admin/indices/get/GetIndexRequest.java b/server/src/main/java/org/opensearch/action/admin/indices/get/GetIndexRequest.java index 47c59791edf04..601b53f88baa3 100644 --- a/server/src/main/java/org/opensearch/action/admin/indices/get/GetIndexRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/indices/get/GetIndexRequest.java @@ -57,7 +57,8 @@ public class GetIndexRequest extends ClusterInfoRequest { public enum Feature { ALIASES((byte) 0), MAPPINGS((byte) 1), - SETTINGS((byte) 2); + SETTINGS((byte) 2), + CONTEXT((byte) 3); private static final Feature[] FEATURES = new Feature[Feature.values().length]; @@ -86,7 +87,11 @@ public static Feature fromId(byte id) { } } - private static final Feature[] DEFAULT_FEATURES = new Feature[] { Feature.ALIASES, Feature.MAPPINGS, Feature.SETTINGS }; + private static final Feature[] DEFAULT_FEATURES = new Feature[] { + Feature.ALIASES, + Feature.MAPPINGS, + Feature.SETTINGS, + Feature.CONTEXT }; private Feature[] features = DEFAULT_FEATURES; private boolean humanReadable = false; private transient boolean includeDefaults = false; diff --git a/server/src/main/java/org/opensearch/action/admin/indices/get/GetIndexResponse.java b/server/src/main/java/org/opensearch/action/admin/indices/get/GetIndexResponse.java index 5a237b8d3470f..a9ba991401bef 100644 --- a/server/src/main/java/org/opensearch/action/admin/indices/get/GetIndexResponse.java +++ b/server/src/main/java/org/opensearch/action/admin/indices/get/GetIndexResponse.java @@ -34,6 +34,7 @@ import org.opensearch.Version; import org.opensearch.cluster.metadata.AliasMetadata; +import org.opensearch.cluster.metadata.Context; import org.opensearch.cluster.metadata.MappingMetadata; import org.opensearch.common.annotation.PublicApi; import org.opensearch.common.settings.Settings; @@ -68,6 +69,7 @@ public class GetIndexResponse extends ActionResponse implements ToXContentObject private Map settings = Map.of(); private Map defaultSettings = Map.of(); private Map dataStreams = Map.of(); + private Map contexts = Map.of(); private final String[] indices; public GetIndexResponse( @@ -76,7 +78,8 @@ public GetIndexResponse( final Map> aliases, final Map settings, final Map defaultSettings, - final Map dataStreams + final Map dataStreams, + final Map contexts ) { this.indices = indices; // to have deterministic order @@ -96,6 +99,9 @@ public GetIndexResponse( if (dataStreams != null) { this.dataStreams = Collections.unmodifiableMap(dataStreams); } + if (contexts != null) { + this.contexts = Collections.unmodifiableMap(contexts); + } } GetIndexResponse(StreamInput in) throws IOException { @@ -160,6 +166,15 @@ public GetIndexResponse( dataStreamsMapBuilder.put(in.readString(), in.readOptionalString()); } dataStreams = Collections.unmodifiableMap(dataStreamsMapBuilder); + + if (in.getVersion().onOrAfter(Version.V_2_17_0)) { + final Map contextMapBuilder = new HashMap<>(); + int contextSize = in.readVInt(); + for (int i = 0; i < contextSize; i++) { + contextMapBuilder.put(in.readString(), in.readOptionalWriteable(Context::new)); + } + contexts = Collections.unmodifiableMap(contextMapBuilder); + } } public String[] indices() { @@ -214,6 +229,10 @@ public Map getSettings() { return settings(); } + public Map contexts() { + return contexts; + } + /** * Returns the string value for the specified index and setting. If the includeDefaults flag was not set or set to * false on the {@link GetIndexRequest}, this method will only return a value where the setting was explicitly set @@ -277,6 +296,14 @@ public void writeTo(StreamOutput out) throws IOException { out.writeString(indexEntry.getKey()); out.writeOptionalString(indexEntry.getValue()); } + + if (out.getVersion().onOrAfter(Version.V_2_17_0)) { + out.writeVInt(contexts.size()); + for (final Map.Entry indexEntry : contexts.entrySet()) { + out.writeString(indexEntry.getKey()); + out.writeOptionalWriteable(indexEntry.getValue()); + } + } } @Override @@ -320,6 +347,11 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (dataStream != null) { builder.field("data_stream", dataStream); } + + Context context = contexts.get(index); + if (context != null) { + builder.field("context", context); + } } builder.endObject(); } @@ -343,11 +375,12 @@ public boolean equals(Object o) { && Objects.equals(mappings, that.mappings) && Objects.equals(settings, that.settings) && Objects.equals(defaultSettings, that.defaultSettings) - && Objects.equals(dataStreams, that.dataStreams); + && Objects.equals(dataStreams, that.dataStreams) + && Objects.equals(contexts, that.contexts); } @Override public int hashCode() { - return Objects.hash(Arrays.hashCode(indices), aliases, mappings, settings, defaultSettings, dataStreams); + return Objects.hash(Arrays.hashCode(indices), aliases, mappings, settings, defaultSettings, dataStreams, contexts); } } diff --git a/server/src/main/java/org/opensearch/action/admin/indices/get/TransportGetIndexAction.java b/server/src/main/java/org/opensearch/action/admin/indices/get/TransportGetIndexAction.java index 755119401c6b5..c6f4a8cd49ae9 100644 --- a/server/src/main/java/org/opensearch/action/admin/indices/get/TransportGetIndexAction.java +++ b/server/src/main/java/org/opensearch/action/admin/indices/get/TransportGetIndexAction.java @@ -36,6 +36,7 @@ import org.opensearch.action.support.clustermanager.info.TransportClusterInfoAction; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.metadata.AliasMetadata; +import org.opensearch.cluster.metadata.Context; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.IndexNameExpressionResolver; import org.opensearch.cluster.metadata.MappingMetadata; @@ -110,6 +111,7 @@ protected void doClusterManagerOperation( Map> aliasesResult = Map.of(); Map settings = Map.of(); Map defaultSettings = Map.of(); + Map contexts = Map.of(); final Map dataStreams = new HashMap<>( StreamSupport.stream(Spliterators.spliterator(state.metadata().findDataStreams(concreteIndices).entrySet(), 0), false) .collect(Collectors.toMap(k -> k.getKey(), v -> v.getValue().getName())) @@ -118,6 +120,7 @@ protected void doClusterManagerOperation( boolean doneAliases = false; boolean doneMappings = false; boolean doneSettings = false; + boolean doneContext = false; for (GetIndexRequest.Feature feature : features) { switch (feature) { case MAPPINGS: @@ -159,11 +162,25 @@ protected void doClusterManagerOperation( doneSettings = true; } break; - + case CONTEXT: + if (!doneContext) { + final Map contextBuilder = new HashMap<>(); + for (String index : concreteIndices) { + Context indexContext = state.metadata().index(index).context(); + if (indexContext != null) { + contextBuilder.put(index, indexContext); + } + } + contexts = contextBuilder; + doneContext = true; + } + break; default: throw new IllegalStateException("feature [" + feature + "] is not valid"); } } - listener.onResponse(new GetIndexResponse(concreteIndices, mappingsResult, aliasesResult, settings, defaultSettings, dataStreams)); + listener.onResponse( + new GetIndexResponse(concreteIndices, mappingsResult, aliasesResult, settings, defaultSettings, dataStreams, contexts) + ); } } diff --git a/server/src/main/java/org/opensearch/action/admin/indices/segments/IndicesSegmentResponse.java b/server/src/main/java/org/opensearch/action/admin/indices/segments/IndicesSegmentResponse.java index 648f58dada4f9..b69d4b6f624b8 100644 --- a/server/src/main/java/org/opensearch/action/admin/indices/segments/IndicesSegmentResponse.java +++ b/server/src/main/java/org/opensearch/action/admin/indices/segments/IndicesSegmentResponse.java @@ -45,13 +45,12 @@ import org.opensearch.index.engine.Segment; import java.io.IOException; -import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Locale; import java.util.Map; -import java.util.Set; /** * Transport response for retrieving indices segment information @@ -86,21 +85,24 @@ public Map getIndices() { return indicesSegments; } Map indicesSegments = new HashMap<>(); - - Set indices = new HashSet<>(); - for (ShardSegments shard : shards) { - indices.add(shard.getShardRouting().getIndexName()); + if (shards.length == 0) { + this.indicesSegments = indicesSegments; + return indicesSegments; } - for (String indexName : indices) { - List shards = new ArrayList<>(); - for (ShardSegments shard : this.shards) { - if (shard.getShardRouting().getIndexName().equals(indexName)) { - shards.add(shard); - } + Arrays.sort(shards, Comparator.comparing(shardSegment -> shardSegment.getShardRouting().getIndexName())); + int startIndexPos = 0; + String startIndexName = shards[startIndexPos].getShardRouting().getIndexName(); + for (int i = 0; i < shards.length; i++) { + if (!shards[i].getShardRouting().getIndexName().equals(startIndexName)) { + indicesSegments.put(startIndexName, new IndexSegments(startIndexName, Arrays.copyOfRange(shards, startIndexPos, i))); + startIndexPos = i; + startIndexName = shards[startIndexPos].getShardRouting().getIndexName(); } - indicesSegments.put(indexName, new IndexSegments(indexName, shards.toArray(new ShardSegments[0]))); } + // Add the last shardSegment from shards list which would have got missed in the loop above + indicesSegments.put(startIndexName, new IndexSegments(startIndexName, Arrays.copyOfRange(shards, startIndexPos, shards.length))); + this.indicesSegments = indicesSegments; return indicesSegments; } diff --git a/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStatsFlags.java b/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStatsFlags.java index 04f39d77ce6c8..03fb55323feec 100644 --- a/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStatsFlags.java +++ b/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStatsFlags.java @@ -101,7 +101,7 @@ public CommonStatsFlags(StreamInput in) throws IOException { includeCaches = in.readEnumSet(CacheType.class); levels = in.readStringArray(); } - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + if (in.getVersion().onOrAfter(Version.V_2_17_0)) { includeIndicesStatsByLevel = in.readBoolean(); } } @@ -128,7 +128,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeEnumSet(includeCaches); out.writeStringArrayNullable(levels); } - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + if (out.getVersion().onOrAfter(Version.V_2_17_0)) { out.writeBoolean(includeIndicesStatsByLevel); } } diff --git a/server/src/main/java/org/opensearch/action/admin/indices/stats/IndicesStatsRequest.java b/server/src/main/java/org/opensearch/action/admin/indices/stats/IndicesStatsRequest.java index 2b64464a76899..c36e53098d166 100644 --- a/server/src/main/java/org/opensearch/action/admin/indices/stats/IndicesStatsRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/indices/stats/IndicesStatsRequest.java @@ -36,8 +36,12 @@ import org.opensearch.common.annotation.PublicApi; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.tasks.TaskId; +import org.opensearch.rest.action.admin.cluster.ClusterAdminTask; +import org.opensearch.tasks.Task; import java.io.IOException; +import java.util.Map; /** * A request to get indices level stats. Allow to enable different stats to be returned. @@ -103,6 +107,15 @@ public IndicesStatsRequest groups(String... groups) { return this; } + @Override + public Task createTask(long id, String type, String action, TaskId parentTaskId, Map headers) { + if (this.getShouldCancelOnTimeout()) { + return new ClusterAdminTask(id, type, action, parentTaskId, headers); + } else { + return super.createTask(id, type, action, parentTaskId, headers); + } + } + public String[] groups() { return this.flags.groups(); } diff --git a/server/src/main/java/org/opensearch/action/admin/indices/stats/IndicesStatsResponse.java b/server/src/main/java/org/opensearch/action/admin/indices/stats/IndicesStatsResponse.java index 6242081cd2371..900a886481fe6 100644 --- a/server/src/main/java/org/opensearch/action/admin/indices/stats/IndicesStatsResponse.java +++ b/server/src/main/java/org/opensearch/action/admin/indices/stats/IndicesStatsResponse.java @@ -69,7 +69,7 @@ public class IndicesStatsResponse extends BroadcastResponse { shards = in.readArray(ShardStats::new, (size) -> new ShardStats[size]); } - IndicesStatsResponse( + public IndicesStatsResponse( ShardStats[] shards, int totalShards, int successfulShards, diff --git a/server/src/main/java/org/opensearch/action/admin/indices/tiering/TieringUtils.java b/server/src/main/java/org/opensearch/action/admin/indices/tiering/TieringUtils.java new file mode 100644 index 0000000000000..46912de17f213 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/indices/tiering/TieringUtils.java @@ -0,0 +1,47 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.indices.tiering; + +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.allocation.RoutingAllocation; +import org.opensearch.index.IndexModule; + +/** + * Utility class for tiering operations + * + * @opensearch.internal + */ +public class TieringUtils { + + /** + * Checks if the specified shard is a partial shard by + * checking the INDEX_STORE_LOCALITY_SETTING for its index. + * see {@link #isPartialIndex(IndexMetadata)} + * @param shard ShardRouting object representing the shard + * @param allocation RoutingAllocation object representing the allocation + * @return true if the shard is a partial shard, false otherwise + */ + public static boolean isPartialShard(ShardRouting shard, RoutingAllocation allocation) { + IndexMetadata indexMetadata = allocation.metadata().getIndexSafe(shard.index()); + return isPartialIndex(indexMetadata); + } + + /** + * Checks if the specified index is a partial index by + * checking the INDEX_STORE_LOCALITY_SETTING for the index. + * + * @param indexMetadata the metadata of the index + * @return true if the index is a partial index, false otherwise + */ + public static boolean isPartialIndex(final IndexMetadata indexMetadata) { + return IndexModule.DataLocalityType.PARTIAL.name() + .equals(indexMetadata.getSettings().get(IndexModule.INDEX_STORE_LOCALITY_SETTING.getKey())); + } +} diff --git a/server/src/main/java/org/opensearch/action/ingest/SimulateExecutionService.java b/server/src/main/java/org/opensearch/action/ingest/SimulateExecutionService.java index c7c0f21eb0876..459466f8c8ab6 100644 --- a/server/src/main/java/org/opensearch/action/ingest/SimulateExecutionService.java +++ b/server/src/main/java/org/opensearch/action/ingest/SimulateExecutionService.java @@ -36,6 +36,7 @@ import org.opensearch.core.action.ActionListener; import org.opensearch.ingest.CompoundProcessor; import org.opensearch.ingest.IngestDocument; +import org.opensearch.ingest.IngestService; import org.opensearch.ingest.Pipeline; import org.opensearch.threadpool.ThreadPool; @@ -56,9 +57,11 @@ class SimulateExecutionService { private static final String THREAD_POOL_NAME = ThreadPool.Names.MANAGEMENT; private final ThreadPool threadPool; + private final IngestService ingestService; - SimulateExecutionService(ThreadPool threadPool) { + SimulateExecutionService(ThreadPool threadPool, IngestService ingestService) { this.threadPool = threadPool; + this.ingestService = ingestService; } void executeDocument( @@ -91,6 +94,9 @@ void executeDocument( } public void execute(SimulatePipelineRequest.Parsed request, ActionListener listener) { + + ingestService.validateProcessorCountForIngestPipeline(request.getPipeline()); + threadPool.executor(THREAD_POOL_NAME).execute(ActionRunnable.wrap(listener, l -> { final AtomicInteger counter = new AtomicInteger(); final List responses = new CopyOnWriteArrayList<>( diff --git a/server/src/main/java/org/opensearch/action/ingest/SimulatePipelineTransportAction.java b/server/src/main/java/org/opensearch/action/ingest/SimulatePipelineTransportAction.java index 4753679d370af..5eeb09c4d50c0 100644 --- a/server/src/main/java/org/opensearch/action/ingest/SimulatePipelineTransportAction.java +++ b/server/src/main/java/org/opensearch/action/ingest/SimulatePipelineTransportAction.java @@ -69,7 +69,7 @@ public SimulatePipelineTransportAction( (Writeable.Reader) SimulatePipelineRequest::new ); this.ingestService = ingestService; - this.executionService = new SimulateExecutionService(threadPool); + this.executionService = new SimulateExecutionService(threadPool, ingestService); } @Override diff --git a/server/src/main/java/org/opensearch/action/search/GetAllPitNodesRequest.java b/server/src/main/java/org/opensearch/action/search/GetAllPitNodesRequest.java index 336c8139561e9..97c0d66f4b8e1 100644 --- a/server/src/main/java/org/opensearch/action/search/GetAllPitNodesRequest.java +++ b/server/src/main/java/org/opensearch/action/search/GetAllPitNodesRequest.java @@ -27,7 +27,7 @@ public class GetAllPitNodesRequest extends BaseNodesRequest {} ); } executeRequest(task, searchRequest, this::searchAsyncAction, listener); diff --git a/server/src/main/java/org/opensearch/action/support/TimeoutTaskCancellationUtility.java b/server/src/main/java/org/opensearch/action/support/TimeoutTaskCancellationUtility.java index a317a45eab31f..d2e7121a592cb 100644 --- a/server/src/main/java/org/opensearch/action/support/TimeoutTaskCancellationUtility.java +++ b/server/src/main/java/org/opensearch/action/support/TimeoutTaskCancellationUtility.java @@ -12,11 +12,12 @@ import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; import org.opensearch.action.admin.cluster.node.tasks.cancel.CancelTasksRequest; +import org.opensearch.action.admin.cluster.node.tasks.cancel.CancelTasksResponse; import org.opensearch.client.OriginSettingClient; import org.opensearch.client.node.NodeClient; -import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.unit.TimeValue; import org.opensearch.core.action.ActionListener; +import org.opensearch.core.tasks.TaskCancelledException; import org.opensearch.core.tasks.TaskId; import org.opensearch.search.SearchService; import org.opensearch.tasks.CancellableTask; @@ -27,7 +28,6 @@ import java.util.concurrent.atomic.AtomicBoolean; import static org.opensearch.action.admin.cluster.node.tasks.get.GetTaskAction.TASKS_ORIGIN; -import static org.opensearch.action.search.TransportSearchAction.SEARCH_CANCEL_AFTER_TIME_INTERVAL_SETTING; /** * Utility to cancel a timeout task @@ -38,25 +38,25 @@ public class TimeoutTaskCancellationUtility { private static final Logger logger = LogManager.getLogger(TimeoutTaskCancellationUtility.class); + private static final AtomicBoolean executeResponseOrFailureOnce = new AtomicBoolean(true); + /** * Wraps a listener with a timeout listener {@link TimeoutRunnableListener} to schedule the task cancellation for provided tasks on * generic thread pool * @param client - {@link NodeClient} * @param taskToCancel - task to schedule cancellation for - * @param clusterSettings - {@link ClusterSettings} + * @param timeout - {@link TimeValue} * @param listener - original listener associated with the task * @return wrapped listener */ public static ActionListener wrapWithCancellationListener( NodeClient client, CancellableTask taskToCancel, - ClusterSettings clusterSettings, - ActionListener listener + TimeValue timeout, + ActionListener listener, + TimeoutHandler timeoutHandler ) { - final TimeValue globalTimeout = clusterSettings.get(SEARCH_CANCEL_AFTER_TIME_INTERVAL_SETTING); - final TimeValue timeoutInterval = (taskToCancel.getCancellationTimeout() == null) - ? globalTimeout - : taskToCancel.getCancellationTimeout(); + final TimeValue timeoutInterval = (taskToCancel.getCancellationTimeout() == null) ? timeout : taskToCancel.getCancellationTimeout(); // Note: -1 (or no timeout) will help to turn off cancellation. The combinations will be request level set at -1 or request level // set to null and cluster level set to -1. ActionListener listenerToReturn = listener; @@ -72,24 +72,30 @@ public static ActionListener wrapWithCancellationListener( // force the origin to execute the cancellation as a system user new OriginSettingClient(client, TASKS_ORIGIN).admin() .cluster() - .cancelTasks( - cancelTasksRequest, - ActionListener.wrap( - r -> logger.debug( + .cancelTasks(cancelTasksRequest, new ActionListener() { + @Override + public void onResponse(CancelTasksResponse cancelTasksResponse) { + logger.debug( "Scheduled cancel task with timeout: {} for original task: {} is successfully completed", timeoutInterval, cancelTasksRequest.getTaskId() - ), - e -> logger.error( + ); + // Notify the timeoutHandler that the task was canceled due to timeout + timeoutHandler.onTimeout(new TaskCancelledException(cancelTasksRequest.getReason())); + } + + @Override + public void onFailure(Exception e) { + logger.error( new ParameterizedMessage( "Scheduled cancel task with timeout: {} for original task: {} is failed", timeoutInterval, cancelTasksRequest.getTaskId() ), e - ) - ) - ); + ); + } + }); }); wrappedListener.cancellable = client.threadPool().schedule(wrappedListener, timeoutInterval, ThreadPool.Names.GENERIC); listenerToReturn = wrappedListener; @@ -100,6 +106,22 @@ public static ActionListener wrapWithCancellationListener( return listenerToReturn; } + /** + * A functional interface used to handle the timeout of a cancellable task. + * Implementations of this interface provide a callback method that is invoked + * when a task is cancelled due to a timeout. + */ + public interface TimeoutHandler { + + /** + * Called when a cancellable task is cancelled due to a timeout. + * + * @param e the exception that contains details about the task cancellation, + * including the reason for cancellation. + */ + void onTimeout(TaskCancelledException e); + } + /** * Timeout listener which executes the provided runnable after timeout is expired and if a response/failure is not yet received. * If either a response/failure is received before timeout then the scheduled task is cancelled and response/failure is sent back to @@ -144,6 +166,7 @@ public void run() { if (executeRunnable.compareAndSet(true, false)) { timeoutRunnable.run(); } // else do nothing since either response/failure is already sent to client + } catch (Exception ex) { // ignore the exception logger.error( diff --git a/server/src/main/java/org/opensearch/action/support/broadcast/BroadcastRequest.java b/server/src/main/java/org/opensearch/action/support/broadcast/BroadcastRequest.java index 8a27e032cec5e..784c43c938f2c 100644 --- a/server/src/main/java/org/opensearch/action/support/broadcast/BroadcastRequest.java +++ b/server/src/main/java/org/opensearch/action/support/broadcast/BroadcastRequest.java @@ -52,6 +52,8 @@ public class BroadcastRequest> extends protected String[] indices; private IndicesOptions indicesOptions = IndicesOptions.strictExpandOpenAndForbidClosed(); + protected boolean shouldCancelOnTimeout = false; + public BroadcastRequest(StreamInput in) throws IOException { super(in); indices = in.readStringArray(); @@ -125,4 +127,12 @@ public void writeTo(StreamOutput out) throws IOException { out.writeStringArrayNullable(indices); indicesOptions.writeIndicesOptions(out); } + + public void setShouldCancelOnTimeout(boolean shouldCancelOnTimeout) { + this.shouldCancelOnTimeout = shouldCancelOnTimeout; + } + + public boolean getShouldCancelOnTimeout() { + return this.shouldCancelOnTimeout; + } } diff --git a/server/src/main/java/org/opensearch/action/support/clustermanager/ClusterManagerNodeReadRequest.java b/server/src/main/java/org/opensearch/action/support/clustermanager/ClusterManagerNodeReadRequest.java index 6dcc6ed1b098e..c373107163082 100644 --- a/server/src/main/java/org/opensearch/action/support/clustermanager/ClusterManagerNodeReadRequest.java +++ b/server/src/main/java/org/opensearch/action/support/clustermanager/ClusterManagerNodeReadRequest.java @@ -47,6 +47,8 @@ public abstract class ClusterManagerNodeReadRequest clusterMetadataManifest = remoteClusterStateService + .getClusterMetadataManifestByTermVersion( + clusterStateTermVersion.getClusterName().value(), + clusterStateTermVersion.getClusterUUID(), + clusterStateTermVersion.getTerm(), + clusterStateTermVersion.getVersion() + ); + if (clusterMetadataManifest.isEmpty()) { + logger.trace("could not find manifest in remote-store for ClusterStateTermVersion {}", termVersion); + return null; + } + ClusterState clusterStateFromRemote = remoteClusterStateService.getClusterStateForManifest( + appliedState.getClusterName().value(), + clusterMetadataManifest.get(), + appliedState.nodes().getLocalNode().getId(), + true + ); + + if (clusterStateFromRemote != null) { + logger.trace("Using the remote cluster-state fetched from local node, ClusterStateTermVersion {}", termVersion); + return clusterStateFromRemote; + } + } catch (Exception e) { + logger.trace("Error while fetching from remote cluster state", e); + } + } + return null; + } + private boolean checkForBlock(Request request, ClusterState localClusterState) { final ClusterBlockException blockException = checkBlock(request, localClusterState); if (blockException != null) { diff --git a/server/src/main/java/org/opensearch/action/support/clustermanager/term/GetTermVersionResponse.java b/server/src/main/java/org/opensearch/action/support/clustermanager/term/GetTermVersionResponse.java index 0906abe57d547..cf8e6085d4b32 100644 --- a/server/src/main/java/org/opensearch/action/support/clustermanager/term/GetTermVersionResponse.java +++ b/server/src/main/java/org/opensearch/action/support/clustermanager/term/GetTermVersionResponse.java @@ -8,6 +8,7 @@ package org.opensearch.action.support.clustermanager.term; +import org.opensearch.Version; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.coordination.ClusterStateTermVersion; import org.opensearch.core.action.ActionResponse; @@ -25,18 +26,34 @@ public class GetTermVersionResponse extends ActionResponse { private final ClusterStateTermVersion clusterStateTermVersion; + private final boolean isStatePresentInRemote; + public GetTermVersionResponse(ClusterStateTermVersion clusterStateTermVersion) { this.clusterStateTermVersion = clusterStateTermVersion; + this.isStatePresentInRemote = false; + } + + public GetTermVersionResponse(ClusterStateTermVersion clusterStateTermVersion, boolean canDownloadFromRemote) { + this.clusterStateTermVersion = clusterStateTermVersion; + this.isStatePresentInRemote = canDownloadFromRemote; } public GetTermVersionResponse(StreamInput in) throws IOException { super(in); this.clusterStateTermVersion = new ClusterStateTermVersion(in); + if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + this.isStatePresentInRemote = in.readOptionalBoolean(); + } else { + this.isStatePresentInRemote = false; + } } @Override public void writeTo(StreamOutput out) throws IOException { clusterStateTermVersion.writeTo(out); + if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + out.writeOptionalBoolean(isStatePresentInRemote); + } } public ClusterStateTermVersion getClusterStateTermVersion() { @@ -47,4 +64,7 @@ public boolean matches(ClusterState clusterState) { return clusterStateTermVersion != null && clusterStateTermVersion.equals(new ClusterStateTermVersion(clusterState)); } + public boolean isStatePresentInRemote() { + return isStatePresentInRemote; + } } diff --git a/server/src/main/java/org/opensearch/action/support/clustermanager/term/TransportGetTermVersionAction.java b/server/src/main/java/org/opensearch/action/support/clustermanager/term/TransportGetTermVersionAction.java index 4752a99c910e4..1cab739a20838 100644 --- a/server/src/main/java/org/opensearch/action/support/clustermanager/term/TransportGetTermVersionAction.java +++ b/server/src/main/java/org/opensearch/action/support/clustermanager/term/TransportGetTermVersionAction.java @@ -15,11 +15,14 @@ import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.block.ClusterBlockException; import org.opensearch.cluster.coordination.ClusterStateTermVersion; +import org.opensearch.cluster.coordination.Coordinator; import org.opensearch.cluster.metadata.IndexNameExpressionResolver; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.inject.Inject; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.discovery.Discovery; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportService; @@ -34,13 +37,18 @@ public class TransportGetTermVersionAction extends TransportClusterManagerNodeRe private final Logger logger = LogManager.getLogger(getClass()); + private final Discovery discovery; + + private boolean usePreCommitState = false; + @Inject public TransportGetTermVersionAction( TransportService transportService, ClusterService clusterService, ThreadPool threadPool, ActionFilters actionFilters, - IndexNameExpressionResolver indexNameExpressionResolver + IndexNameExpressionResolver indexNameExpressionResolver, + Discovery discovery ) { super( GetTermVersionAction.NAME, @@ -52,6 +60,8 @@ public TransportGetTermVersionAction( GetTermVersionRequest::new, indexNameExpressionResolver ); + this.usePreCommitState = FeatureFlags.isEnabled(FeatureFlags.TERM_VERSION_PRECOMMIT_ENABLE_SETTING); + this.discovery = discovery; } @Override @@ -76,10 +86,22 @@ protected void clusterManagerOperation( ClusterState state, ActionListener listener ) throws Exception { - ActionListener.completeWith(listener, () -> buildResponse(request, state)); + if (usePreCommitState) { + ActionListener.completeWith(listener, () -> buildResponse(request, clusterService.preCommitState())); + } else { + ActionListener.completeWith(listener, () -> buildResponse(request, state)); + } + } private GetTermVersionResponse buildResponse(GetTermVersionRequest request, ClusterState state) { - return new GetTermVersionResponse(new ClusterStateTermVersion(state)); + ClusterStateTermVersion termVersion = new ClusterStateTermVersion(state); + if (discovery instanceof Coordinator) { + Coordinator coordinator = (Coordinator) discovery; + if (coordinator.isRemotePublicationEnabled()) { + return new GetTermVersionResponse(termVersion, coordinator.isRemotePublicationEnabled()); + } + } + return new GetTermVersionResponse(termVersion); } } diff --git a/server/src/main/java/org/opensearch/action/support/nodes/BaseNodeResponse.java b/server/src/main/java/org/opensearch/action/support/nodes/BaseNodeResponse.java index 8a4e12567b515..7e10b583ef21a 100644 --- a/server/src/main/java/org/opensearch/action/support/nodes/BaseNodeResponse.java +++ b/server/src/main/java/org/opensearch/action/support/nodes/BaseNodeResponse.java @@ -67,6 +67,6 @@ public DiscoveryNode getNode() { @Override public void writeTo(StreamOutput out) throws IOException { - node.writeTo(out); + node.writeToWithAttribute(out); } } diff --git a/server/src/main/java/org/opensearch/action/support/nodes/BaseNodesRequest.java b/server/src/main/java/org/opensearch/action/support/nodes/BaseNodesRequest.java index a4f6d8afeaf38..06f2eed5d228d 100644 --- a/server/src/main/java/org/opensearch/action/support/nodes/BaseNodesRequest.java +++ b/server/src/main/java/org/opensearch/action/support/nodes/BaseNodesRequest.java @@ -73,6 +73,7 @@ public abstract class BaseNodesRequest * Setting default behavior as `true` but can be explicitly changed in requests that do not require. */ private boolean includeDiscoveryNodes = true; + private final TimeValue DEFAULT_TIMEOUT_SECS = TimeValue.timeValueSeconds(30); private TimeValue timeout; @@ -88,11 +89,22 @@ protected BaseNodesRequest(String... nodesIds) { this.nodesIds = nodesIds; } + protected BaseNodesRequest(boolean includeDiscoveryNodes, String... nodesIds) { + this.nodesIds = nodesIds; + this.includeDiscoveryNodes = includeDiscoveryNodes; + } + protected BaseNodesRequest(DiscoveryNode... concreteNodes) { this.nodesIds = null; this.concreteNodes = concreteNodes; } + protected BaseNodesRequest(boolean includeDiscoveryNodes, DiscoveryNode... concreteNodes) { + this.nodesIds = null; + this.concreteNodes = concreteNodes; + this.includeDiscoveryNodes = includeDiscoveryNodes; + } + public final String[] nodesIds() { return nodesIds; } @@ -127,10 +139,6 @@ public void setConcreteNodes(DiscoveryNode[] concreteNodes) { this.concreteNodes = concreteNodes; } - public void setIncludeDiscoveryNodes(boolean value) { - includeDiscoveryNodes = value; - } - public boolean getIncludeDiscoveryNodes() { return includeDiscoveryNodes; } diff --git a/server/src/main/java/org/opensearch/action/support/nodes/TransportNodesAction.java b/server/src/main/java/org/opensearch/action/support/nodes/TransportNodesAction.java index 3acd12f632e0f..dccd5059dd52d 100644 --- a/server/src/main/java/org/opensearch/action/support/nodes/TransportNodesAction.java +++ b/server/src/main/java/org/opensearch/action/support/nodes/TransportNodesAction.java @@ -240,18 +240,16 @@ class AsyncAction { } this.responses = new AtomicReferenceArray<>(request.concreteNodes().length); this.concreteNodes = request.concreteNodes(); - if (request.getIncludeDiscoveryNodes() == false) { - // As we transfer the ownership of discovery nodes to route the request to into the AsyncAction class, we - // remove the list of DiscoveryNodes from the request. This reduces the payload of the request and improves + // As we transfer the ownership of discovery nodes to route the request to into the AsyncAction class, + // we remove the list of DiscoveryNodes from the request. This reduces the payload of the request and improves // the number of concrete nodes in the memory. request.setConcreteNodes(null); } } void start() { - final DiscoveryNode[] nodes = this.concreteNodes; - if (nodes.length == 0) { + if (this.concreteNodes.length == 0) { // nothing to notify threadPool.generic().execute(() -> listener.onResponse(newResponse(request, responses))); return; @@ -260,9 +258,9 @@ void start() { if (request.timeout() != null) { builder.withTimeout(request.timeout()); } - for (int i = 0; i < nodes.length; i++) { + for (int i = 0; i < this.concreteNodes.length; i++) { final int idx = i; - final DiscoveryNode node = nodes[i]; + final DiscoveryNode node = this.concreteNodes[i]; final String nodeId = node.getId(); try { TransportRequest nodeRequest = newNodeRequest(request); diff --git a/server/src/main/java/org/opensearch/cluster/AbstractDiffable.java b/server/src/main/java/org/opensearch/cluster/AbstractDiffable.java index 74af3472433ba..770a0c171e084 100644 --- a/server/src/main/java/org/opensearch/cluster/AbstractDiffable.java +++ b/server/src/main/java/org/opensearch/cluster/AbstractDiffable.java @@ -83,6 +83,11 @@ private static class CompleteDiff> implements Diff { this.part = part; } + @Override + public String toString() { + return "CompleteDiff{" + "part=" + part + '}'; + } + /** * Creates simple diff without changes */ diff --git a/server/src/main/java/org/opensearch/cluster/ClusterModule.java b/server/src/main/java/org/opensearch/cluster/ClusterModule.java index bb51c42252448..3924e8e0589a7 100644 --- a/server/src/main/java/org/opensearch/cluster/ClusterModule.java +++ b/server/src/main/java/org/opensearch/cluster/ClusterModule.java @@ -53,6 +53,7 @@ import org.opensearch.cluster.metadata.ViewMetadata; import org.opensearch.cluster.metadata.WeightedRoutingMetadata; import org.opensearch.cluster.routing.DelayedAllocationService; +import org.opensearch.cluster.routing.RerouteService; import org.opensearch.cluster.routing.allocation.AllocationService; import org.opensearch.cluster.routing.allocation.ExistingShardsAllocator; import org.opensearch.cluster.routing.allocation.allocator.BalancedShardsAllocator; @@ -75,6 +76,7 @@ import org.opensearch.cluster.routing.allocation.decider.ResizeAllocationDecider; import org.opensearch.cluster.routing.allocation.decider.RestoreInProgressAllocationDecider; import org.opensearch.cluster.routing.allocation.decider.SameShardAllocationDecider; +import org.opensearch.cluster.routing.allocation.decider.SearchReplicaAllocationDecider; import org.opensearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider; import org.opensearch.cluster.routing.allocation.decider.SnapshotInProgressAllocationDecider; import org.opensearch.cluster.routing.allocation.decider.TargetPoolAllocationDecider; @@ -85,6 +87,7 @@ import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Setting.Property; import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.util.concurrent.ThreadContext; import org.opensearch.common.util.set.Sets; import org.opensearch.core.ParseField; @@ -379,6 +382,9 @@ public static Collection createAllocationDeciders( addAllocationDecider(deciders, new SnapshotInProgressAllocationDecider()); addAllocationDecider(deciders, new RestoreInProgressAllocationDecider()); addAllocationDecider(deciders, new FilterAllocationDecider(settings, clusterSettings)); + if (FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL_SETTING.get(settings)) { + addAllocationDecider(deciders, new SearchReplicaAllocationDecider(settings, clusterSettings)); + } addAllocationDecider(deciders, new SameShardAllocationDecider(settings, clusterSettings)); addAllocationDecider(deciders, new DiskThresholdDecider(settings, clusterSettings)); addAllocationDecider(deciders, new ThrottlingAllocationDecider(settings, clusterSettings)); @@ -474,4 +480,7 @@ public void setExistingShardsAllocators(GatewayAllocator gatewayAllocator, Shard allocationService.setExistingShardsAllocators(existingShardsAllocators); } + public void setRerouteServiceForAllocator(RerouteService rerouteService) { + shardsAllocator.setRerouteService(rerouteService); + } } diff --git a/server/src/main/java/org/opensearch/cluster/ClusterState.java b/server/src/main/java/org/opensearch/cluster/ClusterState.java index 9e63f961d241d..1e4fd2dfffe0f 100644 --- a/server/src/main/java/org/opensearch/cluster/ClusterState.java +++ b/server/src/main/java/org/opensearch/cluster/ClusterState.java @@ -156,7 +156,7 @@ default boolean isPrivate() { } - private static final NamedDiffableValueSerializer CUSTOM_VALUE_SERIALIZER = new NamedDiffableValueSerializer<>(Custom.class); + public static final NamedDiffableValueSerializer CUSTOM_VALUE_SERIALIZER = new NamedDiffableValueSerializer<>(Custom.class); public static final String UNKNOWN_UUID = "_na_"; @@ -781,7 +781,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeString(stateUUID); metadata.writeTo(out); routingTable.writeTo(out); - nodes.writeTo(out); + nodes.writeToWithAttribute(out); blocks.writeTo(out); // filter out custom states not supported by the other node int numberOfCustoms = 0; @@ -839,6 +839,34 @@ private static class ClusterStateDiff implements Diff { minimumClusterManagerNodesOnPublishingClusterManager = after.minimumClusterManagerNodesOnPublishingClusterManager; } + @Override + public String toString() { + return new StringBuilder().append("ClusterStateDiff{toVersion=") + .append(toVersion) + .append(", fromUuid='") + .append(fromUuid) + .append('\'') + .append(", toUuid='") + .append(toUuid) + .append('\'') + .append(", clusterName=") + .append(clusterName) + .append(", routingTable=") + .append(routingTable) + .append(", nodes=") + .append(nodes) + .append(", metadata=") + .append(metadata) + .append(", blocks=") + .append(blocks) + .append(", customs=") + .append(customs) + .append(", minimumClusterManagerNodesOnPublishingClusterManager=") + .append(minimumClusterManagerNodesOnPublishingClusterManager) + .append("}") + .toString(); + } + ClusterStateDiff(StreamInput in, DiscoveryNode localNode) throws IOException { clusterName = new ClusterName(in); fromUuid = in.readString(); @@ -859,13 +887,23 @@ public void writeTo(StreamOutput out) throws IOException { out.writeString(toUuid); out.writeLong(toVersion); routingTable.writeTo(out); - nodes.writeTo(out); + nodesWriteToWithAttributes(nodes, out); metadata.writeTo(out); blocks.writeTo(out); customs.writeTo(out); out.writeVInt(minimumClusterManagerNodesOnPublishingClusterManager); } + private void nodesWriteToWithAttributes(Diff nodes, StreamOutput out) throws IOException { + DiscoveryNodes part = nodes.apply(null); + if (part != null) { + out.writeBoolean(true); + part.writeToWithAttribute(out); + } else { + out.writeBoolean(false); + } + } + @Override public ClusterState apply(ClusterState state) { Builder builder = new Builder(clusterName); diff --git a/server/src/main/java/org/opensearch/cluster/DiffableUtils.java b/server/src/main/java/org/opensearch/cluster/DiffableUtils.java index d21cd354bf659..4b5dcaa52cc50 100644 --- a/server/src/main/java/org/opensearch/cluster/DiffableUtils.java +++ b/server/src/main/java/org/opensearch/cluster/DiffableUtils.java @@ -271,6 +271,18 @@ public Map getUpserts() { return upserts; } + @Override + public String toString() { + return new StringBuilder().append("MapDiff{deletes=") + .append(deletes) + .append(", diffs=") + .append(diffs) + .append(", upserts=") + .append(upserts) + .append("}") + .toString(); + } + @Override public void writeTo(StreamOutput out) throws IOException { out.writeCollection(deletes, (o, v) -> keySerializer.writeKey(v, o)); diff --git a/server/src/main/java/org/opensearch/cluster/block/ClusterBlock.java b/server/src/main/java/org/opensearch/cluster/block/ClusterBlock.java index 5fa897c0b1185..7c0a7a2a6b837 100644 --- a/server/src/main/java/org/opensearch/cluster/block/ClusterBlock.java +++ b/server/src/main/java/org/opensearch/cluster/block/ClusterBlock.java @@ -52,7 +52,7 @@ * @opensearch.api */ @PublicApi(since = "1.0.0") -public class ClusterBlock implements Writeable, ToXContentFragment { +public class ClusterBlock implements Writeable, ToXContentFragment, Comparable { private final int id; @Nullable @@ -217,7 +217,13 @@ public int hashCode() { return Objects.hash(id, uuid); } + @Override + public int compareTo(ClusterBlock block) { + return Integer.compare(block.id(), this.id()); + } + public boolean isAllowReleaseResources() { return allowReleaseResources; } + } diff --git a/server/src/main/java/org/opensearch/cluster/block/ClusterBlocks.java b/server/src/main/java/org/opensearch/cluster/block/ClusterBlocks.java index 02a20b7681ba7..615ea18315cd1 100644 --- a/server/src/main/java/org/opensearch/cluster/block/ClusterBlocks.java +++ b/server/src/main/java/org/opensearch/cluster/block/ClusterBlocks.java @@ -39,8 +39,10 @@ import org.opensearch.common.Nullable; import org.opensearch.common.annotation.PublicApi; import org.opensearch.common.util.set.Sets; +import org.opensearch.core.common.io.stream.BufferedChecksumStreamOutput; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.VerifiableWriteable; import org.opensearch.core.rest.RestStatus; import java.io.IOException; @@ -62,7 +64,7 @@ * @opensearch.api */ @PublicApi(since = "1.0.0") -public class ClusterBlocks extends AbstractDiffable { +public class ClusterBlocks extends AbstractDiffable implements VerifiableWriteable { public static final ClusterBlocks EMPTY_CLUSTER_BLOCK = new ClusterBlocks(emptySet(), Map.of()); private final Set global; @@ -303,6 +305,11 @@ public void writeTo(StreamOutput out) throws IOException { out.writeMap(indicesBlocks, StreamOutput::writeString, (o, s) -> writeBlockSet(s, o)); } + @Override + public void writeVerifiableTo(BufferedChecksumStreamOutput out) throws IOException { + writeTo(out); + } + private static void writeBlockSet(Set blocks, StreamOutput out) throws IOException { out.writeCollection(blocks); } diff --git a/server/src/main/java/org/opensearch/cluster/coordination/CoordinationMetadata.java b/server/src/main/java/org/opensearch/cluster/coordination/CoordinationMetadata.java index 53398d6f3f98f..d7291a3689192 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/CoordinationMetadata.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/CoordinationMetadata.java @@ -35,8 +35,10 @@ import org.opensearch.common.annotation.PublicApi; import org.opensearch.common.util.set.Sets; import org.opensearch.core.ParseField; +import org.opensearch.core.common.io.stream.BufferedChecksumStreamOutput; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.VerifiableWriteable; import org.opensearch.core.common.io.stream.Writeable; import org.opensearch.core.xcontent.ConstructingObjectParser; import org.opensearch.core.xcontent.ToXContentFragment; @@ -59,7 +61,7 @@ * @opensearch.api */ @PublicApi(since = "1.0.0") -public class CoordinationMetadata implements Writeable, ToXContentFragment { +public class CoordinationMetadata implements VerifiableWriteable, ToXContentFragment { public static final CoordinationMetadata EMPTY_METADATA = builder().build(); @@ -149,6 +151,11 @@ public void writeTo(StreamOutput out) throws IOException { out.writeCollection(votingConfigExclusions); } + @Override + public void writeVerifiableTo(BufferedChecksumStreamOutput out) throws IOException { + writeTo(out); + } + @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { return builder.field(TERM_PARSE_FIELD.getPreferredName(), term) @@ -272,7 +279,7 @@ public CoordinationMetadata build() { * @opensearch.api */ @PublicApi(since = "1.0.0") - public static class VotingConfigExclusion implements Writeable, ToXContentFragment { + public static class VotingConfigExclusion implements Writeable, ToXContentFragment, Comparable { public static final String MISSING_VALUE_MARKER = "_absent_"; private final String nodeId; private final String nodeName; @@ -361,6 +368,10 @@ public String toString() { return sb.toString(); } + @Override + public int compareTo(VotingConfigExclusion votingConfigExclusion) { + return votingConfigExclusion.getNodeId().compareTo(this.getNodeId()); + } } /** diff --git a/server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java b/server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java index c7820c2c9a365..9cffc7051d756 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java @@ -39,8 +39,8 @@ import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.common.settings.Settings; -import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.util.io.IOUtils; +import org.opensearch.gateway.remote.ClusterMetadataManifest; import java.io.Closeable; import java.io.IOException; @@ -53,7 +53,7 @@ import java.util.Set; import static org.opensearch.cluster.coordination.Coordinator.ZEN1_BWC_TERM; -import static org.opensearch.common.util.FeatureFlags.REMOTE_PUBLICATION_EXPERIMENTAL; +import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_PUBLICATION_SETTING; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.isRemoteStoreClusterStateEnabled; /** @@ -81,7 +81,7 @@ public class CoordinationState { private VotingConfiguration lastPublishedConfiguration; private VoteCollection publishVotes; private final boolean isRemoteStateEnabled; - private final boolean isRemotePublicationEnabled; + private boolean isRemotePublicationEnabled; public CoordinationState( DiscoveryNode localNode, @@ -105,8 +105,9 @@ public CoordinationState( .getLastAcceptedConfiguration(); this.publishVotes = new VoteCollection(); this.isRemoteStateEnabled = isRemoteStoreClusterStateEnabled(settings); + // ToDo: revisit this check while making the setting dynamic this.isRemotePublicationEnabled = isRemoteStateEnabled - && FeatureFlags.isEnabled(REMOTE_PUBLICATION_EXPERIMENTAL) + && REMOTE_PUBLICATION_SETTING.get(settings) && localNode.isRemoteStatePublicationEnabled(); } @@ -460,6 +461,9 @@ public PublishResponse handlePublishRequest(PublishRequest publishRequest) { clusterState.term() ); persistedStateRegistry.getPersistedState(PersistedStateType.LOCAL).setLastAcceptedState(clusterState); + if (shouldUpdateRemotePersistedState(publishRequest)) { + updateRemotePersistedStateOnPublishRequest(publishRequest); + } assert getLastAcceptedState() == clusterState; return new PublishResponse(clusterState.term(), clusterState.version()); @@ -572,6 +576,9 @@ public void handleCommit(ApplyCommitRequest applyCommit) { ); persistedStateRegistry.getPersistedState(PersistedStateType.LOCAL).markLastAcceptedStateAsCommitted(); + if (shouldCommitRemotePersistedState()) { + persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).markLastAcceptedStateAsCommitted(); + } assert getLastCommittedConfiguration().equals(getLastAcceptedConfiguration()); } @@ -617,6 +624,33 @@ public void close() throws IOException { IOUtils.close(persistedStateRegistry); } + private boolean shouldUpdateRemotePersistedState(PublishRequest publishRequest) { + return persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE) != null + && publishRequest.getAcceptedState().getNodes().isLocalNodeElectedClusterManager() == false; + } + + private void updateRemotePersistedStateOnPublishRequest(PublishRequest publishRequest) { + if (publishRequest instanceof RemoteStatePublishRequest) { + persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).setLastAcceptedState(publishRequest.getAcceptedState()); + persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE) + .setLastAcceptedManifest(((RemoteStatePublishRequest) publishRequest).getAcceptedManifest()); + } else { + // We will end up here if PublishRequest was sent not using Remote Store even with remote persisted state on this node + persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).setLastAcceptedState(null); + persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).setLastAcceptedManifest(null); + } + } + + private boolean shouldCommitRemotePersistedState() { + return persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE) != null + && persistedStateRegistry.getPersistedState(PersistedStateType.LOCAL) + .getLastAcceptedState() + .getNodes() + .isLocalNodeElectedClusterManager() == false + && persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).getLastAcceptedState() != null + && persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).getLastAcceptedManifest() != null; + } + /** * Pluggable persistence layer for {@link CoordinationState}. * @@ -654,6 +688,22 @@ public interface PersistedState extends Closeable { */ PersistedStateStats getStats(); + /** + * Returns the last accepted {@link ClusterMetadataManifest}. + * + * @return The last accepted {@link ClusterMetadataManifest}, or null if no manifest + * has been accepted yet. + */ + default ClusterMetadataManifest getLastAcceptedManifest() { + // return null by default, this method needs to be overridden wherever required + return null; + } + + /** + * Sets the last accepted {@link ClusterMetadataManifest}. + */ + default void setLastAcceptedManifest(ClusterMetadataManifest manifest) {} + /** * Marks the last accepted cluster state as committed. * After a successful call to this method, {@link #getLastAcceptedState()} should return the last cluster state that was set, @@ -662,14 +712,7 @@ public interface PersistedState extends Closeable { */ default void markLastAcceptedStateAsCommitted() { final ClusterState lastAcceptedState = getLastAcceptedState(); - Metadata.Builder metadataBuilder = null; - if (lastAcceptedState.getLastAcceptedConfiguration().equals(lastAcceptedState.getLastCommittedConfiguration()) == false) { - final CoordinationMetadata coordinationMetadata = CoordinationMetadata.builder(lastAcceptedState.coordinationMetadata()) - .lastCommittedConfiguration(lastAcceptedState.getLastAcceptedConfiguration()) - .build(); - metadataBuilder = Metadata.builder(lastAcceptedState.metadata()); - metadataBuilder.coordinationMetadata(coordinationMetadata); - } + Metadata.Builder metadataBuilder = commitVotingConfiguration(lastAcceptedState); // if we receive a commit from a Zen1 cluster-manager that has not recovered its state yet, // the cluster uuid might not been known yet. assert lastAcceptedState.metadata().clusterUUID().equals(Metadata.UNKNOWN_CLUSTER_UUID) == false @@ -694,6 +737,18 @@ default void markLastAcceptedStateAsCommitted() { } } + default Metadata.Builder commitVotingConfiguration(ClusterState lastAcceptedState) { + Metadata.Builder metadataBuilder = null; + if (lastAcceptedState.getLastAcceptedConfiguration().equals(lastAcceptedState.getLastCommittedConfiguration()) == false) { + final CoordinationMetadata coordinationMetadata = CoordinationMetadata.builder(lastAcceptedState.coordinationMetadata()) + .lastCommittedConfiguration(lastAcceptedState.getLastAcceptedConfiguration()) + .build(); + metadataBuilder = Metadata.builder(lastAcceptedState.metadata()); + metadataBuilder.coordinationMetadata(coordinationMetadata); + } + return metadataBuilder; + } + default void close() throws IOException {} } diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java index 87f02c6891be6..13a57d93f03f0 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java @@ -386,6 +386,8 @@ private void handleApplyCommit(ApplyCommitRequest applyCommitRequest, ActionList coordinationState.get().handleCommit(applyCommitRequest); final ClusterState committedState = hideStateIfNotRecovered(coordinationState.get().getLastAcceptedState()); applierState = mode == Mode.CANDIDATE ? clusterStateWithNoClusterManagerBlock(committedState) : committedState; + clusterApplier.setPreCommitState(applierState); + if (applyCommitRequest.getSourceNode().equals(getLocalNode())) { // cluster-manager node applies the committed state at the end of the publication process, not here. applyListener.onResponse(null); @@ -901,6 +903,10 @@ public DiscoveryStats stats() { stats.add(persistedStateRegistry.getPersistedState(stateType).getStats()); } }); + if (coordinationState.get().isRemotePublicationEnabled()) { + stats.add(publicationHandler.getFullDownloadStats()); + stats.add(publicationHandler.getDiffDownloadStats()); + } clusterStateStats.setPersistenceStats(stats); return new DiscoveryStats(new PendingClusterStateStats(0, 0, 0), publicationHandler.stats(), clusterStateStats); } @@ -1337,6 +1343,7 @@ assert getLocalNode().equals(clusterState.getNodes().get(getLocalNode().getId()) coordinationState.get().isRemotePublicationEnabled(), persistedStateRegistry ); + logger.debug("initialized PublicationContext using class: {}", publicationContext.getClass().toString()); final PublishRequest publishRequest = coordinationState.get().handleClientValue(clusterState); final CoordinatorPublication publication = new CoordinatorPublication( @@ -1857,4 +1864,11 @@ protected void sendApplyCommit( public static boolean isZen1Node(DiscoveryNode discoveryNode) { return Booleans.isTrue(discoveryNode.getAttributes().getOrDefault("zen1", "false")); } + + public boolean isRemotePublicationEnabled() { + if (coordinationState.get() != null) { + return coordinationState.get().isRemotePublicationEnabled(); + } + return false; + } } diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Join.java b/server/src/main/java/org/opensearch/cluster/coordination/Join.java index 58fa85992ebc8..ce1a234998690 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Join.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Join.java @@ -78,8 +78,8 @@ public Join(StreamInput in) throws IOException { @Override public void writeTo(StreamOutput out) throws IOException { - sourceNode.writeTo(out); - targetNode.writeTo(out); + sourceNode.writeToWithAttribute(out); + targetNode.writeToWithAttribute(out); out.writeLong(term); out.writeLong(lastAcceptedTerm); out.writeLong(lastAcceptedVersion); diff --git a/server/src/main/java/org/opensearch/cluster/coordination/JoinRequest.java b/server/src/main/java/org/opensearch/cluster/coordination/JoinRequest.java index 04f87d16ee400..1447838a41502 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/JoinRequest.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/JoinRequest.java @@ -84,7 +84,7 @@ public JoinRequest(StreamInput in) throws IOException { @Override public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); - sourceNode.writeTo(out); + sourceNode.writeToWithAttribute(out); out.writeLong(minimumTerm); out.writeOptionalWriteable(optionalJoin.orElse(null)); } diff --git a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java index f77a7ffc8ce8e..838b5723b217b 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java @@ -67,6 +67,7 @@ import static org.opensearch.cluster.decommission.DecommissionHelper.nodeCommissioned; import static org.opensearch.gateway.GatewayService.STATE_NOT_RECOVERED_BLOCK; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_CLUSTER_PUBLICATION_REPO_NAME_ATTRIBUTES; import static org.opensearch.node.remotestore.RemoteStoreNodeService.CompatibilityMode; import static org.opensearch.node.remotestore.RemoteStoreNodeService.CompatibilityMode.MIXED; import static org.opensearch.node.remotestore.RemoteStoreNodeService.CompatibilityMode.STRICT; @@ -458,7 +459,7 @@ public static void ensureNodesCompatibility( ); } - ensureRemoteStoreNodesCompatibility(joiningNode, currentNodes, metadata); + ensureRemoteRepositoryCompatibility(joiningNode, currentNodes, metadata); } /** @@ -491,6 +492,30 @@ public static void ensureNodeCommissioned(DiscoveryNode node, Metadata metadata) } } + public static void ensureRemoteRepositoryCompatibility(DiscoveryNode joiningNode, DiscoveryNodes currentNodes, Metadata metadata) { + List existingNodes = new ArrayList<>(currentNodes.getNodes().values()); + + boolean isClusterRemoteStoreEnabled = existingNodes.stream().anyMatch(DiscoveryNode::isRemoteStoreNode); + if (isClusterRemoteStoreEnabled || joiningNode.isRemoteStoreNode()) { + ensureRemoteStoreNodesCompatibility(joiningNode, currentNodes, metadata); + } else { + ensureRemoteClusterStateNodesCompatibility(joiningNode, currentNodes); + } + } + + private static void ensureRemoteClusterStateNodesCompatibility(DiscoveryNode joiningNode, DiscoveryNodes currentNodes) { + List existingNodes = new ArrayList<>(currentNodes.getNodes().values()); + + assert existingNodes.isEmpty() == false; + Optional remotePublicationNode = existingNodes.stream() + .filter(DiscoveryNode::isRemoteStatePublicationEnabled) + .findFirst(); + + if (remotePublicationNode.isPresent() && joiningNode.isRemoteStatePublicationEnabled()) { + ensureRepositoryCompatibility(joiningNode, remotePublicationNode.get(), REMOTE_CLUSTER_PUBLICATION_REPO_NAME_ATTRIBUTES); + } + } + /** * The method ensures homogeneity - * 1. The joining node has to be a remote store backed if it's joining a remote store backed cluster. Validates @@ -506,6 +531,7 @@ public static void ensureNodeCommissioned(DiscoveryNode node, Metadata metadata) * needs to be modified. */ private static void ensureRemoteStoreNodesCompatibility(DiscoveryNode joiningNode, DiscoveryNodes currentNodes, Metadata metadata) { + List existingNodes = new ArrayList<>(currentNodes.getNodes().values()); assert existingNodes.isEmpty() == false; @@ -587,6 +613,23 @@ private static void ensureRemoteStoreNodesCompatibility( } } + private static void ensureRepositoryCompatibility(DiscoveryNode joiningNode, DiscoveryNode existingNode, List reposToValidate) { + + RemoteStoreNodeAttribute joiningRemoteStoreNodeAttribute = new RemoteStoreNodeAttribute(joiningNode); + RemoteStoreNodeAttribute existingRemoteStoreNodeAttribute = new RemoteStoreNodeAttribute(existingNode); + + if (existingRemoteStoreNodeAttribute.equalsForRepositories(joiningRemoteStoreNodeAttribute, reposToValidate) == false) { + throw new IllegalStateException( + "a remote store node [" + + joiningNode + + "] is trying to join a remote store cluster with incompatible node attributes in " + + "comparison with existing node [" + + existingNode + + "]" + ); + } + } + public static Collection> addBuiltInJoinValidators( Collection> onJoinValidators ) { diff --git a/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java b/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java index 62885a12222be..cdf331b7bb577 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java @@ -62,6 +62,7 @@ import java.io.IOException; import java.util.HashMap; import java.util.Map; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; import java.util.function.BiConsumer; @@ -97,6 +98,7 @@ public class PublicationTransportHandler { private final AtomicLong fullClusterStateReceivedCount = new AtomicLong(); private final AtomicLong incompatibleClusterStateDiffReceivedCount = new AtomicLong(); private final AtomicLong compatibleClusterStateDiffReceivedCount = new AtomicLong(); + private final AtomicBoolean allNodesRemotePublicationEnabled = new AtomicBoolean(); // -> no need to put a timeout on the options here, because we want the response to eventually be received // and not log an error if it arrives after the timeout private final TransportRequestOptions stateRequestOptions = TransportRequestOptions.builder() @@ -176,6 +178,14 @@ public PublishClusterStateStats stats() { ); } + public PersistedStateStats getFullDownloadStats() { + return remoteClusterStateService.getFullDownloadStats(); + } + + public PersistedStateStats getDiffDownloadStats() { + return remoteClusterStateService.getDiffDownloadStats(); + } + private PublishWithJoinResponse handleIncomingPublishRequest(BytesTransportRequest request) throws IOException { try (StreamInput in = CompressedStreamUtils.decompressBytes(request, namedWriteableRegistry)) { ClusterState incomingState; @@ -189,7 +199,7 @@ private PublishWithJoinResponse handleIncomingPublishRequest(BytesTransportReque } fullClusterStateReceivedCount.incrementAndGet(); logger.debug("received full cluster state version [{}] with size [{}]", incomingState.version(), request.bytes().length()); - final PublishWithJoinResponse response = acceptState(incomingState); + final PublishWithJoinResponse response = acceptState(incomingState, null); lastSeenClusterState.set(incomingState); return response; } else { @@ -220,7 +230,7 @@ private PublishWithJoinResponse handleIncomingPublishRequest(BytesTransportReque incomingState.stateUUID(), request.bytes().length() ); - final PublishWithJoinResponse response = acceptState(incomingState); + final PublishWithJoinResponse response = acceptState(incomingState, null); lastSeenClusterState.compareAndSet(lastSeen, incomingState); return response; } @@ -229,73 +239,82 @@ private PublishWithJoinResponse handleIncomingPublishRequest(BytesTransportReque } // package private for testing - PublishWithJoinResponse handleIncomingRemotePublishRequest(RemotePublishRequest request) throws IOException { - if (transportService.getLocalNode().equals(request.getSourceNode())) { - return acceptRemoteStateOnLocalNode(request); - } - // TODO Make cluster state download non-blocking: https://github.com/opensearch-project/OpenSearch/issues/14102 - ClusterMetadataManifest manifest = remoteClusterStateService.getClusterMetadataManifestByFileName( - request.getClusterUUID(), - request.getManifestFile() - ); - if (manifest == null) { - throw new IllegalStateException("Publication failed as manifest was not found for " + request); - } + PublishWithJoinResponse handleIncomingRemotePublishRequest(RemotePublishRequest request) throws IOException, IllegalStateException { boolean applyFullState = false; - final ClusterState lastSeen = lastSeenClusterState.get(); - if (lastSeen == null) { - logger.debug(() -> "Diff cannot be applied as there is no last cluster state"); - applyFullState = true; - } else if (manifest.getDiffManifest() == null) { - logger.trace(() -> "There is no diff in the manifest"); - applyFullState = true; - } else if (manifest.getDiffManifest().getFromStateUUID().equals(lastSeen.stateUUID()) == false) { - logger.debug(() -> "Last cluster state not compatible with the diff"); - applyFullState = true; - } - - if (applyFullState == true) { - logger.debug( - () -> new ParameterizedMessage( - "Downloading full cluster state for term {}, version {}, stateUUID {}", - manifest.getClusterTerm(), - manifest.getStateVersion(), - manifest.getStateUUID() - ) - ); - ClusterState clusterState = remoteClusterStateService.getClusterStateForManifest( - request.getClusterName(), - manifest, - transportService.getLocalNode().getId(), - true - ); - fullClusterStateReceivedCount.incrementAndGet(); - final PublishWithJoinResponse response = acceptState(clusterState); - lastSeenClusterState.set(clusterState); - return response; - } else { - logger.debug( - () -> new ParameterizedMessage( - "Downloading diff cluster state for term {}, version {}, previousUUID {}, current UUID {}", - manifest.getClusterTerm(), - manifest.getStateVersion(), - manifest.getDiffManifest().getFromStateUUID(), - manifest.getStateUUID() - ) - ); - ClusterState clusterState = remoteClusterStateService.getClusterStateUsingDiff( - manifest, - lastSeen, - transportService.getLocalNode().getId() + try { + if (transportService.getLocalNode().equals(request.getSourceNode())) { + return acceptRemoteStateOnLocalNode(request); + } + // TODO Make cluster state download non-blocking: https://github.com/opensearch-project/OpenSearch/issues/14102 + ClusterMetadataManifest manifest = remoteClusterStateService.getClusterMetadataManifestByFileName( + request.getClusterUUID(), + request.getManifestFile() ); - compatibleClusterStateDiffReceivedCount.incrementAndGet(); - final PublishWithJoinResponse response = acceptState(clusterState); - lastSeenClusterState.compareAndSet(lastSeen, clusterState); - return response; + if (manifest == null) { + throw new IllegalStateException("Publication failed as manifest was not found for " + request); + } + final ClusterState lastSeen = lastSeenClusterState.get(); + if (lastSeen == null) { + logger.debug(() -> "Diff cannot be applied as there is no last cluster state"); + applyFullState = true; + } else if (manifest.getDiffManifest() == null) { + logger.debug(() -> "There is no diff in the manifest"); + applyFullState = true; + } else if (manifest.getDiffManifest().getFromStateUUID().equals(lastSeen.stateUUID()) == false) { + logger.debug(() -> "Last cluster state not compatible with the diff"); + applyFullState = true; + } + + if (applyFullState == true) { + logger.debug( + () -> new ParameterizedMessage( + "Downloading full cluster state for term {}, version {}, stateUUID {}", + manifest.getClusterTerm(), + manifest.getStateVersion(), + manifest.getStateUUID() + ) + ); + ClusterState clusterState = remoteClusterStateService.getClusterStateForManifest( + request.getClusterName(), + manifest, + transportService.getLocalNode().getId(), + true + ); + fullClusterStateReceivedCount.incrementAndGet(); + final PublishWithJoinResponse response = acceptState(clusterState, manifest); + lastSeenClusterState.set(clusterState); + return response; + } else { + logger.debug( + () -> new ParameterizedMessage( + "Downloading diff cluster state for term {}, version {}, previousUUID {}, current UUID {}", + manifest.getClusterTerm(), + manifest.getStateVersion(), + manifest.getDiffManifest().getFromStateUUID(), + manifest.getStateUUID() + ) + ); + ClusterState clusterState = remoteClusterStateService.getClusterStateUsingDiff( + manifest, + lastSeen, + transportService.getLocalNode().getId() + ); + compatibleClusterStateDiffReceivedCount.incrementAndGet(); + final PublishWithJoinResponse response = acceptState(clusterState, manifest); + lastSeenClusterState.compareAndSet(lastSeen, clusterState); + return response; + } + } catch (Exception e) { + if (applyFullState) { + remoteClusterStateService.fullDownloadFailed(); + } else { + remoteClusterStateService.diffDownloadFailed(); + } + throw e; } } - private PublishWithJoinResponse acceptState(ClusterState incomingState) { + private PublishWithJoinResponse acceptState(ClusterState incomingState, ClusterMetadataManifest manifest) { // if the state is coming from the current node, use original request instead (see currentPublishRequestToSelf for explanation) if (transportService.getLocalNode().equals(incomingState.nodes().getClusterManagerNode())) { final PublishRequest publishRequest = currentPublishRequestToSelf.get(); @@ -305,6 +324,9 @@ private PublishWithJoinResponse acceptState(ClusterState incomingState) { return handlePublishRequest.apply(publishRequest); } } + if (manifest != null) { + return handlePublishRequest.apply(new RemoteStatePublishRequest(incomingState, manifest)); + } return handlePublishRequest.apply(new PublishRequest(incomingState)); } @@ -332,11 +354,18 @@ public PublicationContext newPublicationContext( boolean isRemotePublicationEnabled, PersistedStateRegistry persistedStateRegistry ) { - final PublicationContext publicationContext = new PublicationContext( - clusterChangedEvent, - isRemotePublicationEnabled, - persistedStateRegistry - ); + if (isRemotePublicationEnabled == true) { + if (allNodesRemotePublicationEnabled.get() == false) { + if (validateRemotePublicationOnAllNodes(clusterChangedEvent.state().nodes()) == true) { + allNodesRemotePublicationEnabled.set(true); + } + } + if (allNodesRemotePublicationEnabled.get() == true) { + // if all nodes are remote then create remote publication context + return new RemotePublicationContext(clusterChangedEvent, persistedStateRegistry); + } + } + final PublicationContext publicationContext = new PublicationContext(clusterChangedEvent, persistedStateRegistry); // Build the serializations we expect to need now, early in the process, so that an error during serialization fails the publication // straight away. This isn't watertight since we send diffs on a best-effort basis and may fall back to sending a full state (and @@ -345,6 +374,17 @@ public PublicationContext newPublicationContext( return publicationContext; } + private boolean validateRemotePublicationOnAllNodes(DiscoveryNodes discoveryNodes) { + assert ClusterMetadataManifest.getCodecForVersion(discoveryNodes.getMinNodeVersion()) >= ClusterMetadataManifest.CODEC_V0; + for (DiscoveryNode node : discoveryNodes.getNodes().values()) { + // if a node is non-remote then created local publication context + if (node.isRemoteStatePublicationEnabled() == false) { + return false; + } + } + return true; + } + // package private for testing void setCurrentPublishRequestToSelf(PublishRequest publishRequest) { this.currentPublishRequestToSelf.set(publishRequest); @@ -385,25 +425,19 @@ private static BytesReference serializeDiffClusterState(Diff diff, */ public class PublicationContext { - private final DiscoveryNodes discoveryNodes; - private final ClusterState newState; - private final ClusterState previousState; - private final boolean sendFullVersion; + protected final DiscoveryNodes discoveryNodes; + protected final ClusterState newState; + protected final ClusterState previousState; + protected final boolean sendFullVersion; private final Map serializedStates = new HashMap<>(); private final Map serializedDiffs = new HashMap<>(); - private final boolean sendRemoteState; - private final PersistedStateRegistry persistedStateRegistry; + protected final PersistedStateRegistry persistedStateRegistry; - PublicationContext( - ClusterChangedEvent clusterChangedEvent, - boolean isRemotePublicationEnabled, - PersistedStateRegistry persistedStateRegistry - ) { + PublicationContext(ClusterChangedEvent clusterChangedEvent, PersistedStateRegistry persistedStateRegistry) { discoveryNodes = clusterChangedEvent.state().nodes(); newState = clusterChangedEvent.state(); previousState = clusterChangedEvent.previousState(); sendFullVersion = previousState.getBlocks().disableStatePersistence(); - sendRemoteState = isRemotePublicationEnabled; this.persistedStateRegistry = persistedStateRegistry; } @@ -468,17 +502,7 @@ public void onFailure(Exception e) { } else { responseActionListener = listener; } - // TODO Decide to send remote state before starting publication by checking remote publication on all nodes - if (sendRemoteState && destination.isRemoteStatePublicationEnabled()) { - logger.trace("sending remote cluster state version [{}] to [{}]", newState.version(), destination); - sendRemoteClusterState(destination, publishRequest.getAcceptedState(), responseActionListener); - } else if (sendFullVersion || previousState.nodes().nodeExists(destination) == false) { - logger.trace("sending full cluster state version [{}] to [{}]", newState.version(), destination); - sendFullClusterState(destination, responseActionListener); - } else { - logger.trace("sending cluster state diff for version [{}] to [{}]", newState.version(), destination); - sendClusterStateDiff(destination, responseActionListener); - } + sendClusterState(destination, responseActionListener); } public void sendApplyCommit( @@ -517,58 +541,14 @@ public String executor() { ); } - private void sendRemoteClusterState( - final DiscoveryNode destination, - final ClusterState clusterState, - final ActionListener listener - ) { - try { - final String manifestFileName = ((RemotePersistedState) persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE)) - .getLastUploadedManifestFile(); - final RemotePublishRequest remotePublishRequest = new RemotePublishRequest( - discoveryNodes.getLocalNode(), - clusterState.term(), - clusterState.getVersion(), - clusterState.getClusterName().value(), - clusterState.metadata().clusterUUID(), - manifestFileName - ); - final Consumer transportExceptionHandler = exp -> { - logger.debug(() -> new ParameterizedMessage("failed to send remote cluster state to {}", destination), exp); - listener.onFailure(exp); - }; - final TransportResponseHandler responseHandler = new TransportResponseHandler<>() { - - @Override - public PublishWithJoinResponse read(StreamInput in) throws IOException { - return new PublishWithJoinResponse(in); - } - - @Override - public void handleResponse(PublishWithJoinResponse response) { - listener.onResponse(response); - } - - @Override - public void handleException(TransportException exp) { - transportExceptionHandler.accept(exp); - } - - @Override - public String executor() { - return ThreadPool.Names.GENERIC; - } - }; - transportService.sendRequest( - destination, - PUBLISH_REMOTE_STATE_ACTION_NAME, - remotePublishRequest, - stateRequestOptions, - responseHandler - ); - } catch (Exception e) { - logger.warn(() -> new ParameterizedMessage("error sending remote cluster state to {}", destination), e); - listener.onFailure(e); + public void sendClusterState(DiscoveryNode destination, ActionListener listener) { + logger.debug("sending cluster state over transport to node: {}", destination.getName()); + if (sendFullVersion || previousState.nodes().nodeExists(destination) == false) { + logger.trace("sending full cluster state version [{}] to [{}]", newState.version(), destination); + sendFullClusterState(destination, listener); + } else { + logger.trace("sending cluster state diff for version [{}] to [{}]", newState.version(), destination); + sendClusterStateDiff(destination, listener); } } @@ -648,4 +628,69 @@ public String executor() { } } + /** + * An extension of {@code PublicationContext} to support remote cluster state publication + * + * @opensearch.internal + */ + public class RemotePublicationContext extends PublicationContext { + + RemotePublicationContext(ClusterChangedEvent clusterChangedEvent, PersistedStateRegistry persistedStateRegistry) { + super(clusterChangedEvent, persistedStateRegistry); + } + + @Override + public void sendClusterState(final DiscoveryNode destination, final ActionListener listener) { + try { + logger.debug("sending remote cluster state to node: {}", destination.getName()); + final String manifestFileName = ((RemotePersistedState) persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE)) + .getLastUploadedManifestFile(); + final RemotePublishRequest remotePublishRequest = new RemotePublishRequest( + discoveryNodes.getLocalNode(), + newState.term(), + newState.getVersion(), + newState.getClusterName().value(), + newState.metadata().clusterUUID(), + manifestFileName + ); + final Consumer transportExceptionHandler = exp -> { + logger.debug(() -> new ParameterizedMessage("failed to send remote cluster state to {}", destination), exp); + listener.onFailure(exp); + }; + final TransportResponseHandler responseHandler = new TransportResponseHandler<>() { + + @Override + public PublishWithJoinResponse read(StreamInput in) throws IOException { + return new PublishWithJoinResponse(in); + } + + @Override + public void handleResponse(PublishWithJoinResponse response) { + listener.onResponse(response); + } + + @Override + public void handleException(TransportException exp) { + transportExceptionHandler.accept(exp); + } + + @Override + public String executor() { + return ThreadPool.Names.GENERIC; + } + }; + transportService.sendRequest( + destination, + PUBLISH_REMOTE_STATE_ACTION_NAME, + remotePublishRequest, + stateRequestOptions, + responseHandler + ); + } catch (Exception e) { + logger.warn(() -> new ParameterizedMessage("error sending remote cluster state to {}", destination), e); + listener.onFailure(e); + } + } + } + } diff --git a/server/src/main/java/org/opensearch/cluster/coordination/RemoteStatePublishRequest.java b/server/src/main/java/org/opensearch/cluster/coordination/RemoteStatePublishRequest.java new file mode 100644 index 0000000000000..5667e6d67d062 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/coordination/RemoteStatePublishRequest.java @@ -0,0 +1,51 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.coordination; + +import org.opensearch.cluster.ClusterState; +import org.opensearch.gateway.remote.ClusterMetadataManifest; + +import java.util.Objects; + +/** + * PublishRequest created by downloading the accepted {@link ClusterState} from Remote Store, using the published {@link ClusterMetadataManifest} + * + * @opensearch.internal + */ +public class RemoteStatePublishRequest extends PublishRequest { + private final ClusterMetadataManifest manifest; + + public RemoteStatePublishRequest(ClusterState acceptedState, ClusterMetadataManifest acceptedManifest) { + super(acceptedState); + this.manifest = acceptedManifest; + } + + public ClusterMetadataManifest getAcceptedManifest() { + return manifest; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + if (!super.equals(o)) return false; + RemoteStatePublishRequest that = (RemoteStatePublishRequest) o; + return Objects.equals(manifest, that.manifest); + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), manifest); + } + + @Override + public String toString() { + return "RemoteStatePublishRequest{" + super.toString() + "manifest=" + manifest + "} "; + } +} diff --git a/server/src/main/java/org/opensearch/cluster/coordination/StartJoinRequest.java b/server/src/main/java/org/opensearch/cluster/coordination/StartJoinRequest.java index de58eb721b28f..287418aaf378e 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/StartJoinRequest.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/StartJoinRequest.java @@ -64,7 +64,7 @@ public StartJoinRequest(StreamInput input) throws IOException { @Override public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); - sourceNode.writeTo(out); + sourceNode.writeToWithAttribute(out); out.writeLong(term); } diff --git a/server/src/main/java/org/opensearch/cluster/health/ClusterIndexHealth.java b/server/src/main/java/org/opensearch/cluster/health/ClusterIndexHealth.java index 19c64965e6941..77d96cb0af792 100644 --- a/server/src/main/java/org/opensearch/cluster/health/ClusterIndexHealth.java +++ b/server/src/main/java/org/opensearch/cluster/health/ClusterIndexHealth.java @@ -32,9 +32,11 @@ package org.opensearch.cluster.health; +import org.opensearch.action.admin.cluster.health.ClusterHealthRequest; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.routing.IndexRoutingTable; import org.opensearch.cluster.routing.IndexShardRoutingTable; +import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.common.annotation.PublicApi; import org.opensearch.core.ParseField; import org.opensearch.core.common.io.stream.StreamInput; @@ -141,6 +143,7 @@ public final class ClusterIndexHealth implements Iterable, W private final int relocatingShards; private final int initializingShards; private final int unassignedShards; + private int delayedUnassignedShards; private final int activePrimaryShards; private final ClusterHealthStatus status; private final Map shards; @@ -163,6 +166,7 @@ public ClusterIndexHealth(final IndexMetadata indexMetadata, final IndexRoutingT int computeRelocatingShards = 0; int computeInitializingShards = 0; int computeUnassignedShards = 0; + int computeDelayedUnassignedShards = 0; for (ClusterShardHealth shardHealth : shards.values()) { if (shardHealth.isPrimaryActive()) { computeActivePrimaryShards++; @@ -171,13 +175,9 @@ public ClusterIndexHealth(final IndexMetadata indexMetadata, final IndexRoutingT computeRelocatingShards += shardHealth.getRelocatingShards(); computeInitializingShards += shardHealth.getInitializingShards(); computeUnassignedShards += shardHealth.getUnassignedShards(); + computeDelayedUnassignedShards += shardHealth.getDelayedUnassignedShards(); - if (shardHealth.getStatus() == ClusterHealthStatus.RED) { - computeStatus = ClusterHealthStatus.RED; - } else if (shardHealth.getStatus() == ClusterHealthStatus.YELLOW && computeStatus != ClusterHealthStatus.RED) { - // do not override an existing red - computeStatus = ClusterHealthStatus.YELLOW; - } + computeStatus = getIndexHealthStatus(shardHealth.getStatus(), computeStatus); } if (shards.isEmpty()) { // might be since none has been created yet (two phase index creation) computeStatus = ClusterHealthStatus.RED; @@ -189,6 +189,110 @@ public ClusterIndexHealth(final IndexMetadata indexMetadata, final IndexRoutingT this.relocatingShards = computeRelocatingShards; this.initializingShards = computeInitializingShards; this.unassignedShards = computeUnassignedShards; + this.delayedUnassignedShards = computeDelayedUnassignedShards; + } + + public ClusterIndexHealth( + final IndexMetadata indexMetadata, + final IndexRoutingTable indexRoutingTable, + final ClusterHealthRequest.Level healthLevel + ) { + this.index = indexMetadata.getIndex().getName(); + this.numberOfShards = indexMetadata.getNumberOfShards(); + this.numberOfReplicas = indexMetadata.getNumberOfReplicas(); + + shards = new HashMap<>(); + + // update the index status + ClusterHealthStatus computeStatus = ClusterHealthStatus.GREEN; + int computeActivePrimaryShards = 0; + int computeActiveShards = 0; + int computeRelocatingShards = 0; + int computeInitializingShards = 0; + int computeUnassignedShards = 0; + int computeDelayedUnassignedShards = 0; + + boolean isShardLevelHealthRequired = healthLevel == ClusterHealthRequest.Level.SHARDS; + if (isShardLevelHealthRequired) { + for (IndexShardRoutingTable indexShardRoutingTable : indexRoutingTable) { + int shardId = indexShardRoutingTable.shardId().id(); + ClusterShardHealth shardHealth = new ClusterShardHealth(shardId, indexShardRoutingTable); + if (shardHealth.isPrimaryActive()) { + computeActivePrimaryShards++; + } + computeActiveShards += shardHealth.getActiveShards(); + computeRelocatingShards += shardHealth.getRelocatingShards(); + computeInitializingShards += shardHealth.getInitializingShards(); + computeUnassignedShards += shardHealth.getUnassignedShards(); + computeDelayedUnassignedShards += shardHealth.getDelayedUnassignedShards(); + computeStatus = getIndexHealthStatus(shardHealth.getStatus(), computeStatus); + shards.put(shardId, shardHealth); + } + } else { + for (IndexShardRoutingTable indexShardRoutingTable : indexRoutingTable) { + int activeShardsPerShardId = 0; + + List shardRoutings = indexShardRoutingTable.shards(); + int shardRoutingCountPerShardId = shardRoutings.size(); + for (int index = 0; index < shardRoutingCountPerShardId; index++) { + ShardRouting shardRouting = shardRoutings.get(index); + if (shardRouting.active()) { + computeActiveShards++; + activeShardsPerShardId++; + if (shardRouting.relocating()) { + computeRelocatingShards++; + } + } else if (shardRouting.initializing()) { + computeInitializingShards++; + } else if (shardRouting.unassigned()) { + computeUnassignedShards++; + if (shardRouting.unassignedInfo().isDelayed()) { + computeDelayedUnassignedShards++; + } + } + } + ShardRouting primaryShard = indexShardRoutingTable.primaryShard(); + if (primaryShard.active()) { + computeActivePrimaryShards++; + } + ClusterHealthStatus shardHealth = ClusterShardHealth.getShardHealth( + primaryShard, + activeShardsPerShardId, + shardRoutingCountPerShardId + ); + computeStatus = getIndexHealthStatus(shardHealth, computeStatus); + } + } + + if (indexRoutingTable.shards() != null && indexRoutingTable.shards().isEmpty()) { + // might be since none has been created yet (two phase index creation) + computeStatus = ClusterHealthStatus.RED; + } + + this.status = computeStatus; + this.activePrimaryShards = computeActivePrimaryShards; + this.activeShards = computeActiveShards; + this.relocatingShards = computeRelocatingShards; + this.initializingShards = computeInitializingShards; + this.unassignedShards = computeUnassignedShards; + this.delayedUnassignedShards = computeDelayedUnassignedShards; + + } + + public static ClusterHealthStatus getIndexHealthStatus(ClusterHealthStatus shardHealth, ClusterHealthStatus computeStatus) { + switch (shardHealth) { + case RED: + return ClusterHealthStatus.RED; + case YELLOW: + // do not override an existing red + if (computeStatus != ClusterHealthStatus.RED) { + return ClusterHealthStatus.YELLOW; + } else { + return ClusterHealthStatus.RED; + } + default: + return computeStatus; + } } public ClusterIndexHealth(final StreamInput in) throws IOException { @@ -269,6 +373,10 @@ public int getUnassignedShards() { return unassignedShards; } + public int getDelayedUnassignedShards() { + return delayedUnassignedShards; + } + public ClusterHealthStatus getStatus() { return status; } diff --git a/server/src/main/java/org/opensearch/cluster/health/ClusterShardHealth.java b/server/src/main/java/org/opensearch/cluster/health/ClusterShardHealth.java index 1fe88f65248c2..ace4537a5e291 100644 --- a/server/src/main/java/org/opensearch/cluster/health/ClusterShardHealth.java +++ b/server/src/main/java/org/opensearch/cluster/health/ClusterShardHealth.java @@ -50,6 +50,7 @@ import org.opensearch.core.xcontent.XContentParser; import java.io.IOException; +import java.util.List; import java.util.Locale; import java.util.Objects; @@ -109,6 +110,7 @@ public final class ClusterShardHealth implements Writeable, ToXContentFragment { private final int relocatingShards; private final int initializingShards; private final int unassignedShards; + private int delayedUnassignedShards; private final boolean primaryActive; public ClusterShardHealth(final int shardId, final IndexShardRoutingTable shardRoutingTable) { @@ -117,7 +119,10 @@ public ClusterShardHealth(final int shardId, final IndexShardRoutingTable shardR int computeRelocatingShards = 0; int computeInitializingShards = 0; int computeUnassignedShards = 0; - for (ShardRouting shardRouting : shardRoutingTable) { + int computeDelayedUnassignedShards = 0; + List shardRoutings = shardRoutingTable.shards(); + for (int index = 0; index < shardRoutings.size(); index++) { + ShardRouting shardRouting = shardRoutings.get(index); if (shardRouting.active()) { computeActiveShards++; if (shardRouting.relocating()) { @@ -128,24 +133,18 @@ public ClusterShardHealth(final int shardId, final IndexShardRoutingTable shardR computeInitializingShards++; } else if (shardRouting.unassigned()) { computeUnassignedShards++; + if (shardRouting.unassignedInfo() != null && shardRouting.unassignedInfo().isDelayed()) { + computeDelayedUnassignedShards++; + } } } - ClusterHealthStatus computeStatus; final ShardRouting primaryRouting = shardRoutingTable.primaryShard(); - if (primaryRouting.active()) { - if (computeActiveShards == shardRoutingTable.size()) { - computeStatus = ClusterHealthStatus.GREEN; - } else { - computeStatus = ClusterHealthStatus.YELLOW; - } - } else { - computeStatus = getInactivePrimaryHealth(primaryRouting); - } - this.status = computeStatus; + this.status = getShardHealth(primaryRouting, computeActiveShards, shardRoutingTable.size()); this.activeShards = computeActiveShards; this.relocatingShards = computeRelocatingShards; this.initializingShards = computeInitializingShards; this.unassignedShards = computeUnassignedShards; + this.delayedUnassignedShards = computeDelayedUnassignedShards; this.primaryActive = primaryRouting.active(); } @@ -208,6 +207,10 @@ public int getUnassignedShards() { return unassignedShards; } + public int getDelayedUnassignedShards() { + return delayedUnassignedShards; + } + @Override public void writeTo(final StreamOutput out) throws IOException { out.writeVInt(shardId); @@ -219,6 +222,27 @@ public void writeTo(final StreamOutput out) throws IOException { out.writeBoolean(primaryActive); } + /** + * Computes the shard health of an index. + *

+ * Shard health is GREEN when all primary and replica shards of the indices are active. + * Shard health is YELLOW when primary shard is active but at-least one replica shard is inactive. + * Shard health is RED when the primary is not active. + *

+ */ + public static ClusterHealthStatus getShardHealth(final ShardRouting primaryRouting, final int activeShards, final int totalShards) { + assert primaryRouting != null : "Primary shard routing can't be null"; + if (primaryRouting.active()) { + if (activeShards == totalShards) { + return ClusterHealthStatus.GREEN; + } else { + return ClusterHealthStatus.YELLOW; + } + } else { + return getInactivePrimaryHealth(primaryRouting); + } + } + /** * Checks if an inactive primary shard should cause the cluster health to go RED. *

diff --git a/server/src/main/java/org/opensearch/cluster/health/ClusterStateHealth.java b/server/src/main/java/org/opensearch/cluster/health/ClusterStateHealth.java index 083159bffdc2b..f6cfdd3c42e0c 100644 --- a/server/src/main/java/org/opensearch/cluster/health/ClusterStateHealth.java +++ b/server/src/main/java/org/opensearch/cluster/health/ClusterStateHealth.java @@ -31,6 +31,7 @@ package org.opensearch.cluster.health; +import org.opensearch.action.admin.cluster.health.ClusterHealthRequest; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.routing.IndexRoutingTable; @@ -63,6 +64,7 @@ public final class ClusterStateHealth implements Iterable, W private final int activePrimaryShards; private final int initializingShards; private final int unassignedShards; + private int delayedUnassignedShards; private final double activeShardsPercent; private final ClusterHealthStatus status; private final Map indices; @@ -76,6 +78,10 @@ public ClusterStateHealth(final ClusterState clusterState) { this(clusterState, clusterState.metadata().getConcreteAllIndices()); } + public ClusterStateHealth(final ClusterState clusterState, final ClusterHealthRequest.Level clusterHealthLevel) { + this(clusterState, clusterState.metadata().getConcreteAllIndices(), clusterHealthLevel); + } + /** * Creates a new ClusterStateHealth instance considering the current cluster state and the provided index names. * @@ -105,6 +111,7 @@ public ClusterStateHealth(final ClusterState clusterState, final String[] concre int computeRelocatingShards = 0; int computeInitializingShards = 0; int computeUnassignedShards = 0; + int computeDelayedUnassignedShards = 0; for (ClusterIndexHealth indexHealth : indices.values()) { computeActivePrimaryShards += indexHealth.getActivePrimaryShards(); @@ -112,10 +119,76 @@ public ClusterStateHealth(final ClusterState clusterState, final String[] concre computeRelocatingShards += indexHealth.getRelocatingShards(); computeInitializingShards += indexHealth.getInitializingShards(); computeUnassignedShards += indexHealth.getUnassignedShards(); - if (indexHealth.getStatus() == ClusterHealthStatus.RED) { - computeStatus = ClusterHealthStatus.RED; - } else if (indexHealth.getStatus() == ClusterHealthStatus.YELLOW && computeStatus != ClusterHealthStatus.RED) { - computeStatus = ClusterHealthStatus.YELLOW; + computeDelayedUnassignedShards += indexHealth.getDelayedUnassignedShards(); + computeStatus = getClusterHealthStatus(indexHealth, computeStatus); + } + + if (clusterState.blocks().hasGlobalBlockWithStatus(RestStatus.SERVICE_UNAVAILABLE)) { + computeStatus = ClusterHealthStatus.RED; + } + + this.status = computeStatus; + this.activePrimaryShards = computeActivePrimaryShards; + this.activeShards = computeActiveShards; + this.relocatingShards = computeRelocatingShards; + this.initializingShards = computeInitializingShards; + this.unassignedShards = computeUnassignedShards; + this.delayedUnassignedShards = computeDelayedUnassignedShards; + + // shortcut on green + if (ClusterHealthStatus.GREEN.equals(computeStatus)) { + this.activeShardsPercent = 100; + } else { + List shardRoutings = clusterState.getRoutingTable().allShards(); + int activeShardCount = 0; + int totalShardCount = 0; + for (ShardRouting shardRouting : shardRoutings) { + if (shardRouting.active()) activeShardCount++; + totalShardCount++; + } + this.activeShardsPercent = (((double) activeShardCount) / totalShardCount) * 100; + } + } + + public ClusterStateHealth( + final ClusterState clusterState, + final String[] concreteIndices, + final ClusterHealthRequest.Level healthLevel + ) { + numberOfNodes = clusterState.nodes().getSize(); + numberOfDataNodes = clusterState.nodes().getDataNodes().size(); + hasDiscoveredClusterManager = clusterState.nodes().getClusterManagerNodeId() != null; + indices = new HashMap<>(); + boolean isIndexOrShardLevelHealthRequired = healthLevel == ClusterHealthRequest.Level.INDICES + || healthLevel == ClusterHealthRequest.Level.SHARDS; + + ClusterHealthStatus computeStatus = ClusterHealthStatus.GREEN; + int computeActivePrimaryShards = 0; + int computeActiveShards = 0; + int computeRelocatingShards = 0; + int computeInitializingShards = 0; + int computeUnassignedShards = 0; + int computeDelayedUnassignedShards = 0; + + for (String index : concreteIndices) { + IndexRoutingTable indexRoutingTable = clusterState.routingTable().index(index); + IndexMetadata indexMetadata = clusterState.metadata().index(index); + if (indexRoutingTable == null) { + continue; + } + + ClusterIndexHealth indexHealth = new ClusterIndexHealth(indexMetadata, indexRoutingTable, healthLevel); + computeActivePrimaryShards += indexHealth.getActivePrimaryShards(); + computeActiveShards += indexHealth.getActiveShards(); + computeRelocatingShards += indexHealth.getRelocatingShards(); + computeInitializingShards += indexHealth.getInitializingShards(); + computeUnassignedShards += indexHealth.getUnassignedShards(); + computeDelayedUnassignedShards += indexHealth.getDelayedUnassignedShards(); + computeStatus = getClusterHealthStatus(indexHealth, computeStatus); + + if (isIndexOrShardLevelHealthRequired) { + // Store ClusterIndexHealth only when the health is requested at Index or Shard level + indices.put(indexHealth.getIndex(), indexHealth); } } @@ -129,9 +202,10 @@ public ClusterStateHealth(final ClusterState clusterState, final String[] concre this.relocatingShards = computeRelocatingShards; this.initializingShards = computeInitializingShards; this.unassignedShards = computeUnassignedShards; + this.delayedUnassignedShards = computeDelayedUnassignedShards; // shortcut on green - if (computeStatus.equals(ClusterHealthStatus.GREEN)) { + if (ClusterHealthStatus.GREEN.equals(computeStatus)) { this.activeShardsPercent = 100; } else { List shardRoutings = clusterState.getRoutingTable().allShards(); @@ -145,6 +219,22 @@ public ClusterStateHealth(final ClusterState clusterState, final String[] concre } } + private static ClusterHealthStatus getClusterHealthStatus(ClusterIndexHealth indexHealth, ClusterHealthStatus computeStatus) { + switch (indexHealth.getStatus()) { + case RED: + return ClusterHealthStatus.RED; + case YELLOW: + // do not override an existing red + if (computeStatus != ClusterHealthStatus.RED) { + return ClusterHealthStatus.YELLOW; + } else { + return ClusterHealthStatus.RED; + } + default: + return computeStatus; + } + } + public ClusterStateHealth(final StreamInput in) throws IOException { activePrimaryShards = in.readVInt(); activeShards = in.readVInt(); @@ -213,6 +303,10 @@ public int getUnassignedShards() { return unassignedShards; } + public int getDelayedUnassignedShards() { + return delayedUnassignedShards; + } + public int getNumberOfNodes() { return this.numberOfNodes; } diff --git a/server/src/main/java/org/opensearch/cluster/metadata/Context.java b/server/src/main/java/org/opensearch/cluster/metadata/Context.java index 4bd6134e8a318..ceaef4dbc8d14 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/Context.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/Context.java @@ -26,7 +26,7 @@ * Class encapsulating the context metadata associated with an index template/index. */ @ExperimentalApi -public class Context extends AbstractDiffable implements ToXContentObject { +public class Context extends AbstractDiffable implements ToXContentObject { private static final ParseField NAME = new ParseField("name"); private static final ParseField VERSION = new ParseField("version"); @@ -103,9 +103,9 @@ public void writeTo(StreamOutput out) throws IOException { public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); builder.field(NAME.getPreferredName(), this.name); - builder.field("version", this.version); - if (params != null) { - builder.field("params", this.params); + builder.field(VERSION.getPreferredName(), this.version); + if (this.params != null) { + builder.field(PARAMS.getPreferredName(), this.params); } builder.endObject(); return builder; @@ -127,4 +127,9 @@ public boolean equals(Object o) { public int hashCode() { return Objects.hash(name, version, params); } + + @Override + public String toString() { + return "Context{" + "name='" + name + '\'' + ", version='" + version + '\'' + ", params=" + params + '}'; + } } diff --git a/server/src/main/java/org/opensearch/cluster/metadata/IndexMetadata.java b/server/src/main/java/org/opensearch/cluster/metadata/IndexMetadata.java index df0d2609ad83d..713f8c9fc332c 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/IndexMetadata.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/IndexMetadata.java @@ -54,8 +54,10 @@ import org.opensearch.common.xcontent.XContentHelper; import org.opensearch.core.Assertions; import org.opensearch.core.common.Strings; +import org.opensearch.core.common.io.stream.BufferedChecksumStreamOutput; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.VerifiableWriteable; import org.opensearch.core.common.io.stream.Writeable; import org.opensearch.core.index.Index; import org.opensearch.core.index.shard.ShardId; @@ -69,6 +71,7 @@ import org.opensearch.index.IndexModule; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.seqno.SequenceNumbers; +import org.opensearch.indices.replication.SegmentReplicationSource; import org.opensearch.indices.replication.common.ReplicationType; import java.io.IOException; @@ -87,6 +90,7 @@ import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.TreeSet; import java.util.function.Function; import static org.opensearch.cluster.metadata.Metadata.CONTEXT_MODE_PARAM; @@ -102,7 +106,7 @@ * @opensearch.api */ @PublicApi(since = "1.0.0") -public class IndexMetadata implements Diffable, ToXContentFragment { +public class IndexMetadata implements Diffable, ToXContentFragment, VerifiableWriteable { public static final ClusterBlock INDEX_READ_ONLY_BLOCK = new ClusterBlock( 5, @@ -243,6 +247,22 @@ static Setting buildNumberOfShardsSetting() { Property.IndexScope ); + /** + * Setting to control the number of search only replicas for an index. + * A search only replica exists solely to perform read operations for a shard and are designed to achieve + * isolation from writers (primary shards). This means they are not primary eligible and do not have any direct communication + * with their primary. Search replicas require the use of Segment Replication on the index and poll their {@link SegmentReplicationSource} for + * updates. //TODO: Once physical isolation is introduced, reference the setting here. + */ + public static final String SETTING_NUMBER_OF_SEARCH_REPLICAS = "index.number_of_search_only_replicas"; + public static final Setting INDEX_NUMBER_OF_SEARCH_REPLICAS_SETTING = Setting.intSetting( + SETTING_NUMBER_OF_SEARCH_REPLICAS, + 0, + 0, + Property.Dynamic, + Property.IndexScope + ); + public static final String SETTING_ROUTING_PARTITION_SIZE = "index.routing_partition_size"; public static final Setting INDEX_ROUTING_PARTITION_SIZE_SETTING = Setting.intSetting( SETTING_ROUTING_PARTITION_SIZE, @@ -639,6 +659,7 @@ public static APIBlock readFrom(StreamInput input) throws IOException { public static final String KEY_PRIMARY_TERMS = "primary_terms"; public static final String REMOTE_STORE_CUSTOM_KEY = "remote_store"; public static final String TRANSLOG_METADATA_KEY = "translog_metadata"; + public static final String CONTEXT_KEY = "context"; public static final String INDEX_STATE_FILE_PREFIX = "state-"; @@ -648,6 +669,7 @@ public static APIBlock readFrom(StreamInput input) throws IOException { private final int numberOfShards; private final int numberOfReplicas; + private final int numberOfSearchOnlyReplicas; private final Index index; private final long version; @@ -689,6 +711,8 @@ public static APIBlock readFrom(StreamInput input) throws IOException { private final int indexTotalShardsPerNodeLimit; + private final Context context; + private IndexMetadata( final Index index, final long version, @@ -699,6 +723,7 @@ private IndexMetadata( final State state, final int numberOfShards, final int numberOfReplicas, + final int numberOfSearchOnlyReplicas, final Settings settings, final Map mappings, final Map aliases, @@ -715,7 +740,8 @@ private IndexMetadata( final ActiveShardCount waitForActiveShards, final Map rolloverInfos, final boolean isSystem, - final int indexTotalShardsPerNodeLimit + final int indexTotalShardsPerNodeLimit, + final Context context ) { this.index = index; @@ -731,7 +757,8 @@ private IndexMetadata( this.state = state; this.numberOfShards = numberOfShards; this.numberOfReplicas = numberOfReplicas; - this.totalNumberOfShards = numberOfShards * (numberOfReplicas + 1); + this.numberOfSearchOnlyReplicas = numberOfSearchOnlyReplicas; + this.totalNumberOfShards = numberOfShards * (numberOfReplicas + numberOfSearchOnlyReplicas + 1); this.settings = settings; this.mappings = Collections.unmodifiableMap(mappings); this.customData = Collections.unmodifiableMap(customData); @@ -751,6 +778,7 @@ private IndexMetadata( this.isSystem = isSystem; this.isRemoteSnapshot = IndexModule.Type.REMOTE_SNAPSHOT.match(this.settings); this.indexTotalShardsPerNodeLimit = indexTotalShardsPerNodeLimit; + this.context = context; assert numberOfShards * routingFactor == routingNumShards : routingNumShards + " must be a multiple of " + numberOfShards; } @@ -833,6 +861,10 @@ public int getNumberOfReplicas() { return numberOfReplicas; } + public int getNumberOfSearchOnlyReplicas() { + return numberOfSearchOnlyReplicas; + } + public int getRoutingPartitionSize() { return routingPartitionSize; } @@ -979,6 +1011,9 @@ public boolean equals(Object o) { if (isSystem != that.isSystem) { return false; } + if (!Objects.equals(context, that.context)) { + return false; + } return true; } @@ -997,6 +1032,7 @@ public int hashCode() { result = 31 * result + inSyncAllocationIds.hashCode(); result = 31 * result + rolloverInfos.hashCode(); result = 31 * result + Boolean.hashCode(isSystem); + result = 31 * result + Objects.hashCode(context); return result; } @@ -1041,6 +1077,7 @@ private static class IndexMetadataDiff implements Diff { private final Diff>> inSyncAllocationIds; private final Diff> rolloverInfos; private final boolean isSystem; + private final Context context; IndexMetadataDiff(IndexMetadata before, IndexMetadata after) { index = after.index.getName(); @@ -1063,6 +1100,7 @@ private static class IndexMetadataDiff implements Diff { ); rolloverInfos = DiffableUtils.diff(before.rolloverInfos, after.rolloverInfos, DiffableUtils.getStringKeySerializer()); isSystem = after.isSystem; + context = after.context; } private static final DiffableUtils.DiffableValueReader ALIAS_METADATA_DIFF_VALUE_READER = @@ -1094,6 +1132,11 @@ private static class IndexMetadataDiff implements Diff { ); rolloverInfos = DiffableUtils.readJdkMapDiff(in, DiffableUtils.getStringKeySerializer(), ROLLOVER_INFO_DIFF_VALUE_READER); isSystem = in.readBoolean(); + if (in.getVersion().onOrAfter(Version.V_2_17_0)) { + context = in.readOptionalWriteable(Context::new); + } else { + context = null; + } } @Override @@ -1113,6 +1156,9 @@ public void writeTo(StreamOutput out) throws IOException { inSyncAllocationIds.writeTo(out); rolloverInfos.writeTo(out); out.writeBoolean(isSystem); + if (out.getVersion().onOrAfter(Version.V_2_17_0)) { + out.writeOptionalWriteable(context); + } } @Override @@ -1132,6 +1178,7 @@ public IndexMetadata apply(IndexMetadata part) { builder.inSyncAllocationIds.putAll(inSyncAllocationIds.apply(part.inSyncAllocationIds)); builder.rolloverInfos.putAll(rolloverInfos.apply(part.rolloverInfos)); builder.system(part.isSystem); + builder.context(context); return builder.build(); } } @@ -1173,6 +1220,10 @@ public static IndexMetadata readFrom(StreamInput in) throws IOException { builder.putRolloverInfo(new RolloverInfo(in)); } builder.system(in.readBoolean()); + + if (in.getVersion().onOrAfter(Version.V_2_17_0)) { + builder.context(in.readOptionalWriteable(Context::new)); + } return builder.build(); } @@ -1210,12 +1261,84 @@ public void writeTo(StreamOutput out) throws IOException { cursor.writeTo(out); } out.writeBoolean(isSystem); + + if (out.getVersion().onOrAfter(Version.V_2_17_0)) { + out.writeOptionalWriteable(context); + } + } + + @Override + public void writeVerifiableTo(BufferedChecksumStreamOutput out) throws IOException { + out.writeString(index.getName()); // uuid will come as part of settings + out.writeLong(version); + out.writeVLong(mappingVersion); + out.writeVLong(settingsVersion); + out.writeVLong(aliasesVersion); + out.writeInt(routingNumShards); + out.writeByte(state.id()); + writeSettingsToStream(settings, out); + out.writeVLongArray(primaryTerms); + out.writeMapValues(mappings, (stream, val) -> val.writeVerifiableTo((BufferedChecksumStreamOutput) stream)); + out.writeMapValues(aliases, (stream, val) -> val.writeTo(stream)); + out.writeMap(customData, StreamOutput::writeString, (stream, val) -> val.writeTo(stream)); + out.writeMap( + inSyncAllocationIds, + StreamOutput::writeVInt, + (stream, val) -> DiffableUtils.StringSetValueSerializer.getInstance().write(new TreeSet<>(val), stream) + ); + out.writeMapValues(rolloverInfos, (stream, val) -> val.writeTo(stream)); + out.writeBoolean(isSystem); + if (out.getVersion().onOrAfter(Version.V_2_17_0)) { + out.writeOptionalWriteable(context); + } + } + + @Override + public String toString() { + return new StringBuilder().append("IndexMetadata{routingNumShards=") + .append(routingNumShards) + .append(", index=") + .append(index) + .append(", version=") + .append(version) + .append(", state=") + .append(state) + .append(", settingsVersion=") + .append(settingsVersion) + .append(", mappingVersion=") + .append(mappingVersion) + .append(", aliasesVersion=") + .append(aliasesVersion) + .append(", primaryTerms=") + .append(Arrays.toString(primaryTerms)) + .append(", aliases=") + .append(aliases) + .append(", settings=") + .append(settings) + .append(", mappings=") + .append(mappings) + .append(", customData=") + .append(customData) + .append(", inSyncAllocationIds=") + .append(inSyncAllocationIds) + .append(", rolloverInfos=") + .append(rolloverInfos) + .append(", isSystem=") + .append(isSystem) + .append(", context=") + .append(context) + .append("}") + .toString(); } public boolean isSystem() { return isSystem; } + public Context context() { + return context; + } + public boolean isRemoteSnapshot() { return isRemoteSnapshot; } @@ -1251,6 +1374,7 @@ public static class Builder { private final Map rolloverInfos; private Integer routingNumShards; private boolean isSystem; + private Context context; public Builder(String index) { this.index = index; @@ -1278,6 +1402,7 @@ public Builder(IndexMetadata indexMetadata) { this.inSyncAllocationIds = new HashMap<>(indexMetadata.inSyncAllocationIds); this.rolloverInfos = new HashMap<>(indexMetadata.rolloverInfos); this.isSystem = indexMetadata.isSystem; + this.context = indexMetadata.context; } public Builder index(String index) { @@ -1324,6 +1449,11 @@ public Builder numberOfReplicas(int numberOfReplicas) { return this; } + public Builder numberOfSearchReplicas(int numberOfSearchReplicas) { + settings = Settings.builder().put(settings).put(SETTING_NUMBER_OF_SEARCH_REPLICAS, numberOfSearchReplicas).build(); + return this; + } + public Builder routingPartitionSize(int routingPartitionSize) { settings = Settings.builder().put(settings).put(SETTING_ROUTING_PARTITION_SIZE, routingPartitionSize).build(); return this; @@ -1494,6 +1624,15 @@ public boolean isSystem() { return isSystem; } + public Builder context(Context context) { + this.context = context; + return this; + } + + public Context context() { + return context; + } + public IndexMetadata build() { final Map tmpAliases = aliases; Settings tmpSettings = settings; @@ -1511,6 +1650,7 @@ public IndexMetadata build() { throw new IllegalArgumentException("must specify number of replicas for index [" + index + "]"); } final int numberOfReplicas = INDEX_NUMBER_OF_REPLICAS_SETTING.get(settings); + final int numberOfSearchReplicas = INDEX_NUMBER_OF_SEARCH_REPLICAS_SETTING.get(settings); int routingPartitionSize = INDEX_ROUTING_PARTITION_SIZE_SETTING.get(settings); if (routingPartitionSize != 1 && routingPartitionSize >= getRoutingNumShards()) { @@ -1606,6 +1746,7 @@ public IndexMetadata build() { state, numberOfShards, numberOfReplicas, + numberOfSearchReplicas, tmpSettings, mappings, tmpAliases, @@ -1622,7 +1763,8 @@ public IndexMetadata build() { waitForActiveShards, rolloverInfos, isSystem, - indexTotalShardsPerNodeLimit + indexTotalShardsPerNodeLimit, + context ); } @@ -1725,6 +1867,11 @@ public static void toXContent(IndexMetadata indexMetadata, XContentBuilder build builder.endObject(); builder.field(KEY_SYSTEM, indexMetadata.isSystem); + if (indexMetadata.context != null) { + builder.field(CONTEXT_KEY); + indexMetadata.context.toXContent(builder, params); + } + builder.endObject(); } @@ -1806,6 +1953,8 @@ public static IndexMetadata fromXContent(XContentParser parser) throws IOExcepti // simply ignored when upgrading from 2.x assert Version.CURRENT.major <= 5; parser.skipChildren(); + } else if (CONTEXT_KEY.equals(currentFieldName)) { + builder.context(Context.fromXContent(parser)); } else { // assume it's custom index metadata builder.putCustom(currentFieldName, parser.mapStrings()); diff --git a/server/src/main/java/org/opensearch/cluster/metadata/IndexTemplateMetadata.java b/server/src/main/java/org/opensearch/cluster/metadata/IndexTemplateMetadata.java index 3d532208bcfe2..b09acc54653c4 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/IndexTemplateMetadata.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/IndexTemplateMetadata.java @@ -44,8 +44,10 @@ import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.common.xcontent.XContentHelper; import org.opensearch.common.xcontent.json.JsonXContent; +import org.opensearch.core.common.io.stream.BufferedChecksumStreamOutput; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.VerifiableWriteable; import org.opensearch.core.xcontent.ToXContent; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentParser; @@ -67,7 +69,7 @@ * @opensearch.api */ @PublicApi(since = "1.0.0") -public class IndexTemplateMetadata extends AbstractDiffable { +public class IndexTemplateMetadata extends AbstractDiffable implements VerifiableWriteable { private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(IndexTemplateMetadata.class); @@ -257,6 +259,17 @@ public void writeTo(StreamOutput out) throws IOException { out.writeOptionalVInt(version); } + @Override + public void writeVerifiableTo(BufferedChecksumStreamOutput out) throws IOException { + out.writeString(name); + out.writeInt(order); + out.writeStringCollection(patterns); + Settings.writeSettingsToStream(settings, out); + out.writeMap(mappings, StreamOutput::writeString, (stream, val) -> val.writeTo(stream)); + out.writeMapValues(aliases, (stream, val) -> val.writeTo(stream)); + out.writeOptionalVInt(version); + } + @Override public String toString() { try { diff --git a/server/src/main/java/org/opensearch/cluster/metadata/MappingMetadata.java b/server/src/main/java/org/opensearch/cluster/metadata/MappingMetadata.java index e8180613c0fa3..5856e7861d91c 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/MappingMetadata.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/MappingMetadata.java @@ -40,8 +40,10 @@ import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.common.xcontent.XContentHelper; import org.opensearch.core.common.bytes.BytesReference; +import org.opensearch.core.common.io.stream.BufferedChecksumStreamOutput; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.VerifiableWriteable; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.index.mapper.DocumentMapper; import org.opensearch.index.mapper.MapperService; @@ -60,7 +62,7 @@ * @opensearch.api */ @PublicApi(since = "1.0.0") -public class MappingMetadata extends AbstractDiffable { +public class MappingMetadata extends AbstractDiffable implements VerifiableWriteable { public static final MappingMetadata EMPTY_MAPPINGS = new MappingMetadata(MapperService.SINGLE_MAPPING_NAME, Collections.emptyMap()); private final String type; @@ -164,6 +166,13 @@ public void writeTo(StreamOutput out) throws IOException { out.writeBoolean(routingRequired); } + @Override + public void writeVerifiableTo(BufferedChecksumStreamOutput out) throws IOException { + out.writeString(type()); + source().writeVerifiableTo(out); + out.writeBoolean(routingRequired); + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java b/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java index 6163fd624c838..600f408cc963b 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java @@ -253,7 +253,7 @@ static Custom fromXContent(XContentParser parser, String name) throws IOExceptio public static final String GLOBAL_STATE_FILE_PREFIX = "global-"; - private static final NamedDiffableValueSerializer CUSTOM_VALUE_SERIALIZER = new NamedDiffableValueSerializer<>(Custom.class); + public static final NamedDiffableValueSerializer CUSTOM_VALUE_SERIALIZER = new NamedDiffableValueSerializer<>(Custom.class); private final String clusterUUID; private final boolean clusterUUIDCommitted; @@ -1513,6 +1513,24 @@ public Builder updateNumberOfReplicas(final int numberOfReplicas, final String[] return this; } + /** + * Update the number of search replicas for the specified indices. + * + * @param numberOfSearchReplicas the number of search replicas + * @param indices the indices to update the number of replicas for + * @return the builder + */ + public Builder updateNumberOfSearchReplicas(final int numberOfSearchReplicas, final String[] indices) { + for (String index : indices) { + IndexMetadata indexMetadata = this.indices.get(index); + if (indexMetadata == null) { + throw new IndexNotFoundException(index); + } + put(IndexMetadata.builder(indexMetadata).numberOfSearchReplicas(numberOfSearchReplicas)); + } + return this; + } + public Builder coordinationMetadata(CoordinationMetadata coordinationMetadata) { this.coordinationMetadata = coordinationMetadata; return this; diff --git a/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java b/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java index 50d25b11ef810..8b08927bc146a 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java @@ -49,6 +49,7 @@ import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.ack.ClusterStateUpdateResponse; import org.opensearch.cluster.ack.CreateIndexClusterStateUpdateResponse; +import org.opensearch.cluster.applicationtemplates.SystemTemplatesService; import org.opensearch.cluster.block.ClusterBlock; import org.opensearch.cluster.block.ClusterBlockLevel; import org.opensearch.cluster.block.ClusterBlocks; @@ -75,9 +76,12 @@ import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.FeatureFlags; +import org.opensearch.common.util.set.Sets; import org.opensearch.common.xcontent.XContentHelper; import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.Strings; +import org.opensearch.core.common.unit.ByteSizeValue; import org.opensearch.core.index.Index; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.env.Environment; @@ -85,7 +89,9 @@ import org.opensearch.index.IndexNotFoundException; import org.opensearch.index.IndexService; import org.opensearch.index.IndexSettings; +import org.opensearch.index.compositeindex.CompositeIndexSettings; import org.opensearch.index.compositeindex.CompositeIndexValidator; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeIndexSettings; import org.opensearch.index.mapper.DocumentMapper; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.mapper.MapperService.MergeReason; @@ -98,6 +104,7 @@ import org.opensearch.index.translog.Translog; import org.opensearch.indices.IndexCreationException; import org.opensearch.indices.IndicesService; +import org.opensearch.indices.InvalidIndexContextException; import org.opensearch.indices.InvalidIndexNameException; import org.opensearch.indices.RemoteStoreSettings; import org.opensearch.indices.ShardLimitValidator; @@ -125,27 +132,31 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.function.BiConsumer; import java.util.function.BiFunction; -import java.util.function.Function; import java.util.function.Predicate; import java.util.function.Supplier; import java.util.stream.Collectors; import java.util.stream.IntStream; +import java.util.stream.Stream; import static java.util.stream.Collectors.toList; import static org.opensearch.cluster.metadata.IndexMetadata.INDEX_NUMBER_OF_REPLICAS_SETTING; +import static org.opensearch.cluster.metadata.IndexMetadata.INDEX_NUMBER_OF_SEARCH_REPLICAS_SETTING; import static org.opensearch.cluster.metadata.IndexMetadata.INDEX_NUMBER_OF_SHARDS_SETTING; import static org.opensearch.cluster.metadata.IndexMetadata.INDEX_REPLICATION_TYPE_SETTING; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_CREATION_DATE; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_INDEX_UUID; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_SEGMENT_STORE_REPOSITORY; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_STORE_ENABLED; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REPLICATION_TYPE; import static org.opensearch.cluster.metadata.Metadata.DEFAULT_REPLICA_COUNT_SETTING; +import static org.opensearch.cluster.metadata.MetadataIndexTemplateService.findContextTemplateName; import static org.opensearch.index.IndexModule.INDEX_STORE_TYPE_SETTING; +import static org.opensearch.index.IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING; import static org.opensearch.indices.IndicesService.CLUSTER_REPLICATION_TYPE_SETTING; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.isRemoteDataAttributePresent; import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; @@ -494,20 +505,30 @@ private ClusterState applyCreateIndexWithTemporaryService( final IndexMetadata sourceMetadata, final IndexMetadata temporaryIndexMeta, final List> mappings, - final Function> aliasSupplier, + final BiFunction, List> aliasSupplier, final List templatesApplied, final BiConsumer metadataTransformer ) throws Exception { // create the index here (on the master) to validate it can be created, as well as adding the mapping return indicesService.withTempIndexService(temporaryIndexMeta, indexService -> { + Settings.Builder tmpSettingsBuilder = Settings.builder().put(temporaryIndexMeta.getSettings()); + + List> updatedMappings = new ArrayList<>(); + updatedMappings.addAll(mappings); + + Template contextTemplate = applyContext(request, currentState, updatedMappings, tmpSettingsBuilder); + try { - updateIndexMappingsAndBuildSortOrder(indexService, request, mappings, sourceMetadata); + updateIndexMappingsAndBuildSortOrder(indexService, request, updatedMappings, sourceMetadata); } catch (Exception e) { logger.log(silent ? Level.DEBUG : Level.INFO, "failed on parsing mappings on index creation [{}]", request.index(), e); throw e; } - final List aliases = aliasSupplier.apply(indexService); + final List aliases = aliasSupplier.apply( + indexService, + Optional.ofNullable(contextTemplate).map(Template::aliases).orElse(Map.of()) + ); final IndexMetadata indexMetadata; try { @@ -515,11 +536,12 @@ private ClusterState applyCreateIndexWithTemporaryService( request.index(), aliases, indexService.mapperService()::documentMapper, - temporaryIndexMeta.getSettings(), + tmpSettingsBuilder.build(), temporaryIndexMeta.getRoutingNumShards(), sourceMetadata, temporaryIndexMeta.isSystem(), - temporaryIndexMeta.getCustomData() + temporaryIndexMeta.getCustomData(), + temporaryIndexMeta.context() ); } catch (Exception e) { logger.info("failed to build index metadata [{}]", request.index()); @@ -541,6 +563,54 @@ private ClusterState applyCreateIndexWithTemporaryService( }); } + Template applyContext( + CreateIndexClusterStateUpdateRequest request, + ClusterState currentState, + List> mappings, + Settings.Builder settingsBuilder + ) throws IOException { + if (request.context() != null) { + ComponentTemplate componentTemplate = MetadataIndexTemplateService.findComponentTemplate( + currentState.metadata(), + request.context() + ); + + if (componentTemplate.template().mappings() != null) { + // Mappings added at last (priority to mappings provided) + mappings.add(MapperService.parseMapping(xContentRegistry, componentTemplate.template().mappings().toString())); + } + + if (componentTemplate.template().settings() != null) { + validateOverlap(settingsBuilder.keys(), componentTemplate.template().settings(), request.index()).ifPresent(message -> { + ValidationException validationException = new ValidationException(); + validationException.addValidationError(message); + throw validationException; + }); + // Settings applied at last + settingsBuilder.put(componentTemplate.template().settings()); + } + + settingsBuilder.put(IndexSettings.INDEX_CONTEXT_CREATED_VERSION.getKey(), componentTemplate.version()); + settingsBuilder.put(IndexSettings.INDEX_CONTEXT_CURRENT_VERSION.getKey(), componentTemplate.version()); + + return componentTemplate.template(); + } + return null; + } + + static Optional validateOverlap(Set requestSettings, Settings contextTemplateSettings, String indexName) { + if (requestSettings.stream().anyMatch(contextTemplateSettings::hasValue)) { + return Optional.of( + "Cannot apply context template as user provide settings have overlap with the included context template." + + "Please remove the settings [" + + Sets.intersection(requestSettings, contextTemplateSettings.keySet()) + + "] to continue using the context for index: " + + indexName + ); + } + return Optional.empty(); + } + /** * Given a state and index settings calculated after applying templates, validate metadata for * the new index, returning an {@link IndexMetadata} for the new index. @@ -567,6 +637,10 @@ IndexMetadata buildAndValidateTemporaryIndexMetadata( tmpImdBuilder.system(isSystem); addRemoteStoreCustomMetadata(tmpImdBuilder, true); + if (request.context() != null) { + tmpImdBuilder.context(request.context()); + } + // Set up everything, now locally create the index to see that things are ok, and apply IndexMetadata tempMetadata = tmpImdBuilder.build(); validateActiveShardCount(request.waitForActiveShards(), tempMetadata); @@ -647,10 +721,10 @@ private ClusterState applyCreateIndexRequestWithV1Templates( null, tmpImd, Collections.singletonList(mappings), - indexService -> resolveAndValidateAliases( + (indexService, contextAlias) -> resolveAndValidateAliases( request.index(), request.aliases(), - MetadataIndexTemplateService.resolveAliases(templates), + Stream.concat(Stream.of(contextAlias), MetadataIndexTemplateService.resolveAliases(templates).stream()).collect(toList()), currentState.metadata(), aliasValidator, // the context is only used for validation so it's fine to pass fake values for the @@ -712,10 +786,13 @@ private ClusterState applyCreateIndexRequestWithV2Template( null, tmpImd, mappings, - indexService -> resolveAndValidateAliases( + (indexService, contextAlias) -> resolveAndValidateAliases( request.index(), request.aliases(), - MetadataIndexTemplateService.resolveAliases(currentState.metadata(), templateName), + Stream.concat( + Stream.of(contextAlias), + MetadataIndexTemplateService.resolveAliases(currentState.metadata(), templateName).stream() + ).collect(toList()), currentState.metadata(), aliasValidator, // the context is only used for validation so it's fine to pass fake values for the @@ -793,7 +870,7 @@ private ClusterState applyCreateIndexRequestWithExistingMetadata( sourceMetadata, tmpImd, Collections.singletonList(mappings), - indexService -> resolveAndValidateAliases( + (indexService, contextTemplate) -> resolveAndValidateAliases( request.index(), request.aliases(), Collections.emptyList(), @@ -962,6 +1039,9 @@ static Settings aggregateIndexSettings( updateReplicationStrategy(indexSettingsBuilder, request.settings(), settings, combinedTemplateSettings, clusterSettings); updateRemoteStoreSettings(indexSettingsBuilder, currentState, clusterSettings, settings, request.index()); + if (FeatureFlags.isEnabled(FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL_SETTING)) { + updateSearchOnlyReplicas(request.settings(), indexSettingsBuilder); + } if (sourceMetadata != null) { assert request.resizeType() != null; @@ -996,11 +1076,28 @@ static Settings aggregateIndexSettings( validateTranslogRetentionSettings(indexSettings); validateStoreTypeSettings(indexSettings); validateRefreshIntervalSettings(request.settings(), clusterSettings); + validateTranslogFlushIntervalSettingsForCompositeIndex(request.settings(), clusterSettings); validateTranslogDurabilitySettings(request.settings(), clusterSettings, settings); - return indexSettings; } + private static void updateSearchOnlyReplicas(Settings requestSettings, Settings.Builder builder) { + if (INDEX_NUMBER_OF_SEARCH_REPLICAS_SETTING.exists(builder) && builder.get(SETTING_NUMBER_OF_SEARCH_REPLICAS) != null) { + if (INDEX_NUMBER_OF_SEARCH_REPLICAS_SETTING.get(requestSettings) > 0 + && ReplicationType.parseString(builder.get(INDEX_REPLICATION_TYPE_SETTING.getKey())).equals(ReplicationType.DOCUMENT)) { + throw new IllegalArgumentException( + "To set " + + SETTING_NUMBER_OF_SEARCH_REPLICAS + + ", " + + INDEX_REPLICATION_TYPE_SETTING.getKey() + + " must be set to " + + ReplicationType.SEGMENT + ); + } + builder.put(SETTING_NUMBER_OF_SEARCH_REPLICAS, INDEX_NUMBER_OF_SEARCH_REPLICAS_SETTING.get(requestSettings)); + } + } + /** * Updates index settings to set replication strategy by default based on cluster level settings or remote store * node attributes @@ -1236,7 +1333,8 @@ static IndexMetadata buildIndexMetadata( int routingNumShards, @Nullable IndexMetadata sourceMetadata, boolean isSystem, - Map customData + Map customData, + Context context ) { IndexMetadata.Builder indexMetadataBuilder = createIndexMetadataBuilder(indexName, sourceMetadata, indexSettings, routingNumShards); indexMetadataBuilder.system(isSystem); @@ -1261,6 +1359,8 @@ static IndexMetadata buildIndexMetadata( indexMetadataBuilder.putCustom(entry.getKey(), entry.getValue()); } + indexMetadataBuilder.context(context); + indexMetadataBuilder.state(IndexMetadata.State.OPEN); return indexMetadataBuilder.build(); } @@ -1354,6 +1454,7 @@ private static void validateActiveShardCount(ActiveShardCount waitForActiveShard private void validate(CreateIndexClusterStateUpdateRequest request, ClusterState state) { validateIndexName(request.index(), state); validateIndexSettings(request.index(), request.settings(), forbidPrivateIndexSettings); + validateContext(request); } public void validateIndexSettings(String indexName, final Settings settings, final boolean forbidPrivateIndexSettings) @@ -1644,6 +1745,71 @@ public static void validateTranslogRetentionSettings(Settings indexSettings) { } } + /** + * Validates {@code index.translog.flush_threshold_size} is equal or below the {@code indices.composite_index.translog.max_flush_threshold_size} + * for composite indices based on {{@code index.composite_index}} + * + * @param requestSettings settings passed in during index create/update request + * @param clusterSettings cluster setting + */ + public static void validateTranslogFlushIntervalSettingsForCompositeIndex(Settings requestSettings, ClusterSettings clusterSettings) { + if (StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.exists(requestSettings) == false + || requestSettings.get(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey()) == null) { + return; + } + ByteSizeValue translogFlushSize = INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.get(requestSettings); + ByteSizeValue compositeIndexMaxFlushSize = clusterSettings.get( + CompositeIndexSettings.COMPOSITE_INDEX_MAX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING + ); + if (translogFlushSize.compareTo(compositeIndexMaxFlushSize) > 0) { + throw new IllegalArgumentException( + String.format( + Locale.ROOT, + "You can configure '%s' with upto '%s' for composite index", + INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), + compositeIndexMaxFlushSize + ) + ); + } + } + + /** + * Validates {@code index.translog.flush_threshold_size} is equal or below the {@code indices.composite_index.translog.max_flush_threshold_size} + * for composite indices based on {{@code index.composite_index}} + * This is used during update index settings flow + * + * @param requestSettings settings passed in during index update request + * @param clusterSettings cluster setting + * @param indexSettings index settings + */ + public static Optional validateTranslogFlushIntervalSettingsForCompositeIndex( + Settings requestSettings, + ClusterSettings clusterSettings, + Settings indexSettings + ) { + if (INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.exists(requestSettings) == false + || requestSettings.get(INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey()) == null + || StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.exists(indexSettings) == false + || indexSettings.get(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey()) == null) { + return Optional.empty(); + } + ByteSizeValue translogFlushSize = INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.get(requestSettings); + ByteSizeValue compositeIndexMaxFlushSize = clusterSettings.get( + CompositeIndexSettings.COMPOSITE_INDEX_MAX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING + ); + if (translogFlushSize.compareTo(compositeIndexMaxFlushSize) > 0) { + return Optional.of( + String.format( + Locale.ROOT, + "You can configure '%s' with upto '%s' for composite index", + INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), + compositeIndexMaxFlushSize + ) + ); + } + return Optional.empty(); + } + /** * Validates {@code index.refresh_interval} is equal or below the {@code cluster.minimum.index.refresh_interval}. * @@ -1694,4 +1860,25 @@ static void validateTranslogDurabilitySettings(Settings requestSettings, Cluster } } + + void validateContext(CreateIndexClusterStateUpdateRequest request) { + final boolean isContextAllowed = FeatureFlags.isEnabled(FeatureFlags.APPLICATION_BASED_CONFIGURATION_TEMPLATES); + + if (request.context() != null && !isContextAllowed) { + throw new InvalidIndexContextException( + request.context().name(), + request.index(), + "index specifies a context which cannot be used without enabling: " + + SystemTemplatesService.SETTING_APPLICATION_BASED_CONFIGURATION_TEMPLATES_ENABLED.getKey() + ); + } + + if (request.context() != null && findContextTemplateName(clusterService.state().metadata(), request.context()) == null) { + throw new InvalidIndexContextException( + request.context().name(), + request.index(), + "index specifies a context which is not loaded on the cluster." + ); + } + } } diff --git a/server/src/main/java/org/opensearch/cluster/metadata/MetadataIndexTemplateService.java b/server/src/main/java/org/opensearch/cluster/metadata/MetadataIndexTemplateService.java index 6b638c9920c27..e4afc798cc64d 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/MetadataIndexTemplateService.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/MetadataIndexTemplateService.java @@ -71,9 +71,11 @@ import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.index.IndexService; +import org.opensearch.index.IndexSettings; import org.opensearch.index.mapper.MapperParsingException; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.mapper.MapperService.MergeReason; +import org.opensearch.index.translog.Translog; import org.opensearch.indices.IndexTemplateMissingException; import org.opensearch.indices.IndicesService; import org.opensearch.indices.InvalidIndexTemplateException; @@ -100,6 +102,7 @@ import static org.opensearch.cluster.metadata.MetadataCreateDataStreamService.validateTimestampFieldMapping; import static org.opensearch.cluster.metadata.MetadataCreateIndexService.validateRefreshIntervalSettings; +import static org.opensearch.cluster.metadata.MetadataCreateIndexService.validateTranslogFlushIntervalSettingsForCompositeIndex; import static org.opensearch.common.util.concurrent.ThreadContext.ACTION_ORIGIN_TRANSIENT_NAME; import static org.opensearch.indices.cluster.IndicesClusterStateService.AllocatedIndices.IndexRemovalReason.NO_LONGER_ASSIGNED; @@ -452,7 +455,7 @@ static void validateNotInUse(Metadata metadata, String templateNameOrWildcard) { final Set componentsBeingUsed = new HashSet<>(); final List templatesStillUsing = metadata.templatesV2().entrySet().stream().filter(e -> { Set referredComponentTemplates = new HashSet<>(e.getValue().composedOf()); - String systemTemplateUsed = findContextTemplate(metadata, e.getValue().context()); + String systemTemplateUsed = findContextTemplateName(metadata, e.getValue().context()); if (systemTemplateUsed != null) { referredComponentTemplates.add(systemTemplateUsed); } @@ -568,7 +571,7 @@ public static void validateV2TemplateRequest( ); } - if (template.context() != null && findContextTemplate(metadata, template.context()) == null) { + if (template.context() != null && findContextTemplateName(metadata, template.context()) == null) { throw new InvalidIndexTemplateException( name, "index template [" + name + "] specifies a context which is not loaded on the cluster." @@ -585,7 +588,12 @@ private void validateComponentTemplateRequest(ComponentTemplate componentTemplat } } - private static String findContextTemplate(Metadata metadata, Context context) { + static ComponentTemplate findComponentTemplate(Metadata metadata, Context context) { + String contextTemplateName = findContextTemplateName(metadata, context); + return metadata.componentTemplates().getOrDefault(contextTemplateName, null); + } + + static String findContextTemplateName(Metadata metadata, Context context) { if (context == null) { return null; } @@ -1246,7 +1254,7 @@ public static List collectMappings(final ClusterState state, // Now use context mappings which take the highest precedence Optional.ofNullable(template.context()) - .map(ctx -> findContextTemplate(state.metadata(), ctx)) + .map(ctx -> findContextTemplateName(state.metadata(), ctx)) .map(name -> state.metadata().componentTemplates().get(name)) .map(ComponentTemplate::template) .map(Template::mappings) @@ -1317,8 +1325,7 @@ private static Settings resolveSettings(Metadata metadata, ComposableIndexTempla Optional.ofNullable(template.template()).map(Template::settings).ifPresent(templateSettings::put); // Add the template referred by context since it will take the highest precedence. - final String systemTemplate = findContextTemplate(metadata, template.context()); - final ComponentTemplate componentTemplate = metadata.componentTemplates().get(systemTemplate); + final ComponentTemplate componentTemplate = findComponentTemplate(metadata, template.context()); Optional.ofNullable(componentTemplate).map(ComponentTemplate::template).map(Template::settings).ifPresent(templateSettings::put); return templateSettings.build(); @@ -1367,8 +1374,7 @@ public static List> resolveAliases(final Metadata met // Now use context referenced template's aliases which take the highest precedence if (template.context() != null) { - final String systemTemplate = findContextTemplate(metadata, template.context()); - final ComponentTemplate componentTemplate = metadata.componentTemplates().get(systemTemplate); + final ComponentTemplate componentTemplate = findComponentTemplate(metadata, template.context()); Optional.ofNullable(componentTemplate.template()).map(Template::aliases).ifPresent(aliases::add); } @@ -1632,8 +1638,10 @@ private void validate(String name, @Nullable Settings settings, List ind ); validationErrors.addAll(indexSettingsValidation); - // validate index refresh interval settings + // validate index refresh interval and translog durability settings validateRefreshIntervalSettings(settings, clusterService.getClusterSettings()); + validateTranslogFlushIntervalSettingsForCompositeIndex(settings, clusterService.getClusterSettings()); + validateTranslogDurabilitySettingsInTemplate(settings, clusterService.getClusterSettings()); } if (indexPatterns.stream().anyMatch(Regex::isMatchAllPattern)) { @@ -1659,6 +1667,29 @@ private void validate(String name, @Nullable Settings settings, List ind } } + /** + * Validates {@code index.translog.durability} is not async with the incoming index template + * if the {@code cluster.remote_store.index.restrict.async-durability} is set to true. + * + * @param requestSettings settings passed during template creation + * @param clusterSettings current cluster settings + */ + private void validateTranslogDurabilitySettingsInTemplate(Settings requestSettings, ClusterSettings clusterSettings) { + if (IndexSettings.INDEX_TRANSLOG_DURABILITY_SETTING.exists(requestSettings) == false + || clusterSettings.get(IndicesService.CLUSTER_REMOTE_INDEX_RESTRICT_ASYNC_DURABILITY_SETTING) == false) { + return; + } + Translog.Durability durability = IndexSettings.INDEX_TRANSLOG_DURABILITY_SETTING.get(requestSettings); + if (durability.equals(Translog.Durability.ASYNC)) { + throw new IllegalArgumentException( + "index setting [index.translog.durability=async] is not allowed as cluster setting [" + + IndicesService.CLUSTER_REMOTE_INDEX_RESTRICT_ASYNC_DURABILITY_SETTING.getKey() + + "=true]" + ); + } + + } + /** * Listener for putting metadata in the template * diff --git a/server/src/main/java/org/opensearch/cluster/metadata/MetadataUpdateSettingsService.java b/server/src/main/java/org/opensearch/cluster/metadata/MetadataUpdateSettingsService.java index 7d4c3512ed757..4e7e31bbb9222 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/MetadataUpdateSettingsService.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/MetadataUpdateSettingsService.java @@ -57,24 +57,33 @@ import org.opensearch.common.settings.IndexScopedSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.action.ActionListener; import org.opensearch.core.index.Index; import org.opensearch.index.IndexSettings; import org.opensearch.indices.IndicesService; import org.opensearch.indices.ShardLimitValidator; +import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.threadpool.ThreadPool; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; +import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Optional; import java.util.Set; import static org.opensearch.action.support.ContextPreservingActionListener.wrapPreservingContext; +import static org.opensearch.cluster.metadata.IndexMetadata.INDEX_REPLICATION_TYPE_SETTING; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS; +import static org.opensearch.cluster.metadata.MetadataCreateIndexService.validateOverlap; import static org.opensearch.cluster.metadata.MetadataCreateIndexService.validateRefreshIntervalSettings; import static org.opensearch.cluster.metadata.MetadataCreateIndexService.validateTranslogDurabilitySettings; +import static org.opensearch.cluster.metadata.MetadataCreateIndexService.validateTranslogFlushIntervalSettingsForCompositeIndex; +import static org.opensearch.cluster.metadata.MetadataIndexTemplateService.findComponentTemplate; import static org.opensearch.common.settings.AbstractScopedSettings.ARCHIVED_SETTINGS_PREFIX; import static org.opensearch.index.IndexSettings.same; @@ -196,6 +205,7 @@ public ClusterState execute(ClusterState currentState) { Set openIndices = new HashSet<>(); Set closeIndices = new HashSet<>(); final String[] actualIndices = new String[request.indices().length]; + final List validationErrors = new ArrayList<>(); for (int i = 0; i < request.indices().length; i++) { Index index = request.indices()[i]; actualIndices[i] = index.getName(); @@ -205,6 +215,25 @@ public ClusterState execute(ClusterState currentState) { } else { closeIndices.add(index); } + if (metadata.context() != null) { + validateOverlap( + normalizedSettings.keySet(), + findComponentTemplate(currentState.metadata(), metadata.context()).template().settings(), + index.getName() + ).ifPresent(validationErrors::add); + } + validateTranslogFlushIntervalSettingsForCompositeIndex( + normalizedSettings, + clusterService.getClusterSettings(), + metadata.getSettings() + ).ifPresent(validationErrors::add); + + } + + if (validationErrors.size() > 0) { + ValidationException exception = new ValidationException(); + exception.addValidationErrors(validationErrors); + throw exception; } if (!skippedSettings.isEmpty() && !openIndices.isEmpty()) { @@ -260,6 +289,34 @@ public ClusterState execute(ClusterState currentState) { } } + if (IndexMetadata.INDEX_NUMBER_OF_SEARCH_REPLICAS_SETTING.exists(openSettings)) { + if (FeatureFlags.isEnabled(FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL_SETTING)) { + validateSearchReplicaCountSettings(normalizedSettings, request.indices(), currentState); + } + final int updatedNumberOfSearchReplicas = IndexMetadata.INDEX_NUMBER_OF_SEARCH_REPLICAS_SETTING.get(openSettings); + if (preserveExisting == false) { + // TODO: Honor awareness validation to search replicas. + + // Verify that this won't take us over the cluster shard limit. + int totalNewShards = Arrays.stream(request.indices()) + .mapToInt(i -> getTotalNewShards(i, currentState, updatedNumberOfSearchReplicas)) + .sum(); + Optional error = shardLimitValidator.checkShardLimit(totalNewShards, currentState); + if (error.isPresent()) { + ValidationException ex = new ValidationException(); + ex.addValidationError(error.get()); + throw ex; + } + routingTableBuilder.updateNumberOfSearchReplicas(updatedNumberOfSearchReplicas, actualIndices); + metadataBuilder.updateNumberOfSearchReplicas(updatedNumberOfSearchReplicas, actualIndices); + logger.info( + "updating number_of_Search Replicas to [{}] for indices {}", + updatedNumberOfSearchReplicas, + actualIndices + ); + } + } + if (!openIndices.isEmpty()) { for (Index index : openIndices) { IndexMetadata indexMetadata = metadataBuilder.getSafe(index); @@ -361,7 +418,6 @@ public ClusterState execute(ClusterState currentState) { .routingTable(routingTableBuilder.build()) .blocks(blocks) .build(); - // now, reroute in case things change that require it (like number of replicas) updatedState = allocationService.reroute(updatedState, "settings update"); try { @@ -469,4 +525,27 @@ public ClusterState execute(ClusterState currentState) { } ); } + + /** + * Validates that if we are trying to update search replica count the index is segrep enabled. + * + * @param requestSettings {@link Settings} + * @param indices indices that are changing + * @param currentState {@link ClusterState} current cluster state + */ + private void validateSearchReplicaCountSettings(Settings requestSettings, Index[] indices, ClusterState currentState) { + final int updatedNumberOfSearchReplicas = IndexMetadata.INDEX_NUMBER_OF_SEARCH_REPLICAS_SETTING.get(requestSettings); + if (updatedNumberOfSearchReplicas > 0) { + if (Arrays.stream(indices).allMatch(index -> currentState.metadata().isSegmentReplicationEnabled(index.getName())) == false) { + throw new IllegalArgumentException( + "To set " + + SETTING_NUMBER_OF_SEARCH_REPLICAS + + ", " + + INDEX_REPLICATION_TYPE_SETTING.getKey() + + " must be set to " + + ReplicationType.SEGMENT + ); + } + } + } } diff --git a/server/src/main/java/org/opensearch/cluster/metadata/QueryGroup.java b/server/src/main/java/org/opensearch/cluster/metadata/QueryGroup.java index a971aa58940ba..0eeafdc8f5eed 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/QueryGroup.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/QueryGroup.java @@ -12,11 +12,14 @@ import org.opensearch.cluster.Diff; import org.opensearch.common.UUIDs; import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.annotation.PublicApi; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.xcontent.ToXContentObject; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.wlm.MutableQueryGroupFragment; +import org.opensearch.wlm.MutableQueryGroupFragment.ResiliencyMode; import org.opensearch.wlm.ResourceType; import org.joda.time.Instant; @@ -24,62 +27,59 @@ import java.util.HashMap; import java.util.Map; import java.util.Objects; +import java.util.Optional; /** * Class to define the QueryGroup schema * { * "_id": "fafjafjkaf9ag8a9ga9g7ag0aagaga", * "resource_limits": { - * "memory": 0.4 + * "memory": 0.4, + * "cpu": 0.2 * }, * "resiliency_mode": "enforced", * "name": "analytics", * "updated_at": 4513232415 * } */ -@ExperimentalApi +@PublicApi(since = "2.18.0") public class QueryGroup extends AbstractDiffable implements ToXContentObject { public static final String _ID_STRING = "_id"; public static final String NAME_STRING = "name"; - public static final String RESILIENCY_MODE_STRING = "resiliency_mode"; public static final String UPDATED_AT_STRING = "updated_at"; - public static final String RESOURCE_LIMITS_STRING = "resource_limits"; private static final int MAX_CHARS_ALLOWED_IN_NAME = 50; private final String name; private final String _id; - private final ResiliencyMode resiliencyMode; // It is an epoch in millis private final long updatedAtInMillis; - private final Map resourceLimits; + private final MutableQueryGroupFragment mutableQueryGroupFragment; - public QueryGroup(String name, ResiliencyMode resiliencyMode, Map resourceLimits) { - this(name, UUIDs.randomBase64UUID(), resiliencyMode, resourceLimits, Instant.now().getMillis()); + public QueryGroup(String name, MutableQueryGroupFragment mutableQueryGroupFragment) { + this(name, UUIDs.randomBase64UUID(), mutableQueryGroupFragment, Instant.now().getMillis()); } - public QueryGroup(String name, String _id, ResiliencyMode resiliencyMode, Map resourceLimits, long updatedAt) { + public QueryGroup(String name, String _id, MutableQueryGroupFragment mutableQueryGroupFragment, long updatedAt) { Objects.requireNonNull(name, "QueryGroup.name can't be null"); - Objects.requireNonNull(resourceLimits, "QueryGroup.resourceLimits can't be null"); - Objects.requireNonNull(resiliencyMode, "QueryGroup.resiliencyMode can't be null"); + Objects.requireNonNull(mutableQueryGroupFragment.getResourceLimits(), "QueryGroup.resourceLimits can't be null"); + Objects.requireNonNull(mutableQueryGroupFragment.getResiliencyMode(), "QueryGroup.resiliencyMode can't be null"); Objects.requireNonNull(_id, "QueryGroup._id can't be null"); validateName(name); - if (resourceLimits.isEmpty()) { + if (mutableQueryGroupFragment.getResourceLimits().isEmpty()) { throw new IllegalArgumentException("QueryGroup.resourceLimits should at least have 1 resource limit"); } - validateResourceLimits(resourceLimits); if (!isValid(updatedAt)) { throw new IllegalArgumentException("QueryGroup.updatedAtInMillis is not a valid epoch"); } this.name = name; this._id = _id; - this.resiliencyMode = resiliencyMode; - this.resourceLimits = resourceLimits; + this.mutableQueryGroupFragment = mutableQueryGroupFragment; this.updatedAtInMillis = updatedAt; } - private static boolean isValid(long updatedAt) { + public static boolean isValid(long updatedAt) { long minValidTimestamp = Instant.ofEpochMilli(0L).getMillis(); // Use Instant.now() to get the current time in seconds since epoch @@ -90,12 +90,22 @@ private static boolean isValid(long updatedAt) { } public QueryGroup(StreamInput in) throws IOException { - this( - in.readString(), - in.readString(), - ResiliencyMode.fromName(in.readString()), - in.readMap((i) -> ResourceType.fromName(i.readString()), StreamInput::readDouble), - in.readLong() + this(in.readString(), in.readString(), new MutableQueryGroupFragment(in), in.readLong()); + } + + public static QueryGroup updateExistingQueryGroup(QueryGroup existingGroup, MutableQueryGroupFragment mutableQueryGroupFragment) { + final Map updatedResourceLimits = new HashMap<>(existingGroup.getResourceLimits()); + final Map mutableFragmentResourceLimits = mutableQueryGroupFragment.getResourceLimits(); + if (mutableFragmentResourceLimits != null && !mutableFragmentResourceLimits.isEmpty()) { + updatedResourceLimits.putAll(mutableFragmentResourceLimits); + } + final ResiliencyMode mode = Optional.ofNullable(mutableQueryGroupFragment.getResiliencyMode()) + .orElse(existingGroup.getResiliencyMode()); + return new QueryGroup( + existingGroup.getName(), + existingGroup.get_id(), + new MutableQueryGroupFragment(mode, updatedResourceLimits), + Instant.now().getMillis() ); } @@ -103,8 +113,7 @@ public QueryGroup(StreamInput in) throws IOException { public void writeTo(StreamOutput out) throws IOException { out.writeString(name); out.writeString(_id); - out.writeString(resiliencyMode.getName()); - out.writeMap(resourceLimits, ResourceType::writeTo, StreamOutput::writeDouble); + mutableQueryGroupFragment.writeTo(out); out.writeLong(updatedAtInMillis); } @@ -114,34 +123,15 @@ public static void validateName(String name) { } } - private void validateResourceLimits(Map resourceLimits) { - for (Map.Entry resource : resourceLimits.entrySet()) { - Double threshold = resource.getValue(); - Objects.requireNonNull(resource.getKey(), "resourceName can't be null"); - Objects.requireNonNull(threshold, "resource limit threshold for" + resource.getKey().getName() + " : can't be null"); - - if (Double.compare(threshold, 0.0) <= 0 || Double.compare(threshold, 1.0) > 0) { - throw new IllegalArgumentException("resource value should be greater than 0 and less or equal to 1.0"); - } - } - } - @Override public XContentBuilder toXContent(final XContentBuilder builder, final Params params) throws IOException { builder.startObject(); builder.field(_ID_STRING, _id); builder.field(NAME_STRING, name); - builder.field(RESILIENCY_MODE_STRING, resiliencyMode.getName()); - builder.field(UPDATED_AT_STRING, updatedAtInMillis); - // write resource limits - builder.startObject(RESOURCE_LIMITS_STRING); - for (ResourceType resourceType : ResourceType.values()) { - if (resourceLimits.containsKey(resourceType)) { - builder.field(resourceType.getName(), resourceLimits.get(resourceType)); - } + for (String fieldName : MutableQueryGroupFragment.acceptedFieldNames) { + mutableQueryGroupFragment.writeField(builder, fieldName); } - builder.endObject(); - + builder.field(UPDATED_AT_STRING, updatedAtInMillis); builder.endObject(); return builder; } @@ -160,27 +150,30 @@ public boolean equals(Object o) { if (o == null || getClass() != o.getClass()) return false; QueryGroup that = (QueryGroup) o; return Objects.equals(name, that.name) - && Objects.equals(resiliencyMode, that.resiliencyMode) - && Objects.equals(resourceLimits, that.resourceLimits) + && Objects.equals(mutableQueryGroupFragment, that.mutableQueryGroupFragment) && Objects.equals(_id, that._id) && updatedAtInMillis == that.updatedAtInMillis; } @Override public int hashCode() { - return Objects.hash(name, resourceLimits, updatedAtInMillis, _id); + return Objects.hash(name, mutableQueryGroupFragment, updatedAtInMillis, _id); } public String getName() { return name; } + public MutableQueryGroupFragment getMutableQueryGroupFragment() { + return mutableQueryGroupFragment; + } + public ResiliencyMode getResiliencyMode() { - return resiliencyMode; + return getMutableQueryGroupFragment().getResiliencyMode(); } public Map getResourceLimits() { - return resourceLimits; + return getMutableQueryGroupFragment().getResourceLimits(); } public String get_id() { @@ -199,37 +192,6 @@ public static Builder builder() { return new Builder(); } - /** - * This enum models the different QueryGroup resiliency modes - * SOFT - means that this query group can consume more than query group resource limits if node is not in duress - * ENFORCED - means that it will never breach the assigned limits and will cancel as soon as the limits are breached - * MONITOR - it will not cause any cancellation but just log the eligible task cancellations - */ - @ExperimentalApi - public enum ResiliencyMode { - SOFT("soft"), - ENFORCED("enforced"), - MONITOR("monitor"); - - private final String name; - - ResiliencyMode(String mode) { - this.name = mode; - } - - public String getName() { - return name; - } - - public static ResiliencyMode fromName(String s) { - for (ResiliencyMode mode : values()) { - if (mode.getName().equalsIgnoreCase(s)) return mode; - - } - throw new IllegalArgumentException("Invalid value for QueryGroupMode: " + s); - } - } - /** * Builder class for {@link QueryGroup} */ @@ -237,9 +199,8 @@ public static ResiliencyMode fromName(String s) { public static class Builder { private String name; private String _id; - private ResiliencyMode resiliencyMode; + private MutableQueryGroupFragment mutableQueryGroupFragment; private long updatedAt; - private Map resourceLimits; private Builder() {} @@ -257,8 +218,7 @@ public static Builder fromXContent(XContentParser parser) throws IOException { } String fieldName = ""; - // Map to hold resources - final Map resourceLimits = new HashMap<>(); + MutableQueryGroupFragment mutableQueryGroupFragment1 = new MutableQueryGroupFragment(); while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { fieldName = parser.currentName(); @@ -267,32 +227,21 @@ public static Builder fromXContent(XContentParser parser) throws IOException { builder._id(parser.text()); } else if (fieldName.equals(NAME_STRING)) { builder.name(parser.text()); - } else if (fieldName.equals(RESILIENCY_MODE_STRING)) { - builder.mode(parser.text()); + } else if (MutableQueryGroupFragment.shouldParse(fieldName)) { + mutableQueryGroupFragment1.parseField(parser, fieldName); } else if (fieldName.equals(UPDATED_AT_STRING)) { builder.updatedAt(parser.longValue()); } else { throw new IllegalArgumentException(fieldName + " is not a valid field in QueryGroup"); } } else if (token == XContentParser.Token.START_OBJECT) { - - if (!fieldName.equals(RESOURCE_LIMITS_STRING)) { - throw new IllegalArgumentException( - "QueryGroup.resourceLimits is an object and expected token was { " + " but found " + token - ); - } - - while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { - if (token == XContentParser.Token.FIELD_NAME) { - fieldName = parser.currentName(); - } else { - resourceLimits.put(ResourceType.fromName(fieldName), parser.doubleValue()); - } + if (!MutableQueryGroupFragment.shouldParse(fieldName)) { + throw new IllegalArgumentException(fieldName + " is not a valid object in QueryGroup"); } - + mutableQueryGroupFragment1.parseField(parser, fieldName); } } - return builder.resourceLimits(resourceLimits); + return builder.mutableQueryGroupFragment(mutableQueryGroupFragment1); } public Builder name(String name) { @@ -305,8 +254,8 @@ public Builder _id(String _id) { return this; } - public Builder mode(String mode) { - this.resiliencyMode = ResiliencyMode.fromName(mode); + public Builder mutableQueryGroupFragment(MutableQueryGroupFragment mutableQueryGroupFragment) { + this.mutableQueryGroupFragment = mutableQueryGroupFragment; return this; } @@ -315,13 +264,12 @@ public Builder updatedAt(long updatedAt) { return this; } - public Builder resourceLimits(Map resourceLimits) { - this.resourceLimits = resourceLimits; - return this; + public QueryGroup build() { + return new QueryGroup(name, _id, mutableQueryGroupFragment, updatedAt); } - public QueryGroup build() { - return new QueryGroup(name, _id, resiliencyMode, resourceLimits, updatedAt); + public MutableQueryGroupFragment getMutableQueryGroupFragment() { + return mutableQueryGroupFragment; } } } diff --git a/server/src/main/java/org/opensearch/cluster/metadata/RepositoriesMetadata.java b/server/src/main/java/org/opensearch/cluster/metadata/RepositoriesMetadata.java index 4b3dc7964a87b..59452e33191d7 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/RepositoriesMetadata.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/RepositoriesMetadata.java @@ -184,6 +184,24 @@ public boolean equalsIgnoreGenerationsWithRepoSkip(@Nullable RepositoriesMetadat .filter(repo -> !reposToSkip.contains(repo.name())) .collect(Collectors.toList()); + return equalsRepository(currentRepositories, otherRepositories); + } + + public boolean equalsIgnoreGenerationsForRepo(@Nullable RepositoriesMetadata other, List reposToValidate) { + if (other == null) { + return false; + } + List currentRepositories = repositories.stream() + .filter(repo -> reposToValidate.contains(repo.name())) + .collect(Collectors.toList()); + List otherRepositories = other.repositories.stream() + .filter(repo -> reposToValidate.contains(repo.name())) + .collect(Collectors.toList()); + + return equalsRepository(currentRepositories, otherRepositories); + } + + public static boolean equalsRepository(List currentRepositories, List otherRepositories) { if (otherRepositories.size() != currentRepositories.size()) { return false; } diff --git a/server/src/main/java/org/opensearch/cluster/metadata/TemplatesMetadata.java b/server/src/main/java/org/opensearch/cluster/metadata/TemplatesMetadata.java index 6ecc471c5e0ae..c01cd8f373c72 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/TemplatesMetadata.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/TemplatesMetadata.java @@ -10,7 +10,9 @@ import org.opensearch.cluster.AbstractDiffable; import org.opensearch.common.annotation.PublicApi; +import org.opensearch.core.common.io.stream.BufferedChecksumStreamOutput; import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.VerifiableWriteable; import org.opensearch.core.xcontent.ToXContentFragment; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentParser; @@ -27,7 +29,7 @@ * @opensearch.api */ @PublicApi(since = "2.15.0") -public class TemplatesMetadata extends AbstractDiffable implements ToXContentFragment { +public class TemplatesMetadata extends AbstractDiffable implements ToXContentFragment, VerifiableWriteable { public static TemplatesMetadata EMPTY_METADATA = builder().build(); private final Map templates; @@ -65,6 +67,11 @@ public void writeTo(StreamOutput out) throws IOException { } } + @Override + public void writeVerifiableTo(BufferedChecksumStreamOutput out) throws IOException { + out.writeMapValues(templates, (stream, value) -> value.writeVerifiableTo((BufferedChecksumStreamOutput) stream)); + } + @Override public boolean equals(Object o) { if (this == o) return true; @@ -80,6 +87,11 @@ public int hashCode() { return templates != null ? templates.hashCode() : 0; } + @Override + public String toString() { + return "TemplatesMetadata{" + "templates=" + templates + '}'; + } + /** * Builder for the templates metadata * @@ -133,16 +145,15 @@ public static TemplatesMetadata fromXContent(XContentParser parser) throws IOExc String currentFieldName = parser.currentName(); if (currentFieldName == null) { token = parser.nextToken(); - if (token == XContentParser.Token.START_OBJECT) { - // move to the field name - token = parser.nextToken(); - } - currentFieldName = parser.currentName(); } - if (currentFieldName != null) { - while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.START_OBJECT) { + // move to the field name + token = parser.nextToken(); + } + if (parser.currentName() != null && token != XContentParser.Token.END_OBJECT) { + do { builder.put(IndexTemplateMetadata.Builder.fromXContent(parser, parser.currentName())); - } + } while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT); } return builder.build(); } diff --git a/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java b/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java index 653f81830ed17..a6f0a457f7f9b 100644 --- a/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java +++ b/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java @@ -37,9 +37,10 @@ import org.opensearch.common.annotation.PublicApi; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; +import org.opensearch.core.common.io.stream.BufferedChecksumStreamOutput; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; -import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.common.io.stream.VerifiableWriteable; import org.opensearch.core.common.transport.TransportAddress; import org.opensearch.core.xcontent.ToXContentFragment; import org.opensearch.core.xcontent.XContentBuilder; @@ -64,6 +65,7 @@ import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_NODE_ATTRIBUTE_KEY_PREFIX; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; /** * A discovery node represents a node that is part of the cluster. @@ -71,7 +73,7 @@ * @opensearch.api */ @PublicApi(since = "1.0.0") -public class DiscoveryNode implements Writeable, ToXContentFragment { +public class DiscoveryNode implements VerifiableWriteable, ToXContentFragment { static final String COORDINATING_ONLY = "coordinating_only"; @@ -329,6 +331,7 @@ public DiscoveryNode(StreamInput in) throws IOException { for (int i = 0; i < size; i++) { this.attributes.put(in.readString(), in.readString()); } + int rolesSize = in.readVInt(); final Set roles = new HashSet<>(rolesSize); for (int i = 0; i < rolesSize; i++) { @@ -358,17 +361,51 @@ public DiscoveryNode(StreamInput in) throws IOException { @Override public void writeTo(StreamOutput out) throws IOException { + if (out.getVersion().onOrAfter(Version.V_2_17_0)) { + writeToUtil(out, false); + } else { + writeToUtil(out, true); + } + + } + + public void writeToWithAttribute(StreamOutput out) throws IOException { + writeToUtil(out, true); + } + + public void writeToUtil(StreamOutput out, boolean includeAllAttributes) throws IOException { + writeNodeDetails(out); + + if (includeAllAttributes) { + out.writeVInt(attributes.size()); + for (Map.Entry entry : attributes.entrySet()) { + out.writeString(entry.getKey()); + out.writeString(entry.getValue()); + } + } else { + out.writeVInt(0); + } + + writeRolesAndVersion(out); + } + + @Override + public void writeVerifiableTo(BufferedChecksumStreamOutput out) throws IOException { + writeNodeDetails(out); + out.writeMap(attributes, StreamOutput::writeString, StreamOutput::writeString); + writeRolesAndVersion(out); + } + + private void writeNodeDetails(StreamOutput out) throws IOException { out.writeString(nodeName); out.writeString(nodeId); out.writeString(ephemeralId); out.writeString(hostName); out.writeString(hostAddress); address.writeTo(out); - out.writeVInt(attributes.size()); - for (Map.Entry entry : attributes.entrySet()) { - out.writeString(entry.getKey()); - out.writeString(entry.getValue()); - } + } + + private void writeRolesAndVersion(StreamOutput out) throws IOException { out.writeVInt(roles.size()); for (final DiscoveryNodeRole role : roles) { final DiscoveryNodeRole compatibleRole = role.getCompatibilityRole(out.getVersion()); @@ -473,7 +510,8 @@ public boolean isSearchNode() { * @return true if the node contains remote store node attributes, false otherwise */ public boolean isRemoteStoreNode() { - return this.getAttributes().keySet().stream().anyMatch(key -> key.startsWith(REMOTE_STORE_NODE_ATTRIBUTE_KEY_PREFIX)); + return this.getAttributes().keySet().stream().anyMatch(key -> key.equals(REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY)) + && this.getAttributes().keySet().stream().anyMatch(key -> key.equals(REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY)); } /** diff --git a/server/src/main/java/org/opensearch/cluster/node/DiscoveryNodes.java b/server/src/main/java/org/opensearch/cluster/node/DiscoveryNodes.java index 2ebcd8096893d..52d830aafda38 100644 --- a/server/src/main/java/org/opensearch/cluster/node/DiscoveryNodes.java +++ b/server/src/main/java/org/opensearch/cluster/node/DiscoveryNodes.java @@ -41,8 +41,10 @@ import org.opensearch.common.regex.Regex; import org.opensearch.common.util.set.Sets; import org.opensearch.core.common.Strings; +import org.opensearch.core.common.io.stream.BufferedChecksumStreamOutput; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.VerifiableWriteable; import org.opensearch.core.common.transport.TransportAddress; import java.io.IOException; @@ -66,7 +68,7 @@ * @opensearch.api */ @PublicApi(since = "1.0.0") -public class DiscoveryNodes extends AbstractDiffable implements Iterable { +public class DiscoveryNodes extends AbstractDiffable implements Iterable, VerifiableWriteable { public static final DiscoveryNodes EMPTY_NODES = builder().build(); @@ -688,16 +690,67 @@ public String shortSummary() { @Override public void writeTo(StreamOutput out) throws IOException { + writeToUtil((output, value) -> value.writeTo(output), out); + } + + public void writeToWithAttribute(StreamOutput out) throws IOException { + writeToUtil((output, value) -> value.writeToWithAttribute(output), out); + } + + public void writeToUtil(final Writer writer, StreamOutput out) throws IOException { + writeClusterManager(out); + out.writeVInt(nodes.size()); + for (DiscoveryNode node : this) { + writer.write(out, node); + } + } + + @Override + public void writeVerifiableTo(BufferedChecksumStreamOutput out) throws IOException { + writeClusterManager(out); + out.writeMapValues(nodes, (stream, val) -> val.writeVerifiableTo((BufferedChecksumStreamOutput) stream)); + } + + private void writeClusterManager(StreamOutput out) throws IOException { if (clusterManagerNodeId == null) { out.writeBoolean(false); } else { out.writeBoolean(true); out.writeString(clusterManagerNodeId); } - out.writeVInt(nodes.size()); - for (DiscoveryNode node : this) { - node.writeTo(out); - } + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + DiscoveryNodes that = (DiscoveryNodes) o; + return Objects.equals(nodes, that.nodes) + && Objects.equals(dataNodes, that.dataNodes) + && Objects.equals(clusterManagerNodes, that.clusterManagerNodes) + && Objects.equals(ingestNodes, that.ingestNodes) + && Objects.equals(clusterManagerNodeId, that.clusterManagerNodeId) + && Objects.equals(localNodeId, that.localNodeId) + && Objects.equals(minNonClientNodeVersion, that.minNonClientNodeVersion) + && Objects.equals(maxNonClientNodeVersion, that.maxNonClientNodeVersion) + && Objects.equals(maxNodeVersion, that.maxNodeVersion) + && Objects.equals(minNodeVersion, that.minNodeVersion); + } + + @Override + public int hashCode() { + return Objects.hash( + nodes, + dataNodes, + clusterManagerNodes, + ingestNodes, + clusterManagerNodeId, + localNodeId, + minNonClientNodeVersion, + maxNonClientNodeVersion, + maxNodeVersion, + minNodeVersion + ); } public static DiscoveryNodes readFrom(StreamInput in, DiscoveryNode localNode) throws IOException { diff --git a/server/src/main/java/org/opensearch/cluster/routing/IndexRoutingTable.java b/server/src/main/java/org/opensearch/cluster/routing/IndexRoutingTable.java index 7c179f6d4d8fd..9cc3bb21e2d12 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/IndexRoutingTable.java +++ b/server/src/main/java/org/opensearch/cluster/routing/IndexRoutingTable.java @@ -45,8 +45,10 @@ import org.opensearch.cluster.routing.RecoverySource.SnapshotRecoverySource; import org.opensearch.common.Randomness; import org.opensearch.common.annotation.PublicApi; +import org.opensearch.core.common.io.stream.BufferedChecksumStreamOutput; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.VerifiableWriteable; import org.opensearch.core.index.Index; import org.opensearch.core.index.shard.ShardId; @@ -79,7 +81,10 @@ * @opensearch.api */ @PublicApi(since = "1.0.0") -public class IndexRoutingTable extends AbstractDiffable implements Iterable { +public class IndexRoutingTable extends AbstractDiffable + implements + Iterable, + VerifiableWriteable { private final Index index; private final ShardShuffler shuffler; @@ -139,7 +144,7 @@ boolean validate(Metadata metadata) { // check the replicas for (IndexShardRoutingTable indexShardRoutingTable : this) { int routingNumberOfReplicas = indexShardRoutingTable.size() - 1; - if (routingNumberOfReplicas != indexMetadata.getNumberOfReplicas()) { + if (routingNumberOfReplicas != indexMetadata.getNumberOfReplicas() + indexMetadata.getNumberOfSearchOnlyReplicas()) { throw new IllegalStateException( "Shard [" + indexShardRoutingTable.shardId().id() @@ -157,7 +162,9 @@ boolean validate(Metadata metadata) { ); } final Set inSyncAllocationIds = indexMetadata.inSyncAllocationIds(shardRouting.id()); - if (shardRouting.active() && inSyncAllocationIds.contains(shardRouting.allocationId().getId()) == false) { + if (shardRouting.active() + && inSyncAllocationIds.contains(shardRouting.allocationId().getId()) == false + && shardRouting.isSearchOnly() == false) { throw new IllegalStateException( "active shard routing " + shardRouting @@ -351,6 +358,11 @@ public int hashCode() { return result; } + @Override + public String toString() { + return "IndexRoutingTable{" + "shards=" + shards + ", index=" + index + '}'; + } + public static IndexRoutingTable readFrom(StreamInput in) throws IOException { Index index = new Index(in); Builder builder = new Builder(index); @@ -376,6 +388,11 @@ public void writeTo(StreamOutput out) throws IOException { } } + public void writeVerifiableTo(BufferedChecksumStreamOutput out) throws IOException { + index.writeTo(out); + out.writeMapValues(shards, (stream, value) -> IndexShardRoutingTable.Builder.writeVerifiableTo(value, stream)); + } + public static Builder builder(Index index) { return new Builder(index); } @@ -594,6 +611,17 @@ private Builder initializeEmpty(IndexMetadata indexMetadata, UnassignedInfo unas ) ); } + for (int i = 0; i < indexMetadata.getNumberOfSearchOnlyReplicas(); i++) { + indexShardRoutingBuilder.addShard( + ShardRouting.newUnassigned( + shardId, + false, + true, + PeerRecoverySource.INSTANCE, // TODO: Update to remote store if enabled + unassignedInfo + ) + ); + } shards.put(shardNumber, indexShardRoutingBuilder.build()); } return this; @@ -614,6 +642,26 @@ public Builder addReplica() { return this; } + /** + * Add a Search only replica to the IndexShardRoutingTable + * @return The Builder + */ + public Builder addSearchReplica() { + for (final int shardNumber : shards.keySet()) { + ShardId shardId = new ShardId(index, shardNumber); + // version 0, will get updated when reroute will happen + ShardRouting shard = ShardRouting.newUnassigned( + shardId, + false, + true, + PeerRecoverySource.INSTANCE, // TODO: Change to remote store if enabled + new UnassignedInfo(UnassignedInfo.Reason.REPLICA_ADDED, null) + ); + shards.put(shardNumber, new IndexShardRoutingTable.Builder(shards.get(shard.id())).addShard(shard).build()); + } + return this; + } + public Builder removeReplica() { for (final int shardId : shards.keySet()) { IndexShardRoutingTable indexShard = shards.get(shardId); @@ -629,7 +677,7 @@ public Builder removeReplica() { // first check if there is one that is not assigned to a node, and remove it boolean removed = false; for (ShardRouting shardRouting : indexShard) { - if (!shardRouting.primary() && !shardRouting.assignedToNode()) { + if (!shardRouting.primary() && !shardRouting.assignedToNode() && !shardRouting.isSearchOnly()) { builder.removeShard(shardRouting); removed = true; break; @@ -637,7 +685,45 @@ public Builder removeReplica() { } if (!removed) { for (ShardRouting shardRouting : indexShard) { - if (!shardRouting.primary()) { + if (!shardRouting.primary() && !shardRouting.isSearchOnly()) { + builder.removeShard(shardRouting); + break; + } + } + } + shards.put(shardId, builder.build()); + } + return this; + } + + /** + * Remove a Search only replica from the IndexShardRoutingTable + * @return The Builder + */ + public Builder removeSearchReplica() { + for (final int shardId : shards.keySet()) { + IndexShardRoutingTable indexShardRoutingTable = shards.get(shardId); + if (indexShardRoutingTable.searchOnlyReplicas().isEmpty()) { + // nothing to do here! + return this; + } + // re-add all the current ones + IndexShardRoutingTable.Builder builder = new IndexShardRoutingTable.Builder(indexShardRoutingTable.shardId()); + for (ShardRouting shardRouting : indexShardRoutingTable) { + builder.addShard(shardRouting); + } + // first check if there is one that is not assigned to a node, and remove it + boolean removed = false; + for (ShardRouting shardRouting : indexShardRoutingTable) { + if (shardRouting.isSearchOnly() && !shardRouting.assignedToNode()) { + builder.removeShard(shardRouting); + removed = true; + break; + } + } + if (!removed) { + for (ShardRouting shardRouting : indexShardRoutingTable) { + if (shardRouting.isSearchOnly()) { builder.removeShard(shardRouting); break; } diff --git a/server/src/main/java/org/opensearch/cluster/routing/IndexShardRoutingTable.java b/server/src/main/java/org/opensearch/cluster/routing/IndexShardRoutingTable.java index 479143fa9a2f0..f25cb14f65eca 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/IndexShardRoutingTable.java +++ b/server/src/main/java/org/opensearch/cluster/routing/IndexShardRoutingTable.java @@ -34,6 +34,9 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.message.ParameterizedMessage; +import org.opensearch.cluster.AbstractDiffable; +import org.opensearch.cluster.Diff; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.common.Nullable; @@ -60,6 +63,7 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Predicate; import java.util.stream.Collectors; @@ -75,7 +79,7 @@ * @opensearch.api */ @PublicApi(since = "1.0.0") -public class IndexShardRoutingTable implements Iterable { +public class IndexShardRoutingTable extends AbstractDiffable implements Iterable { final ShardShuffler shuffler; // Shuffler for weighted round-robin shard routing. This uses rotation to permute shards. @@ -211,6 +215,24 @@ public List getShards() { return shards(); } + /** + * Returns a {@link List} of the search only shards in the RoutingTable + * + * @return a {@link List} of shards + */ + public List searchOnlyReplicas() { + return replicas.stream().filter(ShardRouting::isSearchOnly).collect(Collectors.toList()); + } + + /** + * Returns a {@link List} of the writer replicas (primary eligible) shards in the RoutingTable + * + * @return a {@link List} of shards + */ + public List writerReplicas() { + return replicas.stream().filter(r -> r.isSearchOnly() == false).collect(Collectors.toList()); + } + /** * Returns a {@link List} of active shards * @@ -527,6 +549,12 @@ private static List rankShardsAndUpdateStats( return sortedShards; } + @Override + public void writeTo(StreamOutput out) throws IOException { + this.shardId().getIndex().writeTo(out); + Builder.writeToThin(this, out); + } + private static class NodeRankComparator implements Comparator { private final Map nodeRanks; @@ -619,15 +647,11 @@ public ShardIterator replicaActiveInitializingShardIt() { return new PlainShardIterator(shardId, Collections.emptyList()); } - LinkedList ordered = new LinkedList<>(); - for (ShardRouting replica : shuffler.shuffle(replicas)) { - if (replica.active()) { - ordered.addFirst(replica); - } else if (replica.initializing()) { - ordered.addLast(replica); - } - } - return new PlainShardIterator(shardId, ordered); + return filterAndOrderShards(replica -> true); + } + + public ShardIterator searchReplicaActiveInitializingShardIt() { + return filterAndOrderShards(ShardRouting::isSearchOnly); } /** @@ -658,6 +682,20 @@ public ShardIterator replicaFirstActiveInitializingShardsIt() { return new PlainShardIterator(shardId, ordered); } + private ShardIterator filterAndOrderShards(Predicate filter) { + LinkedList ordered = new LinkedList<>(); + for (ShardRouting replica : shuffler.shuffle(replicas)) { + if (filter.test(replica)) { + if (replica.active()) { + ordered.addFirst(replica); + } else if (replica.initializing()) { + ordered.addLast(replica); + } + } + } + return new PlainShardIterator(shardId, ordered); + } + /** * Returns an iterator on active and initializing shards residing on the provided nodeId. */ @@ -1049,6 +1087,14 @@ private void populateInitializingShardWeightsMap(WeightedRouting weightedRouting } } + public static IndexShardRoutingTable readFrom(StreamInput in) throws IOException { + return IndexShardRoutingTable.Builder.readFrom(in); + } + + public static Diff readDiffFrom(StreamInput in) throws IOException { + return readDiffFrom(IndexShardRoutingTable::readFrom, in); + } + /** * Builder of an index shard routing table. * @@ -1135,6 +1181,27 @@ public static void writeToThin(IndexShardRoutingTable indexShard, StreamOutput o } } + public static void writeVerifiableTo(IndexShardRoutingTable indexShard, StreamOutput out) throws IOException { + out.writeVInt(indexShard.shardId.id()); + out.writeVInt(indexShard.shards.size()); + // Order allocated shards by allocationId + AtomicInteger assignedShardCount = new AtomicInteger(); + indexShard.shards.stream() + .filter(shardRouting -> shardRouting.allocationId() != null) + .sorted(Comparator.comparing(o -> o.allocationId().getId())) + .forEach(shardRouting -> { + try { + assignedShardCount.getAndIncrement(); + shardRouting.writeToThin(out); + } catch (IOException e) { + logger.error(() -> new ParameterizedMessage("Failed to write shard {}. Exception {}", indexShard, e)); + throw new RuntimeException("Failed to write IndexShardRoutingTable", e); + } + }); + // is primary assigned + out.writeBoolean(indexShard.primaryShard().allocationId() != null); + out.writeVInt(indexShard.shards.size() - assignedShardCount.get()); + } } @Override diff --git a/server/src/main/java/org/opensearch/cluster/routing/OperationRouting.java b/server/src/main/java/org/opensearch/cluster/routing/OperationRouting.java index 6242247f34a93..fe9e00b250e70 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/OperationRouting.java +++ b/server/src/main/java/org/opensearch/cluster/routing/OperationRouting.java @@ -121,6 +121,7 @@ public class OperationRouting { private volatile boolean isFailOpenEnabled; private volatile boolean isStrictWeightedShardRouting; private volatile boolean ignoreWeightedRouting; + private final boolean isReaderWriterSplitEnabled; public OperationRouting(Settings settings, ClusterSettings clusterSettings) { // whether to ignore awareness attributes when routing requests @@ -141,6 +142,7 @@ public OperationRouting(Settings settings, ClusterSettings clusterSettings) { clusterSettings.addSettingsUpdateConsumer(WEIGHTED_ROUTING_FAILOPEN_ENABLED, this::setFailOpenEnabled); clusterSettings.addSettingsUpdateConsumer(STRICT_WEIGHTED_SHARD_ROUTING_ENABLED, this::setStrictWeightedShardRouting); clusterSettings.addSettingsUpdateConsumer(IGNORE_WEIGHTED_SHARD_ROUTING, this::setIgnoreWeightedRouting); + this.isReaderWriterSplitEnabled = FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL_SETTING.get(settings); } void setUseAdaptiveReplicaSelection(boolean useAdaptiveReplicaSelection) { @@ -254,6 +256,14 @@ public GroupShardsIterator searchShards( preference = Preference.PRIMARY_FIRST.type(); } + if (isReaderWriterSplitEnabled) { + if (preference == null || preference.isEmpty()) { + if (indexMetadataForShard.getNumberOfSearchOnlyReplicas() > 0) { + preference = Preference.SEARCH_REPLICA.type(); + } + } + } + ShardIterator iterator = preferenceActiveShardIterator( shard, clusterState.nodes().getLocalNodeId(), @@ -366,6 +376,8 @@ private ShardIterator preferenceActiveShardIterator( return indexShard.primaryFirstActiveInitializingShardsIt(); case REPLICA_FIRST: return indexShard.replicaFirstActiveInitializingShardsIt(); + case SEARCH_REPLICA: + return indexShard.searchReplicaActiveInitializingShardIt(); case ONLY_LOCAL: return indexShard.onlyNodeActiveInitializingShardsIt(localNodeId); case ONLY_NODES: diff --git a/server/src/main/java/org/opensearch/cluster/routing/Preference.java b/server/src/main/java/org/opensearch/cluster/routing/Preference.java index a1ea01afa118f..093e3d5fd45f8 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/Preference.java +++ b/server/src/main/java/org/opensearch/cluster/routing/Preference.java @@ -73,6 +73,11 @@ public enum Preference { */ REPLICA_FIRST("_replica_first"), + /** + * Route to search replica shards + */ + SEARCH_REPLICA("_search_replica"), + /** * Route to the local shard only */ @@ -127,6 +132,8 @@ public static Preference parse(String preference) { return ONLY_LOCAL; case "_only_nodes": return ONLY_NODES; + case "_search_replica": + return SEARCH_REPLICA; default: throw new IllegalArgumentException("no Preference for [" + preferenceType + "]"); } diff --git a/server/src/main/java/org/opensearch/cluster/routing/RecoverySource.java b/server/src/main/java/org/opensearch/cluster/routing/RecoverySource.java index 8d407c6aff5b6..ae96ea6b73cca 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/RecoverySource.java +++ b/server/src/main/java/org/opensearch/cluster/routing/RecoverySource.java @@ -264,6 +264,9 @@ public static class SnapshotRecoverySource extends RecoverySource { private final boolean isSearchableSnapshot; private final boolean remoteStoreIndexShallowCopy; private final String sourceRemoteStoreRepository; + private final String sourceRemoteTranslogRepository; + + private final long pinnedTimestamp; public SnapshotRecoverySource(String restoreUUID, Snapshot snapshot, Version version, IndexId indexId) { this(restoreUUID, snapshot, version, indexId, false, false, null); @@ -277,6 +280,30 @@ public SnapshotRecoverySource( boolean isSearchableSnapshot, boolean remoteStoreIndexShallowCopy, @Nullable String sourceRemoteStoreRepository + ) { + this( + restoreUUID, + snapshot, + version, + indexId, + isSearchableSnapshot, + remoteStoreIndexShallowCopy, + sourceRemoteStoreRepository, + null, + 0L + ); + } + + public SnapshotRecoverySource( + String restoreUUID, + Snapshot snapshot, + Version version, + IndexId indexId, + boolean isSearchableSnapshot, + boolean remoteStoreIndexShallowCopy, + @Nullable String sourceRemoteStoreRepository, + @Nullable String sourceRemoteTranslogRepository, + long pinnedTimestamp ) { this.restoreUUID = restoreUUID; this.snapshot = Objects.requireNonNull(snapshot); @@ -285,6 +312,8 @@ public SnapshotRecoverySource( this.isSearchableSnapshot = isSearchableSnapshot; this.remoteStoreIndexShallowCopy = remoteStoreIndexShallowCopy; this.sourceRemoteStoreRepository = sourceRemoteStoreRepository; + this.sourceRemoteTranslogRepository = sourceRemoteTranslogRepository; + this.pinnedTimestamp = pinnedTimestamp; } SnapshotRecoverySource(StreamInput in) throws IOException { @@ -304,6 +333,13 @@ public SnapshotRecoverySource( remoteStoreIndexShallowCopy = false; sourceRemoteStoreRepository = null; } + if (in.getVersion().onOrAfter(Version.V_2_17_0)) { + sourceRemoteTranslogRepository = in.readOptionalString(); + pinnedTimestamp = in.readLong(); + } else { + sourceRemoteTranslogRepository = null; + pinnedTimestamp = 0L; + } } public String restoreUUID() { @@ -336,10 +372,18 @@ public String sourceRemoteStoreRepository() { return sourceRemoteStoreRepository; } + public String sourceRemoteTranslogRepository() { + return sourceRemoteTranslogRepository; + } + public boolean remoteStoreIndexShallowCopy() { return remoteStoreIndexShallowCopy; } + public long pinnedTimestamp() { + return pinnedTimestamp; + } + @Override protected void writeAdditionalFields(StreamOutput out) throws IOException { out.writeString(restoreUUID); @@ -353,6 +397,10 @@ protected void writeAdditionalFields(StreamOutput out) throws IOException { out.writeBoolean(remoteStoreIndexShallowCopy); out.writeOptionalString(sourceRemoteStoreRepository); } + if (out.getVersion().onOrAfter(Version.V_2_17_0)) { + out.writeOptionalString(sourceRemoteTranslogRepository); + out.writeLong(pinnedTimestamp); + } } @Override @@ -369,7 +417,8 @@ public void addAdditionalFields(XContentBuilder builder, ToXContent.Params param .field("restoreUUID", restoreUUID) .field("isSearchableSnapshot", isSearchableSnapshot) .field("remoteStoreIndexShallowCopy", remoteStoreIndexShallowCopy) - .field("sourceRemoteStoreRepository", sourceRemoteStoreRepository); + .field("sourceRemoteStoreRepository", sourceRemoteStoreRepository) + .field("sourceRemoteTranslogRepository", sourceRemoteTranslogRepository); } @Override @@ -394,8 +443,11 @@ public boolean equals(Object o) { && isSearchableSnapshot == that.isSearchableSnapshot && remoteStoreIndexShallowCopy == that.remoteStoreIndexShallowCopy && sourceRemoteStoreRepository != null - ? sourceRemoteStoreRepository.equals(that.sourceRemoteStoreRepository) - : that.sourceRemoteStoreRepository == null; + ? sourceRemoteStoreRepository.equals(that.sourceRemoteStoreRepository) + : that.sourceRemoteStoreRepository == null && sourceRemoteTranslogRepository != null + ? sourceRemoteTranslogRepository.equals(that.sourceRemoteTranslogRepository) + : that.sourceRemoteTranslogRepository == null && pinnedTimestamp == that.pinnedTimestamp; + } @Override @@ -407,10 +459,11 @@ public int hashCode() { version, isSearchableSnapshot, remoteStoreIndexShallowCopy, - sourceRemoteStoreRepository + sourceRemoteStoreRepository, + sourceRemoteTranslogRepository, + pinnedTimestamp ); } - } /** diff --git a/server/src/main/java/org/opensearch/cluster/routing/RoutingNodes.java b/server/src/main/java/org/opensearch/cluster/routing/RoutingNodes.java index b5e74821d41e7..76111f623e0a5 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/RoutingNodes.java +++ b/server/src/main/java/org/opensearch/cluster/routing/RoutingNodes.java @@ -385,7 +385,7 @@ public ShardRouting activeReplicaWithHighestVersion(ShardId shardId) { // be accessible. Therefore, we need to protect against the version being null // (meaning the node will be going away). return assignedShards(shardId).stream() - .filter(shr -> !shr.primary() && shr.active()) + .filter(shr -> !shr.primary() && shr.active() && !shr.isSearchOnly()) .filter(shr -> node(shr.currentNodeId()) != null) .max( Comparator.comparing( @@ -409,7 +409,7 @@ public ShardRouting activeReplicaWithOldestVersion(ShardId shardId) { // It's possible for replicaNodeVersion to be null. Therefore, we need to protect against the version being null // (meaning the node will be going away). return assignedShards(shardId).stream() - .filter(shr -> !shr.primary() && shr.active()) + .filter(shr -> !shr.primary() && shr.active() && !shr.isSearchOnly()) .filter(shr -> node(shr.currentNodeId()) != null) .min( Comparator.comparing( @@ -428,7 +428,7 @@ public ShardRouting activeReplicaWithOldestVersion(ShardId shardId) { * are preferred for primary promotion */ public ShardRouting activeReplicaOnRemoteNode(ShardId shardId) { - return assignedShards(shardId).stream().filter(shr -> !shr.primary() && shr.active()).filter((shr) -> { + return assignedShards(shardId).stream().filter(shr -> !shr.primary() && shr.active() && !shr.isSearchOnly()).filter((shr) -> { RoutingNode nd = node(shr.currentNodeId()); return (nd != null && nd.node().isRemoteStoreNode()); }).findFirst().orElse(null); @@ -820,6 +820,7 @@ private ShardRouting cancelRelocation(ShardRouting shard) { private ShardRouting promoteActiveReplicaShardToPrimary(ShardRouting replicaShard) { assert replicaShard.active() : "non-active shard cannot be promoted to primary: " + replicaShard; assert replicaShard.primary() == false : "primary shard cannot be promoted to primary: " + replicaShard; + assert replicaShard.isSearchOnly() == false : "search only replica cannot be promoted to primary: " + replicaShard; ShardRouting primaryShard = replicaShard.moveActiveReplicaToPrimary(); updateAssigned(replicaShard, primaryShard); return primaryShard; diff --git a/server/src/main/java/org/opensearch/cluster/routing/RoutingPool.java b/server/src/main/java/org/opensearch/cluster/routing/RoutingPool.java index db10ad61c7d6d..647e993339476 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/RoutingPool.java +++ b/server/src/main/java/org/opensearch/cluster/routing/RoutingPool.java @@ -11,6 +11,9 @@ import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.routing.allocation.RoutingAllocation; +import org.opensearch.common.util.FeatureFlags; + +import static org.opensearch.action.admin.indices.tiering.TieringUtils.isPartialIndex; /** * {@link RoutingPool} defines the different node types based on the assigned capabilities. The methods @@ -58,6 +61,7 @@ public static RoutingPool getShardPool(ShardRouting shard, RoutingAllocation all * @return {@link RoutingPool} for the given index. */ public static RoutingPool getIndexPool(IndexMetadata indexMetadata) { - return indexMetadata.isRemoteSnapshot() ? REMOTE_CAPABLE : LOCAL_ONLY; + return indexMetadata.isRemoteSnapshot() + || (FeatureFlags.isEnabled(FeatureFlags.TIERED_REMOTE_INDEX) && isPartialIndex(indexMetadata)) ? REMOTE_CAPABLE : LOCAL_ONLY; } } diff --git a/server/src/main/java/org/opensearch/cluster/routing/RoutingTable.java b/server/src/main/java/org/opensearch/cluster/routing/RoutingTable.java index 6c7b94f316da2..7128eb44bfb14 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/RoutingTable.java +++ b/server/src/main/java/org/opensearch/cluster/routing/RoutingTable.java @@ -42,8 +42,10 @@ import org.opensearch.common.Nullable; import org.opensearch.common.annotation.PublicApi; import org.opensearch.common.util.iterable.Iterables; +import org.opensearch.core.common.io.stream.BufferedChecksumStreamOutput; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.VerifiableWriteable; import org.opensearch.core.index.Index; import org.opensearch.core.index.shard.ShardId; import org.opensearch.index.IndexNotFoundException; @@ -70,7 +72,7 @@ * @opensearch.api */ @PublicApi(since = "1.0.0") -public class RoutingTable implements Iterable, Diffable { +public class RoutingTable implements Iterable, Diffable, VerifiableWriteable { public static final RoutingTable EMPTY_ROUTING_TABLE = builder().build(); @@ -378,6 +380,10 @@ public Diff diff(RoutingTable previousState) { return new RoutingTableDiff(previousState, this); } + public Diff incrementalDiff(RoutingTable previousState) { + return new RoutingTableIncrementalDiff(previousState, this); + } + public static Diff readDiffFrom(StreamInput in) throws IOException { return new RoutingTableDiff(in); } @@ -403,7 +409,13 @@ public void writeTo(StreamOutput out) throws IOException { } } - private static class RoutingTableDiff implements Diff { + @Override + public void writeVerifiableTo(BufferedChecksumStreamOutput out) throws IOException { + out.writeLong(version); + out.writeMapValues(indicesRouting, (stream, value) -> value.writeVerifiableTo((BufferedChecksumStreamOutput) stream)); + } + + private static class RoutingTableDiff implements Diff, StringKeyDiffProvider { private final long version; @@ -422,6 +434,11 @@ private static class RoutingTableDiff implements Diff { indicesRouting = DiffableUtils.readJdkMapDiff(in, DiffableUtils.getStringKeySerializer(), DIFF_VALUE_READER); } + @Override + public String toString() { + return "RoutingTableDiff{" + "version=" + version + ", indicesRouting=" + indicesRouting + '}'; + } + @Override public RoutingTable apply(RoutingTable part) { return new RoutingTable(version, indicesRouting.apply(part.indicesRouting)); @@ -432,6 +449,11 @@ public void writeTo(StreamOutput out) throws IOException { out.writeLong(version); indicesRouting.writeTo(out); } + + @Override + public DiffableUtils.MapDiff> provideDiff() { + return (DiffableUtils.MapDiff>) indicesRouting; + } } public static Builder builder() { @@ -520,7 +542,7 @@ public Builder updateNumberOfReplicas(final int numberOfReplicas, final String[] // ignore index missing failure, its closed... continue; } - int currentNumberOfReplicas = indexRoutingTable.shards().get(0).size() - 1; // remove the required primary + int currentNumberOfReplicas = indexRoutingTable.shards().get(0).writerReplicas().size(); IndexRoutingTable.Builder builder = new IndexRoutingTable.Builder(indexRoutingTable.getIndex()); // re-add all the shards for (IndexShardRoutingTable indexShardRoutingTable : indexRoutingTable) { @@ -541,6 +563,45 @@ public Builder updateNumberOfReplicas(final int numberOfReplicas, final String[] return this; } + /** + * Update the number of search replicas for the specified indices. + * + * @param numberOfSearchReplicas the number of replicas + * @param indices the indices to update the number of replicas for + * @return the builder + */ + public Builder updateNumberOfSearchReplicas(final int numberOfSearchReplicas, final String[] indices) { + if (indicesRouting == null) { + throw new IllegalStateException("once build is called the builder cannot be reused"); + } + for (String index : indices) { + IndexRoutingTable indexRoutingTable = indicesRouting.get(index); + if (indexRoutingTable == null) { + // ignore index missing failure, its closed... + continue; + } + IndexShardRoutingTable shardRoutings = indexRoutingTable.shards().get(0); + int currentNumberOfSearchReplicas = shardRoutings.searchOnlyReplicas().size(); + IndexRoutingTable.Builder builder = new IndexRoutingTable.Builder(indexRoutingTable.getIndex()); + // re-add all the shards + for (IndexShardRoutingTable indexShardRoutingTable : indexRoutingTable) { + builder.addIndexShard(indexShardRoutingTable); + } + if (currentNumberOfSearchReplicas < numberOfSearchReplicas) { + // now, add "empty" ones + for (int i = 0; i < (numberOfSearchReplicas - currentNumberOfSearchReplicas); i++) { + builder.addSearchReplica(); + } + } else if (currentNumberOfSearchReplicas > numberOfSearchReplicas) { + for (int i = 0; i < (currentNumberOfSearchReplicas - numberOfSearchReplicas); i++) { + builder.removeSearchReplica(); + } + } + indicesRouting.put(index, builder.build()); + } + return this; + } + public Builder addAsNew(IndexMetadata indexMetadata) { if (indexMetadata.getState() == IndexMetadata.State.OPEN) { IndexRoutingTable.Builder indexRoutingBuilder = new IndexRoutingTable.Builder(indexMetadata.getIndex()).initializeAsNew( diff --git a/server/src/main/java/org/opensearch/cluster/routing/RoutingTableIncrementalDiff.java b/server/src/main/java/org/opensearch/cluster/routing/RoutingTableIncrementalDiff.java index 3d75b22a8ed7f..13501a431d9f9 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/RoutingTableIncrementalDiff.java +++ b/server/src/main/java/org/opensearch/cluster/routing/RoutingTableIncrementalDiff.java @@ -9,93 +9,85 @@ package org.opensearch.cluster.routing; import org.opensearch.cluster.Diff; +import org.opensearch.cluster.DiffableUtils; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.index.Index; import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; import java.util.Map; +import static org.opensearch.cluster.DiffableUtils.MapDiff; + /** * Represents a difference between {@link RoutingTable} objects that can be serialized and deserialized. */ -public class RoutingTableIncrementalDiff implements Diff { +public class RoutingTableIncrementalDiff implements Diff, StringKeyDiffProvider { - private final Map> diffs; + private final Diff> indicesRouting; - /** - * Constructs a new RoutingTableIncrementalDiff with the given differences. - * - * @param diffs a map containing the differences of {@link IndexRoutingTable}. - */ - public RoutingTableIncrementalDiff(Map> diffs) { - this.diffs = diffs; + private final long version; + + private static final DiffableUtils.DiffableValueSerializer CUSTOM_ROUTING_TABLE_DIFFABLE_VALUE_SERIALIZER = + new DiffableUtils.DiffableValueSerializer<>() { + + @Override + public IndexRoutingTable read(StreamInput in, String key) throws IOException { + return IndexRoutingTable.readFrom(in); + } + + @Override + public Diff readDiff(StreamInput in, String key) throws IOException { + return new RoutingTableIncrementalDiff.IndexRoutingTableIncrementalDiff(in); + } + + @Override + public Diff diff(IndexRoutingTable currentState, IndexRoutingTable previousState) { + return new RoutingTableIncrementalDiff.IndexRoutingTableIncrementalDiff( + currentState.getIndex(), + previousState, + currentState + ); + } + }; + + public RoutingTableIncrementalDiff(RoutingTable before, RoutingTable after) { + version = after.version(); + indicesRouting = DiffableUtils.diff( + before.getIndicesRouting(), + after.getIndicesRouting(), + DiffableUtils.getStringKeySerializer(), + CUSTOM_ROUTING_TABLE_DIFFABLE_VALUE_SERIALIZER + ); } - /** - * Gets the map of differences of {@link IndexRoutingTable}. - * - * @return a map containing the differences. - */ - public Map> getDiffs() { - return diffs; + public RoutingTableIncrementalDiff(StreamInput in) throws IOException { + version = in.readLong(); + indicesRouting = DiffableUtils.readJdkMapDiff( + in, + DiffableUtils.getStringKeySerializer(), + CUSTOM_ROUTING_TABLE_DIFFABLE_VALUE_SERIALIZER + ); } - /** - * Reads a {@link RoutingTableIncrementalDiff} from the given {@link StreamInput}. - * - * @param in the input stream to read from. - * @return the deserialized RoutingTableIncrementalDiff. - * @throws IOException if an I/O exception occurs while reading from the stream. - */ public static RoutingTableIncrementalDiff readFrom(StreamInput in) throws IOException { - int size = in.readVInt(); - Map> diffs = new HashMap<>(); - - for (int i = 0; i < size; i++) { - String key = in.readString(); - Diff diff = IndexRoutingTableIncrementalDiff.readFrom(in); - diffs.put(key, diff); - } - return new RoutingTableIncrementalDiff(diffs); + return new RoutingTableIncrementalDiff(in); } - /** - * Applies the differences to the provided {@link RoutingTable}. - * - * @param part the original RoutingTable to which the differences will be applied. - * @return the updated RoutingTable with the applied differences. - */ @Override public RoutingTable apply(RoutingTable part) { - RoutingTable.Builder builder = new RoutingTable.Builder(); - for (IndexRoutingTable indexRoutingTable : part) { - builder.add(indexRoutingTable); // Add existing index routing tables to builder - } - - // Apply the diffs - for (Map.Entry> entry : diffs.entrySet()) { - builder.add(entry.getValue().apply(part.index(entry.getKey()))); - } - - return builder.build(); + return new RoutingTable(version, indicesRouting.apply(part.getIndicesRouting())); } - /** - * Writes the differences to the given {@link StreamOutput}. - * - * @param out the output stream to write to. - * @throws IOException if an I/O exception occurs while writing to the stream. - */ @Override public void writeTo(StreamOutput out) throws IOException { - out.writeVInt(diffs.size()); - for (Map.Entry> entry : diffs.entrySet()) { - out.writeString(entry.getKey()); - entry.getValue().writeTo(out); - } + out.writeLong(version); + indicesRouting.writeTo(out); + } + + @Override + public MapDiff> provideDiff() { + return (MapDiff>) indicesRouting; } /** @@ -103,66 +95,36 @@ public void writeTo(StreamOutput out) throws IOException { */ public static class IndexRoutingTableIncrementalDiff implements Diff { - private final List indexShardRoutingTables; + private final Diff> indexShardRoutingTables; - /** - * Constructs a new IndexShardRoutingTableDiff with the given shard routing tables. - * - * @param indexShardRoutingTables a list of IndexShardRoutingTable representing the differences. - */ - public IndexRoutingTableIncrementalDiff(List indexShardRoutingTables) { - this.indexShardRoutingTables = indexShardRoutingTables; + private final Index index; + + public IndexRoutingTableIncrementalDiff(Index index, IndexRoutingTable before, IndexRoutingTable after) { + this.index = index; + this.indexShardRoutingTables = DiffableUtils.diff(before.getShards(), after.getShards(), DiffableUtils.getIntKeySerializer()); + } + + private static final DiffableUtils.DiffableValueReader DIFF_VALUE_READER = + new DiffableUtils.DiffableValueReader<>(IndexShardRoutingTable::readFrom, IndexShardRoutingTable::readDiffFrom); + + public IndexRoutingTableIncrementalDiff(StreamInput in) throws IOException { + this.index = new Index(in); + this.indexShardRoutingTables = DiffableUtils.readJdkMapDiff(in, DiffableUtils.getIntKeySerializer(), DIFF_VALUE_READER); } - /** - * Applies the differences to the provided {@link IndexRoutingTable}. - * - * @param part the original IndexRoutingTable to which the differences will be applied. - * @return the updated IndexRoutingTable with the applied differences. - */ @Override public IndexRoutingTable apply(IndexRoutingTable part) { - IndexRoutingTable.Builder builder = new IndexRoutingTable.Builder(part.getIndex()); - for (IndexShardRoutingTable shardRoutingTable : part) { - builder.addIndexShard(shardRoutingTable); // Add existing shards to builder - } - - // Apply the diff: update or add the new shard routing tables - for (IndexShardRoutingTable diffShard : indexShardRoutingTables) { - builder.addIndexShard(diffShard); - } - return builder.build(); + return new IndexRoutingTable(index, indexShardRoutingTables.apply(part.getShards())); } - /** - * Writes the differences to the given {@link StreamOutput}. - * - * @param out the output stream to write to. - * @throws IOException if an I/O exception occurs while writing to the stream. - */ @Override public void writeTo(StreamOutput out) throws IOException { - out.writeVInt(indexShardRoutingTables.size()); - for (IndexShardRoutingTable shardRoutingTable : indexShardRoutingTables) { - IndexShardRoutingTable.Builder.writeTo(shardRoutingTable, out); - } + index.writeTo(out); + indexShardRoutingTables.writeTo(out); } - /** - * Reads a {@link IndexRoutingTableIncrementalDiff} from the given {@link StreamInput}. - * - * @param in the input stream to read from. - * @return the deserialized IndexShardRoutingTableDiff. - * @throws IOException if an I/O exception occurs while reading from the stream. - */ public static IndexRoutingTableIncrementalDiff readFrom(StreamInput in) throws IOException { - int size = in.readVInt(); - List indexShardRoutingTables = new ArrayList<>(size); - for (int i = 0; i < size; i++) { - IndexShardRoutingTable shardRoutingTable = IndexShardRoutingTable.Builder.readFrom(in); - indexShardRoutingTables.add(shardRoutingTable); - } - return new IndexRoutingTableIncrementalDiff(indexShardRoutingTables); + return new IndexRoutingTableIncrementalDiff(in); } } } diff --git a/server/src/main/java/org/opensearch/cluster/routing/ShardRouting.java b/server/src/main/java/org/opensearch/cluster/routing/ShardRouting.java index 45de045a8fc69..ada35caa1e61e 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/ShardRouting.java +++ b/server/src/main/java/org/opensearch/cluster/routing/ShardRouting.java @@ -32,11 +32,13 @@ package org.opensearch.cluster.routing; +import org.opensearch.Version; import org.opensearch.cluster.routing.RecoverySource.ExistingStoreRecoverySource; import org.opensearch.cluster.routing.RecoverySource.PeerRecoverySource; import org.opensearch.cluster.routing.allocation.allocator.BalancedShardsAllocator; import org.opensearch.common.Nullable; import org.opensearch.common.annotation.PublicApi; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.common.io.stream.Writeable; @@ -67,6 +69,7 @@ public class ShardRouting implements Writeable, ToXContentObject { private final String currentNodeId; private final String relocatingNodeId; private final boolean primary; + private final boolean searchOnly; private final ShardRoutingState state; private final RecoverySource recoverySource; private final UnassignedInfo unassignedInfo; @@ -85,6 +88,7 @@ protected ShardRouting( String currentNodeId, String relocatingNodeId, boolean primary, + boolean searchOnly, ShardRoutingState state, RecoverySource recoverySource, UnassignedInfo unassignedInfo, @@ -95,6 +99,7 @@ protected ShardRouting( this.currentNodeId = currentNodeId; this.relocatingNodeId = relocatingNodeId; this.primary = primary; + this.searchOnly = searchOnly; this.state = state; this.recoverySource = recoverySource; this.unassignedInfo = unassignedInfo; @@ -116,6 +121,31 @@ protected ShardRouting( + this; } + protected ShardRouting( + ShardId shardId, + String relocatingNodeId, + String currentNodeId, + boolean primary, + ShardRoutingState shardRoutingState, + RecoverySource recoverySource, + UnassignedInfo unassignedInfo, + AllocationId allocationId, + long expectedShardSize + ) { + this( + shardId, + relocatingNodeId, + currentNodeId, + primary, + false, + shardRoutingState, + recoverySource, + unassignedInfo, + allocationId, + expectedShardSize + ); + } + @Nullable private ShardRouting initializeTargetRelocatingShard() { if (state == ShardRoutingState.RELOCATING) { @@ -124,6 +154,7 @@ private ShardRouting initializeTargetRelocatingShard() { relocatingNodeId, currentNodeId, primary, + searchOnly, ShardRoutingState.INITIALIZING, PeerRecoverySource.INSTANCE, unassignedInfo, @@ -143,12 +174,26 @@ public static ShardRouting newUnassigned( boolean primary, RecoverySource recoverySource, UnassignedInfo unassignedInfo + ) { + return newUnassigned(shardId, primary, false, recoverySource, unassignedInfo); + } + + /** + * Creates a new unassigned shard, overloaded for bwc for searchOnly addition. + */ + public static ShardRouting newUnassigned( + ShardId shardId, + boolean primary, + boolean search, + RecoverySource recoverySource, + UnassignedInfo unassignedInfo ) { return new ShardRouting( shardId, null, null, primary, + search, ShardRoutingState.UNASSIGNED, recoverySource, unassignedInfo, @@ -280,6 +325,13 @@ public boolean primary() { return this.primary; } + /** + * Returns true iff this shard is a search only replica. + */ + public boolean isSearchOnly() { + return searchOnly; + } + /** * The shard state. */ @@ -306,6 +358,11 @@ public ShardRouting(ShardId shardId, StreamInput in) throws IOException { currentNodeId = in.readOptionalString(); relocatingNodeId = in.readOptionalString(); primary = in.readBoolean(); + if (in.getVersion().onOrAfter(Version.V_2_17_0)) { + searchOnly = in.readBoolean(); + } else { + searchOnly = false; + } state = ShardRoutingState.fromValue(in.readByte()); if (state == ShardRoutingState.UNASSIGNED || state == ShardRoutingState.INITIALIZING) { recoverySource = RecoverySource.readFrom(in); @@ -339,6 +396,9 @@ public void writeToThin(StreamOutput out) throws IOException { out.writeOptionalString(currentNodeId); out.writeOptionalString(relocatingNodeId); out.writeBoolean(primary); + if (out.getVersion().onOrAfter(Version.V_2_17_0)) { + out.writeBoolean(searchOnly); + } out.writeByte(state.value()); if (state == ShardRoutingState.UNASSIGNED || state == ShardRoutingState.INITIALIZING) { recoverySource.writeTo(out); @@ -364,6 +424,7 @@ public ShardRouting updateUnassigned(UnassignedInfo unassignedInfo, RecoverySour currentNodeId, relocatingNodeId, primary, + searchOnly, state, recoverySource, unassignedInfo, @@ -392,6 +453,7 @@ public ShardRouting moveToUnassigned(UnassignedInfo unassignedInfo) { null, null, primary, + searchOnly, ShardRoutingState.UNASSIGNED, recoverySource, unassignedInfo, @@ -419,6 +481,7 @@ public ShardRouting initialize(String nodeId, @Nullable String existingAllocatio nodeId, null, primary, + searchOnly, ShardRoutingState.INITIALIZING, recoverySource, unassignedInfo, @@ -439,6 +502,7 @@ public ShardRouting relocate(String relocatingNodeId, long expectedShardSize) { currentNodeId, relocatingNodeId, primary, + searchOnly, ShardRoutingState.RELOCATING, recoverySource, null, @@ -460,6 +524,7 @@ public ShardRouting cancelRelocation() { currentNodeId, null, primary, + searchOnly, ShardRoutingState.STARTED, recoverySource, null, @@ -483,6 +548,7 @@ public ShardRouting removeRelocationSource() { currentNodeId, null, primary, + searchOnly, state, recoverySource, unassignedInfo, @@ -503,6 +569,7 @@ public ShardRouting reinitializeReplicaShard() { currentNodeId, null, primary, + searchOnly, ShardRoutingState.INITIALIZING, recoverySource, unassignedInfo, @@ -528,6 +595,7 @@ public ShardRouting moveToStarted() { currentNodeId, null, primary, + searchOnly, ShardRoutingState.STARTED, null, null, @@ -546,11 +614,15 @@ public ShardRouting moveActivePrimaryToReplica() { if (!primary) { throw new IllegalShardRoutingStateException(this, "Not a primary shard, can't move to replica"); } + if (searchOnly) { + throw new IllegalShardRoutingStateException(this, "Cannot move a primary to a search only replica"); + } return new ShardRouting( shardId, currentNodeId, relocatingNodeId, false, + false, state, recoverySource, unassignedInfo, @@ -569,11 +641,15 @@ public ShardRouting moveActiveReplicaToPrimary() { if (primary) { throw new IllegalShardRoutingStateException(this, "Already primary, can't move to primary"); } + if (searchOnly) { + throw new IllegalShardRoutingStateException(this, "Cannot move a search only replica to primary"); + } return new ShardRouting( shardId, currentNodeId, relocatingNodeId, true, + false, state, recoverySource, unassignedInfo, @@ -811,7 +887,11 @@ public String shortSummary() { if (primary) { sb.append("[P]"); } else { - sb.append("[R]"); + if (searchOnly) { + sb.append("[S]"); + } else { + sb.append("[R]"); + } } if (recoverySource != null) { sb.append(", recovery_source[").append(recoverySource).append("]"); @@ -831,10 +911,11 @@ public String shortSummary() { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - builder.startObject() - .field("state", state()) - .field("primary", primary()) - .field("node", currentNodeId()) + XContentBuilder fieldBuilder = builder.startObject().field("state", state()).field("primary", primary()); + if (FeatureFlags.isEnabled(FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL)) { + fieldBuilder.field("searchOnly", isSearchOnly()); + } + fieldBuilder.field("node", currentNodeId()) .field("relocating_node", relocatingNodeId()) .field("shard", id()) .field("index", getIndexName()); diff --git a/server/src/main/java/org/opensearch/cluster/routing/StringKeyDiffProvider.java b/server/src/main/java/org/opensearch/cluster/routing/StringKeyDiffProvider.java new file mode 100644 index 0000000000000..5d36a238e03ff --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/routing/StringKeyDiffProvider.java @@ -0,0 +1,33 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.routing; + +import org.opensearch.cluster.DiffableUtils; + +import java.util.Map; + +/** + * Interface for providing a difference (diff) between two maps with {@code String} keys and values of type {@code V}. + * This interface is used to compute and obtain the difference between two versions of a map, typically used + * in cluster state updates or other scenarios where changes need to be tracked and propagated efficiently. + * + * @param the type of the values in the map + */ +public interface StringKeyDiffProvider { + + /** + * Provides the difference between two versions of a map with {@code String} keys and values of type {@code V}. + * The difference is represented as a {@link DiffableUtils.MapDiff} object, which can be used to apply the + * changes to another map or to serialize the diff. + * + * @return a {@link DiffableUtils.MapDiff} object representing the difference between the maps + */ + DiffableUtils.MapDiff> provideDiff(); + +} diff --git a/server/src/main/java/org/opensearch/cluster/routing/allocation/AbstractAllocationDecision.java b/server/src/main/java/org/opensearch/cluster/routing/allocation/AbstractAllocationDecision.java index 59a39b358cb70..614e9f49c8726 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/allocation/AbstractAllocationDecision.java +++ b/server/src/main/java/org/opensearch/cluster/routing/allocation/AbstractAllocationDecision.java @@ -107,7 +107,7 @@ public List getNodeDecisions() { @Override public void writeTo(StreamOutput out) throws IOException { - out.writeOptionalWriteable(targetNode); + out.writeOptionalWriteable((stream, node) -> node.writeToWithAttribute(stream), targetNode); if (nodeDecisions != null) { out.writeBoolean(true); out.writeList(nodeDecisions); diff --git a/server/src/main/java/org/opensearch/cluster/routing/allocation/AllocationService.java b/server/src/main/java/org/opensearch/cluster/routing/allocation/AllocationService.java index e29a81a2c131f..78f17c9ff212b 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/allocation/AllocationService.java +++ b/server/src/main/java/org/opensearch/cluster/routing/allocation/AllocationService.java @@ -36,6 +36,7 @@ import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; import org.opensearch.Version; +import org.opensearch.action.admin.cluster.health.ClusterHealthRequest; import org.opensearch.cluster.ClusterInfoService; import org.opensearch.cluster.ClusterManagerMetrics; import org.opensearch.cluster.ClusterState; @@ -196,7 +197,11 @@ public ClusterState applyStartedShards(ClusterState clusterState, List logger.trace("reroute after balanced shards allocator timed out completed"), + e -> logger.debug("reroute after balanced shards allocator timed out failed", e) + ) + ); + } + } + /** * Returns the currently configured delta threshold */ diff --git a/server/src/main/java/org/opensearch/cluster/routing/allocation/allocator/LocalShardsBalancer.java b/server/src/main/java/org/opensearch/cluster/routing/allocation/allocator/LocalShardsBalancer.java index adb8ee2cf7e85..7f6a7790d1db0 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/allocation/allocator/LocalShardsBalancer.java +++ b/server/src/main/java/org/opensearch/cluster/routing/allocation/allocator/LocalShardsBalancer.java @@ -30,6 +30,7 @@ import org.opensearch.cluster.routing.allocation.decider.Decision; import org.opensearch.cluster.routing.allocation.decider.DiskThresholdDecider; import org.opensearch.common.collect.Tuple; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.gateway.PriorityComparator; import java.util.ArrayList; @@ -45,6 +46,7 @@ import java.util.stream.Stream; import java.util.stream.StreamSupport; +import static org.opensearch.action.admin.indices.tiering.TieringUtils.isPartialShard; import static org.opensearch.cluster.routing.ShardRoutingState.RELOCATING; /** @@ -552,6 +554,16 @@ private void checkAndAddInEligibleTargetNode(RoutingNode targetNode) { } } + /** + * Checks if the shard can be skipped from the local shard balancer operations + * @param shardRouting the shard to be checked + * @return true if the shard can be skipped, false otherwise + */ + private boolean canShardBeSkipped(ShardRouting shardRouting) { + return (RoutingPool.REMOTE_CAPABLE.equals(RoutingPool.getShardPool(shardRouting, allocation)) + && !(FeatureFlags.isEnabled(FeatureFlags.TIERED_REMOTE_INDEX) && isPartialShard(shardRouting, allocation))); + } + /** * Move started shards that can not be allocated to a node anymore *

@@ -603,7 +615,7 @@ void moveShards() { ShardRouting shardRouting = it.next(); - if (RoutingPool.REMOTE_CAPABLE.equals(RoutingPool.getShardPool(shardRouting, allocation))) { + if (canShardBeSkipped(shardRouting)) { continue; } @@ -669,7 +681,7 @@ void moveShards() { */ @Override MoveDecision decideMove(final ShardRouting shardRouting) { - if (RoutingPool.REMOTE_CAPABLE.equals(RoutingPool.getShardPool(shardRouting, allocation))) { + if (canShardBeSkipped(shardRouting)) { return MoveDecision.NOT_TAKEN; } @@ -758,7 +770,9 @@ private Map buildModelFromAssigned() for (ShardRouting shard : rn) { assert rn.nodeId().equals(shard.currentNodeId()); /* we skip relocating shards here since we expect an initializing shard with the same id coming in */ - if (RoutingPool.LOCAL_ONLY.equals(RoutingPool.getShardPool(shard, allocation)) && shard.state() != RELOCATING) { + if ((RoutingPool.LOCAL_ONLY.equals(RoutingPool.getShardPool(shard, allocation)) + || (FeatureFlags.isEnabled(FeatureFlags.TIERED_REMOTE_INDEX) && isPartialShard(shard, allocation))) + && shard.state() != RELOCATING) { node.addShard(shard); ++totalShardCount; if (logger.isTraceEnabled()) { diff --git a/server/src/main/java/org/opensearch/cluster/routing/allocation/allocator/ShardsAllocator.java b/server/src/main/java/org/opensearch/cluster/routing/allocation/allocator/ShardsAllocator.java index 29e9acca4e6c2..38aafff6ce3e8 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/allocation/allocator/ShardsAllocator.java +++ b/server/src/main/java/org/opensearch/cluster/routing/allocation/allocator/ShardsAllocator.java @@ -32,6 +32,7 @@ package org.opensearch.cluster.routing.allocation.allocator; +import org.opensearch.cluster.routing.RerouteService; import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.cluster.routing.allocation.AllocateUnassignedDecision; import org.opensearch.cluster.routing.allocation.MoveDecision; @@ -73,4 +74,6 @@ public interface ShardsAllocator { * the cluster explain API, then this method should throw a {@code UnsupportedOperationException}. */ ShardAllocationDecision decideShardAllocation(ShardRouting shard, RoutingAllocation allocation); + + default void setRerouteService(RerouteService rerouteService) {} } diff --git a/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/SearchReplicaAllocationDecider.java b/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/SearchReplicaAllocationDecider.java new file mode 100644 index 0000000000000..955c396bee4da --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/SearchReplicaAllocationDecider.java @@ -0,0 +1,99 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.routing.allocation.decider; + +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodeFilters; +import org.opensearch.cluster.routing.RoutingNode; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.allocation.RoutingAllocation; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Setting.Property; +import org.opensearch.common.settings.Settings; +import org.opensearch.node.remotestore.RemoteStoreNodeService; + +import java.util.Map; + +import static org.opensearch.cluster.node.DiscoveryNodeFilters.IP_VALIDATOR; +import static org.opensearch.cluster.node.DiscoveryNodeFilters.OpType.OR; + +/** + * This allocation decider is similar to FilterAllocationDecider but provides + * the option to filter specifically for search replicas. + * The filter behaves similar to an include for any defined node attribute. + * A search replica can be allocated to only nodes with one of the specified attributes while + * other shard types will be rejected from nodes with any othe attributes. + * @opensearch.internal + */ +public class SearchReplicaAllocationDecider extends AllocationDecider { + + public static final String NAME = "filter"; + private static final String SEARCH_REPLICA_ROUTING_INCLUDE_GROUP_PREFIX = "cluster.routing.allocation.search.replica.dedicated.include"; + public static final Setting.AffixSetting SEARCH_REPLICA_ROUTING_INCLUDE_GROUP_SETTING = Setting.prefixKeySetting( + SEARCH_REPLICA_ROUTING_INCLUDE_GROUP_PREFIX + ".", + key -> Setting.simpleString(key, value -> IP_VALIDATOR.accept(key, value), Property.Dynamic, Property.NodeScope) + ); + + private volatile DiscoveryNodeFilters searchReplicaIncludeFilters; + + private volatile RemoteStoreNodeService.Direction migrationDirection; + private volatile RemoteStoreNodeService.CompatibilityMode compatibilityMode; + + public SearchReplicaAllocationDecider(Settings settings, ClusterSettings clusterSettings) { + setSearchReplicaIncludeFilters(SEARCH_REPLICA_ROUTING_INCLUDE_GROUP_SETTING.getAsMap(settings)); + clusterSettings.addAffixMapUpdateConsumer( + SEARCH_REPLICA_ROUTING_INCLUDE_GROUP_SETTING, + this::setSearchReplicaIncludeFilters, + (a, b) -> {} + ); + } + + @Override + public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + return shouldFilter(shardRouting, node.node(), allocation); + } + + @Override + public Decision canRemain(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + return shouldFilter(shardRouting, node.node(), allocation); + } + + private Decision shouldFilter(ShardRouting shardRouting, DiscoveryNode node, RoutingAllocation allocation) { + if (searchReplicaIncludeFilters != null) { + final boolean match = searchReplicaIncludeFilters.match(node); + if (match == false && shardRouting.isSearchOnly()) { + return allocation.decision( + Decision.NO, + NAME, + "node does not match shard setting [%s] filters [%s]", + SEARCH_REPLICA_ROUTING_INCLUDE_GROUP_PREFIX, + searchReplicaIncludeFilters + ); + } + // filter will only apply to search replicas + if (shardRouting.isSearchOnly() == false && match) { + return allocation.decision( + Decision.NO, + NAME, + "only search replicas can be allocated to node with setting [%s] filters [%s]", + SEARCH_REPLICA_ROUTING_INCLUDE_GROUP_PREFIX, + searchReplicaIncludeFilters + ); + } + } + return allocation.decision(Decision.YES, NAME, "node passes include/exclude/require filters"); + } + + private void setSearchReplicaIncludeFilters(Map filters) { + searchReplicaIncludeFilters = DiscoveryNodeFilters.trimTier( + DiscoveryNodeFilters.buildOrUpdateFromKeyValue(searchReplicaIncludeFilters, OR, filters) + ); + } +} diff --git a/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/TargetPoolAllocationDecider.java b/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/TargetPoolAllocationDecider.java index 76f9f44077ad8..493d23b57d271 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/TargetPoolAllocationDecider.java +++ b/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/TargetPoolAllocationDecider.java @@ -87,6 +87,36 @@ public Decision canForceAllocatePrimary(ShardRouting shardRouting, RoutingNode n return canAllocate(shardRouting, node, allocation); } + @Override + public Decision canRemain(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + RoutingPool targetPool = RoutingPool.getShardPool(shardRouting, allocation); + RoutingPool currentNodePool = RoutingPool.getNodePool(allocation.routingNodes().node(shardRouting.currentNodeId())); + if (RoutingPool.REMOTE_CAPABLE.equals(targetPool) && targetPool != currentNodePool) { + logger.debug( + "Shard: [{}] has current pool: [{}], target pool: [{}]. Cannot remain on node: [{}]", + shardRouting.shortSummary(), + currentNodePool.name(), + RoutingPool.REMOTE_CAPABLE.name(), + node.node() + ); + return allocation.decision( + Decision.NO, + NAME, + "Shard %s is allocated on a different pool %s than the target pool %s", + shardRouting.shortSummary(), + currentNodePool, + targetPool + ); + } + return allocation.decision( + Decision.YES, + NAME, + "Routing pools are compatible. Shard pool: [%s], node pool: [%s]", + currentNodePool, + targetPool + ); + } + public Decision shouldAutoExpandToNode(IndexMetadata indexMetadata, DiscoveryNode node, RoutingAllocation allocation) { logger.debug("Evaluating node: {} for autoExpandReplica eligibility of index: {}", node, indexMetadata.getIndex()); return canAllocateInTargetPool(indexMetadata, node, allocation); diff --git a/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/ThrottlingAllocationDecider.java b/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/ThrottlingAllocationDecider.java index 61e7aaed5ecff..4bde1e282fe78 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/ThrottlingAllocationDecider.java +++ b/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/ThrottlingAllocationDecider.java @@ -357,7 +357,7 @@ private ShardRouting initializingShard(ShardRouting shardRouting, String current @Override public Decision canMoveAway(ShardRouting shardRouting, RoutingAllocation allocation) { int outgoingRecoveries = 0; - if (!shardRouting.primary()) { + if (!shardRouting.primary() && !shardRouting.isSearchOnly()) { ShardRouting primaryShard = allocation.routingNodes().activePrimary(shardRouting.shardId()); outgoingRecoveries = allocation.routingNodes().getOutgoingRecoveries(primaryShard.currentNodeId()); } else { diff --git a/server/src/main/java/org/opensearch/cluster/routing/remote/InternalRemoteRoutingTableService.java b/server/src/main/java/org/opensearch/cluster/routing/remote/InternalRemoteRoutingTableService.java index 0f1ff3138ef90..220093b428989 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/remote/InternalRemoteRoutingTableService.java +++ b/server/src/main/java/org/opensearch/cluster/routing/remote/InternalRemoteRoutingTableService.java @@ -13,10 +13,10 @@ import org.apache.logging.log4j.message.ParameterizedMessage; import org.opensearch.action.LatchedActionListener; import org.opensearch.cluster.Diff; -import org.opensearch.cluster.DiffableUtils; import org.opensearch.cluster.routing.IndexRoutingTable; import org.opensearch.cluster.routing.RoutingTable; import org.opensearch.cluster.routing.RoutingTableIncrementalDiff; +import org.opensearch.cluster.routing.StringKeyDiffProvider; import org.opensearch.common.blobstore.BlobPath; import org.opensearch.common.lifecycle.AbstractLifecycleComponent; import org.opensearch.common.remote.RemoteWritableEntityStore; @@ -63,7 +63,7 @@ public class InternalRemoteRoutingTableService extends AbstractLifecycleComponen private final Supplier repositoriesService; private Compressor compressor; private RemoteWritableEntityStore remoteIndexRoutingTableStore; - private RemoteWritableEntityStore remoteRoutingTableDiffStore; + private RemoteWritableEntityStore, RemoteRoutingTableDiff> remoteRoutingTableDiffStore; private final ClusterSettings clusterSettings; private BlobStoreRepository blobStoreRepository; private final ThreadPool threadPool; @@ -95,16 +95,9 @@ public List getIndicesRouting(RoutingTable routingTable) { * @param after current routing table * @return incremental diff of the previous and current routing table */ - public DiffableUtils.MapDiff> getIndicesRoutingMapDiff( - RoutingTable before, - RoutingTable after - ) { - return DiffableUtils.diff( - before.getIndicesRouting(), - after.getIndicesRouting(), - DiffableUtils.getStringKeySerializer(), - CUSTOM_ROUTING_TABLE_DIFFABLE_VALUE_SERIALIZER - ); + @Override + public StringKeyDiffProvider getIndicesRoutingMapDiff(RoutingTable before, RoutingTable after) { + return new RoutingTableIncrementalDiff(before, after); } /** @@ -142,18 +135,16 @@ public void getAsyncIndexRoutingDiffWriteAction( String clusterUUID, long term, long version, - Map> indexRoutingTableDiff, + StringKeyDiffProvider routingTableDiff, LatchedActionListener latchedActionListener ) { - RoutingTableIncrementalDiff routingTableIncrementalDiff = new RoutingTableIncrementalDiff(indexRoutingTableDiff); RemoteRoutingTableDiff remoteRoutingTableDiff = new RemoteRoutingTableDiff( - routingTableIncrementalDiff, + (RoutingTableIncrementalDiff) routingTableDiff, clusterUUID, compressor, term, version ); - ActionListener completionListener = ActionListener.wrap( resp -> latchedActionListener.onResponse(remoteRoutingTableDiff.getUploadedMetadata()), ex -> latchedActionListener.onFailure( @@ -209,15 +200,14 @@ public void getAsyncIndexRoutingReadAction( public void getAsyncIndexRoutingTableDiffReadAction( String clusterUUID, String uploadedFilename, - LatchedActionListener latchedActionListener + LatchedActionListener> latchedActionListener ) { - ActionListener actionListener = ActionListener.wrap( + ActionListener> actionListener = ActionListener.wrap( latchedActionListener::onResponse, latchedActionListener::onFailure ); RemoteRoutingTableDiff remoteRoutingTableDiff = new RemoteRoutingTableDiff(uploadedFilename, clusterUUID, compressor); - remoteRoutingTableDiffStore.readAsync(remoteRoutingTableDiff, actionListener); } diff --git a/server/src/main/java/org/opensearch/cluster/routing/remote/NoopRemoteRoutingTableService.java b/server/src/main/java/org/opensearch/cluster/routing/remote/NoopRemoteRoutingTableService.java index 1ebf3206212a1..17687199c39d6 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/remote/NoopRemoteRoutingTableService.java +++ b/server/src/main/java/org/opensearch/cluster/routing/remote/NoopRemoteRoutingTableService.java @@ -10,16 +10,15 @@ import org.opensearch.action.LatchedActionListener; import org.opensearch.cluster.Diff; -import org.opensearch.cluster.DiffableUtils; import org.opensearch.cluster.routing.IndexRoutingTable; import org.opensearch.cluster.routing.RoutingTable; import org.opensearch.cluster.routing.RoutingTableIncrementalDiff; +import org.opensearch.cluster.routing.StringKeyDiffProvider; import org.opensearch.common.lifecycle.AbstractLifecycleComponent; import org.opensearch.gateway.remote.ClusterMetadataManifest; import java.io.IOException; import java.util.List; -import java.util.Map; /** * Noop impl for RemoteRoutingTableService. @@ -32,16 +31,8 @@ public List getIndicesRouting(RoutingTable routingTable) { } @Override - public DiffableUtils.MapDiff> getIndicesRoutingMapDiff( - RoutingTable before, - RoutingTable after - ) { - return DiffableUtils.diff( - Map.of(), - Map.of(), - DiffableUtils.getStringKeySerializer(), - CUSTOM_ROUTING_TABLE_DIFFABLE_VALUE_SERIALIZER - ); + public StringKeyDiffProvider getIndicesRoutingMapDiff(RoutingTable before, RoutingTable after) { + return new RoutingTableIncrementalDiff(RoutingTable.builder().build(), RoutingTable.builder().build()); } @Override @@ -60,7 +51,7 @@ public void getAsyncIndexRoutingDiffWriteAction( String clusterUUID, long term, long version, - Map> indexRoutingTableDiff, + StringKeyDiffProvider routingTableDiff, LatchedActionListener latchedActionListener ) { // noop @@ -89,7 +80,7 @@ public void getAsyncIndexRoutingReadAction( public void getAsyncIndexRoutingTableDiffReadAction( String clusterUUID, String uploadedFilename, - LatchedActionListener latchedActionListener + LatchedActionListener> latchedActionListener ) { // noop } diff --git a/server/src/main/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableService.java b/server/src/main/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableService.java index 0811a5f3010f4..d7ef3a29aa21f 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableService.java +++ b/server/src/main/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableService.java @@ -10,20 +10,14 @@ import org.opensearch.action.LatchedActionListener; import org.opensearch.cluster.Diff; -import org.opensearch.cluster.DiffableUtils; import org.opensearch.cluster.routing.IndexRoutingTable; -import org.opensearch.cluster.routing.IndexShardRoutingTable; import org.opensearch.cluster.routing.RoutingTable; -import org.opensearch.cluster.routing.RoutingTableIncrementalDiff; +import org.opensearch.cluster.routing.StringKeyDiffProvider; import org.opensearch.common.lifecycle.LifecycleComponent; -import org.opensearch.core.common.io.stream.StreamInput; -import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.gateway.remote.ClusterMetadataManifest; import java.io.IOException; -import java.util.ArrayList; import java.util.List; -import java.util.Map; /** * A Service which provides APIs to upload and download routing table from remote store. @@ -32,38 +26,6 @@ */ public interface RemoteRoutingTableService extends LifecycleComponent { - public static final DiffableUtils.DiffableValueSerializer CUSTOM_ROUTING_TABLE_DIFFABLE_VALUE_SERIALIZER = - new DiffableUtils.DiffableValueSerializer() { - @Override - public IndexRoutingTable read(StreamInput in, String key) throws IOException { - return IndexRoutingTable.readFrom(in); - } - - @Override - public void write(IndexRoutingTable value, StreamOutput out) throws IOException { - value.writeTo(out); - } - - @Override - public Diff readDiff(StreamInput in, String key) throws IOException { - return IndexRoutingTable.readDiffFrom(in); - } - - @Override - public Diff diff(IndexRoutingTable currentState, IndexRoutingTable previousState) { - List diffs = new ArrayList<>(); - for (Map.Entry entry : currentState.getShards().entrySet()) { - Integer index = entry.getKey(); - IndexShardRoutingTable currentShardRoutingTable = entry.getValue(); - IndexShardRoutingTable previousShardRoutingTable = previousState.shard(index); - if (previousShardRoutingTable == null || !previousShardRoutingTable.equals(currentShardRoutingTable)) { - diffs.add(currentShardRoutingTable); - } - } - return new RoutingTableIncrementalDiff.IndexRoutingTableIncrementalDiff(diffs); - } - }; - List getIndicesRouting(RoutingTable routingTable); void getAsyncIndexRoutingReadAction( @@ -75,7 +37,7 @@ void getAsyncIndexRoutingReadAction( void getAsyncIndexRoutingTableDiffReadAction( String clusterUUID, String uploadedFilename, - LatchedActionListener latchedActionListener + LatchedActionListener> latchedActionListener ); List getUpdatedIndexRoutingTableMetadata( @@ -83,10 +45,7 @@ List getUpdatedIndexRoutingTableM List allIndicesRouting ); - DiffableUtils.MapDiff> getIndicesRoutingMapDiff( - RoutingTable before, - RoutingTable after - ); + StringKeyDiffProvider getIndicesRoutingMapDiff(RoutingTable before, RoutingTable after); void getAsyncIndexRoutingWriteAction( String clusterUUID, @@ -100,7 +59,7 @@ void getAsyncIndexRoutingDiffWriteAction( String clusterUUID, long term, long version, - Map> indexRoutingTableDiff, + StringKeyDiffProvider routingTableDiff, LatchedActionListener latchedActionListener ); @@ -110,8 +69,8 @@ List getAllUploadedIndicesRouting List indicesRoutingToDelete ); - public void deleteStaleIndexRoutingPaths(List stalePaths) throws IOException; + void deleteStaleIndexRoutingPaths(List stalePaths) throws IOException; - public void deleteStaleIndexRoutingDiffPaths(List stalePaths) throws IOException; + void deleteStaleIndexRoutingDiffPaths(List stalePaths) throws IOException; } diff --git a/server/src/main/java/org/opensearch/cluster/service/ClusterApplier.java b/server/src/main/java/org/opensearch/cluster/service/ClusterApplier.java index 5b3f7f1001779..df31b0e94d734 100644 --- a/server/src/main/java/org/opensearch/cluster/service/ClusterApplier.java +++ b/server/src/main/java/org/opensearch/cluster/service/ClusterApplier.java @@ -49,6 +49,12 @@ public interface ClusterApplier { */ void setInitialState(ClusterState initialState); + /** + * Sets the pre-commit state for the applier. + * @param clusterState state that has been committed by coordinator to store + */ + void setPreCommitState(ClusterState clusterState); + /** * Method to invoke when a new cluster state is available to be applied * diff --git a/server/src/main/java/org/opensearch/cluster/service/ClusterApplierService.java b/server/src/main/java/org/opensearch/cluster/service/ClusterApplierService.java index b2548a8976c73..47080cfbde692 100644 --- a/server/src/main/java/org/opensearch/cluster/service/ClusterApplierService.java +++ b/server/src/main/java/org/opensearch/cluster/service/ClusterApplierService.java @@ -119,7 +119,7 @@ public class ClusterApplierService extends AbstractLifecycleComponent implements private final Collection clusterStateListeners = new CopyOnWriteArrayList<>(); private final Map timeoutClusterStateListeners = new ConcurrentHashMap<>(); - + private final AtomicReference preCommitState = new AtomicReference<>(); // last state which is yet to be applied private final AtomicReference state; // last applied state private final String nodeName; @@ -750,4 +750,18 @@ protected long currentTimeInMillis() { protected boolean applicationMayFail() { return false; } + + /** + * Pre-commit State of the cluster-applier + * @return ClusterState + */ + public ClusterState preCommitState() { + return preCommitState.get(); + } + + @Override + public void setPreCommitState(ClusterState clusterState) { + preCommitState.set(clusterState); + } + } diff --git a/server/src/main/java/org/opensearch/cluster/service/ClusterManagerTaskThrottler.java b/server/src/main/java/org/opensearch/cluster/service/ClusterManagerTaskThrottler.java index 827f3a12fbce4..39ce218dd801a 100644 --- a/server/src/main/java/org/opensearch/cluster/service/ClusterManagerTaskThrottler.java +++ b/server/src/main/java/org/opensearch/cluster/service/ClusterManagerTaskThrottler.java @@ -33,7 +33,7 @@ *

* Set specific setting to for setting the threshold of throttling of particular task type. * e.g : Set "cluster_manager.throttling.thresholds.put_mapping" to set throttling limit of "put mapping" tasks, - * Set it to default value(-1) to disable the throttling for this task type. + * Set it to default value(-1) to disable the throttling for this task type. */ public class ClusterManagerTaskThrottler implements TaskBatcherListener { private static final Logger logger = LogManager.getLogger(ClusterManagerTaskThrottler.class); @@ -69,7 +69,7 @@ public class ClusterManagerTaskThrottler implements TaskBatcherListener { private final int MIN_THRESHOLD_VALUE = -1; // Disabled throttling private final ClusterManagerTaskThrottlerListener clusterManagerTaskThrottlerListener; - private final ConcurrentMap tasksCount; + final ConcurrentMap tasksCount; private final ConcurrentMap tasksThreshold; private final Supplier minNodeVersionSupplier; @@ -209,30 +209,59 @@ Long getThrottlingLimit(final String taskKey) { return tasksThreshold.get(taskKey); } + private void failFastWhenThrottlingThresholdsAreAlreadyBreached( + final boolean throttlingEnabledWithThreshold, + final Long threshold, + final long existingTaskCount, + final int incomingTaskCount, + final String taskThrottlingKey + ) { + if (throttlingEnabledWithThreshold && shouldThrottle(threshold, existingTaskCount, incomingTaskCount)) { + throw new ClusterManagerThrottlingException("Throttling Exception : Limit exceeded for " + taskThrottlingKey); + } + } + @Override public void onBeginSubmit(List tasks) { - ThrottlingKey clusterManagerThrottlingKey = ((ClusterStateTaskExecutor) tasks.get(0).batchingKey) + final ThrottlingKey clusterManagerThrottlingKey = ((ClusterStateTaskExecutor) tasks.get(0).batchingKey) .getClusterManagerThrottlingKey(); - tasksCount.putIfAbsent(clusterManagerThrottlingKey.getTaskThrottlingKey(), 0L); - tasksCount.computeIfPresent(clusterManagerThrottlingKey.getTaskThrottlingKey(), (key, count) -> { - int size = tasks.size(); - if (clusterManagerThrottlingKey.isThrottlingEnabled()) { - Long threshold = tasksThreshold.get(clusterManagerThrottlingKey.getTaskThrottlingKey()); - if (threshold != null && shouldThrottle(threshold, count, size)) { - clusterManagerTaskThrottlerListener.onThrottle(clusterManagerThrottlingKey.getTaskThrottlingKey(), size); - logger.warn( - "Throwing Throttling Exception for [{}]. Trying to add [{}] tasks to queue, limit is set to [{}]", - clusterManagerThrottlingKey.getTaskThrottlingKey(), - tasks.size(), - threshold - ); - throw new ClusterManagerThrottlingException( - "Throttling Exception : Limit exceeded for " + clusterManagerThrottlingKey.getTaskThrottlingKey() - ); - } - } - return count + size; - }); + final String taskThrottlingKey = clusterManagerThrottlingKey.getTaskThrottlingKey(); + final Long threshold = getThrottlingLimit(taskThrottlingKey); + final boolean isThrottlingEnabledWithThreshold = clusterManagerThrottlingKey.isThrottlingEnabled() && threshold != null; + int incomingTaskCount = tasks.size(); + + try { + tasksCount.putIfAbsent(taskThrottlingKey, 0L); + // Perform shallow check before acquiring lock to avoid blocking of network threads + // if throttling is ongoing for a specific task + failFastWhenThrottlingThresholdsAreAlreadyBreached( + isThrottlingEnabledWithThreshold, + threshold, + tasksCount.get(taskThrottlingKey), + incomingTaskCount, + taskThrottlingKey + ); + + tasksCount.computeIfPresent(taskThrottlingKey, (key, existingTaskCount) -> { + failFastWhenThrottlingThresholdsAreAlreadyBreached( + isThrottlingEnabledWithThreshold, + threshold, + existingTaskCount, + incomingTaskCount, + taskThrottlingKey + ); + return existingTaskCount + incomingTaskCount; + }); + } catch (final ClusterManagerThrottlingException e) { + clusterManagerTaskThrottlerListener.onThrottle(taskThrottlingKey, incomingTaskCount); + logger.trace( + "Throwing Throttling Exception for [{}]. Trying to add [{}] tasks to queue, limit is set to [{}]", + taskThrottlingKey, + incomingTaskCount, + threshold + ); + throw e; + } } /** diff --git a/server/src/main/java/org/opensearch/cluster/service/ClusterManagerThrottlingException.java b/server/src/main/java/org/opensearch/cluster/service/ClusterManagerThrottlingException.java index 04fa9fa45d5ea..7a835910c400f 100644 --- a/server/src/main/java/org/opensearch/cluster/service/ClusterManagerThrottlingException.java +++ b/server/src/main/java/org/opensearch/cluster/service/ClusterManagerThrottlingException.java @@ -25,4 +25,10 @@ public ClusterManagerThrottlingException(String msg, Object... args) { public ClusterManagerThrottlingException(StreamInput in) throws IOException { super(in); } + + @Override + public Throwable fillInStackTrace() { + // This is on the hot path; stack traces are expensive to compute and not very useful for this exception, so don't fill it. + return this; + } } diff --git a/server/src/main/java/org/opensearch/cluster/service/ClusterService.java b/server/src/main/java/org/opensearch/cluster/service/ClusterService.java index c3c48dd8b87ef..1a79161d223e2 100644 --- a/server/src/main/java/org/opensearch/cluster/service/ClusterService.java +++ b/server/src/main/java/org/opensearch/cluster/service/ClusterService.java @@ -183,6 +183,14 @@ public ClusterState state() { return clusterApplierService.state(); } + /** + * The state that is persisted to store but may not be applied to cluster. + * @return ClusterState + */ + public ClusterState preCommitState() { + return clusterApplierService.preCommitState(); + } + /** * Adds a high priority applier of updated cluster states. */ diff --git a/server/src/main/java/org/opensearch/common/compress/CompressedXContent.java b/server/src/main/java/org/opensearch/common/compress/CompressedXContent.java index 23fc6353dbad3..daf2ffab2d5e2 100644 --- a/server/src/main/java/org/opensearch/common/compress/CompressedXContent.java +++ b/server/src/main/java/org/opensearch/common/compress/CompressedXContent.java @@ -38,6 +38,7 @@ import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.core.common.bytes.BytesArray; import org.opensearch.core.common.bytes.BytesReference; +import org.opensearch.core.common.io.stream.BufferedChecksumStreamOutput; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.compress.Compressor; @@ -169,6 +170,10 @@ public void writeTo(StreamOutput out) throws IOException { out.writeByteArray(bytes); } + public void writeVerifiableTo(BufferedChecksumStreamOutput out) throws IOException { + out.writeInt(crc32); + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/server/src/main/java/org/opensearch/common/remote/RemoteWriteableBlobEntity.java b/server/src/main/java/org/opensearch/common/remote/RemoteWriteableBlobEntity.java index f034ce2d1adf1..4d9adb48d2a3a 100644 --- a/server/src/main/java/org/opensearch/common/remote/RemoteWriteableBlobEntity.java +++ b/server/src/main/java/org/opensearch/common/remote/RemoteWriteableBlobEntity.java @@ -63,6 +63,17 @@ public String[] getBlobPathTokens() { public abstract String generateBlobFileName(); + /** + * Generate the blob path for the remote entity by adding a custom prefix. + * This custom prefix may be generated by any of the strategies defined in {@link org.opensearch.index.remote.RemoteStoreEnums} + * The default implementation returns the same path as passed in the argument. + * @param blobPath The remote path on which the remote entity is to be uploaded + * @return The modified remote path after adding a custom prefix at which the remote entity will be uploaded. + */ + public BlobPath getPrefixedPath(BlobPath blobPath) { + return blobPath; + } + public String clusterUUID() { return clusterUUID; } diff --git a/server/src/main/java/org/opensearch/common/remote/RemoteWriteableEntityBlobStore.java b/server/src/main/java/org/opensearch/common/remote/RemoteWriteableEntityBlobStore.java index baa44a7c9bde9..b5e074874dd38 100644 --- a/server/src/main/java/org/opensearch/common/remote/RemoteWriteableEntityBlobStore.java +++ b/server/src/main/java/org/opensearch/common/remote/RemoteWriteableEntityBlobStore.java @@ -102,7 +102,7 @@ public BlobPath getBlobPathForUpload(final RemoteWriteableBlobEntity obj) { for (String token : obj.getBlobPathParameters().getPathTokens()) { blobPath = blobPath.add(token); } - return blobPath; + return obj.getPrefixedPath(blobPath); } public BlobPath getBlobPathForDownload(final RemoteWriteableBlobEntity obj) { diff --git a/server/src/main/java/org/opensearch/common/settings/AbstractScopedSettings.java b/server/src/main/java/org/opensearch/common/settings/AbstractScopedSettings.java index 117ed66fcb451..7655135b06d6c 100644 --- a/server/src/main/java/org/opensearch/common/settings/AbstractScopedSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/AbstractScopedSettings.java @@ -790,6 +790,36 @@ public T get(Setting setting) { return setting.get(this.lastSettingsApplied, settings); } + /** + * Returns the value for the given setting if it is explicitly set, + * otherwise will return null instead of default value + **/ + public T getOrNull(Setting setting) { + if (setting.getProperties().contains(scope) == false) { + throw new SettingsException( + "settings scope doesn't match the setting scope [" + this.scope + "] not in [" + setting.getProperties() + "]" + ); + } + if (get(setting.getKey()) == null) { + throw new SettingsException("setting " + setting.getKey() + " has not been registered"); + } + if (setting.exists(lastSettingsApplied)) { + return setting.get(lastSettingsApplied); + } + if (setting.exists(settings)) { + return setting.get(settings); + } + if (setting.fallbackSetting != null) { + if (setting.fallbackSetting.exists(lastSettingsApplied)) { + return setting.fallbackSetting.get(lastSettingsApplied); + } + if (setting.fallbackSetting.exists(settings)) { + return setting.fallbackSetting.get(settings); + } + } + return null; + } + /** * Updates a target settings builder with new, updated or deleted settings from a given settings builder. *

diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 9a6b3f1118709..09832e2b41b6d 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -76,6 +76,7 @@ import org.opensearch.cluster.routing.allocation.decider.FilterAllocationDecider; import org.opensearch.cluster.routing.allocation.decider.NodeLoadAwareAllocationDecider; import org.opensearch.cluster.routing.allocation.decider.SameShardAllocationDecider; +import org.opensearch.cluster.routing.allocation.decider.SearchReplicaAllocationDecider; import org.opensearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider; import org.opensearch.cluster.routing.allocation.decider.ThrottlingAllocationDecider; import org.opensearch.cluster.service.ClusterApplierService; @@ -107,6 +108,7 @@ import org.opensearch.gateway.ShardsBatchGatewayAllocator; import org.opensearch.gateway.remote.RemoteClusterStateCleanupManager; import org.opensearch.gateway.remote.RemoteClusterStateService; +import org.opensearch.gateway.remote.RemoteIndexMetadataManager; import org.opensearch.gateway.remote.model.RemoteRoutingTableBlobStore; import org.opensearch.http.HttpTransportSettings; import org.opensearch.index.IndexModule; @@ -132,6 +134,7 @@ import org.opensearch.indices.fielddata.cache.IndicesFieldDataCache; import org.opensearch.indices.recovery.RecoverySettings; import org.opensearch.indices.store.IndicesStore; +import org.opensearch.ingest.IngestService; import org.opensearch.monitor.fs.FsHealthService; import org.opensearch.monitor.fs.FsService; import org.opensearch.monitor.jvm.JvmGcMonitorService; @@ -149,6 +152,7 @@ import org.opensearch.ratelimitting.admissioncontrol.AdmissionControlSettings; import org.opensearch.ratelimitting.admissioncontrol.settings.CpuBasedAdmissionControllerSettings; import org.opensearch.ratelimitting.admissioncontrol.settings.IoBasedAdmissionControllerSettings; +import org.opensearch.repositories.blobstore.BlobStoreRepository; import org.opensearch.repositories.fs.FsRepository; import org.opensearch.rest.BaseRestHandler; import org.opensearch.script.ScriptService; @@ -406,6 +410,7 @@ public void apply(Settings value, Settings current, Settings previous) { ClusterService.USER_DEFINED_METADATA, ClusterManagerService.MASTER_SERVICE_SLOW_TASK_LOGGING_THRESHOLD_SETTING, // deprecated ClusterManagerService.CLUSTER_MANAGER_SERVICE_SLOW_TASK_LOGGING_THRESHOLD_SETTING, + IngestService.MAX_NUMBER_OF_INGEST_PROCESSORS, SearchService.DEFAULT_SEARCH_TIMEOUT_SETTING, SearchService.DEFAULT_ALLOW_PARTIAL_SEARCH_RESULTS, TransportSearchAction.SHARD_COUNT_LIMIT_SETTING, @@ -547,6 +552,7 @@ public void apply(Settings value, Settings current, Settings previous) { SearchService.MAX_AGGREGATION_REWRITE_FILTERS, SearchService.INDICES_MAX_CLAUSE_COUNT_SETTING, SearchService.CARDINALITY_AGGREGATION_PRUNING_THRESHOLD, + SearchService.KEYWORD_INDEX_OR_DOC_VALUES_ENABLED, CreatePitController.PIT_INIT_KEEP_ALIVE, Node.WRITE_PORTS_FILE_SETTING, Node.NODE_NAME_SETTING, @@ -634,6 +640,7 @@ public void apply(Settings value, Settings current, Settings previous) { HandshakingTransportAddressConnector.PROBE_CONNECT_TIMEOUT_SETTING, HandshakingTransportAddressConnector.PROBE_HANDSHAKE_TIMEOUT_SETTING, SnapshotsService.MAX_CONCURRENT_SNAPSHOT_OPERATIONS_SETTING, + SnapshotsService.MAX_SHARDS_ALLOWED_IN_STATUS_API, FsHealthService.ENABLED_SETTING, FsHealthService.REFRESH_INTERVAL_SETTING, FsHealthService.SLOW_PATH_LOGGING_THRESHOLD_SETTING, @@ -727,16 +734,22 @@ public void apply(Settings value, Settings current, Settings previous) { // Remote cluster state settings RemoteClusterStateCleanupManager.REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING, RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING, + RemoteClusterStateService.REMOTE_PUBLICATION_SETTING, INDEX_METADATA_UPLOAD_TIMEOUT_SETTING, GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING, METADATA_MANIFEST_UPLOAD_TIMEOUT_SETTING, RemoteClusterStateService.REMOTE_STATE_READ_TIMEOUT_SETTING, + RemoteClusterStateService.CLUSTER_REMOTE_STORE_STATE_PATH_PREFIX, + RemoteIndexMetadataManager.REMOTE_INDEX_METADATA_PATH_TYPE_SETTING, + RemoteIndexMetadataManager.REMOTE_INDEX_METADATA_PATH_HASH_ALGO_SETTING, RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING, RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING, IndicesService.CLUSTER_REMOTE_INDEX_RESTRICT_ASYNC_DURABILITY_SETTING, IndicesService.CLUSTER_INDEX_RESTRICT_REPLICATION_TYPE_SETTING, RemoteRoutingTableBlobStore.REMOTE_ROUTING_TABLE_PATH_TYPE_SETTING, RemoteRoutingTableBlobStore.REMOTE_ROUTING_TABLE_PATH_HASH_ALGO_SETTING, + RemoteClusterStateService.REMOTE_CLUSTER_STATE_CHECKSUM_VALIDATION_MODE_SETTING, + RemoteRoutingTableBlobStore.CLUSTER_REMOTE_STORE_ROUTING_TABLE_PATH_PREFIX, // Admission Control Settings AdmissionControlSettings.ADMISSION_CONTROL_TRANSPORT_LAYER_MODE, @@ -749,8 +762,9 @@ public void apply(Settings value, Settings current, Settings previous) { IoBasedAdmissionControllerSettings.INDEXING_IO_USAGE_LIMIT, // Concurrent segment search settings - SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING, + SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING, // deprecated SearchService.CONCURRENT_SEGMENT_SEARCH_TARGET_MAX_SLICE_COUNT_SETTING, + SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE, RemoteStoreSettings.CLUSTER_REMOTE_INDEX_SEGMENT_METADATA_RETENTION_MAX_COUNT_SETTING, RemoteStoreSettings.CLUSTER_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING, @@ -763,11 +777,15 @@ public void apply(Settings value, Settings current, Settings previous) { RemoteStoreSettings.CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_SCHEDULER_INTERVAL, RemoteStoreSettings.CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_LOOKBACK_INTERVAL, RemoteStoreSettings.CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_ENABLED, + RemoteStoreSettings.CLUSTER_REMOTE_STORE_SEGMENTS_PATH_PREFIX, + RemoteStoreSettings.CLUSTER_REMOTE_STORE_TRANSLOG_PATH_PREFIX, + BlobStoreRepository.SNAPSHOT_SHARD_PATH_PREFIX_SETTING, SearchService.CLUSTER_ALLOW_DERIVED_FIELD_SETTING, // Composite index settings CompositeIndexSettings.STAR_TREE_INDEX_ENABLED_SETTING, + CompositeIndexSettings.COMPOSITE_INDEX_MAX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING, SystemTemplatesService.SETTING_APPLICATION_BASED_CONFIGURATION_TEMPLATES_ENABLED, @@ -805,6 +823,8 @@ public void apply(Settings value, Settings current, Settings previous) { OpenSearchOnHeapCacheSettings.EXPIRE_AFTER_ACCESS_SETTING.getConcreteSettingForNamespace( CacheType.INDICES_REQUEST_CACHE.getSettingPrefix() ) - ) + ), + List.of(FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL), + List.of(SearchReplicaAllocationDecider.SEARCH_REPLICA_ROUTING_INCLUDE_GROUP_SETTING) ); } diff --git a/server/src/main/java/org/opensearch/common/settings/FeatureFlagSettings.java b/server/src/main/java/org/opensearch/common/settings/FeatureFlagSettings.java index d893d8d92be3b..73375f9eaa413 100644 --- a/server/src/main/java/org/opensearch/common/settings/FeatureFlagSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/FeatureFlagSettings.java @@ -37,8 +37,9 @@ protected FeatureFlagSettings( FeatureFlags.TIERED_REMOTE_INDEX_SETTING, FeatureFlags.REMOTE_STORE_MIGRATION_EXPERIMENTAL_SETTING, FeatureFlags.PLUGGABLE_CACHE_SETTING, - FeatureFlags.REMOTE_PUBLICATION_EXPERIMENTAL_SETTING, FeatureFlags.STAR_TREE_INDEX_SETTING, - FeatureFlags.APPLICATION_BASED_CONFIGURATION_TEMPLATES_SETTING + FeatureFlags.APPLICATION_BASED_CONFIGURATION_TEMPLATES_SETTING, + FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL_SETTING, + FeatureFlags.TERM_VERSION_PRECOMMIT_ENABLE_SETTING ); } diff --git a/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java index 284eb43aa5509..8d56a942c5d6e 100644 --- a/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java @@ -223,6 +223,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings { IndexSettings.SEARCHABLE_SNAPSHOT_INDEX_ID, IndexSettings.SEARCHABLE_SNAPSHOT_ID_NAME, IndexSettings.SEARCHABLE_SNAPSHOT_ID_UUID, + IndexSettings.SEARCHABLE_SNAPSHOT_SHARD_PATH_TYPE, // Settings for remote translog IndexSettings.INDEX_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING, @@ -237,7 +238,8 @@ public final class IndexScopedSettings extends AbstractScopedSettings { IndexSettings.INDEX_DOC_ID_FUZZY_SET_FALSE_POSITIVE_PROBABILITY_SETTING, // Settings for concurrent segment search - IndexSettings.INDEX_CONCURRENT_SEGMENT_SEARCH_SETTING, + IndexSettings.INDEX_CONCURRENT_SEGMENT_SEARCH_SETTING, // deprecated + IndexSettings.INDEX_CONCURRENT_SEGMENT_SEARCH_MODE, IndexSettings.INDEX_CONCURRENT_SEGMENT_SEARCH_MAX_SLICE_COUNT, IndexSettings.ALLOW_DERIVED_FIELDS, @@ -248,6 +250,11 @@ public final class IndexScopedSettings extends AbstractScopedSettings { StarTreeIndexSettings.DEFAULT_METRICS_LIST, StarTreeIndexSettings.DEFAULT_DATE_INTERVALS, StarTreeIndexSettings.STAR_TREE_MAX_DATE_INTERVALS_SETTING, + StarTreeIndexSettings.STAR_TREE_MAX_BASE_METRICS_SETTING, + StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING, + + IndexSettings.INDEX_CONTEXT_CREATED_VERSION, + IndexSettings.INDEX_CONTEXT_CURRENT_VERSION, // validate that built-in similarities don't get redefined Setting.groupSetting("index.similarity.", (s) -> { @@ -273,7 +280,9 @@ public final class IndexScopedSettings extends AbstractScopedSettings { */ public static final Map> FEATURE_FLAGGED_INDEX_SETTINGS = Map.of( FeatureFlags.TIERED_REMOTE_INDEX, - List.of(IndexModule.INDEX_STORE_LOCALITY_SETTING, IndexModule.INDEX_TIERING_STATE) + List.of(IndexModule.INDEX_STORE_LOCALITY_SETTING, IndexModule.INDEX_TIERING_STATE), + FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL, + List.of(IndexMetadata.INDEX_NUMBER_OF_SEARCH_REPLICAS_SETTING) ); public static final IndexScopedSettings DEFAULT_SCOPED_SETTINGS = new IndexScopedSettings(Settings.EMPTY, BUILT_IN_INDEX_SETTINGS); diff --git a/server/src/main/java/org/opensearch/common/util/FeatureFlags.java b/server/src/main/java/org/opensearch/common/util/FeatureFlags.java index e2554d61116ad..a5acea004c3b2 100644 --- a/server/src/main/java/org/opensearch/common/util/FeatureFlags.java +++ b/server/src/main/java/org/opensearch/common/util/FeatureFlags.java @@ -67,16 +67,13 @@ public class FeatureFlags { */ public static final String PLUGGABLE_CACHE = "opensearch.experimental.feature.pluggable.caching.enabled"; - /** - * Gates the functionality of remote routing table. - */ - public static final String REMOTE_PUBLICATION_EXPERIMENTAL = "opensearch.experimental.feature.remote_store.publication.enabled"; - /** * Gates the functionality of background task execution. */ public static final String BACKGROUND_TASK_EXECUTION_EXPERIMENTAL = "opensearch.experimental.feature.task.background.enabled"; + public static final String READER_WRITER_SPLIT_EXPERIMENTAL = "opensearch.experimental.feature.read.write.split.enabled"; + public static final Setting REMOTE_STORE_MIGRATION_EXPERIMENTAL_SETTING = Setting.boolSetting( REMOTE_STORE_MIGRATION_EXPERIMENTAL, false, @@ -99,8 +96,8 @@ public class FeatureFlags { public static final Setting PLUGGABLE_CACHE_SETTING = Setting.boolSetting(PLUGGABLE_CACHE, false, Property.NodeScope); - public static final Setting REMOTE_PUBLICATION_EXPERIMENTAL_SETTING = Setting.boolSetting( - REMOTE_PUBLICATION_EXPERIMENTAL, + public static final Setting READER_WRITER_SPLIT_EXPERIMENTAL_SETTING = Setting.boolSetting( + READER_WRITER_SPLIT_EXPERIMENTAL, false, Property.NodeScope ); @@ -122,6 +119,22 @@ public class FeatureFlags { Property.NodeScope ); + /** + * Gates the functionality of ApproximatePointRangeQuery where we approximate query results. + */ + public static final String APPROXIMATE_POINT_RANGE_QUERY = "opensearch.experimental.feature.approximate_point_range_query.enabled"; + public static final Setting APPROXIMATE_POINT_RANGE_QUERY_SETTING = Setting.boolSetting( + APPROXIMATE_POINT_RANGE_QUERY, + false, + Property.NodeScope + ); + public static final String TERM_VERSION_PRECOMMIT_ENABLE = "opensearch.experimental.optimization.termversion.precommit.enabled"; + public static final Setting TERM_VERSION_PRECOMMIT_ENABLE_SETTING = Setting.boolSetting( + TERM_VERSION_PRECOMMIT_ENABLE, + false, + Property.NodeScope + ); + private static final List> ALL_FEATURE_FLAG_SETTINGS = List.of( REMOTE_STORE_MIGRATION_EXPERIMENTAL_SETTING, EXTENSIONS_SETTING, @@ -130,9 +143,10 @@ public class FeatureFlags { DATETIME_FORMATTER_CACHING_SETTING, TIERED_REMOTE_INDEX_SETTING, PLUGGABLE_CACHE_SETTING, - REMOTE_PUBLICATION_EXPERIMENTAL_SETTING, STAR_TREE_INDEX_SETTING, - APPLICATION_BASED_CONFIGURATION_TEMPLATES_SETTING + APPLICATION_BASED_CONFIGURATION_TEMPLATES_SETTING, + READER_WRITER_SPLIT_EXPERIMENTAL_SETTING, + TERM_VERSION_PRECOMMIT_ENABLE_SETTING ); /** diff --git a/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java b/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java index 80ba57b7db4a9..b3836edcd7d6c 100644 --- a/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java +++ b/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java @@ -697,11 +697,26 @@ public String getLastUploadedManifestFile() { return lastUploadedManifestFile; } + @Override + public ClusterMetadataManifest getLastAcceptedManifest() { + return lastAcceptedManifest; + } + @Override public void setLastAcceptedState(ClusterState clusterState) { + // for non leader node, update the lastAcceptedClusterState + if (clusterState == null || clusterState.getNodes().isLocalNodeElectedClusterManager() == false) { + lastAcceptedState = clusterState; + return; + } try { final RemoteClusterStateManifestInfo manifestDetails; - if (shouldWriteFullClusterState(clusterState)) { + // Decide the codec version + int codecVersion = ClusterMetadataManifest.getCodecForVersion(clusterState.nodes().getMinNodeVersion()); + assert codecVersion >= 0 : codecVersion; + logger.info("codec version is {}", codecVersion); + + if (shouldWriteFullClusterState(clusterState, codecVersion)) { final Optional latestManifest = remoteClusterStateService.getLatestClusterMetadataManifest( clusterState.getClusterName().value(), clusterState.metadata().clusterUUID() @@ -718,7 +733,7 @@ public void setLastAcceptedState(ClusterState clusterState) { clusterState.metadata().clusterUUID() ); } - manifestDetails = remoteClusterStateService.writeFullMetadata(clusterState, previousClusterUUID); + manifestDetails = remoteClusterStateService.writeFullMetadata(clusterState, previousClusterUUID, codecVersion); } else { assert verifyManifestAndClusterState(lastAcceptedManifest, lastAcceptedState) == true : "Previous manifest and previous ClusterState are not in sync"; @@ -730,7 +745,7 @@ assert verifyManifestAndClusterState(lastAcceptedManifest, lastAcceptedState) == } assert verifyManifestAndClusterState(manifestDetails.getClusterMetadataManifest(), clusterState) == true : "Manifest and ClusterState are not in sync"; - lastAcceptedManifest = manifestDetails.getClusterMetadataManifest(); + setLastAcceptedManifest(manifestDetails.getClusterMetadataManifest()); lastAcceptedState = clusterState; lastUploadedManifestFile = manifestDetails.getManifestFileName(); } catch (Exception e) { @@ -739,9 +754,14 @@ assert verifyManifestAndClusterState(manifestDetails.getClusterMetadataManifest( } } + @Override + public void setLastAcceptedManifest(ClusterMetadataManifest manifest) { + this.lastAcceptedManifest = manifest; + } + @Override public PersistedStateStats getStats() { - return remoteClusterStateService.getStats(); + return remoteClusterStateService.getUploadStats(); } private boolean verifyManifestAndClusterState(ClusterMetadataManifest manifest, ClusterState clusterState) { @@ -758,11 +778,13 @@ private boolean verifyManifestAndClusterState(ClusterMetadataManifest manifest, return true; } - private boolean shouldWriteFullClusterState(ClusterState clusterState) { + private boolean shouldWriteFullClusterState(ClusterState clusterState, int codecVersion) { + assert lastAcceptedManifest == null || lastAcceptedManifest.getCodecVersion() <= codecVersion; if (lastAcceptedState == null || lastAcceptedManifest == null - || lastAcceptedState.term() != clusterState.term() - || lastAcceptedManifest.getOpensearchVersion() != Version.CURRENT) { + || (remoteClusterStateService.isRemotePublicationEnabled() == false && lastAcceptedState.term() != clusterState.term()) + || lastAcceptedManifest.getOpensearchVersion() != Version.CURRENT + || lastAcceptedManifest.getCodecVersion() != codecVersion) { return true; } return false; @@ -774,19 +796,32 @@ public void markLastAcceptedStateAsCommitted() { assert lastAcceptedState != null : "Last accepted state is not present"; assert lastAcceptedManifest != null : "Last accepted manifest is not present"; ClusterState clusterState = lastAcceptedState; - if (lastAcceptedState.metadata().clusterUUID().equals(Metadata.UNKNOWN_CLUSTER_UUID) == false - && lastAcceptedState.metadata().clusterUUIDCommitted() == false) { + boolean shouldCommitVotingConfig = shouldCommitVotingConfig(); + boolean isClusterUUIDUnknown = lastAcceptedState.metadata().clusterUUID().equals(Metadata.UNKNOWN_CLUSTER_UUID); + boolean isClusterUUIDCommitted = lastAcceptedState.metadata().clusterUUIDCommitted(); + if (shouldCommitVotingConfig || (isClusterUUIDUnknown == false && isClusterUUIDCommitted == false)) { Metadata.Builder metadataBuilder = Metadata.builder(lastAcceptedState.metadata()); - metadataBuilder.clusterUUIDCommitted(true); + if (shouldCommitVotingConfig) { + metadataBuilder = commitVotingConfiguration(lastAcceptedState); + } + if (isClusterUUIDUnknown == false && isClusterUUIDCommitted == false) { + metadataBuilder.clusterUUIDCommitted(true); + } clusterState = ClusterState.builder(lastAcceptedState).metadata(metadataBuilder).build(); } - final RemoteClusterStateManifestInfo committedManifestDetails = remoteClusterStateService.markLastStateAsCommitted( - clusterState, - lastAcceptedManifest - ); - lastAcceptedManifest = committedManifestDetails.getClusterMetadataManifest(); + if (clusterState.getNodes().isLocalNodeElectedClusterManager()) { + final RemoteClusterStateManifestInfo committedManifestDetails = remoteClusterStateService.markLastStateAsCommitted( + clusterState, + lastAcceptedManifest, + shouldCommitVotingConfig + ); + assert committedManifestDetails != null; + setLastAcceptedManifest(committedManifestDetails.getClusterMetadataManifest()); + lastUploadedManifestFile = committedManifestDetails.getManifestFileName(); + } else { + setLastAcceptedManifest(ClusterMetadataManifest.builder(lastAcceptedManifest).committed(true).build()); + } lastAcceptedState = clusterState; - lastUploadedManifestFile = committedManifestDetails.getManifestFileName(); } catch (Exception e) { handleExceptionOnWrite(e); } @@ -797,6 +832,10 @@ public void close() throws IOException { remoteClusterStateService.close(); } + private boolean shouldCommitVotingConfig() { + return !lastAcceptedState.getLastAcceptedConfiguration().equals(lastAcceptedState.getLastCommittedConfiguration()); + } + private void handleExceptionOnWrite(Exception e) { throw ExceptionsHelper.convertToRuntime(e); } diff --git a/server/src/main/java/org/opensearch/gateway/ShardsBatchGatewayAllocator.java b/server/src/main/java/org/opensearch/gateway/ShardsBatchGatewayAllocator.java index d18304ea73ed0..5e2dcbcd70b40 100644 --- a/server/src/main/java/org/opensearch/gateway/ShardsBatchGatewayAllocator.java +++ b/server/src/main/java/org/opensearch/gateway/ShardsBatchGatewayAllocator.java @@ -184,11 +184,11 @@ public void cleanCaches() { // for tests protected ShardsBatchGatewayAllocator() { - this(DEFAULT_SHARD_BATCH_SIZE); + this(DEFAULT_SHARD_BATCH_SIZE, null); } - protected ShardsBatchGatewayAllocator(long batchSize) { - this.rerouteService = null; + protected ShardsBatchGatewayAllocator(long batchSize, RerouteService rerouteService) { + this.rerouteService = rerouteService; this.batchStartedAction = null; this.primaryShardBatchAllocator = null; this.batchStoreAction = null; @@ -297,6 +297,18 @@ public void run() { public void onComplete() { logger.trace("Triggering oncomplete after timeout for [{}] primary shards", timedOutPrimaryShardIds.size()); primaryBatchShardAllocator.allocateUnassignedBatchOnTimeout(timedOutPrimaryShardIds, allocation, true); + if (timedOutPrimaryShardIds.isEmpty() == false) { + logger.trace("scheduling reroute after existing shards allocator timed out for primary shards"); + assert rerouteService != null; + rerouteService.reroute( + "reroute after existing shards allocator timed out", + Priority.HIGH, + ActionListener.wrap( + r -> logger.trace("reroute after existing shards allocator timed out completed"), + e -> logger.debug("reroute after existing shards allocator timed out failed", e) + ) + ); + } } }; } else { @@ -320,6 +332,18 @@ public void run() { public void onComplete() { logger.trace("Triggering oncomplete after timeout for [{}] replica shards", timedOutReplicaShardIds.size()); replicaBatchShardAllocator.allocateUnassignedBatchOnTimeout(timedOutReplicaShardIds, allocation, false); + if (timedOutReplicaShardIds.isEmpty() == false) { + logger.trace("scheduling reroute after existing shards allocator timed out for replica shards"); + assert rerouteService != null; + rerouteService.reroute( + "reroute after existing shards allocator timed out", + Priority.HIGH, + ActionListener.wrap( + r -> logger.trace("reroute after existing shards allocator timed out completed"), + e -> logger.debug("reroute after existing shards allocator timed out failed", e) + ) + ); + } } }; } diff --git a/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayMetaState.java b/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayMetaState.java index 647e3632ea0ca..7f634534d1ed6 100644 --- a/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayMetaState.java +++ b/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayMetaState.java @@ -133,7 +133,7 @@ public Request(StreamInput in) throws IOException { } public Request(String... nodesIds) { - super(nodesIds); + super(false, nodesIds); } } diff --git a/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShards.java b/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShards.java index 4b1f611bb88ab..b9cfada1910ce 100644 --- a/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShards.java +++ b/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShards.java @@ -197,7 +197,7 @@ public Request(StreamInput in) throws IOException { } public Request(ShardId shardId, String customDataPath, DiscoveryNode[] nodes) { - super(nodes); + super(false, nodes); this.shardId = Objects.requireNonNull(shardId); this.customDataPath = Objects.requireNonNull(customDataPath); } diff --git a/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShardsBatch.java b/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShardsBatch.java index 89362988b4d85..45ead58f9f195 100644 --- a/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShardsBatch.java +++ b/server/src/main/java/org/opensearch/gateway/TransportNodesListGatewayStartedShardsBatch.java @@ -182,7 +182,7 @@ public Request(StreamInput in) throws IOException { } public Request(DiscoveryNode[] nodes, Map shardAttributes) { - super(nodes); + super(false, nodes); this.shardAttributes = Objects.requireNonNull(shardAttributes); } diff --git a/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java b/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java index 71815b6ee324c..405e5cd784196 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java +++ b/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java @@ -20,10 +20,10 @@ import org.opensearch.core.xcontent.ToXContentFragment; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentParser; -import org.opensearch.gateway.remote.ClusterMetadataManifest.Builder; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -45,7 +45,10 @@ public class ClusterMetadataManifest implements Writeable, ToXContentFragment { // also we introduce index routing-metadata, diff and other attributes as part of manifest // required for state publication public static final int CODEC_V3 = 3; // In Codec V3, we have introduced new diff field in diff-manifest's routing_table_diff + public static final int CODEC_V4 = 4; // In Codec V4, we have removed upserts and delete field for routing table in diff manifest and + // added checksum of cluster state. + public static final int[] CODEC_VERSIONS = { CODEC_V0, CODEC_V1, CODEC_V2, CODEC_V3, CODEC_V4 }; private static final ParseField CLUSTER_TERM_FIELD = new ParseField("cluster_term"); private static final ParseField STATE_VERSION_FIELD = new ParseField("state_version"); private static final ParseField CLUSTER_UUID_FIELD = new ParseField("cluster_uuid"); @@ -73,6 +76,7 @@ public class ClusterMetadataManifest implements Writeable, ToXContentFragment { ); private static final ParseField UPLOADED_CLUSTER_STATE_CUSTOM_METADATA = new ParseField("uploaded_cluster_state_custom_metadata"); private static final ParseField DIFF_MANIFEST = new ParseField("diff_manifest"); + private static final ParseField CHECKSUM = new ParseField("checksum"); private static ClusterMetadataManifest.Builder manifestV0Builder(Object[] fields) { return ClusterMetadataManifest.builder() @@ -114,6 +118,10 @@ private static ClusterMetadataManifest.Builder manifestV3Builder(Object[] fields return manifestV2Builder(fields); } + private static ClusterMetadataManifest.Builder manifestV4Builder(Object[] fields) { + return manifestV3Builder(fields).checksum(checksum(fields)); + } + private static long term(Object[] fields) { return (long) fields[0]; } @@ -216,6 +224,10 @@ private static ClusterStateDiffManifest diffManifest(Object[] fields) { return (ClusterStateDiffManifest) fields[23]; } + private static ClusterStateChecksum checksum(Object[] fields) { + return (ClusterStateChecksum) fields[24]; + } + private static final ConstructingObjectParser PARSER_V0 = new ConstructingObjectParser<>( "cluster_metadata_manifest", fields -> manifestV0Builder(fields).build() @@ -236,13 +248,44 @@ private static ClusterStateDiffManifest diffManifest(Object[] fields) { fields -> manifestV3Builder(fields).build() ); - private static final ConstructingObjectParser CURRENT_PARSER = PARSER_V3; + private static final ConstructingObjectParser PARSER_V4 = new ConstructingObjectParser<>( + "cluster_metadata_manifest", + fields -> manifestV4Builder(fields).build() + ); + + private static final ConstructingObjectParser CURRENT_PARSER = PARSER_V4; + + public static final int MANIFEST_CURRENT_CODEC_VERSION = CODEC_V4; + + private static final Map VERSION_TO_CODEC_MAPPING; static { declareParser(PARSER_V0, CODEC_V0); declareParser(PARSER_V1, CODEC_V1); declareParser(PARSER_V2, CODEC_V2); declareParser(PARSER_V3, CODEC_V3); + declareParser(PARSER_V4, CODEC_V4); + + assert Arrays.stream(CODEC_VERSIONS).max().getAsInt() == MANIFEST_CURRENT_CODEC_VERSION; + Map versionToCodecMapping = new HashMap<>(); + for (Version version : Version.getDeclaredVersions(Version.class)) { + if (version.onOrAfter(Version.V_2_10_0) && version.before(Version.V_2_12_0)) { + versionToCodecMapping.put(version, ClusterMetadataManifest.CODEC_V0); + } else if (version.onOrAfter(Version.V_2_12_0) && version.before(Version.V_2_15_0)) { + versionToCodecMapping.put(version, ClusterMetadataManifest.CODEC_V1); + } else if (version.onOrAfter(Version.V_2_15_0) && version.before(Version.V_2_16_0)) { + versionToCodecMapping.put(version, ClusterMetadataManifest.CODEC_V2); + } else if (version.onOrAfter(Version.V_2_16_0) && version.before(Version.V_2_17_0)) { + versionToCodecMapping.put(version, ClusterMetadataManifest.CODEC_V3); + } else if (version.onOrAfter(Version.V_2_17_0)) { + versionToCodecMapping.put(version, ClusterMetadataManifest.CODEC_V4); + } + } + VERSION_TO_CODEC_MAPPING = Collections.unmodifiableMap(versionToCodecMapping); + } + + public static int getCodecForVersion(Version version) { + return VERSION_TO_CODEC_MAPPING.getOrDefault(version, -1); } private static void declareParser(ConstructingObjectParser parser, long codec_version) { @@ -324,6 +367,13 @@ private static void declareParser(ConstructingObjectParser= CODEC_V4) { + parser.declareObject( + ConstructingObjectParser.optionalConstructorArg(), + (p, c) -> ClusterStateChecksum.fromXContent(p), + CHECKSUM + ); + } } private final int codecVersion; @@ -351,6 +401,7 @@ private static void declareParser(ConstructingObjectParser uploadedClusterStateCustomMap; private final ClusterStateDiffManifest diffManifest; + private ClusterStateChecksum clusterStateChecksum; public List getIndices() { return indices; @@ -459,6 +510,10 @@ public List getIndicesRouting() { return indicesRouting; } + public ClusterStateChecksum getClusterStateChecksum() { + return clusterStateChecksum; + } + public ClusterMetadataManifest( long clusterTerm, long version, @@ -484,7 +539,8 @@ public ClusterMetadataManifest( UploadedMetadataAttribute uploadedTransientSettingsMetadata, UploadedMetadataAttribute uploadedHashesOfConsistentSettings, Map uploadedClusterStateCustomMap, - ClusterStateDiffManifest diffManifest + ClusterStateDiffManifest diffManifest, + ClusterStateChecksum clusterStateChecksum ) { this.clusterTerm = clusterTerm; this.stateVersion = version; @@ -515,6 +571,7 @@ public ClusterMetadataManifest( this.uploadedClusterStateCustomMap = Collections.unmodifiableMap( uploadedClusterStateCustomMap != null ? uploadedClusterStateCustomMap : new HashMap<>() ); + this.clusterStateChecksum = clusterStateChecksum; } public ClusterMetadataManifest(StreamInput in) throws IOException { @@ -528,6 +585,7 @@ public ClusterMetadataManifest(StreamInput in) throws IOException { this.indices = Collections.unmodifiableList(in.readList(UploadedIndexMetadata::new)); this.previousClusterUUID = in.readString(); this.clusterUUIDCommitted = in.readBoolean(); + clusterStateChecksum = null; if (in.getVersion().onOrAfter(Version.V_2_15_0)) { this.codecVersion = in.readInt(); this.uploadedCoordinationMetadata = new UploadedMetadataAttribute(in); @@ -590,6 +648,9 @@ public ClusterMetadataManifest(StreamInput in) throws IOException { this.uploadedHashesOfConsistentSettings = null; this.uploadedClusterStateCustomMap = null; } + if (in.getVersion().onOrAfter(Version.V_2_17_0) && in.readBoolean()) { + clusterStateChecksum = new ClusterStateChecksum(in); + } } public static Builder builder() { @@ -687,6 +748,13 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(CODEC_VERSION_FIELD.getPreferredName(), getCodecVersion()); builder.field(GLOBAL_METADATA_FIELD.getPreferredName(), getGlobalMetadataFileName()); } + if (onOrAfterCodecVersion(CODEC_V4)) { + if (getClusterStateChecksum() != null) { + builder.startObject(CHECKSUM.getPreferredName()); + getClusterStateChecksum().toXContent(builder, params); + builder.endObject(); + } + } return builder; } @@ -746,6 +814,14 @@ public void writeTo(StreamOutput out) throws IOException { out.writeInt(codecVersion); out.writeString(globalMetadataFileName); } + if (out.getVersion().onOrAfter(Version.V_2_17_0)) { + if (clusterStateChecksum != null) { + out.writeBoolean(true); + clusterStateChecksum.writeTo(out); + } else { + out.writeBoolean(false); + } + } } @Override @@ -781,7 +857,8 @@ public boolean equals(Object o) { && Objects.equals(uploadedTransientSettingsMetadata, that.uploadedTransientSettingsMetadata) && Objects.equals(uploadedHashesOfConsistentSettings, that.uploadedHashesOfConsistentSettings) && Objects.equals(uploadedClusterStateCustomMap, that.uploadedClusterStateCustomMap) - && Objects.equals(diffManifest, that.diffManifest); + && Objects.equals(diffManifest, that.diffManifest) + && Objects.equals(clusterStateChecksum, that.clusterStateChecksum); } @Override @@ -811,7 +888,8 @@ public int hashCode() { uploadedTransientSettingsMetadata, uploadedHashesOfConsistentSettings, uploadedClusterStateCustomMap, - diffManifest + diffManifest, + clusterStateChecksum ); } @@ -836,6 +914,10 @@ public static ClusterMetadataManifest fromXContentV2(XContentParser parser) thro return PARSER_V2.parse(parser, null); } + public static ClusterMetadataManifest fromXContentV3(XContentParser parser) throws IOException { + return PARSER_V3.parse(parser, null); + } + public static ClusterMetadataManifest fromXContent(XContentParser parser) throws IOException { return CURRENT_PARSER.parse(parser, null); } @@ -872,6 +954,7 @@ public static class Builder { private UploadedMetadataAttribute hashesOfConsistentSettings; private Map clusterStateCustomMetadataMap; private ClusterStateDiffManifest diffManifest; + private ClusterStateChecksum checksum; public Builder indices(List indices) { this.indices = indices; @@ -1011,6 +1094,11 @@ public Builder diffManifest(ClusterStateDiffManifest diffManifest) { return this; } + public Builder checksum(ClusterStateChecksum checksum) { + this.checksum = checksum; + return this; + } + public Builder() { indices = new ArrayList<>(); customMetadataMap = new HashMap<>(); @@ -1043,6 +1131,7 @@ public Builder(ClusterMetadataManifest manifest) { this.diffManifest = manifest.diffManifest; this.hashesOfConsistentSettings = manifest.uploadedHashesOfConsistentSettings; this.clusterStateCustomMetadataMap = manifest.uploadedClusterStateCustomMap; + this.checksum = manifest.clusterStateChecksum; } public ClusterMetadataManifest build() { @@ -1071,7 +1160,8 @@ public ClusterMetadataManifest build() { transientSettingsMetadata, hashesOfConsistentSettings, clusterStateCustomMetadataMap, - diffManifest + diffManifest, + checksum ); } diff --git a/server/src/main/java/org/opensearch/gateway/remote/ClusterStateChecksum.java b/server/src/main/java/org/opensearch/gateway/remote/ClusterStateChecksum.java new file mode 100644 index 0000000000000..d6739c4572d1a --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/ClusterStateChecksum.java @@ -0,0 +1,485 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.metadata.DiffableStringMap; +import org.opensearch.common.io.stream.BytesStreamOutput; +import org.opensearch.common.settings.Settings; +import org.opensearch.core.common.io.stream.BufferedChecksumStreamOutput; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.xcontent.ToXContentFragment; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.core.xcontent.XContentParseException; +import org.opensearch.core.xcontent.XContentParser; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; + +import com.jcraft.jzlib.JZlib; + +import static org.opensearch.core.xcontent.XContentParserUtils.ensureExpectedToken; + +/** + * Stores checksum for all components in cluster state. This will be used to ensure cluster state is same across all nodes in the cluster. + */ +public class ClusterStateChecksum implements ToXContentFragment, Writeable { + + static final String ROUTING_TABLE_CS = "routing_table"; + static final String NODES_CS = "discovery_nodes"; + static final String BLOCKS_CS = "blocks"; + static final String CUSTOMS_CS = "customs"; + static final String COORDINATION_MD_CS = "coordination_md"; + static final String SETTINGS_MD_CS = "settings_md"; + static final String TRANSIENT_SETTINGS_MD_CS = "transient_settings_md"; + static final String TEMPLATES_MD_CS = "templates_md"; + static final String CUSTOM_MD_CS = "customs_md"; + static final String HASHES_MD_CS = "hashes_md"; + static final String INDICES_CS = "indices_md"; + private static final String CLUSTER_STATE_CS = "cluster_state"; + private static final int CHECKSUM_SIZE = 8; + private static final Logger logger = LogManager.getLogger(ClusterStateChecksum.class); + + long routingTableChecksum; + long nodesChecksum; + long blocksChecksum; + long clusterStateCustomsChecksum; + long coordinationMetadataChecksum; + long settingMetadataChecksum; + long transientSettingsMetadataChecksum; + long templatesMetadataChecksum; + long customMetadataMapChecksum; + long hashesOfConsistentSettingsChecksum; + long indicesChecksum; + long clusterStateChecksum; + + public ClusterStateChecksum(ClusterState clusterState) { + try ( + BytesStreamOutput out = new BytesStreamOutput(); + BufferedChecksumStreamOutput checksumOut = new BufferedChecksumStreamOutput(out) + ) { + clusterState.routingTable().writeVerifiableTo(checksumOut); + routingTableChecksum = checksumOut.getChecksum(); + + checksumOut.reset(); + clusterState.nodes().writeVerifiableTo(checksumOut); + nodesChecksum = checksumOut.getChecksum(); + + checksumOut.reset(); + clusterState.coordinationMetadata().writeVerifiableTo(checksumOut); + coordinationMetadataChecksum = checksumOut.getChecksum(); + + // Settings create sortedMap by default, so no explicit sorting required here. + checksumOut.reset(); + Settings.writeSettingsToStream(clusterState.metadata().persistentSettings(), checksumOut); + settingMetadataChecksum = checksumOut.getChecksum(); + + checksumOut.reset(); + Settings.writeSettingsToStream(clusterState.metadata().transientSettings(), checksumOut); + transientSettingsMetadataChecksum = checksumOut.getChecksum(); + + checksumOut.reset(); + clusterState.metadata().templatesMetadata().writeVerifiableTo(checksumOut); + templatesMetadataChecksum = checksumOut.getChecksum(); + + checksumOut.reset(); + checksumOut.writeStringCollection(clusterState.metadata().customs().keySet()); + customMetadataMapChecksum = checksumOut.getChecksum(); + + checksumOut.reset(); + ((DiffableStringMap) clusterState.metadata().hashesOfConsistentSettings()).writeTo(checksumOut); + hashesOfConsistentSettingsChecksum = checksumOut.getChecksum(); + + checksumOut.reset(); + checksumOut.writeMapValues( + clusterState.metadata().indices(), + (stream, value) -> value.writeVerifiableTo((BufferedChecksumStreamOutput) stream) + ); + indicesChecksum = checksumOut.getChecksum(); + + checksumOut.reset(); + clusterState.blocks().writeVerifiableTo(checksumOut); + blocksChecksum = checksumOut.getChecksum(); + + checksumOut.reset(); + checksumOut.writeStringCollection(clusterState.customs().keySet()); + clusterStateCustomsChecksum = checksumOut.getChecksum(); + } catch (IOException e) { + logger.error("Failed to create checksum for cluster state.", e); + throw new RemoteStateTransferException("Failed to create checksum for cluster state.", e); + } + createClusterStateChecksum(); + } + + private void createClusterStateChecksum() { + clusterStateChecksum = JZlib.crc32_combine(routingTableChecksum, nodesChecksum, CHECKSUM_SIZE); + clusterStateChecksum = JZlib.crc32_combine(clusterStateChecksum, blocksChecksum, CHECKSUM_SIZE); + clusterStateChecksum = JZlib.crc32_combine(clusterStateChecksum, clusterStateCustomsChecksum, CHECKSUM_SIZE); + clusterStateChecksum = JZlib.crc32_combine(clusterStateChecksum, coordinationMetadataChecksum, CHECKSUM_SIZE); + clusterStateChecksum = JZlib.crc32_combine(clusterStateChecksum, settingMetadataChecksum, CHECKSUM_SIZE); + clusterStateChecksum = JZlib.crc32_combine(clusterStateChecksum, transientSettingsMetadataChecksum, CHECKSUM_SIZE); + clusterStateChecksum = JZlib.crc32_combine(clusterStateChecksum, templatesMetadataChecksum, CHECKSUM_SIZE); + clusterStateChecksum = JZlib.crc32_combine(clusterStateChecksum, customMetadataMapChecksum, CHECKSUM_SIZE); + clusterStateChecksum = JZlib.crc32_combine(clusterStateChecksum, hashesOfConsistentSettingsChecksum, CHECKSUM_SIZE); + clusterStateChecksum = JZlib.crc32_combine(clusterStateChecksum, indicesChecksum, CHECKSUM_SIZE); + } + + public static ClusterStateChecksum.Builder builder() { + return new ClusterStateChecksum.Builder(); + } + + public ClusterStateChecksum( + long routingTableChecksum, + long nodesChecksum, + long blocksChecksum, + long clusterStateCustomsChecksum, + long coordinationMetadataChecksum, + long settingMetadataChecksum, + long transientSettingsMetadataChecksum, + long templatesMetadataChecksum, + long customMetadataMapChecksum, + long hashesOfConsistentSettingsChecksum, + long indicesChecksum, + long clusterStateChecksum + ) { + this.routingTableChecksum = routingTableChecksum; + this.nodesChecksum = nodesChecksum; + this.blocksChecksum = blocksChecksum; + this.clusterStateCustomsChecksum = clusterStateCustomsChecksum; + this.coordinationMetadataChecksum = coordinationMetadataChecksum; + this.settingMetadataChecksum = settingMetadataChecksum; + this.transientSettingsMetadataChecksum = transientSettingsMetadataChecksum; + this.templatesMetadataChecksum = templatesMetadataChecksum; + this.customMetadataMapChecksum = customMetadataMapChecksum; + this.hashesOfConsistentSettingsChecksum = hashesOfConsistentSettingsChecksum; + this.indicesChecksum = indicesChecksum; + this.clusterStateChecksum = clusterStateChecksum; + } + + public ClusterStateChecksum(StreamInput in) throws IOException { + routingTableChecksum = in.readLong(); + nodesChecksum = in.readLong(); + blocksChecksum = in.readLong(); + clusterStateCustomsChecksum = in.readLong(); + coordinationMetadataChecksum = in.readLong(); + settingMetadataChecksum = in.readLong(); + transientSettingsMetadataChecksum = in.readLong(); + templatesMetadataChecksum = in.readLong(); + customMetadataMapChecksum = in.readLong(); + hashesOfConsistentSettingsChecksum = in.readLong(); + indicesChecksum = in.readLong(); + clusterStateChecksum = in.readLong(); + } + + public static ClusterStateChecksum fromXContent(XContentParser parser) throws IOException { + ClusterStateChecksum.Builder builder = new ClusterStateChecksum.Builder(); + if (parser.currentToken() == null) { // fresh parser? move to next token + parser.nextToken(); + } + if (parser.currentToken() == XContentParser.Token.START_OBJECT) { + parser.nextToken(); + } + ensureExpectedToken(XContentParser.Token.FIELD_NAME, parser.currentToken(), parser); + XContentParser.Token token; + String currentFieldName = parser.currentName(); + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (parser.currentToken() == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (token.isValue()) { + switch (currentFieldName) { + case ROUTING_TABLE_CS: + builder.routingTableChecksum(parser.longValue()); + break; + case NODES_CS: + builder.nodesChecksum(parser.longValue()); + break; + case BLOCKS_CS: + builder.blocksChecksum(parser.longValue()); + break; + case CUSTOMS_CS: + builder.clusterStateCustomsChecksum(parser.longValue()); + break; + case COORDINATION_MD_CS: + builder.coordinationMetadataChecksum(parser.longValue()); + break; + case SETTINGS_MD_CS: + builder.settingMetadataChecksum(parser.longValue()); + break; + case TRANSIENT_SETTINGS_MD_CS: + builder.transientSettingsMetadataChecksum(parser.longValue()); + break; + case TEMPLATES_MD_CS: + builder.templatesMetadataChecksum(parser.longValue()); + break; + case CUSTOM_MD_CS: + builder.customMetadataMapChecksum(parser.longValue()); + break; + case HASHES_MD_CS: + builder.hashesOfConsistentSettingsChecksum(parser.longValue()); + break; + case INDICES_CS: + builder.indicesChecksum(parser.longValue()); + break; + case CLUSTER_STATE_CS: + builder.clusterStateChecksum(parser.longValue()); + break; + default: + throw new XContentParseException("Unexpected field [" + currentFieldName + "]"); + } + } else { + throw new XContentParseException("Unexpected token [" + token + "]"); + } + } + return builder.build(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeLong(routingTableChecksum); + out.writeLong(nodesChecksum); + out.writeLong(blocksChecksum); + out.writeLong(clusterStateCustomsChecksum); + out.writeLong(coordinationMetadataChecksum); + out.writeLong(settingMetadataChecksum); + out.writeLong(transientSettingsMetadataChecksum); + out.writeLong(templatesMetadataChecksum); + out.writeLong(customMetadataMapChecksum); + out.writeLong(hashesOfConsistentSettingsChecksum); + out.writeLong(indicesChecksum); + out.writeLong(clusterStateChecksum); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.field(ROUTING_TABLE_CS, routingTableChecksum); + builder.field(NODES_CS, nodesChecksum); + builder.field(BLOCKS_CS, blocksChecksum); + builder.field(CUSTOMS_CS, clusterStateCustomsChecksum); + builder.field(COORDINATION_MD_CS, coordinationMetadataChecksum); + builder.field(SETTINGS_MD_CS, settingMetadataChecksum); + builder.field(TRANSIENT_SETTINGS_MD_CS, transientSettingsMetadataChecksum); + builder.field(TEMPLATES_MD_CS, templatesMetadataChecksum); + builder.field(CUSTOM_MD_CS, customMetadataMapChecksum); + builder.field(HASHES_MD_CS, hashesOfConsistentSettingsChecksum); + builder.field(INDICES_CS, indicesChecksum); + builder.field(CLUSTER_STATE_CS, clusterStateChecksum); + return builder; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ClusterStateChecksum that = (ClusterStateChecksum) o; + return routingTableChecksum == that.routingTableChecksum + && nodesChecksum == that.nodesChecksum + && blocksChecksum == that.blocksChecksum + && clusterStateCustomsChecksum == that.clusterStateCustomsChecksum + && coordinationMetadataChecksum == that.coordinationMetadataChecksum + && settingMetadataChecksum == that.settingMetadataChecksum + && transientSettingsMetadataChecksum == that.transientSettingsMetadataChecksum + && templatesMetadataChecksum == that.templatesMetadataChecksum + && customMetadataMapChecksum == that.customMetadataMapChecksum + && hashesOfConsistentSettingsChecksum == that.hashesOfConsistentSettingsChecksum + && indicesChecksum == that.indicesChecksum + && clusterStateChecksum == that.clusterStateChecksum; + } + + @Override + public int hashCode() { + return Objects.hash( + routingTableChecksum, + nodesChecksum, + blocksChecksum, + clusterStateCustomsChecksum, + coordinationMetadataChecksum, + settingMetadataChecksum, + transientSettingsMetadataChecksum, + templatesMetadataChecksum, + customMetadataMapChecksum, + hashesOfConsistentSettingsChecksum, + indicesChecksum, + clusterStateChecksum + ); + } + + @Override + public String toString() { + return "ClusterStateChecksum{" + + "routingTableChecksum=" + + routingTableChecksum + + ", nodesChecksum=" + + nodesChecksum + + ", blocksChecksum=" + + blocksChecksum + + ", clusterStateCustomsChecksum=" + + clusterStateCustomsChecksum + + ", coordinationMetadataChecksum=" + + coordinationMetadataChecksum + + ", settingMetadataChecksum=" + + settingMetadataChecksum + + ", transientSettingsMetadataChecksum=" + + transientSettingsMetadataChecksum + + ", templatesMetadataChecksum=" + + templatesMetadataChecksum + + ", customMetadataMapChecksum=" + + customMetadataMapChecksum + + ", hashesOfConsistentSettingsChecksum=" + + hashesOfConsistentSettingsChecksum + + ", indicesChecksum=" + + indicesChecksum + + ", clusterStateChecksum=" + + clusterStateChecksum + + '}'; + } + + public List getMismatchEntities(ClusterStateChecksum otherClusterStateChecksum) { + if (this.clusterStateChecksum == otherClusterStateChecksum.clusterStateChecksum) { + logger.debug("No mismatch in checksums."); + return List.of(); + } + List mismatches = new ArrayList<>(); + addIfMismatch(this.routingTableChecksum, otherClusterStateChecksum.routingTableChecksum, ROUTING_TABLE_CS, mismatches); + addIfMismatch(this.nodesChecksum, otherClusterStateChecksum.nodesChecksum, NODES_CS, mismatches); + addIfMismatch(this.blocksChecksum, otherClusterStateChecksum.blocksChecksum, BLOCKS_CS, mismatches); + addIfMismatch(this.clusterStateCustomsChecksum, otherClusterStateChecksum.clusterStateCustomsChecksum, CUSTOMS_CS, mismatches); + addIfMismatch( + this.coordinationMetadataChecksum, + otherClusterStateChecksum.coordinationMetadataChecksum, + COORDINATION_MD_CS, + mismatches + ); + addIfMismatch(this.settingMetadataChecksum, otherClusterStateChecksum.settingMetadataChecksum, SETTINGS_MD_CS, mismatches); + addIfMismatch( + this.transientSettingsMetadataChecksum, + otherClusterStateChecksum.transientSettingsMetadataChecksum, + TRANSIENT_SETTINGS_MD_CS, + mismatches + ); + addIfMismatch(this.templatesMetadataChecksum, otherClusterStateChecksum.templatesMetadataChecksum, TEMPLATES_MD_CS, mismatches); + addIfMismatch(this.customMetadataMapChecksum, otherClusterStateChecksum.customMetadataMapChecksum, CUSTOM_MD_CS, mismatches); + addIfMismatch( + this.hashesOfConsistentSettingsChecksum, + otherClusterStateChecksum.hashesOfConsistentSettingsChecksum, + HASHES_MD_CS, + mismatches + ); + addIfMismatch(this.indicesChecksum, otherClusterStateChecksum.indicesChecksum, INDICES_CS, mismatches); + + return mismatches; + } + + private void addIfMismatch(long checksum, long otherChecksum, String entityName, List mismatches) { + if (checksum != otherChecksum) { + mismatches.add(entityName); + } + } + + /** + * Builder for ClusterStateChecksum + */ + public static class Builder { + long routingTableChecksum; + long nodesChecksum; + long blocksChecksum; + long clusterStateCustomsChecksum; + long coordinationMetadataChecksum; + long settingMetadataChecksum; + long transientSettingsMetadataChecksum; + long templatesMetadataChecksum; + long customMetadataMapChecksum; + long hashesOfConsistentSettingsChecksum; + long indicesChecksum; + long clusterStateChecksum; + + public Builder routingTableChecksum(long routingTableChecksum) { + this.routingTableChecksum = routingTableChecksum; + return this; + } + + public Builder nodesChecksum(long nodesChecksum) { + this.nodesChecksum = nodesChecksum; + return this; + } + + public Builder blocksChecksum(long blocksChecksum) { + this.blocksChecksum = blocksChecksum; + return this; + } + + public Builder clusterStateCustomsChecksum(long clusterStateCustomsChecksum) { + this.clusterStateCustomsChecksum = clusterStateCustomsChecksum; + return this; + } + + public Builder coordinationMetadataChecksum(long coordinationMetadataChecksum) { + this.coordinationMetadataChecksum = coordinationMetadataChecksum; + return this; + } + + public Builder settingMetadataChecksum(long settingMetadataChecksum) { + this.settingMetadataChecksum = settingMetadataChecksum; + return this; + } + + public Builder transientSettingsMetadataChecksum(long transientSettingsMetadataChecksum) { + this.transientSettingsMetadataChecksum = transientSettingsMetadataChecksum; + return this; + } + + public Builder templatesMetadataChecksum(long templatesMetadataChecksum) { + this.templatesMetadataChecksum = templatesMetadataChecksum; + return this; + } + + public Builder customMetadataMapChecksum(long customMetadataMapChecksum) { + this.customMetadataMapChecksum = customMetadataMapChecksum; + return this; + } + + public Builder hashesOfConsistentSettingsChecksum(long hashesOfConsistentSettingsChecksum) { + this.hashesOfConsistentSettingsChecksum = hashesOfConsistentSettingsChecksum; + return this; + } + + public Builder indicesChecksum(long indicesChecksum) { + this.indicesChecksum = indicesChecksum; + return this; + } + + public Builder clusterStateChecksum(long clusterStateChecksum) { + this.clusterStateChecksum = clusterStateChecksum; + return this; + } + + public ClusterStateChecksum build() { + return new ClusterStateChecksum( + routingTableChecksum, + nodesChecksum, + blocksChecksum, + clusterStateCustomsChecksum, + coordinationMetadataChecksum, + settingMetadataChecksum, + transientSettingsMetadataChecksum, + templatesMetadataChecksum, + customMetadataMapChecksum, + hashesOfConsistentSettingsChecksum, + indicesChecksum, + clusterStateChecksum + ); + } + } + +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/ClusterStateDiffManifest.java b/server/src/main/java/org/opensearch/gateway/remote/ClusterStateDiffManifest.java index a3b36ddcff1a7..a97b3e1bc6c4b 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/ClusterStateDiffManifest.java +++ b/server/src/main/java/org/opensearch/gateway/remote/ClusterStateDiffManifest.java @@ -13,6 +13,7 @@ import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.routing.IndexRoutingTable; +import org.opensearch.cluster.routing.StringKeyDiffProvider; import org.opensearch.core.common.Strings; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; @@ -33,6 +34,7 @@ import static org.opensearch.cluster.DiffableUtils.NonDiffableValueSerializer.getAbstractInstance; import static org.opensearch.cluster.DiffableUtils.getStringKeySerializer; import static org.opensearch.core.xcontent.XContentParserUtils.ensureExpectedToken; +import static org.opensearch.gateway.remote.ClusterMetadataManifest.CODEC_V2; import static org.opensearch.gateway.remote.ClusterMetadataManifest.CODEC_V3; /** @@ -81,7 +83,8 @@ public class ClusterStateDiffManifest implements ToXContentFragment, Writeable { public ClusterStateDiffManifest( ClusterState state, ClusterState previousState, - DiffableUtils.MapDiff> routingTableIncrementalDiff, + int codecVersion, + StringKeyDiffProvider routingTableDiff, String indicesRoutingDiffPath ) { fromStateUUID = previousState.stateUUID(); @@ -109,13 +112,14 @@ public ClusterStateDiffManifest( customMetadataUpdated = new ArrayList<>(customDiff.getDiffs().keySet()); customMetadataUpdated.addAll(customDiff.getUpserts().keySet()); customMetadataDeleted = customDiff.getDeletes(); - indicesRoutingUpdated = new ArrayList<>(); indicesRoutingDeleted = new ArrayList<>(); this.indicesRoutingDiffPath = indicesRoutingDiffPath; - if (routingTableIncrementalDiff != null) { - routingTableIncrementalDiff.getUpserts().forEach((k, v) -> indicesRoutingUpdated.add(k)); - indicesRoutingDeleted.addAll(routingTableIncrementalDiff.getDeletes()); + if (codecVersion == CODEC_V2 || codecVersion == CODEC_V3) { + if (routingTableDiff != null && routingTableDiff.provideDiff() != null) { + routingTableDiff.provideDiff().getUpserts().forEach((k, v) -> indicesRoutingUpdated.add(k)); + indicesRoutingDeleted.addAll(routingTableDiff.provideDiff().getDeletes()); + } } hashesOfConsistentSettingsUpdated = !state.metadata() .hashesOfConsistentSettings() @@ -231,18 +235,21 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.endObject(); builder.field(CLUSTER_BLOCKS_UPDATED_FIELD, clusterBlocksUpdated); builder.field(DISCOVERY_NODES_UPDATED_FIELD, discoveryNodesUpdated); - builder.startObject(ROUTING_TABLE_DIFF); - builder.startArray(UPSERTS_FIELD); - for (String index : indicesRoutingUpdated) { - builder.value(index); + if (!indicesRoutingUpdated.isEmpty()) { + builder.startArray(UPSERTS_FIELD); + for (String index : indicesRoutingUpdated) { + builder.value(index); + } + builder.endArray(); } - builder.endArray(); - builder.startArray(DELETES_FIELD); - for (String index : indicesRoutingDeleted) { - builder.value(index); + if (!indicesRoutingDeleted.isEmpty()) { + builder.startArray(DELETES_FIELD); + for (String index : indicesRoutingDeleted) { + builder.value(index); + } + builder.endArray(); } - builder.endArray(); if (indicesRoutingDiffPath != null) { builder.field(DIFF_FIELD, indicesRoutingDiffPath); } @@ -345,10 +352,14 @@ public static ClusterStateDiffManifest fromXContent(XContentParser parser, long parser.nextToken(); switch (currentFieldName) { case UPSERTS_FIELD: - builder.indicesRoutingUpdated(convertListToString(parser.listOrderedMap())); + if (codec_version == CODEC_V2 || codec_version == CODEC_V3) { + builder.indicesRoutingUpdated(convertListToString(parser.listOrderedMap())); + } break; case DELETES_FIELD: - builder.indicesRoutingDeleted(convertListToString(parser.listOrderedMap())); + if (codec_version == CODEC_V2 || codec_version == CODEC_V3) { + builder.indicesRoutingDeleted(convertListToString(parser.listOrderedMap())); + } break; case DIFF_FIELD: if (codec_version >= CODEC_V3) { @@ -466,14 +477,14 @@ public boolean isHashesOfConsistentSettingsUpdated() { return hashesOfConsistentSettingsUpdated; } - public List getIndicesRoutingUpdated() { - return indicesRoutingUpdated; - } - public String getIndicesRoutingDiffPath() { return indicesRoutingDiffPath; } + public List getIndicesRoutingUpdated() { + return indicesRoutingUpdated; + } + public List getIndicesRoutingDeleted() { return indicesRoutingDeleted; } @@ -666,6 +677,11 @@ public Builder discoveryNodesUpdated(boolean discoveryNodesUpdated) { return this; } + public Builder indicesRoutingDiffPath(String indicesRoutingDiffPath) { + this.indicesRoutingDiff = indicesRoutingDiffPath; + return this; + } + public Builder indicesRoutingUpdated(List indicesRoutingUpdated) { this.indicesRoutingUpdated = indicesRoutingUpdated; return this; @@ -676,11 +692,6 @@ public Builder indicesRoutingDeleted(List indicesRoutingDeleted) { return this; } - public Builder indicesRoutingDiffPath(String indicesRoutingDiffPath) { - this.indicesRoutingDiff = indicesRoutingDiffPath; - return this; - } - public Builder clusterStateCustomUpdated(List clusterStateCustomUpdated) { this.clusterStateCustomUpdated = clusterStateCustomUpdated; return this; diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateCache.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateCache.java new file mode 100644 index 0000000000000..de36ac4429302 --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateCache.java @@ -0,0 +1,58 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import org.opensearch.cluster.ClusterName; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.coordination.ClusterStateTermVersion; +import org.opensearch.common.collect.Tuple; + +import java.util.concurrent.atomic.AtomicReference; + +/** + * Cache to Remote Cluster State based on term-version check. The current implementation + * caches the last highest version of cluster-state that was downloaded from cache. + * + * @opensearch.internal + */ +public class RemoteClusterStateCache { + + private final AtomicReference> clusterStateFromCache = new AtomicReference<>(); + + public ClusterState getState(String clusterName, ClusterMetadataManifest manifest) { + Tuple cache = clusterStateFromCache.get(); + if (cache != null) { + ClusterStateTermVersion manifestStateTermVersion = new ClusterStateTermVersion( + new ClusterName(clusterName), + manifest.getClusterUUID(), + manifest.getClusterTerm(), + manifest.getStateVersion() + ); + if (cache.v1().equals(manifestStateTermVersion)) { + return cache.v2(); + } + } + return null; + } + + public void putState(final ClusterState newState) { + if (newState.metadata() == null || newState.coordinationMetadata() == null) { + // ensure the remote cluster state has coordination metadata set + return; + } + + ClusterStateTermVersion cacheStateTermVersion = new ClusterStateTermVersion( + new ClusterName(newState.getClusterName().value()), + newState.metadata().clusterUUID(), + newState.term(), + newState.version() + ); + clusterStateFromCache.set(new Tuple<>(cacheStateTermVersion, newState)); + } +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManager.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManager.java index 02db15477ff95..d9bd9669f138c 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManager.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManager.java @@ -81,7 +81,7 @@ public RemoteClusterStateCleanupManager( RemoteRoutingTableService remoteRoutingTableService ) { this.remoteClusterStateService = remoteClusterStateService; - this.remoteStateStats = remoteClusterStateService.getStats(); + this.remoteStateStats = remoteClusterStateService.getRemoteStateStats(); ClusterSettings clusterSettings = clusterService.getClusterSettings(); this.clusterApplierService = clusterService.getClusterApplierService(); this.staleFileCleanupInterval = clusterSettings.get(REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING); diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index 910f601a81ca8..12d10fd908b44 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -18,6 +18,7 @@ import org.opensearch.cluster.DiffableUtils; import org.opensearch.cluster.block.ClusterBlocks; import org.opensearch.cluster.coordination.CoordinationMetadata; +import org.opensearch.cluster.coordination.PersistedStateStats; import org.opensearch.cluster.metadata.DiffableStringMap; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.Metadata; @@ -27,11 +28,12 @@ import org.opensearch.cluster.node.DiscoveryNodes.Builder; import org.opensearch.cluster.routing.IndexRoutingTable; import org.opensearch.cluster.routing.RoutingTable; -import org.opensearch.cluster.routing.RoutingTableIncrementalDiff; +import org.opensearch.cluster.routing.StringKeyDiffProvider; import org.opensearch.cluster.routing.remote.RemoteRoutingTableService; import org.opensearch.cluster.routing.remote.RemoteRoutingTableServiceFactory; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.Nullable; +import org.opensearch.common.annotation.InternalApi; import org.opensearch.common.blobstore.BlobContainer; import org.opensearch.common.blobstore.BlobStore; import org.opensearch.common.settings.ClusterSettings; @@ -39,7 +41,6 @@ import org.opensearch.common.settings.Setting.Property; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; -import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.util.io.IOUtils; import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.io.stream.NamedWriteableRegistry; @@ -70,6 +71,7 @@ import java.io.Closeable; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -89,9 +91,11 @@ import static java.util.Collections.emptyList; import static java.util.Collections.emptyMap; -import static org.opensearch.common.util.FeatureFlags.REMOTE_PUBLICATION_EXPERIMENTAL; +import static org.opensearch.cluster.ClusterState.CUSTOM_VALUE_SERIALIZER; import static org.opensearch.gateway.PersistedClusterStateService.SLOW_WRITE_LOGGING_THRESHOLD; import static org.opensearch.gateway.remote.ClusterMetadataManifest.CODEC_V2; +import static org.opensearch.gateway.remote.ClusterMetadataManifest.CODEC_V3; +import static org.opensearch.gateway.remote.ClusterMetadataManifest.MANIFEST_CURRENT_CODEC_VERSION; import static org.opensearch.gateway.remote.RemoteClusterStateAttributesManager.CLUSTER_BLOCKS; import static org.opensearch.gateway.remote.RemoteClusterStateAttributesManager.CLUSTER_STATE_ATTRIBUTE; import static org.opensearch.gateway.remote.RemoteClusterStateAttributesManager.DISCOVERY_NODES; @@ -114,10 +118,23 @@ * * @opensearch.internal */ +@InternalApi public class RemoteClusterStateService implements Closeable { private static final Logger logger = LogManager.getLogger(RemoteClusterStateService.class); + /** + * Gates the functionality of remote publication. + */ + public static final String REMOTE_PUBLICATION_SETTING_KEY = "cluster.remote_store.publication.enabled"; + + public static final Setting REMOTE_PUBLICATION_SETTING = Setting.boolSetting( + REMOTE_PUBLICATION_SETTING_KEY, + false, + Property.NodeScope, + Property.Final + ); + /** * Used to specify if cluster state metadata should be published to remote store */ @@ -137,6 +154,59 @@ public class RemoteClusterStateService implements Closeable { Setting.Property.NodeScope ); + public static final Setting REMOTE_CLUSTER_STATE_CHECKSUM_VALIDATION_MODE_SETTING = new Setting<>( + "cluster.remote_store.state.checksum_validation.mode", + RemoteClusterStateValidationMode.NONE.name(), + RemoteClusterStateValidationMode::parseString, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + /** + * Controls the fixed prefix for the cluster state path on remote store. + */ + public static final Setting CLUSTER_REMOTE_STORE_STATE_PATH_PREFIX = Setting.simpleString( + "cluster.remote_store.state.path.prefix", + "", + Property.NodeScope, + Property.Final + ); + + /** + * Validation mode for cluster state checksum. + * None: Validation will be disabled. + * Debug: Validation enabled but only matches checksum and logs failing entities. + * Trace: Matches checksum and downloads full cluster state to find diff in failing entities. Only logs failures. + * Failure: Throws exception on failing validation. + */ + public enum RemoteClusterStateValidationMode { + DEBUG("debug"), + TRACE("trace"), + FAILURE("failure"), + NONE("none"); + + public final String mode; + + RemoteClusterStateValidationMode(String mode) { + this.mode = mode; + } + + public static RemoteClusterStateValidationMode parseString(String mode) { + try { + return RemoteClusterStateValidationMode.valueOf(mode.toUpperCase(Locale.ROOT)); + } catch (IllegalArgumentException e) { + throw new IllegalArgumentException( + "[" + + mode + + "] mode is not supported. " + + "supported modes are [" + + Arrays.toString(RemoteClusterStateValidationMode.values()) + + "]" + ); + } + } + } + private TimeValue remoteStateReadTimeout; private final String nodeId; private final Supplier repositoriesService; @@ -148,6 +218,7 @@ public class RemoteClusterStateService implements Closeable { private BlobStoreTransferService blobStoreTransferService; private RemoteRoutingTableService remoteRoutingTableService; private volatile TimeValue slowWriteLoggingThreshold; + private RemoteClusterStateValidationMode remoteClusterStateValidationMode; private final RemotePersistenceStats remoteStateStats; private RemoteClusterStateCleanupManager remoteClusterStateCleanupManager; @@ -162,7 +233,9 @@ public class RemoteClusterStateService implements Closeable { + "indices, coordination metadata updated : [{}], settings metadata updated : [{}], templates metadata " + "updated : [{}], custom metadata updated : [{}], indices routing updated : [{}]"; private final boolean isPublicationEnabled; + private final String remotePathPrefix; + private final RemoteClusterStateCache remoteClusterStateCache; // ToXContent Params with gateway mode. // We are using gateway context mode to persist all custom metadata. public static final ToXContent.Params FORMAT_PARAMS; @@ -194,12 +267,16 @@ public RemoteClusterStateService( clusterSettings.addSettingsUpdateConsumer(SLOW_WRITE_LOGGING_THRESHOLD, this::setSlowWriteLoggingThreshold); this.remoteStateReadTimeout = clusterSettings.get(REMOTE_STATE_READ_TIMEOUT_SETTING); clusterSettings.addSettingsUpdateConsumer(REMOTE_STATE_READ_TIMEOUT_SETTING, this::setRemoteStateReadTimeout); + this.remoteClusterStateValidationMode = REMOTE_CLUSTER_STATE_CHECKSUM_VALIDATION_MODE_SETTING.get(settings); + clusterSettings.addSettingsUpdateConsumer(REMOTE_CLUSTER_STATE_CHECKSUM_VALIDATION_MODE_SETTING, this::setChecksumValidationMode); + this.remoteStateStats = new RemotePersistenceStats(); this.namedWriteableRegistry = namedWriteableRegistry; this.indexMetadataUploadListeners = indexMetadataUploadListeners; - this.isPublicationEnabled = FeatureFlags.isEnabled(REMOTE_PUBLICATION_EXPERIMENTAL) + this.isPublicationEnabled = REMOTE_PUBLICATION_SETTING.get(settings) && RemoteStoreNodeAttribute.isRemoteStoreClusterStateEnabled(settings) && RemoteStoreNodeAttribute.isRemoteRoutingTableEnabled(settings); + this.remotePathPrefix = CLUSTER_REMOTE_STORE_STATE_PATH_PREFIX.get(settings); this.remoteRoutingTableService = RemoteRoutingTableServiceFactory.getService( repositoriesService, settings, @@ -208,6 +285,7 @@ public RemoteClusterStateService( ClusterName.CLUSTER_NAME_SETTING.get(settings).value() ); remoteClusterStateCleanupManager = new RemoteClusterStateCleanupManager(this, clusterService, remoteRoutingTableService); + remoteClusterStateCache = new RemoteClusterStateCache(); } /** @@ -217,7 +295,8 @@ public RemoteClusterStateService( * @return A manifest object which contains the details of uploaded entity metadata. */ @Nullable - public RemoteClusterStateManifestInfo writeFullMetadata(ClusterState clusterState, String previousClusterUUID) throws IOException { + public RemoteClusterStateManifestInfo writeFullMetadata(ClusterState clusterState, String previousClusterUUID, int codecVersion) + throws IOException { final long startTimeNanos = relativeTimeNanosSupplier.getAsLong(); if (clusterState.nodes().isLocalNodeElectedClusterManager() == false) { logger.error("Local node is not elected cluster manager. Exiting"); @@ -244,6 +323,7 @@ public RemoteClusterStateManifestInfo writeFullMetadata(ClusterState clusterStat ClusterStateDiffManifest clusterStateDiffManifest = new ClusterStateDiffManifest( clusterState, ClusterState.EMPTY_STATE, + MANIFEST_CURRENT_CODEC_VERSION, null, null ); @@ -252,12 +332,14 @@ public RemoteClusterStateManifestInfo writeFullMetadata(ClusterState clusterStat uploadedMetadataResults, previousClusterUUID, clusterStateDiffManifest, - false + !remoteClusterStateValidationMode.equals(RemoteClusterStateValidationMode.NONE) ? new ClusterStateChecksum(clusterState) : null, + false, + codecVersion ); final long durationMillis = TimeValue.nsecToMSec(relativeTimeNanosSupplier.getAsLong() - startTimeNanos); - remoteStateStats.stateSucceeded(); - remoteStateStats.stateTook(durationMillis); + remoteStateStats.stateUploadSucceeded(); + remoteStateStats.stateUploadTook(durationMillis); if (durationMillis >= slowWriteLoggingThreshold.getMillis()) { logger.warn( "writing cluster state took [{}ms] which is above the warn threshold of [{}]; " @@ -285,12 +367,20 @@ public RemoteClusterStateManifestInfo writeFullMetadata(ClusterState clusterStat * * @return {@link RemoteClusterStateManifestInfo} object containing uploaded manifest detail */ - @Nullable public RemoteClusterStateManifestInfo writeIncrementalMetadata( ClusterState previousClusterState, ClusterState clusterState, ClusterMetadataManifest previousManifest ) throws IOException { + if (previousClusterState == null) { + throw new IllegalArgumentException("previousClusterState cannot be null"); + } + if (clusterState == null) { + throw new IllegalArgumentException("clusterState cannot be null"); + } + if (previousManifest == null) { + throw new IllegalArgumentException("previousManifest cannot be null"); + } logger.trace("WRITING INCREMENTAL STATE"); final long startTimeNanos = relativeTimeNanosSupplier.getAsLong(); @@ -298,7 +388,6 @@ public RemoteClusterStateManifestInfo writeIncrementalMetadata( logger.error("Local node is not elected cluster manager. Exiting"); return null; } - assert previousClusterState.metadata().coordinationMetadata().term() == clusterState.metadata().coordinationMetadata().term(); boolean firstUploadForSplitGlobalMetadata = !previousManifest.hasMetadataAttributesFiles(); @@ -342,12 +431,18 @@ public RemoteClusterStateManifestInfo writeIncrementalMetadata( } final List indicesRoutingToUpload = new ArrayList<>(); - final DiffableUtils.MapDiff> routingTableIncrementalDiff = - remoteRoutingTableService.getIndicesRoutingMapDiff(previousClusterState.getRoutingTable(), clusterState.getRoutingTable()); - - Map> indexRoutingTableDiffs = routingTableIncrementalDiff.getDiffs(); - routingTableIncrementalDiff.getDiffs().forEach((k, v) -> indicesRoutingToUpload.add(clusterState.getRoutingTable().index(k))); - routingTableIncrementalDiff.getUpserts().forEach((k, v) -> indicesRoutingToUpload.add(v)); + final List deletedIndicesRouting = new ArrayList<>(); + final StringKeyDiffProvider routingTableDiff = remoteRoutingTableService.getIndicesRoutingMapDiff( + previousClusterState.getRoutingTable(), + clusterState.getRoutingTable() + ); + if (routingTableDiff != null && routingTableDiff.provideDiff() != null) { + routingTableDiff.provideDiff() + .getDiffs() + .forEach((k, v) -> indicesRoutingToUpload.add(clusterState.getRoutingTable().index(k))); + routingTableDiff.provideDiff().getUpserts().forEach((k, v) -> indicesRoutingToUpload.add(v)); + deletedIndicesRouting.addAll(routingTableDiff.provideDiff().getDeletes()); + } UploadedMetadataResults uploadedMetadataResults; // For migration case from codec V0 or V1 to V2, we have added null check on metadata attribute files, @@ -384,7 +479,7 @@ public RemoteClusterStateManifestInfo writeIncrementalMetadata( clusterStateCustomsDiff.getUpserts(), updateHashesOfConsistentSettings, indicesRoutingToUpload, - indexRoutingTableDiffs + routingTableDiff ); // update the map if the metadata was uploaded @@ -426,13 +521,14 @@ public RemoteClusterStateManifestInfo writeIncrementalMetadata( uploadedMetadataResults.uploadedIndicesRoutingMetadata = remoteRoutingTableService.getAllUploadedIndicesRouting( previousManifest, uploadedMetadataResults.uploadedIndicesRoutingMetadata, - routingTableIncrementalDiff.getDeletes() + deletedIndicesRouting ); ClusterStateDiffManifest clusterStateDiffManifest = new ClusterStateDiffManifest( clusterState, previousClusterState, - routingTableIncrementalDiff, + MANIFEST_CURRENT_CODEC_VERSION, + routingTableDiff, uploadedMetadataResults.uploadedIndicesRoutingDiffMetadata != null ? uploadedMetadataResults.uploadedIndicesRoutingDiffMetadata.getUploadedFilename() : null @@ -443,12 +539,14 @@ public RemoteClusterStateManifestInfo writeIncrementalMetadata( uploadedMetadataResults, previousManifest.getPreviousClusterUUID(), clusterStateDiffManifest, - false + !remoteClusterStateValidationMode.equals(RemoteClusterStateValidationMode.NONE) ? new ClusterStateChecksum(clusterState) : null, + false, + previousManifest.getCodecVersion() ); final long durationMillis = TimeValue.nsecToMSec(relativeTimeNanosSupplier.getAsLong() - startTimeNanos); - remoteStateStats.stateSucceeded(); - remoteStateStats.stateTook(durationMillis); + remoteStateStats.stateUploadSucceeded(); + remoteStateStats.stateUploadTook(durationMillis); ParameterizedMessage clusterStateUploadTimeMessage = new ParameterizedMessage( CLUSTER_STATE_UPLOAD_TIME_LOG_STRING, manifestDetails.getClusterMetadataManifest().getStateVersion(), @@ -513,14 +611,18 @@ UploadedMetadataResults writeMetadataInParallel( Map clusterStateCustomToUpload, boolean uploadHashesOfConsistentSettings, List indicesRoutingToUpload, - Map> indexRoutingTableDiff + StringKeyDiffProvider routingTableDiff ) throws IOException { assert Objects.nonNull(indexMetadataUploadListeners) : "indexMetadataUploadListeners can not be null"; int totalUploadTasks = indexToUpload.size() + indexMetadataUploadListeners.size() + customToUpload.size() + (uploadCoordinationMetadata ? 1 : 0) + (uploadSettingsMetadata ? 1 : 0) + (uploadTemplateMetadata ? 1 : 0) + (uploadDiscoveryNodes ? 1 : 0) + (uploadClusterBlock ? 1 : 0) + (uploadTransientSettingMetadata ? 1 : 0) + clusterStateCustomToUpload.size() + (uploadHashesOfConsistentSettings ? 1 : 0) + indicesRoutingToUpload.size() - + (indexRoutingTableDiff != null && !indexRoutingTableDiff.isEmpty() ? 1 : 0); + + ((routingTableDiff != null + && routingTableDiff.provideDiff() != null + && (!routingTableDiff.provideDiff().getDiffs().isEmpty() + || !routingTableDiff.provideDiff().getDeletes().isEmpty() + || !routingTableDiff.provideDiff().getUpserts().isEmpty())) ? 1 : 0); CountDownLatch latch = new CountDownLatch(totalUploadTasks); List uploadTasks = Collections.synchronizedList(new ArrayList<>(totalUploadTasks)); Map results = new ConcurrentHashMap<>(totalUploadTasks); @@ -659,7 +761,10 @@ UploadedMetadataResults writeMetadataInParallel( indexMetadata, clusterState.metadata().clusterUUID(), blobStoreRepository.getCompressor(), - blobStoreRepository.getNamedXContentRegistry() + blobStoreRepository.getNamedXContentRegistry(), + remoteIndexMetadataManager.getPathTypeSetting(), + remoteIndexMetadataManager.getPathHashAlgoSetting(), + remotePathPrefix ), listener ); @@ -690,13 +795,17 @@ UploadedMetadataResults writeMetadataInParallel( listener ); }); - if (indexRoutingTableDiff != null && !indexRoutingTableDiff.isEmpty()) { + if (routingTableDiff != null + && routingTableDiff.provideDiff() != null + && (!routingTableDiff.provideDiff().getDiffs().isEmpty() + || !routingTableDiff.provideDiff().getDeletes().isEmpty() + || !routingTableDiff.provideDiff().getUpserts().isEmpty())) { uploadTasks.add(RemoteRoutingTableDiff.ROUTING_TABLE_DIFF_FILE); remoteRoutingTableService.getAsyncIndexRoutingDiffWriteAction( clusterState.metadata().clusterUUID(), clusterState.term(), clusterState.version(), - indexRoutingTableDiff, + routingTableDiff, listener ); } @@ -851,18 +960,41 @@ public RemoteClusterStateCleanupManager getCleanupManager() { } @Nullable - public RemoteClusterStateManifestInfo markLastStateAsCommitted(ClusterState clusterState, ClusterMetadataManifest previousManifest) - throws IOException { + public RemoteClusterStateManifestInfo markLastStateAsCommitted( + ClusterState clusterState, + ClusterMetadataManifest previousManifest, + boolean commitVotingConfig + ) throws IOException { assert clusterState != null : "Last accepted cluster state is not set"; if (clusterState.nodes().isLocalNodeElectedClusterManager() == false) { logger.error("Local node is not elected cluster manager. Exiting"); return null; } assert previousManifest != null : "Last cluster metadata manifest is not set"; + UploadedMetadataAttribute uploadedCoordinationMetadata = previousManifest.getCoordinationMetadata(); + if (commitVotingConfig) { + // update the coordination metadata if voting config is committed + uploadedCoordinationMetadata = writeMetadataInParallel( + clusterState, + emptyList(), + emptyMap(), + emptyMap(), + true, + false, + false, + false, + false, + false, + emptyMap(), + false, + emptyList(), + null + ).uploadedCoordinationMetadata; + } UploadedMetadataResults uploadedMetadataResults = new UploadedMetadataResults( previousManifest.getIndices(), previousManifest.getCustomMetadataMap(), - previousManifest.getCoordinationMetadata(), + uploadedCoordinationMetadata, previousManifest.getSettingsMetadata(), previousManifest.getTemplatesMetadata(), previousManifest.getTransientSettingsMetadata(), @@ -878,7 +1010,9 @@ public RemoteClusterStateManifestInfo markLastStateAsCommitted(ClusterState clus uploadedMetadataResults, previousManifest.getPreviousClusterUUID(), previousManifest.getDiffManifest(), - true + !remoteClusterStateValidationMode.equals(RemoteClusterStateValidationMode.NONE) ? new ClusterStateChecksum(clusterState) : null, + true, + previousManifest.getCodecVersion() ); if (!previousManifest.isClusterUUIDCommitted() && committedManifestDetails.getClusterMetadataManifest().isClusterUUIDCommitted()) { remoteClusterStateCleanupManager.deleteStaleClusterUUIDs(clusterState, committedManifestDetails.getClusterMetadataManifest()); @@ -902,6 +1036,15 @@ public ClusterMetadataManifest getClusterMetadataManifestByFileName(String clust return remoteManifestManager.getRemoteClusterMetadataManifestByFileName(clusterUUID, fileName); } + public Optional getClusterMetadataManifestByTermVersion( + String clusterName, + String clusterUUID, + long term, + long version + ) { + return remoteManifestManager.getClusterMetadataManifestByTermVersion(clusterName, clusterUUID, term, version); + } + @Override public void close() throws IOException { remoteClusterStateCleanupManager.close(); @@ -962,6 +1105,10 @@ private void setSlowWriteLoggingThreshold(TimeValue slowWriteLoggingThreshold) { this.slowWriteLoggingThreshold = slowWriteLoggingThreshold; } + private void setChecksumValidationMode(RemoteClusterStateValidationMode remoteClusterStateValidationMode) { + this.remoteClusterStateValidationMode = remoteClusterStateValidationMode; + } + // Package private for unit test RemoteRoutingTableService getRemoteRoutingTableService() { return this.remoteRoutingTableService; @@ -1019,7 +1166,7 @@ ClusterState readClusterStateInParallel( Map clusterStateCustomToRead, boolean readIndexRoutingTableDiff, boolean includeEphemeral - ) throws IOException { + ) { int totalReadTasks = indicesToRead.size() + customToRead.size() + (readCoordinationMetadata ? 1 : 0) + (readSettingsMetadata ? 1 : 0) + (readTemplatesMetadata ? 1 : 0) + (readDiscoveryNodes ? 1 : 0) + (readClusterBlocks ? 1 : 0) @@ -1028,7 +1175,7 @@ ClusterState readClusterStateInParallel( CountDownLatch latch = new CountDownLatch(totalReadTasks); List readResults = Collections.synchronizedList(new ArrayList<>()); List readIndexRoutingTableResults = Collections.synchronizedList(new ArrayList<>()); - AtomicReference readIndexRoutingTableDiffResults = new AtomicReference<>(); + AtomicReference> readIndexRoutingTableDiffResults = new AtomicReference<>(); List exceptionList = Collections.synchronizedList(new ArrayList<>(totalReadTasks)); LatchedActionListener listener = new LatchedActionListener<>(ActionListener.wrap(response -> { @@ -1071,7 +1218,7 @@ ClusterState readClusterStateInParallel( ); } - LatchedActionListener routingTableDiffLatchedActionListener = new LatchedActionListener<>( + LatchedActionListener> routingTableDiffLatchedActionListener = new LatchedActionListener<>( ActionListener.wrap(response -> { logger.debug("Successfully read routing table diff component from remote"); readIndexRoutingTableDiffResults.set(response); @@ -1292,15 +1439,12 @@ ClusterState readClusterStateInParallel( readIndexRoutingTableResults.forEach( indexRoutingTable -> indicesRouting.put(indexRoutingTable.getIndex().getName(), indexRoutingTable) ); - RoutingTableIncrementalDiff routingTableDiff = readIndexRoutingTableDiffResults.get(); + Diff routingTableDiff = readIndexRoutingTableDiffResults.get(); + RoutingTable newRoutingTable = new RoutingTable(manifest.getRoutingTableVersion(), indicesRouting); if (routingTableDiff != null) { - routingTableDiff.getDiffs().forEach((key, diff) -> { - IndexRoutingTable previousIndexRoutingTable = indicesRouting.get(key); - IndexRoutingTable updatedTable = diff.apply(previousIndexRoutingTable); - indicesRouting.put(key, updatedTable); - }); + newRoutingTable = routingTableDiff.apply(previousState.getRoutingTable()); } - clusterStateBuilder.routingTable(new RoutingTable(manifest.getRoutingTableVersion(), indicesRouting)); + clusterStateBuilder.routingTable(newRoutingTable); return clusterStateBuilder.build(); } @@ -1311,8 +1455,15 @@ public ClusterState getClusterStateForManifest( String localNodeId, boolean includeEphemeral ) throws IOException { + ClusterState stateFromCache = remoteClusterStateCache.getState(clusterName, manifest); + if (stateFromCache != null) { + return stateFromCache; + } + + final ClusterState clusterState; + final long startTimeNanos = relativeTimeNanosSupplier.getAsLong(); if (manifest.onOrAfterCodecVersion(CODEC_V2)) { - return readClusterStateInParallel( + clusterState = readClusterStateInParallel( ClusterState.builder(new ClusterName(clusterName)).build(), manifest, manifest.getClusterUUID(), @@ -1331,8 +1482,14 @@ public ClusterState getClusterStateForManifest( false, includeEphemeral ); + + if (includeEphemeral + && !remoteClusterStateValidationMode.equals(RemoteClusterStateValidationMode.NONE) + && manifest.getClusterStateChecksum() != null) { + validateClusterStateFromChecksum(manifest, clusterState, clusterName, localNodeId, true); + } } else { - ClusterState clusterState = readClusterStateInParallel( + ClusterState state = readClusterStateInParallel( ClusterState.builder(new ClusterName(clusterName)).build(), manifest, manifest.getClusterUUID(), @@ -1353,16 +1510,25 @@ public ClusterState getClusterStateForManifest( false ); Metadata.Builder mb = Metadata.builder(remoteGlobalMetadataManager.getGlobalMetadata(manifest.getClusterUUID(), manifest)); - mb.indices(clusterState.metadata().indices()); - return ClusterState.builder(clusterState).metadata(mb).build(); + mb.indices(state.metadata().indices()); + clusterState = ClusterState.builder(state).metadata(mb).build(); } - + final long durationMillis = TimeValue.nsecToMSec(relativeTimeNanosSupplier.getAsLong() - startTimeNanos); + remoteStateStats.stateFullDownloadSucceeded(); + remoteStateStats.stateFullDownloadTook(durationMillis); + if (includeEphemeral) { + // cache only if the entire cluster-state is present + remoteClusterStateCache.putState(clusterState); + } + return clusterState; } - public ClusterState getClusterStateUsingDiff(ClusterMetadataManifest manifest, ClusterState previousState, String localNodeId) - throws IOException { + public ClusterState getClusterStateUsingDiff(ClusterMetadataManifest manifest, ClusterState previousState, String localNodeId) { assert manifest.getDiffManifest() != null : "Diff manifest null which is required for downloading cluster state"; + final long startTimeNanos = relativeTimeNanosSupplier.getAsLong(); ClusterStateDiffManifest diff = manifest.getDiffManifest(); + boolean includeEphemeral = true; + List updatedIndices = diff.getIndicesUpdated().stream().map(idx -> { Optional uploadedIndexMetadataOptional = manifest.getIndices() .stream() @@ -1386,9 +1552,11 @@ public ClusterState getClusterStateUsingDiff(ClusterMetadataManifest manifest, C } List updatedIndexRouting = new ArrayList<>(); - updatedIndexRouting.addAll( - remoteRoutingTableService.getUpdatedIndexRoutingTableMetadata(diff.getIndicesRoutingUpdated(), manifest.getIndicesRouting()) - ); + if (manifest.getCodecVersion() == CODEC_V2 || manifest.getCodecVersion() == CODEC_V3) { + updatedIndexRouting.addAll( + remoteRoutingTableService.getUpdatedIndexRoutingTableMetadata(diff.getIndicesRoutingUpdated(), manifest.getIndicesRouting()) + ); + } ClusterState updatedClusterState = readClusterStateInParallel( previousState, @@ -1409,7 +1577,7 @@ public ClusterState getClusterStateUsingDiff(ClusterMetadataManifest manifest, C manifest.getDiffManifest() != null && manifest.getDiffManifest().getIndicesRoutingDiffPath() != null && !manifest.getDiffManifest().getIndicesRoutingDiffPath().isEmpty(), - true + includeEphemeral ); ClusterState.Builder clusterStateBuilder = ClusterState.builder(updatedClusterState); Metadata.Builder metadataBuilder = Metadata.builder(updatedClusterState.metadata()); @@ -1432,16 +1600,195 @@ public ClusterState getClusterStateUsingDiff(ClusterMetadataManifest manifest, C } HashMap indexRoutingTables = new HashMap<>(updatedClusterState.getRoutingTable().getIndicesRouting()); - - for (String indexName : diff.getIndicesRoutingDeleted()) { - indexRoutingTables.remove(indexName); + if (manifest.getCodecVersion() == CODEC_V2 || manifest.getCodecVersion() == CODEC_V3) { + for (String indexName : diff.getIndicesRoutingDeleted()) { + indexRoutingTables.remove(indexName); + } } - return clusterStateBuilder.stateUUID(manifest.getStateUUID()) + ClusterState clusterState = clusterStateBuilder.stateUUID(manifest.getStateUUID()) .version(manifest.getStateVersion()) .metadata(metadataBuilder) .routingTable(new RoutingTable(manifest.getRoutingTableVersion(), indexRoutingTables)) .build(); + if (!remoteClusterStateValidationMode.equals(RemoteClusterStateValidationMode.NONE) && manifest.getClusterStateChecksum() != null) { + validateClusterStateFromChecksum(manifest, clusterState, previousState.getClusterName().value(), localNodeId, false); + } + final long durationMillis = TimeValue.nsecToMSec(relativeTimeNanosSupplier.getAsLong() - startTimeNanos); + remoteStateStats.stateDiffDownloadSucceeded(); + remoteStateStats.stateDiffDownloadTook(durationMillis); + + assert includeEphemeral == true; + // newState includes all the fields of cluster-state (includeEphemeral=true always) + remoteClusterStateCache.putState(clusterState); + return clusterState; + } + + void validateClusterStateFromChecksum( + ClusterMetadataManifest manifest, + ClusterState clusterState, + String clusterName, + String localNodeId, + boolean isFullStateDownload + ) { + ClusterStateChecksum newClusterStateChecksum = new ClusterStateChecksum(clusterState); + List failedValidation = newClusterStateChecksum.getMismatchEntities(manifest.getClusterStateChecksum()); + if (failedValidation.isEmpty()) { + return; + } + logger.error( + () -> new ParameterizedMessage( + "Cluster state checksums do not match. Checksum from manifest {}, checksum from created cluster state {}. Entities failing validation {}", + manifest.getClusterStateChecksum(), + newClusterStateChecksum, + failedValidation + ) + ); + if (isFullStateDownload && remoteClusterStateValidationMode.equals(RemoteClusterStateValidationMode.FAILURE)) { + throw new IllegalStateException( + "Cluster state checksums do not match during full state read. Validation failed for " + failedValidation + ); + } + if (remoteClusterStateValidationMode.equals(RemoteClusterStateValidationMode.FAILURE) + || remoteClusterStateValidationMode.equals(RemoteClusterStateValidationMode.TRACE)) { + // download full cluster state and match against state created for the failing entities + ClusterState fullClusterState = readClusterStateInParallel( + ClusterState.builder(new ClusterName(clusterName)).build(), + manifest, + manifest.getClusterUUID(), + localNodeId, + manifest.getIndices(), + manifest.getCustomMetadataMap(), + manifest.getCoordinationMetadata() != null, + manifest.getSettingsMetadata() != null, + manifest.getTransientSettingsMetadata() != null, + manifest.getTemplatesMetadata() != null, + manifest.getDiscoveryNodesMetadata() != null, + manifest.getClusterBlocksMetadata() != null, + manifest.getIndicesRouting(), + manifest.getHashesOfConsistentSettings() != null, + manifest.getClusterStateCustomMap(), + false, + true + ); + for (String failedEntity : failedValidation) { + switch (failedEntity) { + case ClusterStateChecksum.ROUTING_TABLE_CS: + Diff routingTableDiff = fullClusterState.routingTable().diff(clusterState.routingTable()); + logger.error(() -> new ParameterizedMessage("Failing Diff in routing table {}", routingTableDiff)); + break; + case ClusterStateChecksum.NODES_CS: + logger.error( + () -> new ParameterizedMessage( + "Failing Diff in discovery nodes {}", + fullClusterState.nodes().diff(clusterState.nodes()) + ) + ); + break; + case ClusterStateChecksum.BLOCKS_CS: + logger.error( + () -> new ParameterizedMessage( + "Failing Diff in cluster blocks {}", + fullClusterState.blocks().diff(clusterState.blocks()) + ) + ); + break; + case ClusterStateChecksum.CUSTOMS_CS: + logger.error( + () -> new ParameterizedMessage( + "Failing Diff in cluster state customs {}", + DiffableUtils.diff( + clusterState.customs(), + fullClusterState.customs(), + DiffableUtils.getStringKeySerializer(), + CUSTOM_VALUE_SERIALIZER + ) + ) + ); + break; + case ClusterStateChecksum.COORDINATION_MD_CS: + logger.error( + () -> new ParameterizedMessage( + "Failing Diff in coordination md. current md {}, full state md {}", + clusterState.metadata().coordinationMetadata(), + fullClusterState.metadata().coordinationMetadata() + ) + ); + break; + case ClusterStateChecksum.TRANSIENT_SETTINGS_MD_CS: + logger.error( + () -> new ParameterizedMessage( + "Failing Diff in transient settings md. current md {}, full state md {}", + clusterState.metadata().transientSettings(), + fullClusterState.metadata().transientSettings() + ) + ); + + break; + case ClusterStateChecksum.SETTINGS_MD_CS: + logger.error( + () -> new ParameterizedMessage( + "Failing Diff in settings md. current md {}, full state md {}", + clusterState.metadata().settings(), + fullClusterState.metadata().settings() + ) + ); + + break; + case ClusterStateChecksum.HASHES_MD_CS: + logger.error( + () -> new ParameterizedMessage( + "Failing Diff in hashes md {}", + ((DiffableStringMap) fullClusterState.metadata().hashesOfConsistentSettings()).diff( + (DiffableStringMap) clusterState.metadata().hashesOfConsistentSettings() + ) + ) + ); + break; + case ClusterStateChecksum.TEMPLATES_MD_CS: + logger.error( + () -> new ParameterizedMessage( + "Failing Diff in templates md{}", + fullClusterState.metadata().templatesMetadata().diff(clusterState.metadata().templatesMetadata()) + ) + ); + break; + case ClusterStateChecksum.CUSTOM_MD_CS: + logger.error( + () -> new ParameterizedMessage( + "Failing Diff in customs md {}", + DiffableUtils.diff( + clusterState.metadata().customs(), + fullClusterState.metadata().customs(), + DiffableUtils.getStringKeySerializer(), + Metadata.CUSTOM_VALUE_SERIALIZER + ) + ) + ); + break; + case ClusterStateChecksum.INDICES_CS: + logger.error( + () -> new ParameterizedMessage( + "Failing Diff in index md {}", + DiffableUtils.diff( + clusterState.metadata().indices(), + fullClusterState.metadata().indices(), + DiffableUtils.getStringKeySerializer() + ) + ) + ); + break; + default: + logger.error(() -> new ParameterizedMessage("Unknown failed entity {}", failedEntity)); + break; + } + } + } + if (remoteClusterStateValidationMode.equals(RemoteClusterStateValidationMode.FAILURE)) { + throw new IllegalStateException( + "Cluster state checksums do not match during diff read. Validation failed for " + failedValidation + ); + } } /** @@ -1470,6 +1817,10 @@ public String getLastKnownUUIDFromRemote(String clusterName) { } } + public boolean isRemotePublicationEnabled() { + return this.isPublicationEnabled; + } + public void setRemoteStateReadTimeout(TimeValue remoteStateReadTimeout) { this.remoteStateReadTimeout = remoteStateReadTimeout; } @@ -1637,10 +1988,35 @@ void setRemoteClusterStateAttributesManager(RemoteClusterStateAttributesManager } public void writeMetadataFailed() { - getStats().stateFailed(); + remoteStateStats.stateUploadFailed(); } - public RemotePersistenceStats getStats() { + public RemotePersistenceStats getRemoteStateStats() { return remoteStateStats; } + + public PersistedStateStats getUploadStats() { + return remoteStateStats.getUploadStats(); + } + + public PersistedStateStats getFullDownloadStats() { + return remoteStateStats.getRemoteFullDownloadStats(); + } + + public PersistedStateStats getDiffDownloadStats() { + return remoteStateStats.getRemoteDiffDownloadStats(); + } + + public void fullDownloadFailed() { + remoteStateStats.stateFullDownloadFailed(); + } + + public void diffDownloadFailed() { + remoteStateStats.stateDiffDownloadFailed(); + } + + RemoteClusterStateCache getRemoteClusterStateCache() { + return remoteClusterStateCache; + } + } diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteIndexMetadataManager.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteIndexMetadataManager.java index d1f08a7c2a33d..d8e8ffc68834d 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteIndexMetadataManager.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteIndexMetadataManager.java @@ -20,6 +20,7 @@ import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.gateway.remote.model.RemoteIndexMetadata; import org.opensearch.gateway.remote.model.RemoteReadResult; +import org.opensearch.index.remote.RemoteStoreEnums; import org.opensearch.index.translog.transfer.BlobStoreTransferService; import org.opensearch.repositories.blobstore.BlobStoreRepository; import org.opensearch.threadpool.ThreadPool; @@ -44,11 +45,38 @@ public class RemoteIndexMetadataManager extends AbstractRemoteWritableEntityMana Setting.Property.Deprecated ); + /** + * This setting is used to set the remote index metadata blob store path type strategy. + */ + public static final Setting REMOTE_INDEX_METADATA_PATH_TYPE_SETTING = new Setting<>( + "cluster.remote_store.index_metadata.path_type", + RemoteStoreEnums.PathType.HASHED_PREFIX.toString(), + RemoteStoreEnums.PathType::parseString, + Setting.Property.NodeScope, + Setting.Property.Dynamic + ); + + /** + * This setting is used to set the remote index metadata blob store path hash algorithm strategy. + * This setting will come to effect if the {@link #REMOTE_INDEX_METADATA_PATH_TYPE_SETTING} + * is either {@code HASHED_PREFIX} or {@code HASHED_INFIX}. + */ + public static final Setting REMOTE_INDEX_METADATA_PATH_HASH_ALGO_SETTING = new Setting<>( + "cluster.remote_store.index_metadata.path_hash_algo", + RemoteStoreEnums.PathHashAlgorithm.FNV_1A_BASE64.toString(), + RemoteStoreEnums.PathHashAlgorithm::parseString, + Setting.Property.NodeScope, + Setting.Property.Dynamic + ); + private final Compressor compressor; private final NamedXContentRegistry namedXContentRegistry; private volatile TimeValue indexMetadataUploadTimeout; + private RemoteStoreEnums.PathType pathType; + private RemoteStoreEnums.PathHashAlgorithm pathHashAlgo; + public RemoteIndexMetadataManager( ClusterSettings clusterSettings, String clusterName, @@ -70,7 +98,11 @@ public RemoteIndexMetadataManager( this.namedXContentRegistry = blobStoreRepository.getNamedXContentRegistry(); this.compressor = blobStoreRepository.getCompressor(); this.indexMetadataUploadTimeout = clusterSettings.get(INDEX_METADATA_UPLOAD_TIMEOUT_SETTING); + this.pathType = clusterSettings.get(REMOTE_INDEX_METADATA_PATH_TYPE_SETTING); + this.pathHashAlgo = clusterSettings.get(REMOTE_INDEX_METADATA_PATH_HASH_ALGO_SETTING); clusterSettings.addSettingsUpdateConsumer(INDEX_METADATA_UPLOAD_TIMEOUT_SETTING, this::setIndexMetadataUploadTimeout); + clusterSettings.addSettingsUpdateConsumer(REMOTE_INDEX_METADATA_PATH_TYPE_SETTING, this::setPathTypeSetting); + clusterSettings.addSettingsUpdateConsumer(REMOTE_INDEX_METADATA_PATH_HASH_ALGO_SETTING, this::setPathHashAlgoSetting); } /** @@ -127,4 +159,20 @@ protected ActionListener getWrappedReadListener( ex -> listener.onFailure(new RemoteStateTransferException("Download failed for " + component, remoteEntity, ex)) ); } + + private void setPathTypeSetting(RemoteStoreEnums.PathType pathType) { + this.pathType = pathType; + } + + private void setPathHashAlgoSetting(RemoteStoreEnums.PathHashAlgorithm pathHashAlgo) { + this.pathHashAlgo = pathHashAlgo; + } + + protected RemoteStoreEnums.PathType getPathTypeSetting() { + return pathType; + } + + protected RemoteStoreEnums.PathHashAlgorithm getPathHashAlgoSetting() { + return pathHashAlgo; + } } diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteManifestManager.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteManifestManager.java index 0ccadd7dd18da..b243269fe323e 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteManifestManager.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteManifestManager.java @@ -10,6 +10,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.message.ParameterizedMessage; import org.opensearch.Version; import org.opensearch.action.LatchedActionListener; import org.opensearch.cluster.ClusterState; @@ -98,7 +99,9 @@ RemoteClusterStateManifestInfo uploadManifest( RemoteClusterStateUtils.UploadedMetadataResults uploadedMetadataResult, String previousClusterUUID, ClusterStateDiffManifest clusterDiffManifest, - boolean committed + ClusterStateChecksum clusterStateChecksum, + boolean committed, + int codecVersion ) { synchronized (this) { ClusterMetadataManifest.Builder manifestBuilder = ClusterMetadataManifest.builder(); @@ -109,7 +112,7 @@ RemoteClusterStateManifestInfo uploadManifest( .opensearchVersion(Version.CURRENT) .nodeId(nodeId) .committed(committed) - .codecVersion(RemoteClusterMetadataManifest.MANIFEST_CURRENT_CODEC_VERSION) + .codecVersion(codecVersion) .indices(uploadedMetadataResult.uploadedIndexMetadata) .previousClusterUUID(previousClusterUUID) .clusterUUIDCommitted(clusterState.metadata().clusterUUIDCommitted()) @@ -125,8 +128,10 @@ RemoteClusterStateManifestInfo uploadManifest( .metadataVersion(clusterState.metadata().version()) .transientSettingsMetadata(uploadedMetadataResult.uploadedTransientSettingsMetadata) .clusterStateCustomMetadataMap(uploadedMetadataResult.uploadedClusterStateCustomMetadataMap) - .hashesOfConsistentSettings(uploadedMetadataResult.uploadedHashesOfConsistentSettings); + .hashesOfConsistentSettings(uploadedMetadataResult.uploadedHashesOfConsistentSettings) + .checksum(clusterStateChecksum); final ClusterMetadataManifest manifest = manifestBuilder.build(); + logger.trace(() -> new ParameterizedMessage("[{}] uploading manifest", manifest)); String manifestFileName = writeMetadataManifest(clusterState.metadata().clusterUUID(), manifest); return new RemoteClusterStateManifestInfo(manifest, manifestFileName); } @@ -189,6 +194,17 @@ public Optional getLatestClusterMetadataManifest(String return latestManifestFileName.map(s -> fetchRemoteClusterMetadataManifest(clusterName, clusterUUID, s)); } + public Optional getClusterMetadataManifestByTermVersion( + String clusterName, + String clusterUUID, + long term, + long version + ) { + String prefix = RemoteManifestManager.getManifestFilePrefixForTermVersion(term, version); + Optional latestManifestFileName = getManifestFileNameByPrefix(clusterName, clusterUUID, prefix); + return latestManifestFileName.map(s -> fetchRemoteClusterMetadataManifest(clusterName, clusterUUID, s)); + } + public ClusterMetadataManifest getRemoteClusterMetadataManifestByFileName(String clusterUUID, String filename) throws IllegalStateException { try { @@ -288,7 +304,7 @@ private List getManifestFileNames(String clusterName, String clust } } - static String getManifestFilePrefixForTermVersion(long term, long version) { + public static String getManifestFilePrefixForTermVersion(long term, long version) { return String.join( DELIMITER, RemoteClusterMetadataManifest.MANIFEST, @@ -317,4 +333,14 @@ private Optional getLatestManifestFileName(String clusterName, String cl logger.info("No manifest file present in remote store for cluster name: {}, cluster UUID: {}", clusterName, clusterUUID); return Optional.empty(); } + + private Optional getManifestFileNameByPrefix(String clusterName, String clusterUUID, String filePrefix) + throws IllegalStateException { + List manifestFilesMetadata = getManifestFileNames(clusterName, clusterUUID, filePrefix, 1); + if (manifestFilesMetadata != null && !manifestFilesMetadata.isEmpty()) { + return Optional.of(manifestFilesMetadata.get(0).name()); + } + logger.info("No manifest file present in remote store for cluster name: {}, cluster UUID: {}", clusterName, clusterUUID); + return Optional.empty(); + } } diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemotePersistenceStats.java b/server/src/main/java/org/opensearch/gateway/remote/RemotePersistenceStats.java index 1e7f8f278fb0f..417ebdafd3ba7 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemotePersistenceStats.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemotePersistenceStats.java @@ -10,51 +10,96 @@ import org.opensearch.cluster.coordination.PersistedStateStats; -import java.util.concurrent.atomic.AtomicLong; - /** * Remote state related extended stats. * * @opensearch.internal */ -public class RemotePersistenceStats extends PersistedStateStats { - static final String CLEANUP_ATTEMPT_FAILED_COUNT = "cleanup_attempt_failed_count"; - static final String INDEX_ROUTING_FILES_CLEANUP_ATTEMPT_FAILED_COUNT = "index_routing_files_cleanup_attempt_failed_count"; - static final String INDICES_ROUTING_DIFF_FILES_CLEANUP_ATTEMPT_FAILED_COUNT = "indices_routing_diff_files_cleanup_attempt_failed_count"; - static final String REMOTE_UPLOAD = "remote_upload"; - private AtomicLong cleanupAttemptFailedCount = new AtomicLong(0); +public class RemotePersistenceStats { + + RemoteUploadStats remoteUploadStats; + PersistedStateStats remoteDiffDownloadStats; + PersistedStateStats remoteFullDownloadStats; - private AtomicLong indexRoutingFilesCleanupAttemptFailedCount = new AtomicLong(0); - private AtomicLong indicesRoutingDiffFilesCleanupAttemptFailedCount = new AtomicLong(0); + final String FULL_DOWNLOAD_STATS = "remote_full_download"; + final String DIFF_DOWNLOAD_STATS = "remote_diff_download"; public RemotePersistenceStats() { - super(REMOTE_UPLOAD); - addToExtendedFields(CLEANUP_ATTEMPT_FAILED_COUNT, cleanupAttemptFailedCount); - addToExtendedFields(INDEX_ROUTING_FILES_CLEANUP_ATTEMPT_FAILED_COUNT, indexRoutingFilesCleanupAttemptFailedCount); - addToExtendedFields(INDICES_ROUTING_DIFF_FILES_CLEANUP_ATTEMPT_FAILED_COUNT, indicesRoutingDiffFilesCleanupAttemptFailedCount); + remoteUploadStats = new RemoteUploadStats(); + remoteDiffDownloadStats = new PersistedStateStats(DIFF_DOWNLOAD_STATS); + remoteFullDownloadStats = new PersistedStateStats(FULL_DOWNLOAD_STATS); } public void cleanUpAttemptFailed() { - cleanupAttemptFailedCount.incrementAndGet(); + remoteUploadStats.cleanUpAttemptFailed(); } public long getCleanupAttemptFailedCount() { - return cleanupAttemptFailedCount.get(); + return remoteUploadStats.getCleanupAttemptFailedCount(); } public void indexRoutingFilesCleanupAttemptFailed() { - indexRoutingFilesCleanupAttemptFailedCount.incrementAndGet(); + remoteUploadStats.indexRoutingFilesCleanupAttemptFailed(); } public long getIndexRoutingFilesCleanupAttemptFailedCount() { - return indexRoutingFilesCleanupAttemptFailedCount.get(); + return remoteUploadStats.getIndexRoutingFilesCleanupAttemptFailedCount(); } public void indicesRoutingDiffFileCleanupAttemptFailed() { - indicesRoutingDiffFilesCleanupAttemptFailedCount.incrementAndGet(); + remoteUploadStats.indicesRoutingDiffFileCleanupAttemptFailed(); } public long getIndicesRoutingDiffFileCleanupAttemptFailedCount() { - return indicesRoutingDiffFilesCleanupAttemptFailedCount.get(); + return remoteUploadStats.getIndicesRoutingDiffFileCleanupAttemptFailedCount(); + } + + public void stateUploadSucceeded() { + remoteUploadStats.stateSucceeded(); + } + + public void stateUploadTook(long durationMillis) { + remoteUploadStats.stateTook(durationMillis); + } + + public void stateUploadFailed() { + remoteUploadStats.stateFailed(); + } + + public void stateFullDownloadSucceeded() { + remoteFullDownloadStats.stateSucceeded(); + } + + public void stateDiffDownloadSucceeded() { + remoteDiffDownloadStats.stateSucceeded(); + } + + public void stateFullDownloadTook(long durationMillis) { + remoteFullDownloadStats.stateTook(durationMillis); } + + public void stateDiffDownloadTook(long durationMillis) { + remoteDiffDownloadStats.stateTook(durationMillis); + } + + public void stateFullDownloadFailed() { + remoteFullDownloadStats.stateFailed(); + } + + public void stateDiffDownloadFailed() { + remoteDiffDownloadStats.stateFailed(); + } + + public PersistedStateStats getUploadStats() { + return remoteUploadStats; + } + + public PersistedStateStats getRemoteDiffDownloadStats() { + return remoteDiffDownloadStats; + } + + public PersistedStateStats getRemoteFullDownloadStats() { + return remoteFullDownloadStats; + } + } diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteUploadStats.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteUploadStats.java new file mode 100644 index 0000000000000..9ffef65ae1eba --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteUploadStats.java @@ -0,0 +1,59 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import org.opensearch.cluster.coordination.PersistedStateStats; + +import java.util.concurrent.atomic.AtomicLong; + +/** + * Upload stats for remote state + * + * @opensearch.internal + */ +public class RemoteUploadStats extends PersistedStateStats { + static final String CLEANUP_ATTEMPT_FAILED_COUNT = "cleanup_attempt_failed_count"; + static final String INDEX_ROUTING_FILES_CLEANUP_ATTEMPT_FAILED_COUNT = "index_routing_files_cleanup_attempt_failed_count"; + static final String INDICES_ROUTING_DIFF_FILES_CLEANUP_ATTEMPT_FAILED_COUNT = "indices_routing_diff_files_cleanup_attempt_failed_count"; + static final String REMOTE_UPLOAD = "remote_upload"; + private AtomicLong cleanupAttemptFailedCount = new AtomicLong(0); + private AtomicLong indexRoutingFilesCleanupAttemptFailedCount = new AtomicLong(0); + private AtomicLong indicesRoutingDiffFilesCleanupAttemptFailedCount = new AtomicLong(0); + + public RemoteUploadStats() { + super(REMOTE_UPLOAD); + addToExtendedFields(CLEANUP_ATTEMPT_FAILED_COUNT, cleanupAttemptFailedCount); + addToExtendedFields(INDEX_ROUTING_FILES_CLEANUP_ATTEMPT_FAILED_COUNT, indexRoutingFilesCleanupAttemptFailedCount); + addToExtendedFields(INDICES_ROUTING_DIFF_FILES_CLEANUP_ATTEMPT_FAILED_COUNT, indicesRoutingDiffFilesCleanupAttemptFailedCount); + } + + public void cleanUpAttemptFailed() { + cleanupAttemptFailedCount.incrementAndGet(); + } + + public long getCleanupAttemptFailedCount() { + return cleanupAttemptFailedCount.get(); + } + + public void indexRoutingFilesCleanupAttemptFailed() { + indexRoutingFilesCleanupAttemptFailedCount.incrementAndGet(); + } + + public long getIndexRoutingFilesCleanupAttemptFailedCount() { + return indexRoutingFilesCleanupAttemptFailedCount.get(); + } + + public void indicesRoutingDiffFileCleanupAttemptFailed() { + indicesRoutingDiffFilesCleanupAttemptFailedCount.incrementAndGet(); + } + + public long getIndicesRoutingDiffFileCleanupAttemptFailedCount() { + return indicesRoutingDiffFilesCleanupAttemptFailedCount.get(); + } +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/model/RemoteClusterMetadataManifest.java b/server/src/main/java/org/opensearch/gateway/remote/model/RemoteClusterMetadataManifest.java index 5f79b690af574..999beaa4e865d 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/model/RemoteClusterMetadataManifest.java +++ b/server/src/main/java/org/opensearch/gateway/remote/model/RemoteClusterMetadataManifest.java @@ -35,7 +35,7 @@ public class RemoteClusterMetadataManifest extends AbstractClusterMetadataWritea public static final int SPLITTED_MANIFEST_FILE_LENGTH = 6; public static final String METADATA_MANIFEST_NAME_FORMAT = "%s"; - public static final int MANIFEST_CURRENT_CODEC_VERSION = ClusterMetadataManifest.CODEC_V3; + public static final String COMMITTED = "C"; public static final String PUBLISHED = "P"; @@ -53,6 +53,9 @@ public class RemoteClusterMetadataManifest extends AbstractClusterMetadataWritea public static final ChecksumBlobStoreFormat CLUSTER_METADATA_MANIFEST_FORMAT_V2 = new ChecksumBlobStoreFormat<>("cluster-metadata-manifest", METADATA_MANIFEST_NAME_FORMAT, ClusterMetadataManifest::fromXContentV2); + public static final ChecksumBlobStoreFormat CLUSTER_METADATA_MANIFEST_FORMAT_V3 = + new ChecksumBlobStoreFormat<>("cluster-metadata-manifest", METADATA_MANIFEST_NAME_FORMAT, ClusterMetadataManifest::fromXContentV3); + /** * Manifest format compatible with codec v2, where we introduced codec versions/global metadata. */ @@ -150,8 +153,10 @@ int getManifestCodecVersion() { private ChecksumBlobStoreFormat getClusterMetadataManifestBlobStoreFormat() { long codecVersion = getManifestCodecVersion(); - if (codecVersion == MANIFEST_CURRENT_CODEC_VERSION) { + if (codecVersion == ClusterMetadataManifest.MANIFEST_CURRENT_CODEC_VERSION) { return CLUSTER_METADATA_MANIFEST_FORMAT; + } else if (codecVersion == ClusterMetadataManifest.CODEC_V3) { + return CLUSTER_METADATA_MANIFEST_FORMAT_V3; } else if (codecVersion == ClusterMetadataManifest.CODEC_V2) { return CLUSTER_METADATA_MANIFEST_FORMAT_V2; } else if (codecVersion == ClusterMetadataManifest.CODEC_V1) { diff --git a/server/src/main/java/org/opensearch/gateway/remote/model/RemoteDiscoveryNodes.java b/server/src/main/java/org/opensearch/gateway/remote/model/RemoteDiscoveryNodes.java index 446207a767009..829036c6d122b 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/model/RemoteDiscoveryNodes.java +++ b/server/src/main/java/org/opensearch/gateway/remote/model/RemoteDiscoveryNodes.java @@ -88,7 +88,12 @@ public UploadedMetadata getUploadedMetadata() { @Override public InputStream serialize() throws IOException { - return DISCOVERY_NODES_FORMAT.serialize(discoveryNodes, generateBlobFileName(), getCompressor()).streamInput(); + return DISCOVERY_NODES_FORMAT.serialize( + (out, discoveryNode) -> discoveryNode.writeToWithAttribute(out), + discoveryNodes, + generateBlobFileName(), + getCompressor() + ).streamInput(); } @Override diff --git a/server/src/main/java/org/opensearch/gateway/remote/model/RemoteIndexMetadata.java b/server/src/main/java/org/opensearch/gateway/remote/model/RemoteIndexMetadata.java index 5308f92c633b1..97a115a44484c 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/model/RemoteIndexMetadata.java +++ b/server/src/main/java/org/opensearch/gateway/remote/model/RemoteIndexMetadata.java @@ -9,6 +9,7 @@ package org.opensearch.gateway.remote.model; import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.blobstore.BlobPath; import org.opensearch.common.io.Streams; import org.opensearch.common.remote.AbstractClusterMetadataWriteableBlobEntity; import org.opensearch.common.remote.BlobPathParameters; @@ -17,6 +18,8 @@ import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata; import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedMetadata; import org.opensearch.gateway.remote.RemoteClusterStateUtils; +import org.opensearch.index.remote.RemoteStoreEnums; +import org.opensearch.index.remote.RemoteStorePathStrategy; import org.opensearch.index.remote.RemoteStoreUtils; import org.opensearch.repositories.blobstore.ChecksumBlobStoreFormat; @@ -41,15 +44,24 @@ public class RemoteIndexMetadata extends AbstractClusterMetadataWriteableBlobEnt public static final String INDEX = "index"; private IndexMetadata indexMetadata; + private RemoteStoreEnums.PathType pathType; + private RemoteStoreEnums.PathHashAlgorithm pathHashAlgo; + private String fixedPrefix; public RemoteIndexMetadata( final IndexMetadata indexMetadata, final String clusterUUID, final Compressor compressor, - final NamedXContentRegistry namedXContentRegistry + final NamedXContentRegistry namedXContentRegistry, + final RemoteStoreEnums.PathType pathType, + final RemoteStoreEnums.PathHashAlgorithm pathHashAlgo, + final String fixedPrefix ) { super(clusterUUID, compressor, namedXContentRegistry); this.indexMetadata = indexMetadata; + this.pathType = pathType; + this.pathHashAlgo = pathHashAlgo; + this.fixedPrefix = fixedPrefix; } public RemoteIndexMetadata( @@ -86,6 +98,22 @@ public String generateBlobFileName() { return blobFileName; } + @Override + public BlobPath getPrefixedPath(BlobPath blobPath) { + if (pathType == null) { + return blobPath; + } + assert pathHashAlgo != null; + return pathType.path( + RemoteStorePathStrategy.PathInput.builder() + .fixedPrefix(fixedPrefix) + .basePath(blobPath) + .indexUUID(indexMetadata.getIndexUUID()) + .build(), + pathHashAlgo + ); + } + @Override public UploadedMetadata getUploadedMetadata() { assert blobName != null; diff --git a/server/src/main/java/org/opensearch/gateway/remote/model/RemotePinnedTimestamps.java b/server/src/main/java/org/opensearch/gateway/remote/model/RemotePinnedTimestamps.java deleted file mode 100644 index 030491cf8b7b9..0000000000000 --- a/server/src/main/java/org/opensearch/gateway/remote/model/RemotePinnedTimestamps.java +++ /dev/null @@ -1,144 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.gateway.remote.model; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.opensearch.common.io.Streams; -import org.opensearch.common.remote.BlobPathParameters; -import org.opensearch.common.remote.RemoteWriteableBlobEntity; -import org.opensearch.core.common.io.stream.StreamInput; -import org.opensearch.core.common.io.stream.StreamOutput; -import org.opensearch.core.common.io.stream.Writeable; -import org.opensearch.core.compress.Compressor; -import org.opensearch.index.remote.RemoteStoreUtils; -import org.opensearch.repositories.blobstore.ChecksumWritableBlobStoreFormat; - -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; - -import static org.opensearch.gateway.remote.RemoteClusterStateUtils.DELIMITER; - -/** - * Wrapper class for uploading/downloading {@link RemotePinnedTimestamps} to/from remote blob store - * - * @opensearch.internal - */ -public class RemotePinnedTimestamps extends RemoteWriteableBlobEntity { - private static final Logger logger = LogManager.getLogger(RemotePinnedTimestamps.class); - - /** - * Represents a collection of pinned timestamps and their associated pinning entities. - * This class is thread-safe and implements the Writeable interface for serialization. - */ - public static class PinnedTimestamps implements Writeable { - private final Map> pinnedTimestampPinningEntityMap; - - public PinnedTimestamps(Map> pinnedTimestampPinningEntityMap) { - this.pinnedTimestampPinningEntityMap = new ConcurrentHashMap<>(pinnedTimestampPinningEntityMap); - } - - @Override - public void writeTo(StreamOutput out) throws IOException { - out.writeMap(pinnedTimestampPinningEntityMap, StreamOutput::writeLong, StreamOutput::writeStringCollection); - } - - public static PinnedTimestamps readFrom(StreamInput in) throws IOException { - return new PinnedTimestamps(in.readMap(StreamInput::readLong, StreamInput::readStringList)); - } - - /** - * Pins a timestamp against a pinning entity. - * - * @param timestamp The timestamp to pin. - * @param pinningEntity The entity pinning the timestamp. - */ - public void pin(Long timestamp, String pinningEntity) { - logger.debug("Pinning timestamp = {} against entity = {}", timestamp, pinningEntity); - pinnedTimestampPinningEntityMap.computeIfAbsent(timestamp, k -> new ArrayList<>()).add(pinningEntity); - } - - /** - * Unpins a timestamp for a specific pinning entity. - * - * @param timestamp The timestamp to unpin. - * @param pinningEntity The entity unpinning the timestamp. - */ - public void unpin(Long timestamp, String pinningEntity) { - logger.debug("Unpinning timestamp = {} against entity = {}", timestamp, pinningEntity); - if (pinnedTimestampPinningEntityMap.containsKey(timestamp) == false - || pinnedTimestampPinningEntityMap.get(timestamp).contains(pinningEntity) == false) { - logger.warn("Timestamp: {} is not pinned by entity: {}", timestamp, pinningEntity); - } - pinnedTimestampPinningEntityMap.compute(timestamp, (k, v) -> { - v.remove(pinningEntity); - return v.isEmpty() ? null : v; - }); - } - - public Map> getPinnedTimestampPinningEntityMap() { - return new HashMap<>(pinnedTimestampPinningEntityMap); - } - } - - public static final String PINNED_TIMESTAMPS = "pinned_timestamps"; - public static final ChecksumWritableBlobStoreFormat PINNED_TIMESTAMPS_FORMAT = new ChecksumWritableBlobStoreFormat<>( - PINNED_TIMESTAMPS, - PinnedTimestamps::readFrom - ); - - private PinnedTimestamps pinnedTimestamps; - - public RemotePinnedTimestamps(String clusterUUID, Compressor compressor) { - super(clusterUUID, compressor); - pinnedTimestamps = new PinnedTimestamps(new HashMap<>()); - } - - @Override - public BlobPathParameters getBlobPathParameters() { - return new BlobPathParameters(List.of(PINNED_TIMESTAMPS), PINNED_TIMESTAMPS); - } - - @Override - public String getType() { - return PINNED_TIMESTAMPS; - } - - @Override - public String generateBlobFileName() { - return this.blobFileName = String.join(DELIMITER, PINNED_TIMESTAMPS, RemoteStoreUtils.invertLong(System.currentTimeMillis())); - } - - @Override - public InputStream serialize() throws IOException { - return PINNED_TIMESTAMPS_FORMAT.serialize(pinnedTimestamps, generateBlobFileName(), getCompressor()).streamInput(); - } - - @Override - public PinnedTimestamps deserialize(InputStream inputStream) throws IOException { - return PINNED_TIMESTAMPS_FORMAT.deserialize(blobName, Streams.readFully(inputStream)); - } - - public void setBlobFileName(String blobFileName) { - this.blobFileName = blobFileName; - } - - public void setPinnedTimestamps(PinnedTimestamps pinnedTimestamps) { - this.pinnedTimestamps = pinnedTimestamps; - } - - public PinnedTimestamps getPinnedTimestamps() { - return pinnedTimestamps; - } -} diff --git a/server/src/main/java/org/opensearch/gateway/remote/model/RemoteRoutingTableBlobStore.java b/server/src/main/java/org/opensearch/gateway/remote/model/RemoteRoutingTableBlobStore.java index 1a28c97dc8a77..8490f92d055eb 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/model/RemoteRoutingTableBlobStore.java +++ b/server/src/main/java/org/opensearch/gateway/remote/model/RemoteRoutingTableBlobStore.java @@ -15,6 +15,7 @@ import org.opensearch.common.remote.RemoteWriteableEntityBlobStore; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Setting.Property; import org.opensearch.gateway.remote.RemoteClusterStateUtils; import org.opensearch.gateway.remote.routingtable.RemoteIndexRoutingTable; import org.opensearch.index.remote.RemoteStoreEnums; @@ -58,8 +59,19 @@ public class RemoteRoutingTableBlobStore CLUSTER_REMOTE_STORE_ROUTING_TABLE_PATH_PREFIX = Setting.simpleString( + "cluster.remote_store.routing_table.path.prefix", + "", + Property.NodeScope, + Property.Final + ); + private RemoteStoreEnums.PathType pathType; private RemoteStoreEnums.PathHashAlgorithm pathHashAlgo; + private String pathPrefix; public RemoteRoutingTableBlobStore( BlobStoreTransferService blobStoreTransferService, @@ -79,6 +91,7 @@ public RemoteRoutingTableBlobStore( ); this.pathType = clusterSettings.get(REMOTE_ROUTING_TABLE_PATH_TYPE_SETTING); this.pathHashAlgo = clusterSettings.get(REMOTE_ROUTING_TABLE_PATH_HASH_ALGO_SETTING); + this.pathPrefix = clusterSettings.get(CLUSTER_REMOTE_STORE_ROUTING_TABLE_PATH_PREFIX); clusterSettings.addSettingsUpdateConsumer(REMOTE_ROUTING_TABLE_PATH_TYPE_SETTING, this::setPathTypeSetting); clusterSettings.addSettingsUpdateConsumer(REMOTE_ROUTING_TABLE_PATH_HASH_ALGO_SETTING, this::setPathHashAlgoSetting); } @@ -90,6 +103,7 @@ public BlobPath getBlobPathForUpload(final RemoteWriteableBlobEntity { - - public static final String PINNED_TIMESTAMPS_PATH_TOKEN = "pinned_timestamps"; - private final BlobStoreRepository blobStoreRepository; - - public RemoteStorePinnedTimestampsBlobStore( - BlobStoreTransferService blobStoreTransferService, - BlobStoreRepository blobStoreRepository, - String clusterName, - ThreadPool threadPool, - String executor - ) { - super(blobStoreTransferService, blobStoreRepository, clusterName, threadPool, executor, PINNED_TIMESTAMPS_PATH_TOKEN); - this.blobStoreRepository = blobStoreRepository; - } - - @Override - public BlobPath getBlobPathForUpload(final RemoteWriteableBlobEntity obj) { - return blobStoreRepository.basePath().add(PINNED_TIMESTAMPS_PATH_TOKEN); - } -} diff --git a/server/src/main/java/org/opensearch/gateway/remote/routingtable/RemoteRoutingTableDiff.java b/server/src/main/java/org/opensearch/gateway/remote/routingtable/RemoteRoutingTableDiff.java index 2370417dc14df..b3e0e9e5763b7 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/routingtable/RemoteRoutingTableDiff.java +++ b/server/src/main/java/org/opensearch/gateway/remote/routingtable/RemoteRoutingTableDiff.java @@ -10,6 +10,7 @@ import org.opensearch.cluster.Diff; import org.opensearch.cluster.routing.IndexRoutingTable; +import org.opensearch.cluster.routing.RoutingTable; import org.opensearch.cluster.routing.RoutingTableIncrementalDiff; import org.opensearch.common.io.Streams; import org.opensearch.common.remote.AbstractClusterMetadataWriteableBlobEntity; @@ -22,7 +23,6 @@ import java.io.IOException; import java.io.InputStream; import java.util.List; -import java.util.Map; import static org.opensearch.gateway.remote.RemoteClusterStateUtils.DELIMITER; @@ -30,7 +30,8 @@ * Represents a incremental difference between {@link org.opensearch.cluster.routing.RoutingTable} objects that can be serialized and deserialized. * This class is responsible for writing and reading the differences between RoutingTables to and from an input/output stream. */ -public class RemoteRoutingTableDiff extends AbstractClusterMetadataWriteableBlobEntity { +public class RemoteRoutingTableDiff extends AbstractClusterMetadataWriteableBlobEntity> { + private final RoutingTableIncrementalDiff routingTableIncrementalDiff; private long term; @@ -71,18 +72,6 @@ public RemoteRoutingTableDiff( this.version = version; } - /** - * Constructs a new RemoteRoutingTableDiff with the given differences. - * - * @param routingTableIncrementalDiff a RoutingTableIncrementalDiff object containing the differences of {@link IndexRoutingTable}. - * @param clusterUUID the cluster UUID. - * @param compressor the compressor to be used. - */ - public RemoteRoutingTableDiff(RoutingTableIncrementalDiff routingTableIncrementalDiff, String clusterUUID, Compressor compressor) { - super(clusterUUID, compressor); - this.routingTableIncrementalDiff = routingTableIncrementalDiff; - } - /** * Constructs a new RemoteIndexRoutingTableDiff with the given blob name, cluster UUID, and compressor. * @@ -101,9 +90,8 @@ public RemoteRoutingTableDiff(String blobName, String clusterUUID, Compressor co * * @return a map containing the differences. */ - public Map> getDiffs() { - assert routingTableIncrementalDiff != null; - return routingTableIncrementalDiff.getDiffs(); + public Diff getDiffs() { + return routingTableIncrementalDiff; } @Override @@ -144,7 +132,7 @@ public InputStream serialize() throws IOException { } @Override - public RoutingTableIncrementalDiff deserialize(InputStream in) throws IOException { + public Diff deserialize(InputStream in) throws IOException { return REMOTE_ROUTING_TABLE_DIFF_FORMAT.deserialize(blobName, Streams.readFully(in)); } } diff --git a/server/src/main/java/org/opensearch/index/IndexModule.java b/server/src/main/java/org/opensearch/index/IndexModule.java index dc1bf94662385..79de97dc96fba 100644 --- a/server/src/main/java/org/opensearch/index/IndexModule.java +++ b/server/src/main/java/org/opensearch/index/IndexModule.java @@ -73,6 +73,7 @@ import org.opensearch.index.engine.EngineFactory; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.shard.IndexEventListener; +import org.opensearch.index.shard.IndexShard; import org.opensearch.index.shard.IndexingOperationListener; import org.opensearch.index.shard.SearchOperationListener; import org.opensearch.index.similarity.SimilarityService; @@ -629,6 +630,56 @@ public IndexService newIndexService( Supplier clusterDefaultRefreshIntervalSupplier, RecoverySettings recoverySettings, RemoteStoreSettings remoteStoreSettings + ) throws IOException { + return newIndexService( + indexCreationContext, + environment, + xContentRegistry, + shardStoreDeleter, + circuitBreakerService, + bigArrays, + threadPool, + scriptService, + clusterService, + client, + indicesQueryCache, + mapperRegistry, + indicesFieldDataCache, + namedWriteableRegistry, + idFieldDataEnabled, + valuesSourceRegistry, + remoteDirectoryFactory, + translogFactorySupplier, + clusterDefaultRefreshIntervalSupplier, + recoverySettings, + remoteStoreSettings, + (s) -> {} + ); + } + + public IndexService newIndexService( + IndexService.IndexCreationContext indexCreationContext, + NodeEnvironment environment, + NamedXContentRegistry xContentRegistry, + IndexService.ShardStoreDeleter shardStoreDeleter, + CircuitBreakerService circuitBreakerService, + BigArrays bigArrays, + ThreadPool threadPool, + ScriptService scriptService, + ClusterService clusterService, + Client client, + IndicesQueryCache indicesQueryCache, + MapperRegistry mapperRegistry, + IndicesFieldDataCache indicesFieldDataCache, + NamedWriteableRegistry namedWriteableRegistry, + BooleanSupplier idFieldDataEnabled, + ValuesSourceRegistry valuesSourceRegistry, + IndexStorePlugin.DirectoryFactory remoteDirectoryFactory, + BiFunction translogFactorySupplier, + Supplier clusterDefaultRefreshIntervalSupplier, + RecoverySettings recoverySettings, + RemoteStoreSettings remoteStoreSettings, + Consumer replicator ) throws IOException { final IndexEventListener eventListener = freeze(); Function> readerWrapperFactory = indexReaderWrapper @@ -689,7 +740,8 @@ public IndexService newIndexService( recoverySettings, remoteStoreSettings, fileCache, - compositeIndexSettings + compositeIndexSettings, + replicator ); success = true; return indexService; diff --git a/server/src/main/java/org/opensearch/index/IndexService.java b/server/src/main/java/org/opensearch/index/IndexService.java index 12b02d3dbd6fa..f1b36194bf62d 100644 --- a/server/src/main/java/org/opensearch/index/IndexService.java +++ b/server/src/main/java/org/opensearch/index/IndexService.java @@ -136,6 +136,7 @@ import static java.util.Collections.emptyMap; import static java.util.Collections.unmodifiableMap; import static org.opensearch.common.collect.MapBuilder.newMapBuilder; +import static org.opensearch.common.util.FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL_SETTING; import static org.opensearch.index.remote.RemoteMigrationIndexMetadataUpdater.indexHasRemoteStoreSettings; /** @@ -174,6 +175,7 @@ public class IndexService extends AbstractIndexComponent implements IndicesClust private volatile AsyncTranslogFSync fsyncTask; private volatile AsyncGlobalCheckpointTask globalCheckpointTask; private volatile AsyncRetentionLeaseSyncTask retentionLeaseSyncTask; + private volatile AsyncReplicationTask asyncReplicationTask; // don't convert to Setting<> and register... we only set this in tests and register via a plugin private final String INDEX_TRANSLOG_RETENTION_CHECK_INTERVAL_SETTING = "index.translog.retention.check_interval"; @@ -194,6 +196,7 @@ public class IndexService extends AbstractIndexComponent implements IndicesClust private final RemoteStoreSettings remoteStoreSettings; private final FileCache fileCache; private final CompositeIndexSettings compositeIndexSettings; + private final Consumer replicator; public IndexService( IndexSettings indexSettings, @@ -231,7 +234,8 @@ public IndexService( RecoverySettings recoverySettings, RemoteStoreSettings remoteStoreSettings, FileCache fileCache, - CompositeIndexSettings compositeIndexSettings + CompositeIndexSettings compositeIndexSettings, + Consumer replicator ) { super(indexSettings); this.allowExpensiveQueries = allowExpensiveQueries; @@ -306,11 +310,15 @@ public IndexService( this.trimTranslogTask = new AsyncTrimTranslogTask(this); this.globalCheckpointTask = new AsyncGlobalCheckpointTask(this); this.retentionLeaseSyncTask = new AsyncRetentionLeaseSyncTask(this); + if (READER_WRITER_SPLIT_EXPERIMENTAL_SETTING.get(indexSettings.getNodeSettings())) { + this.asyncReplicationTask = new AsyncReplicationTask(this); + } this.translogFactorySupplier = translogFactorySupplier; this.recoverySettings = recoverySettings; this.remoteStoreSettings = remoteStoreSettings; this.compositeIndexSettings = compositeIndexSettings; this.fileCache = fileCache; + this.replicator = replicator; updateFsyncTaskIfNecessary(); } @@ -386,7 +394,8 @@ public IndexService( recoverySettings, remoteStoreSettings, null, - null + null, + s -> {} ); } @@ -395,6 +404,11 @@ static boolean needsMapperService(IndexSettings indexSettings, IndexCreationCont && indexCreationContext == IndexCreationContext.CREATE_INDEX); // metadata verification needs a mapper service } + // visible for tests + AsyncReplicationTask getReplicationTask() { + return asyncReplicationTask; + } + /** * Context for index creation * @@ -867,7 +881,8 @@ public QueryShardContext newQueryShardContext( IndexSearcher searcher, LongSupplier nowInMillis, String clusterAlias, - boolean validate + boolean validate, + boolean keywordIndexOrDocValuesEnabled ) { final SearchIndexNameMatcher indexNameMatcher = new SearchIndexNameMatcher( index().getName(), @@ -893,10 +908,27 @@ public QueryShardContext newQueryShardContext( indexNameMatcher, allowExpensiveQueries, valuesSourceRegistry, - validate + validate, + keywordIndexOrDocValuesEnabled ); } + /** + * Creates a new QueryShardContext. + *

+ * Passing a {@code null} {@link IndexSearcher} will return a valid context, however it won't be able to make + * {@link IndexReader}-specific optimizations, such as rewriting containing range queries. + */ + public QueryShardContext newQueryShardContext( + int shardId, + IndexSearcher searcher, + LongSupplier nowInMillis, + String clusterAlias, + boolean validate + ) { + return newQueryShardContext(shardId, searcher, nowInMillis, clusterAlias, validate, false); + } + /** * The {@link ThreadPool} to use for this index. */ @@ -1065,11 +1097,22 @@ public synchronized void updateMetadata(final IndexMetadata currentIndexMetadata } onRefreshIntervalChange(); updateFsyncTaskIfNecessary(); + if (READER_WRITER_SPLIT_EXPERIMENTAL_SETTING.get(indexSettings.getNodeSettings())) { + updateReplicationTask(); + } } metadataListeners.forEach(c -> c.accept(newIndexMetadata)); } + private void updateReplicationTask() { + try { + asyncReplicationTask.close(); + } finally { + asyncReplicationTask = new AsyncReplicationTask(this); + } + } + /** * Called whenever the refresh interval changes. This can happen in 2 cases - * 1. {@code cluster.default.index.refresh_interval} cluster setting changes. The change would only happen for @@ -1334,6 +1377,47 @@ public String toString() { } } + final class AsyncReplicationTask extends BaseAsyncTask { + + AsyncReplicationTask(IndexService indexService) { + super(indexService, indexService.getRefreshInterval()); + } + + @Override + protected void runInternal() { + indexService.maybeSyncSegments(false); + } + + @Override + protected String getThreadPool() { + return ThreadPool.Names.GENERIC; + } + + @Override + public String toString() { + return "replication"; + } + + @Override + protected boolean mustReschedule() { + return indexSettings.isSegRepEnabledOrRemoteNode() && super.mustReschedule(); + } + } + + private void maybeSyncSegments(boolean force) { + if (getRefreshInterval().millis() > 0 || force) { + for (IndexShard shard : this.shards.values()) { + try { + if (shard.routingEntry().isSearchOnly() && shard.routingEntry().active()) { + replicator.accept(shard); + } + } catch (IndexShardClosedException | AlreadyClosedException ex) { + // do nothing + } + } + } + } + final class AsyncTrimTranslogTask extends BaseAsyncTask { AsyncTrimTranslogTask(IndexService indexService) { diff --git a/server/src/main/java/org/opensearch/index/IndexSettings.java b/server/src/main/java/org/opensearch/index/IndexSettings.java index 9cab68d646b6e..554e99764c1a1 100644 --- a/server/src/main/java/org/opensearch/index/IndexSettings.java +++ b/server/src/main/java/org/opensearch/index/IndexSettings.java @@ -48,6 +48,8 @@ import org.opensearch.core.common.unit.ByteSizeUnit; import org.opensearch.core.common.unit.ByteSizeValue; import org.opensearch.core.index.Index; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeIndexSettings; +import org.opensearch.index.remote.RemoteStoreEnums.PathType; import org.opensearch.index.remote.RemoteStorePathStrategy; import org.opensearch.index.remote.RemoteStoreUtils; import org.opensearch.index.translog.Translog; @@ -76,6 +78,9 @@ import static org.opensearch.index.mapper.MapperService.INDEX_MAPPING_NESTED_FIELDS_LIMIT_SETTING; import static org.opensearch.index.mapper.MapperService.INDEX_MAPPING_TOTAL_FIELDS_LIMIT_SETTING; import static org.opensearch.index.store.remote.directory.RemoteSnapshotDirectory.SEARCHABLE_SNAPSHOT_EXTENDED_COMPATIBILITY_MINIMUM_VERSION; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_ALL; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_AUTO; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_NONE; import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_TARGET_MAX_SLICE_COUNT_DEFAULT_VALUE; /** @@ -677,6 +682,14 @@ public static IndexMergePolicy fromString(String text) { Property.InternalIndex ); + public static final Setting SEARCHABLE_SNAPSHOT_SHARD_PATH_TYPE = new Setting<>( + "index.searchable_snapshot.shard_path_type", + PathType.FIXED.toString(), + PathType::parseString, + Property.IndexScope, + Property.InternalIndex + ); + public static final Setting DEFAULT_SEARCH_PIPELINE = new Setting<>( "index.search.default_pipeline", SearchPipelineService.NOOP_PIPELINE_ID, @@ -689,7 +702,26 @@ public static IndexMergePolicy fromString(String text) { "index.search.concurrent_segment_search.enabled", false, Property.IndexScope, - Property.Dynamic + Property.Dynamic, + Property.Deprecated + ); + + public static final Setting INDEX_CONCURRENT_SEGMENT_SEARCH_MODE = Setting.simpleString( + "index.search.concurrent_segment_search.mode", + CONCURRENT_SEGMENT_SEARCH_MODE_NONE, + value -> { + switch (value) { + case CONCURRENT_SEGMENT_SEARCH_MODE_ALL: + case CONCURRENT_SEGMENT_SEARCH_MODE_NONE: + case CONCURRENT_SEGMENT_SEARCH_MODE_AUTO: + // valid setting + break; + default: + throw new IllegalArgumentException("Setting value must be one of [all, none, auto]"); + } + }, + Property.Dynamic, + Property.IndexScope ); public static final Setting INDEX_CONCURRENT_SEGMENT_SEARCH_MAX_SLICE_COUNT = Setting.intSetting( @@ -734,6 +766,22 @@ public static IndexMergePolicy fromString(String text) { Property.IndexScope ); + public static final Setting INDEX_CONTEXT_CREATED_VERSION = Setting.longSetting( + "index.context.created_version", + 0, + 0, + Property.PrivateIndex, + Property.IndexScope + ); + + public static final Setting INDEX_CONTEXT_CURRENT_VERSION = Setting.longSetting( + "index.context.current_version", + 0, + 0, + Property.PrivateIndex, + Property.IndexScope + ); + private final Index index; private final Version version; private final Logger logger; @@ -864,6 +912,8 @@ private void setRetentionLeaseMillis(final TimeValue retentionLease) { */ private volatile double docIdFuzzySetFalsePositiveProbability; + private final boolean isCompositeIndex; + /** * Returns the default search fields for this index. */ @@ -1027,7 +1077,7 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti setEnableFuzzySetForDocId(scopedSettings.get(INDEX_DOC_ID_FUZZY_SET_ENABLED_SETTING)); setDocIdFuzzySetFalsePositiveProbability(scopedSettings.get(INDEX_DOC_ID_FUZZY_SET_FALSE_POSITIVE_PROBABILITY_SETTING)); - + isCompositeIndex = scopedSettings.get(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING); scopedSettings.addSettingsUpdateConsumer( TieredMergePolicyProvider.INDEX_COMPOUND_FORMAT_SETTING, tieredMergePolicyProvider::setNoCFSRatio @@ -1272,6 +1322,10 @@ public int getNumberOfReplicas() { return settings.getAsInt(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, null); } + public boolean isCompositeIndex() { + return isCompositeIndex; + } + /** * Returns true if segment replication is enabled on the index. * diff --git a/server/src/main/java/org/opensearch/index/codec/composite/CompositeIndexReader.java b/server/src/main/java/org/opensearch/index/codec/composite/CompositeIndexReader.java index a159b0619bcbb..9beb99e4a97cd 100644 --- a/server/src/main/java/org/opensearch/index/codec/composite/CompositeIndexReader.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/CompositeIndexReader.java @@ -9,6 +9,7 @@ package org.opensearch.index.codec.composite; import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.compositeindex.datacube.startree.index.CompositeIndexValues; import java.io.IOException; import java.util.List; diff --git a/server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesFormat.java b/server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesFormat.java index e8c69b11b7c88..63fe5a6af76f7 100644 --- a/server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesFormat.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesFormat.java @@ -37,6 +37,36 @@ public class Composite99DocValuesFormat extends DocValuesFormat { private final DocValuesFormat delegate; private final MapperService mapperService; + /** Data codec name for Composite Doc Values Format */ + public static final String DATA_CODEC_NAME = "Composite99FormatData"; + + /** Meta codec name for Composite Doc Values Format */ + public static final String META_CODEC_NAME = "Composite99FormatMeta"; + + /** Filename extension for the composite index data */ + public static final String DATA_EXTENSION = "cid"; + + /** Filename extension for the composite index meta */ + public static final String META_EXTENSION = "cim"; + + /** Data doc values codec name for Composite Doc Values Format */ + public static final String DATA_DOC_VALUES_CODEC = "Composite99DocValuesData"; + + /** Meta doc values codec name for Composite Doc Values Format */ + public static final String META_DOC_VALUES_CODEC = "Composite99DocValuesMetadata"; + + /** Filename extension for the composite index data doc values */ + public static final String DATA_DOC_VALUES_EXTENSION = "cidvd"; + + /** Filename extension for the composite index meta doc values */ + public static final String META_DOC_VALUES_EXTENSION = "cidvm"; + + /** Initial version for the Composite90DocValuesFormat */ + public static final int VERSION_START = 0; + + /** Current version for the Composite90DocValuesFormat */ + public static final int VERSION_CURRENT = VERSION_START; + // needed for SPI public Composite99DocValuesFormat() { this(new Lucene90DocValuesFormat(), null); diff --git a/server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesReader.java b/server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesReader.java index e3bfe01cfa2d5..7901336151c8e 100644 --- a/server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesReader.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesReader.java @@ -8,22 +8,47 @@ package org.opensearch.index.codec.composite.composite99; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.DocValues; import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.store.ChecksumIndexInput; +import org.apache.lucene.store.IndexInput; import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.util.io.IOUtils; import org.opensearch.index.codec.composite.CompositeIndexFieldInfo; import org.opensearch.index.codec.composite.CompositeIndexReader; -import org.opensearch.index.codec.composite.CompositeIndexValues; +import org.opensearch.index.codec.composite.LuceneDocValuesProducerFactory; +import org.opensearch.index.compositeindex.CompositeIndexMetadata; +import org.opensearch.index.compositeindex.datacube.Metric; +import org.opensearch.index.compositeindex.datacube.MetricStat; +import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.StarTreeMetadata; +import org.opensearch.index.compositeindex.datacube.startree.index.CompositeIndexValues; +import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues; +import org.opensearch.index.mapper.CompositeMappedFieldType; import java.io.IOException; import java.util.ArrayList; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; + +import static org.opensearch.index.compositeindex.CompositeIndexConstants.COMPOSITE_FIELD_MARKER; +import static org.opensearch.index.compositeindex.datacube.startree.fileformats.StarTreeWriter.VERSION_CURRENT; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.fullyQualifiedFieldNameForStarTreeDimensionsDocValues; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.fullyQualifiedFieldNameForStarTreeMetricsDocValues; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.getFieldInfoList; /** * Reader for star tree index and star tree doc values from the segments @@ -32,11 +57,158 @@ */ @ExperimentalApi public class Composite99DocValuesReader extends DocValuesProducer implements CompositeIndexReader { - private DocValuesProducer delegate; + private static final Logger logger = LogManager.getLogger(Composite99DocValuesReader.class); + + private final DocValuesProducer delegate; + private IndexInput dataIn; + private ChecksumIndexInput metaIn; + private final Map compositeIndexInputMap = new LinkedHashMap<>(); + private final Map compositeIndexMetadataMap = new LinkedHashMap<>(); + private final List fields; + private DocValuesProducer compositeDocValuesProducer; + private final List compositeFieldInfos = new ArrayList<>(); + private SegmentReadState readState; - public Composite99DocValuesReader(DocValuesProducer producer, SegmentReadState state) throws IOException { + public Composite99DocValuesReader(DocValuesProducer producer, SegmentReadState readState) throws IOException { this.delegate = producer; - // TODO : read star tree files + this.fields = new ArrayList<>(); + + String metaFileName = IndexFileNames.segmentFileName( + readState.segmentInfo.name, + readState.segmentSuffix, + Composite99DocValuesFormat.META_EXTENSION + ); + + String dataFileName = IndexFileNames.segmentFileName( + readState.segmentInfo.name, + readState.segmentSuffix, + Composite99DocValuesFormat.DATA_EXTENSION + ); + + boolean success = false; + try { + + // initialize meta input + dataIn = readState.directory.openInput(dataFileName, readState.context); + CodecUtil.checkIndexHeader( + dataIn, + Composite99DocValuesFormat.DATA_CODEC_NAME, + Composite99DocValuesFormat.VERSION_START, + Composite99DocValuesFormat.VERSION_CURRENT, + readState.segmentInfo.getId(), + readState.segmentSuffix + ); + + // initialize data input + metaIn = readState.directory.openChecksumInput(metaFileName, readState.context); + Throwable priorE = null; + try { + CodecUtil.checkIndexHeader( + metaIn, + Composite99DocValuesFormat.META_CODEC_NAME, + Composite99DocValuesFormat.VERSION_START, + Composite99DocValuesFormat.VERSION_CURRENT, + readState.segmentInfo.getId(), + readState.segmentSuffix + ); + + while (true) { + + // validate magic marker + long magicMarker = metaIn.readLong(); + if (magicMarker == -1) { + break; + } else if (magicMarker < 0) { + throw new CorruptIndexException("Unknown token encountered: " + magicMarker, metaIn); + } else if (COMPOSITE_FIELD_MARKER != magicMarker) { + logger.error("Invalid composite field magic marker"); + throw new IOException("Invalid composite field magic marker"); + } + + int version = metaIn.readVInt(); + if (VERSION_CURRENT != version) { + logger.error("Invalid composite field version"); + throw new IOException("Invalid composite field version"); + } + + // construct composite index metadata + String compositeFieldName = metaIn.readString(); + CompositeMappedFieldType.CompositeFieldType compositeFieldType = CompositeMappedFieldType.CompositeFieldType.fromName( + metaIn.readString() + ); + + switch (compositeFieldType) { + case STAR_TREE: + StarTreeMetadata starTreeMetadata = new StarTreeMetadata( + metaIn, + compositeFieldName, + compositeFieldType, + version + ); + compositeFieldInfos.add(new CompositeIndexFieldInfo(compositeFieldName, compositeFieldType)); + + IndexInput starTreeIndexInput = dataIn.slice( + "star-tree data slice for respective star-tree fields", + starTreeMetadata.getDataStartFilePointer(), + starTreeMetadata.getDataLength() + ); + compositeIndexInputMap.put(compositeFieldName, starTreeIndexInput); + compositeIndexMetadataMap.put(compositeFieldName, starTreeMetadata); + + List dimensionFields = starTreeMetadata.getDimensionFields(); + + // generating star tree unique fields (fully qualified name for dimension and metrics) + for (String dimensions : dimensionFields) { + fields.add(fullyQualifiedFieldNameForStarTreeDimensionsDocValues(compositeFieldName, dimensions)); + } + + // adding metric fields + for (Metric metric : starTreeMetadata.getMetrics()) { + for (MetricStat metricStat : metric.getBaseMetrics()) { + fields.add( + fullyQualifiedFieldNameForStarTreeMetricsDocValues( + compositeFieldName, + metric.getField(), + metricStat.getTypeName() + ) + ); + + } + } + + break; + default: + throw new CorruptIndexException("Invalid composite field type found in the file", dataIn); + } + } + + // populates the dummy list of field infos to fetch doc id set iterators for respective fields. + // the dummy field info is used to fetch the doc id set iterators for respective fields based on field name + FieldInfos fieldInfos = new FieldInfos(getFieldInfoList(fields)); + this.readState = new SegmentReadState(readState.directory, readState.segmentInfo, fieldInfos, readState.context); + + // initialize star-tree doc values producer + + compositeDocValuesProducer = LuceneDocValuesProducerFactory.getDocValuesProducerForCompositeCodec( + Composite99Codec.COMPOSITE_INDEX_CODEC_NAME, + this.readState, + Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC, + Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION, + Composite99DocValuesFormat.META_DOC_VALUES_CODEC, + Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION + ); + + } catch (Throwable t) { + priorE = t; + } finally { + CodecUtil.checkFooter(metaIn, priorE); + } + success = true; + } finally { + if (success == false) { + IOUtils.closeWhileHandlingException(this); + } + } } @Override @@ -67,24 +239,63 @@ public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException { @Override public void checkIntegrity() throws IOException { delegate.checkIntegrity(); - // Todo : check integrity of composite index related [star tree] files + CodecUtil.checksumEntireFile(dataIn); } @Override public void close() throws IOException { delegate.close(); - // Todo: close composite index related files [star tree] files + boolean success = false; + try { + IOUtils.close(metaIn, dataIn); + IOUtils.close(compositeDocValuesProducer); + success = true; + } finally { + if (!success) { + IOUtils.closeWhileHandlingException(metaIn, dataIn); + } + compositeIndexInputMap.clear(); + compositeIndexMetadataMap.clear(); + fields.clear(); + metaIn = null; + dataIn = null; + } } @Override public List getCompositeIndexFields() { - // todo : read from file formats and get the field names. - return new ArrayList<>(); + return compositeFieldInfos; } @Override public CompositeIndexValues getCompositeIndexValues(CompositeIndexFieldInfo compositeIndexFieldInfo) throws IOException { - // TODO : read compositeIndexValues [starTreeValues] from star tree files - throw new UnsupportedOperationException(); + + switch (compositeIndexFieldInfo.getType()) { + case STAR_TREE: + return new StarTreeValues( + compositeIndexMetadataMap.get(compositeIndexFieldInfo.getField()), + compositeIndexInputMap.get(compositeIndexFieldInfo.getField()), + compositeDocValuesProducer, + this.readState + ); + + default: + throw new CorruptIndexException("Unsupported composite index field type: ", compositeIndexFieldInfo.getType().getName()); + } + } + + /** + * Returns the sorted numeric doc values for the given sorted numeric field. + * If the sorted numeric field is null, it returns an empty doc id set iterator. + *

+ * Sorted numeric field can be null for cases where the segment doesn't hold a particular value. + * + * @param sortedNumeric the sorted numeric doc values for a field + * @return empty sorted numeric values if the field is not present, else sortedNumeric + */ + public static SortedNumericDocValues getSortedNumericDocValues(SortedNumericDocValues sortedNumeric) { + return sortedNumeric == null ? DocValues.emptySortedNumeric() : sortedNumeric; + } + } diff --git a/server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesWriter.java b/server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesWriter.java index 74ab7d423998e..0d4e35f7c3ab8 100644 --- a/server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesWriter.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/composite99/Composite99DocValuesWriter.java @@ -8,34 +8,44 @@ package org.opensearch.index.codec.composite.composite99; +import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.EmptyDocValuesProducer; import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.MergeState; import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.IndexOutput; import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.util.io.IOUtils; import org.opensearch.index.codec.composite.CompositeIndexFieldInfo; import org.opensearch.index.codec.composite.CompositeIndexReader; -import org.opensearch.index.codec.composite.CompositeIndexValues; -import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues; +import org.opensearch.index.codec.composite.LuceneDocValuesConsumerFactory; import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; import org.opensearch.index.compositeindex.datacube.startree.builder.StarTreesBuilder; +import org.opensearch.index.compositeindex.datacube.startree.index.CompositeIndexValues; +import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues; import org.opensearch.index.mapper.CompositeMappedFieldType; import org.opensearch.index.mapper.DocCountFieldMapper; import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.mapper.StarTreeMapper; import java.io.IOException; +import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; /** @@ -52,21 +62,28 @@ public class Composite99DocValuesWriter extends DocValuesConsumer { AtomicReference mergeState = new AtomicReference<>(); private final Set compositeMappedFieldTypes; private final Set compositeFieldSet; + private DocValuesConsumer composite99DocValuesConsumer; + + public IndexOutput dataOut; + public IndexOutput metaOut; private final Set segmentFieldSet; private final boolean segmentHasCompositeFields; + private final AtomicInteger fieldNumberAcrossCompositeFields; private final Map fieldProducerMap = new HashMap<>(); - public Composite99DocValuesWriter(DocValuesConsumer delegate, SegmentWriteState segmentWriteState, MapperService mapperService) { + public Composite99DocValuesWriter(DocValuesConsumer delegate, SegmentWriteState segmentWriteState, MapperService mapperService) + throws IOException { this.delegate = delegate; this.state = segmentWriteState; this.mapperService = mapperService; + this.fieldNumberAcrossCompositeFields = new AtomicInteger(); this.compositeMappedFieldTypes = mapperService.getCompositeFieldTypes(); compositeFieldSet = new HashSet<>(); segmentFieldSet = new HashSet<>(); // TODO : add integ test for this - for (FieldInfo fi : segmentWriteState.fieldInfos) { + for (FieldInfo fi : this.state.fieldInfos) { if (DocValuesType.SORTED_NUMERIC.equals(fi.getDocValuesType())) { segmentFieldSet.add(fi.name); } else if (fi.name.equals(DocCountFieldMapper.NAME)) { @@ -76,6 +93,56 @@ public Composite99DocValuesWriter(DocValuesConsumer delegate, SegmentWriteState for (CompositeMappedFieldType type : compositeMappedFieldTypes) { compositeFieldSet.addAll(type.fields()); } + + boolean success = false; + try { + + // Get consumer write state with DocIdSetIterator.NO_MORE_DOCS as segment doc count, + // so that all the fields are sparse numeric doc values and not dense numeric doc values + SegmentWriteState consumerWriteState = getSegmentWriteState(segmentWriteState); + + this.composite99DocValuesConsumer = LuceneDocValuesConsumerFactory.getDocValuesConsumerForCompositeCodec( + consumerWriteState, + Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC, + Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION, + Composite99DocValuesFormat.META_DOC_VALUES_CODEC, + Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION + ); + + String dataFileName = IndexFileNames.segmentFileName( + this.state.segmentInfo.name, + this.state.segmentSuffix, + Composite99DocValuesFormat.DATA_EXTENSION + ); + dataOut = this.state.directory.createOutput(dataFileName, this.state.context); + CodecUtil.writeIndexHeader( + dataOut, + Composite99DocValuesFormat.DATA_CODEC_NAME, + Composite99DocValuesFormat.VERSION_CURRENT, + this.state.segmentInfo.getId(), + this.state.segmentSuffix + ); + + String metaFileName = IndexFileNames.segmentFileName( + this.state.segmentInfo.name, + this.state.segmentSuffix, + Composite99DocValuesFormat.META_EXTENSION + ); + metaOut = this.state.directory.createOutput(metaFileName, this.state.context); + CodecUtil.writeIndexHeader( + metaOut, + Composite99DocValuesFormat.META_CODEC_NAME, + Composite99DocValuesFormat.VERSION_CURRENT, + this.state.segmentInfo.getId(), + this.state.segmentSuffix + ); + + success = true; + } finally { + if (success == false) { + IOUtils.closeWhileHandlingException(this); + } + } // check if there are any composite fields which are part of the segment // TODO : add integ test where there are no composite fields in a segment, test both flush and merge cases segmentHasCompositeFields = Collections.disjoint(segmentFieldSet, compositeFieldSet) == false; @@ -117,6 +184,26 @@ public void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer) @Override public void close() throws IOException { delegate.close(); + boolean success = false; + try { + if (metaOut != null) { + metaOut.writeLong(-1); // write EOF marker + CodecUtil.writeFooter(metaOut); // write checksum + } + if (dataOut != null) { + CodecUtil.writeFooter(dataOut); // write checksum + } + + success = true; + } finally { + if (success) { + IOUtils.close(dataOut, metaOut, composite99DocValuesConsumer); + } else { + IOUtils.closeWhileHandlingException(dataOut, metaOut, composite99DocValuesConsumer); + } + metaOut = dataOut = null; + composite99DocValuesConsumer = null; + } } private void createCompositeIndicesIfPossible(DocValuesProducer valuesProducer, FieldInfo field) throws IOException { @@ -135,9 +222,9 @@ private void createCompositeIndicesIfPossible(DocValuesProducer valuesProducer, // we have all the required fields to build composite fields if (compositeFieldSet.isEmpty()) { for (CompositeMappedFieldType mappedType : compositeMappedFieldTypes) { - if (mappedType.getCompositeIndexType().equals(CompositeMappedFieldType.CompositeFieldType.STAR_TREE)) { - try (StarTreesBuilder starTreesBuilder = new StarTreesBuilder(state, mapperService)) { - starTreesBuilder.build(fieldProducerMap); + if (mappedType instanceof StarTreeMapper.StarTreeFieldType) { + try (StarTreesBuilder starTreesBuilder = new StarTreesBuilder(state, mapperService, fieldNumberAcrossCompositeFields)) { + starTreesBuilder.build(metaOut, dataOut, fieldProducerMap, composite99DocValuesConsumer); } } } @@ -175,6 +262,7 @@ public void merge(MergeState mergeState) throws IOException { /** * Merges composite fields from multiple segments + * * @param mergeState merge state */ private void mergeCompositeFields(MergeState mergeState) throws IOException { @@ -183,6 +271,7 @@ private void mergeCompositeFields(MergeState mergeState) throws IOException { /** * Merges star tree data fields from multiple segments + * * @param mergeState merge state */ private void mergeStarTreeFields(MergeState mergeState) throws IOException { @@ -205,7 +294,7 @@ private void mergeStarTreeFields(MergeState mergeState) throws IOException { CompositeIndexValues compositeIndexValues = reader.getCompositeIndexValues(fieldInfo); if (compositeIndexValues instanceof StarTreeValues) { StarTreeValues starTreeValues = (StarTreeValues) compositeIndexValues; - List fieldsList = starTreeSubsPerField.getOrDefault(fieldInfo.getField(), Collections.emptyList()); + List fieldsList = starTreeSubsPerField.getOrDefault(fieldInfo.getField(), new ArrayList<>()); if (starTreeField == null) { starTreeField = starTreeValues.getStarTreeField(); } @@ -223,8 +312,36 @@ private void mergeStarTreeFields(MergeState mergeState) throws IOException { } } } - try (StarTreesBuilder starTreesBuilder = new StarTreesBuilder(state, mapperService)) { - starTreesBuilder.buildDuringMerge(starTreeSubsPerField); + try (StarTreesBuilder starTreesBuilder = new StarTreesBuilder(state, mapperService, fieldNumberAcrossCompositeFields)) { + starTreesBuilder.buildDuringMerge(metaOut, dataOut, starTreeSubsPerField, composite99DocValuesConsumer); } } + + private static SegmentWriteState getSegmentWriteState(SegmentWriteState segmentWriteState) { + + SegmentInfo segmentInfo = new SegmentInfo( + segmentWriteState.segmentInfo.dir, + segmentWriteState.segmentInfo.getVersion(), + segmentWriteState.segmentInfo.getMinVersion(), + segmentWriteState.segmentInfo.name, + DocIdSetIterator.NO_MORE_DOCS, + segmentWriteState.segmentInfo.getUseCompoundFile(), + segmentWriteState.segmentInfo.getHasBlocks(), + segmentWriteState.segmentInfo.getCodec(), + segmentWriteState.segmentInfo.getDiagnostics(), + segmentWriteState.segmentInfo.getId(), + segmentWriteState.segmentInfo.getAttributes(), + segmentWriteState.segmentInfo.getIndexSort() + ); + + return new SegmentWriteState( + segmentWriteState.infoStream, + segmentWriteState.directory, + segmentInfo, + segmentWriteState.fieldInfos, + segmentWriteState.segUpdates, + segmentWriteState.context + ); + } + } diff --git a/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/StarTreeValues.java b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/StarTreeValues.java deleted file mode 100644 index 8378a4063b7ca..0000000000000 --- a/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/StarTreeValues.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.index.codec.composite.datacube.startree; - -import org.apache.lucene.search.DocIdSetIterator; -import org.opensearch.common.annotation.ExperimentalApi; -import org.opensearch.index.codec.composite.CompositeIndexValues; -import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; -import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNode; - -import java.util.Map; - -/** - * Concrete class that holds the star tree associated values from the segment - * - * @opensearch.experimental - */ -@ExperimentalApi -public class StarTreeValues implements CompositeIndexValues { - private final StarTreeField starTreeField; - private final StarTreeNode root; - private final Map dimensionDocValuesIteratorMap; - private final Map metricDocValuesIteratorMap; - private final Map attributes; - - public StarTreeValues( - StarTreeField starTreeField, - StarTreeNode root, - Map dimensionDocValuesIteratorMap, - Map metricDocValuesIteratorMap, - Map attributes - ) { - this.starTreeField = starTreeField; - this.root = root; - this.dimensionDocValuesIteratorMap = dimensionDocValuesIteratorMap; - this.metricDocValuesIteratorMap = metricDocValuesIteratorMap; - this.attributes = attributes; - } - - @Override - public CompositeIndexValues getValues() { - return this; - } - - public StarTreeField getStarTreeField() { - return starTreeField; - } - - public StarTreeNode getRoot() { - return root; - } - - public Map getDimensionDocValuesIteratorMap() { - return dimensionDocValuesIteratorMap; - } - - public Map getMetricDocValuesIteratorMap() { - return metricDocValuesIteratorMap; - } - - public Map getAttributes() { - return attributes; - } -} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexConstants.java b/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexConstants.java index 9402675ff39d9..81e19f6cdb1a3 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexConstants.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexConstants.java @@ -23,4 +23,9 @@ public class CompositeIndexConstants { */ public static final String SEGMENT_DOCS_COUNT = "segmentDocsCount"; + /** + * Represents the key to fetch number of total star tree documents in a segment. + */ + public static final String STAR_TREE_DOCS_COUNT = "starTreeDocsCount"; + } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexSettings.java b/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexSettings.java index 014dd22426a10..a29e642d30f05 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexSettings.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexSettings.java @@ -13,6 +13,8 @@ import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; import org.opensearch.common.util.FeatureFlags; +import org.opensearch.core.common.unit.ByteSizeUnit; +import org.opensearch.core.common.unit.ByteSizeValue; /** * Cluster level settings for composite indices @@ -37,12 +39,23 @@ public class CompositeIndexSettings { Setting.Property.Dynamic ); + /** + * This sets the max flush threshold size for composite index + */ + public static final Setting COMPOSITE_INDEX_MAX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING = Setting.byteSizeSetting( + "indices.composite_index.translog.max_flush_threshold_size", + new ByteSizeValue(512, ByteSizeUnit.MB), + new ByteSizeValue(128, ByteSizeUnit.MB), + new ByteSizeValue(Long.MAX_VALUE, ByteSizeUnit.BYTES), + Setting.Property.NodeScope, + Setting.Property.Dynamic + ); + private volatile boolean starTreeIndexCreationEnabled; public CompositeIndexSettings(Settings settings, ClusterSettings clusterSettings) { this.starTreeIndexCreationEnabled = STAR_TREE_INDEX_ENABLED_SETTING.get(settings); clusterSettings.addSettingsUpdateConsumer(STAR_TREE_INDEX_ENABLED_SETTING, this::starTreeIndexCreationEnabled); - } private void starTreeIndexCreationEnabled(boolean value) { diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/DimensionFactory.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/DimensionFactory.java index 6a09e947217f5..3c418c68fe8ad 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/DimensionFactory.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/DimensionFactory.java @@ -12,9 +12,7 @@ import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.common.xcontent.support.XContentMapValues; import org.opensearch.index.compositeindex.datacube.startree.StarTreeIndexSettings; -import org.opensearch.index.mapper.DateFieldMapper; import org.opensearch.index.mapper.Mapper; -import org.opensearch.index.mapper.NumberFieldMapper; import java.util.ArrayList; import java.util.List; @@ -55,11 +53,13 @@ public static Dimension parseAndCreateDimension( Map dimensionMap, Mapper.TypeParser.ParserContext c ) { - if (builder instanceof DateFieldMapper.Builder) { + if (builder.getSupportedDataCubeDimensionType().isPresent() + && builder.getSupportedDataCubeDimensionType().get().equals(DimensionType.DATE)) { return parseAndCreateDateDimension(name, dimensionMap, c); - } else if (builder instanceof NumberFieldMapper.Builder) { - return new NumericDimension(name); - } + } else if (builder.getSupportedDataCubeDimensionType().isPresent() + && builder.getSupportedDataCubeDimensionType().get().equals(DimensionType.NUMERIC)) { + return new NumericDimension(name); + } throw new IllegalArgumentException( String.format(Locale.ROOT, "unsupported field type associated with star tree dimension [%s]", name) ); diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/DimensionType.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/DimensionType.java new file mode 100644 index 0000000000000..4b9faea331752 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/DimensionType.java @@ -0,0 +1,31 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube; + +/** + * Represents the types of dimensions supported in a data cube. + *

+ * This enum defines the possible types of dimensions that can be used + * in a data cube structure within the composite index. + * + * @opensearch.experimental + */ +public enum DimensionType { + /** + * Represents a numeric dimension type. + * This is used for dimensions that contain numerical values. + */ + NUMERIC, + + /** + * Represents a date dimension type. + * This is used for dimensions that contain date or timestamp values. + */ + DATE +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/Metric.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/Metric.java index 9accb0201170a..be16f1e9886cd 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/Metric.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/Metric.java @@ -13,6 +13,7 @@ import org.opensearch.core.xcontent.XContentBuilder; import java.io.IOException; +import java.util.ArrayList; import java.util.List; import java.util.Objects; @@ -23,10 +24,18 @@ public class Metric implements ToXContent { private final String field; private final List metrics; + private final List baseMetrics; public Metric(String field, List metrics) { this.field = field; this.metrics = metrics; + this.baseMetrics = new ArrayList<>(); + for (MetricStat metricStat : metrics) { + if (metricStat.isDerivedMetric()) { + continue; + } + baseMetrics.add(metricStat); + } } public String getField() { @@ -37,6 +46,13 @@ public List getMetrics() { return metrics; } + /** + * Returns only the base metrics + */ + public List getBaseMetrics() { + return baseMetrics; + } + @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeIndexSettings.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeIndexSettings.java index ce389a99b3626..e665831b83d93 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeIndexSettings.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeIndexSettings.java @@ -26,6 +26,7 @@ public class StarTreeIndexSettings { public static int STAR_TREE_MAX_DIMENSIONS_DEFAULT = 10; + public static int STAR_TREE_MAX_BASE_METRICS_DEFAULT = 100; /** * This setting determines the max number of star tree fields that can be part of composite index mapping. For each * star tree field, we will generate associated star tree index. @@ -52,6 +53,19 @@ public class StarTreeIndexSettings { Setting.Property.Final ); + /** + * This setting determines the max number of dimensions that can be part of star tree index field. Number of + * dimensions and associated cardinality has direct effect of star tree index size and query performance. + */ + public static final Setting STAR_TREE_MAX_BASE_METRICS_SETTING = Setting.intSetting( + "index.composite_index.star_tree.field.max_base_metrics", + STAR_TREE_MAX_BASE_METRICS_DEFAULT, + 4, + 100, + Setting.Property.IndexScope, + Setting.Property.Final + ); + /** * This setting determines the max number of date intervals that can be part of star tree date field. */ @@ -108,4 +122,11 @@ public static Rounding.DateTimeUnit getTimeUnit(String expression) { } return DateHistogramAggregationBuilder.DATE_FIELD_UNITS.get(expression); } + + public static final Setting IS_COMPOSITE_INDEX_SETTING = Setting.boolSetting( + "index.composite_index", + false, + Setting.Property.IndexScope, + Setting.Property.Final + ); } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/CountValueAggregator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/CountValueAggregator.java index e79abe0f170b3..2bdbab1cd5b81 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/CountValueAggregator.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/CountValueAggregator.java @@ -7,22 +7,23 @@ */ package org.opensearch.index.compositeindex.datacube.startree.aggregators; -import org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype.StarTreeNumericType; +import org.opensearch.index.mapper.FieldValueConverter; +import org.opensearch.index.mapper.NumberFieldMapper; /** * Count value aggregator for star tree * * @opensearch.experimental */ -class CountValueAggregator implements ValueAggregator { +public class CountValueAggregator implements ValueAggregator { public static final long DEFAULT_INITIAL_VALUE = 1L; - private static final StarTreeNumericType VALUE_AGGREGATOR_TYPE = StarTreeNumericType.LONG; + private static final FieldValueConverter VALUE_AGGREGATOR_TYPE = NumberFieldMapper.NumberType.LONG; public CountValueAggregator() {} @Override - public StarTreeNumericType getAggregatedValueType() { + public FieldValueConverter getAggregatedValueType() { return VALUE_AGGREGATOR_TYPE; } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/DocCountAggregator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/DocCountAggregator.java index 0896fa54e9f46..0f354aad063c2 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/DocCountAggregator.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/DocCountAggregator.java @@ -8,7 +8,8 @@ package org.opensearch.index.compositeindex.datacube.startree.aggregators; -import org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype.StarTreeNumericType; +import org.opensearch.index.mapper.FieldValueConverter; +import org.opensearch.index.mapper.NumberFieldMapper; /** * Aggregator to handle '_doc_count' field @@ -17,12 +18,12 @@ */ public class DocCountAggregator implements ValueAggregator { - private static final StarTreeNumericType VALUE_AGGREGATOR_TYPE = StarTreeNumericType.LONG; + private static final FieldValueConverter VALUE_AGGREGATOR_TYPE = NumberFieldMapper.NumberType.LONG; public DocCountAggregator() {} @Override - public StarTreeNumericType getAggregatedValueType() { + public FieldValueConverter getAggregatedValueType() { return VALUE_AGGREGATOR_TYPE; } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MaxValueAggregator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MaxValueAggregator.java index 8e7bb44238e40..34a3bdc32226d 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MaxValueAggregator.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MaxValueAggregator.java @@ -7,7 +7,7 @@ */ package org.opensearch.index.compositeindex.datacube.startree.aggregators; -import org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype.StarTreeNumericType; +import org.opensearch.index.mapper.FieldValueConverter; /** * Max value aggregator for star tree @@ -16,8 +16,8 @@ */ class MaxValueAggregator extends StatelessDoubleValueAggregator { - public MaxValueAggregator(StarTreeNumericType starTreeNumericType) { - super(starTreeNumericType, null); + public MaxValueAggregator(FieldValueConverter fieldValueConverter) { + super(fieldValueConverter, null); } @Override diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MetricAggregatorInfo.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MetricAggregatorInfo.java index 8b6db2a183bf8..487e714c9e44e 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MetricAggregatorInfo.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MetricAggregatorInfo.java @@ -8,8 +8,7 @@ package org.opensearch.index.compositeindex.datacube.startree.aggregators; import org.opensearch.index.compositeindex.datacube.MetricStat; -import org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype.StarTreeNumericType; -import org.opensearch.index.fielddata.IndexNumericFieldData; +import org.opensearch.index.mapper.FieldValueConverter; import java.util.Comparator; import java.util.Objects; @@ -27,15 +26,15 @@ public class MetricAggregatorInfo implements Comparable { private final MetricStat metricStat; private final String field; private final ValueAggregator valueAggregators; - private final StarTreeNumericType starTreeNumericType; + private final FieldValueConverter fieldValueConverter; /** * Constructor for MetricAggregatorInfo */ - public MetricAggregatorInfo(MetricStat metricStat, String field, String starFieldName, IndexNumericFieldData.NumericType numericType) { + public MetricAggregatorInfo(MetricStat metricStat, String field, String starFieldName, FieldValueConverter fieldValueConverter) { this.metricStat = metricStat; - this.starTreeNumericType = StarTreeNumericType.fromNumericType(numericType); - this.valueAggregators = ValueAggregatorFactory.getValueAggregator(metricStat, this.starTreeNumericType); + this.fieldValueConverter = fieldValueConverter; + this.valueAggregators = ValueAggregatorFactory.getValueAggregator(metricStat, this.fieldValueConverter); this.field = field; this.starFieldName = starFieldName; this.metric = toFieldName(); @@ -72,8 +71,8 @@ public ValueAggregator getValueAggregators() { /** * @return star tree aggregated value type */ - public StarTreeNumericType getAggregatedValueType() { - return starTreeNumericType; + public FieldValueConverter getNumericFieldConverter() { + return fieldValueConverter; } /** diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MinValueAggregator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MinValueAggregator.java index 46e9188b5dc2f..b085447be758e 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MinValueAggregator.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MinValueAggregator.java @@ -7,7 +7,7 @@ */ package org.opensearch.index.compositeindex.datacube.startree.aggregators; -import org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype.StarTreeNumericType; +import org.opensearch.index.mapper.FieldValueConverter; /** * Min value aggregator for star tree @@ -16,8 +16,8 @@ */ class MinValueAggregator extends StatelessDoubleValueAggregator { - public MinValueAggregator(StarTreeNumericType starTreeNumericType) { - super(starTreeNumericType, null); + public MinValueAggregator(FieldValueConverter fieldValueConverter) { + super(fieldValueConverter, null); } @Override diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/StatelessDoubleValueAggregator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/StatelessDoubleValueAggregator.java index 30a1c47c0ee9b..d83a828fb0f9c 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/StatelessDoubleValueAggregator.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/StatelessDoubleValueAggregator.java @@ -7,7 +7,8 @@ */ package org.opensearch.index.compositeindex.datacube.startree.aggregators; -import org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype.StarTreeNumericType; +import org.opensearch.index.mapper.FieldValueConverter; +import org.opensearch.index.mapper.NumberFieldMapper; /** * This is an abstract class that defines the common methods for all double value aggregators @@ -17,17 +18,17 @@ */ abstract class StatelessDoubleValueAggregator implements ValueAggregator { - protected final StarTreeNumericType starTreeNumericType; + protected final FieldValueConverter fieldValueConverter; protected final Double identityValue; - private static final StarTreeNumericType VALUE_AGGREGATOR_TYPE = StarTreeNumericType.DOUBLE; + private static final FieldValueConverter VALUE_AGGREGATOR_TYPE = NumberFieldMapper.NumberType.DOUBLE; - public StatelessDoubleValueAggregator(StarTreeNumericType starTreeNumericType, Double identityValue) { - this.starTreeNumericType = starTreeNumericType; + public StatelessDoubleValueAggregator(FieldValueConverter fieldValueConverter, Double identityValue) { + this.fieldValueConverter = fieldValueConverter; this.identityValue = identityValue; } @Override - public StarTreeNumericType getAggregatedValueType() { + public FieldValueConverter getAggregatedValueType() { return VALUE_AGGREGATOR_TYPE; } @@ -36,7 +37,7 @@ public Double getInitialAggregatedValueForSegmentDocValue(Long segmentDocValue) if (segmentDocValue == null) { return getIdentityMetricValue(); } - return starTreeNumericType.getDoubleValue(segmentDocValue); + return fieldValueConverter.toDoubleValue(segmentDocValue); } @Override @@ -57,7 +58,7 @@ public Double toAggregatedValueType(Long value) { if (value == null) { return getIdentityMetricValue(); } - return VALUE_AGGREGATOR_TYPE.getDoubleValue(value); + return VALUE_AGGREGATOR_TYPE.toDoubleValue(value); } catch (Exception e) { throw new IllegalStateException("Cannot convert " + value + " to sortable aggregation type", e); } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregator.java index ef97a9b603df3..81eb3558915be 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregator.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregator.java @@ -7,7 +7,8 @@ */ package org.opensearch.index.compositeindex.datacube.startree.aggregators; -import org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype.StarTreeNumericType; +import org.opensearch.index.mapper.FieldValueConverter; +import org.opensearch.index.mapper.NumberFieldMapper; import org.opensearch.search.aggregations.metrics.CompensatedSum; /** @@ -22,17 +23,17 @@ */ class SumValueAggregator implements ValueAggregator { - private final StarTreeNumericType starTreeNumericType; - private static final StarTreeNumericType VALUE_AGGREGATOR_TYPE = StarTreeNumericType.DOUBLE; + private final FieldValueConverter fieldValueConverter; + private static final FieldValueConverter VALUE_AGGREGATOR_TYPE = NumberFieldMapper.NumberType.DOUBLE; private CompensatedSum kahanSummation = new CompensatedSum(0, 0); - public SumValueAggregator(StarTreeNumericType starTreeNumericType) { - this.starTreeNumericType = starTreeNumericType; + public SumValueAggregator(FieldValueConverter fieldValueConverter) { + this.fieldValueConverter = fieldValueConverter; } @Override - public StarTreeNumericType getAggregatedValueType() { + public FieldValueConverter getAggregatedValueType() { return VALUE_AGGREGATOR_TYPE; } @@ -41,7 +42,7 @@ public Double getInitialAggregatedValueForSegmentDocValue(Long segmentDocValue) kahanSummation.reset(0, 0); // add takes care of the sum and compensation internally if (segmentDocValue != null) { - kahanSummation.add(starTreeNumericType.getDoubleValue(segmentDocValue)); + kahanSummation.add(fieldValueConverter.toDoubleValue(segmentDocValue)); } else { kahanSummation.add(getIdentityMetricValue()); } @@ -55,7 +56,7 @@ public Double mergeAggregatedValueAndSegmentValue(Double value, Long segmentDocV assert value == null || kahanSummation.value() == value; // add takes care of the sum and compensation internally if (segmentDocValue != null) { - kahanSummation.add(starTreeNumericType.getDoubleValue(segmentDocValue)); + kahanSummation.add(fieldValueConverter.toDoubleValue(segmentDocValue)); } else { kahanSummation.add(getIdentityMetricValue()); } @@ -92,7 +93,7 @@ public Double toAggregatedValueType(Long value) { if (value == null) { return getIdentityMetricValue(); } - return VALUE_AGGREGATOR_TYPE.getDoubleValue(value); + return VALUE_AGGREGATOR_TYPE.toDoubleValue(value); } catch (Exception e) { throw new IllegalStateException("Cannot convert " + value + " to sortable aggregation type", e); } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregator.java index d5ca7f3493087..8eac45669c063 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregator.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregator.java @@ -7,7 +7,7 @@ */ package org.opensearch.index.compositeindex.datacube.startree.aggregators; -import org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype.StarTreeNumericType; +import org.opensearch.index.mapper.FieldValueConverter; /** * A value aggregator that pre-aggregates on the input values for a specific type of aggregation. @@ -19,7 +19,7 @@ public interface ValueAggregator { /** * Returns the data type of the aggregated value. */ - StarTreeNumericType getAggregatedValueType(); + FieldValueConverter getAggregatedValueType(); /** * Returns the initial aggregated value. diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregatorFactory.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregatorFactory.java index bdc381110365d..aac5d2b2855d3 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregatorFactory.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregatorFactory.java @@ -8,7 +8,7 @@ package org.opensearch.index.compositeindex.datacube.startree.aggregators; import org.opensearch.index.compositeindex.datacube.MetricStat; -import org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype.StarTreeNumericType; +import org.opensearch.index.mapper.FieldValueConverter; /** * Value aggregator factory for a given aggregation type @@ -22,20 +22,20 @@ private ValueAggregatorFactory() {} * Returns a new instance of value aggregator for the given aggregation type. * * @param aggregationType Aggregation type - * @param starTreeNumericType Numeric type associated with star tree field ( as specified in index mapping ) + * @param fieldValueConverter Numeric type converter associated with star tree field ( as specified in index mapping ) * @return Value aggregator */ - public static ValueAggregator getValueAggregator(MetricStat aggregationType, StarTreeNumericType starTreeNumericType) { + public static ValueAggregator getValueAggregator(MetricStat aggregationType, FieldValueConverter fieldValueConverter) { switch (aggregationType) { // avg aggregator will be covered in the part of query (using count and sum) case SUM: - return new SumValueAggregator(starTreeNumericType); + return new SumValueAggregator(fieldValueConverter); case VALUE_COUNT: return new CountValueAggregator(); case MIN: - return new MinValueAggregator(starTreeNumericType); + return new MinValueAggregator(fieldValueConverter); case MAX: - return new MaxValueAggregator(starTreeNumericType); + return new MaxValueAggregator(fieldValueConverter); case DOC_COUNT: return new DocCountAggregator(); default: diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/numerictype/StarTreeNumericType.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/numerictype/StarTreeNumericType.java deleted file mode 100644 index 57fe573a6a93c..0000000000000 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/numerictype/StarTreeNumericType.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype; - -import org.opensearch.index.fielddata.IndexNumericFieldData; - -import java.util.function.Function; - -/** - * Enum to map Star Tree Numeric Types to Lucene's Numeric Type - * - * @opensearch.experimental - */ -public enum StarTreeNumericType { - - // TODO: Handle scaled floats - HALF_FLOAT(IndexNumericFieldData.NumericType.HALF_FLOAT, StarTreeNumericTypeConverters::halfFloatPointToDouble), - FLOAT(IndexNumericFieldData.NumericType.FLOAT, StarTreeNumericTypeConverters::floatPointToDouble), - LONG(IndexNumericFieldData.NumericType.LONG, StarTreeNumericTypeConverters::longToDouble), - DOUBLE(IndexNumericFieldData.NumericType.DOUBLE, StarTreeNumericTypeConverters::sortableLongtoDouble), - INT(IndexNumericFieldData.NumericType.INT, StarTreeNumericTypeConverters::intToDouble), - SHORT(IndexNumericFieldData.NumericType.SHORT, StarTreeNumericTypeConverters::shortToDouble), - BYTE(IndexNumericFieldData.NumericType.BYTE, StarTreeNumericTypeConverters::bytesToDouble), - UNSIGNED_LONG(IndexNumericFieldData.NumericType.UNSIGNED_LONG, StarTreeNumericTypeConverters::unsignedlongToDouble); - - final IndexNumericFieldData.NumericType numericType; - final Function converter; - - StarTreeNumericType(IndexNumericFieldData.NumericType numericType, Function converter) { - this.numericType = numericType; - this.converter = converter; - } - - public double getDoubleValue(long rawValue) { - return this.converter.apply(rawValue); - } - - public static StarTreeNumericType fromNumericType(IndexNumericFieldData.NumericType numericType) { - switch (numericType) { - case HALF_FLOAT: - return StarTreeNumericType.HALF_FLOAT; - case FLOAT: - return StarTreeNumericType.FLOAT; - case LONG: - return StarTreeNumericType.LONG; - case DOUBLE: - return StarTreeNumericType.DOUBLE; - case INT: - return StarTreeNumericType.INT; - case SHORT: - return StarTreeNumericType.SHORT; - case UNSIGNED_LONG: - return StarTreeNumericType.UNSIGNED_LONG; - case BYTE: - return StarTreeNumericType.BYTE; - default: - throw new UnsupportedOperationException("Unknown numeric type [" + numericType + "]"); - } - } -} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/numerictype/StarTreeNumericTypeConverters.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/numerictype/StarTreeNumericTypeConverters.java deleted file mode 100644 index eb7647c4f9851..0000000000000 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/numerictype/StarTreeNumericTypeConverters.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype; - -import org.apache.lucene.sandbox.document.HalfFloatPoint; -import org.apache.lucene.util.NumericUtils; -import org.opensearch.common.Numbers; -import org.opensearch.common.annotation.ExperimentalApi; - -/** - * Numeric converters used during aggregations of metric values - * - * @opensearch.experimental - */ -@ExperimentalApi -public class StarTreeNumericTypeConverters { - - public static double halfFloatPointToDouble(Long value) { - return HalfFloatPoint.sortableShortToHalfFloat((short) value.longValue()); - } - - public static double floatPointToDouble(Long value) { - return NumericUtils.sortableIntToFloat((int) value.longValue()); - } - - public static double longToDouble(Long value) { - return (double) value; - } - - public static double intToDouble(Long value) { - return (double) value; - } - - public static double shortToDouble(Long value) { - return (double) value; - } - - public static Double sortableLongtoDouble(Long value) { - return NumericUtils.sortableLongToDouble(value); - } - - public static double unsignedlongToDouble(Long value) { - return Numbers.unsignedLongToDouble(value); - } - - public static double bytesToDouble(Long value) { - byte[] bytes = new byte[8]; - NumericUtils.longToSortableBytes(value, bytes, 0); - return NumericUtils.sortableLongToDouble(NumericUtils.sortableBytesToLong(bytes, 0)); - } - -} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractDocumentsFileManager.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractDocumentsFileManager.java index 4214a46b2fc1c..327fd26c00608 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractDocumentsFileManager.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractDocumentsFileManager.java @@ -19,13 +19,16 @@ import org.opensearch.index.compositeindex.datacube.startree.StarTreeDocument; import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; -import org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype.StarTreeNumericTypeConverters; import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeDocumentBitSetUtil; +import org.opensearch.index.mapper.FieldValueConverter; import java.io.Closeable; import java.io.IOException; import java.util.List; +import static org.opensearch.index.mapper.NumberFieldMapper.NumberType.DOUBLE; +import static org.opensearch.index.mapper.NumberFieldMapper.NumberType.LONG; + /** * Abstract class for managing star tree file operations. * @@ -89,24 +92,22 @@ protected int writeDimensions(StarTreeDocument starTreeDocument, IndexOutput out protected int writeMetrics(StarTreeDocument starTreeDocument, IndexOutput output, boolean isAggregatedDoc) throws IOException { int numBytes = 0; for (int i = 0; i < starTreeDocument.metrics.length; i++) { - switch (metricAggregatorInfos.get(i).getValueAggregators().getAggregatedValueType()) { - case LONG: + FieldValueConverter aggregatedValueType = metricAggregatorInfos.get(i).getValueAggregators().getAggregatedValueType(); + if (aggregatedValueType.equals(LONG)) { + output.writeLong(starTreeDocument.metrics[i] == null ? 0L : (Long) starTreeDocument.metrics[i]); + numBytes += Long.BYTES; + } else if (aggregatedValueType.equals(DOUBLE)) { + if (isAggregatedDoc) { + long val = NumericUtils.doubleToSortableLong( + starTreeDocument.metrics[i] == null ? 0.0 : (Double) starTreeDocument.metrics[i] + ); + output.writeLong(val); + } else { output.writeLong(starTreeDocument.metrics[i] == null ? 0L : (Long) starTreeDocument.metrics[i]); - numBytes += Long.BYTES; - break; - case DOUBLE: - if (isAggregatedDoc) { - long val = NumericUtils.doubleToSortableLong( - starTreeDocument.metrics[i] == null ? 0.0 : (Double) starTreeDocument.metrics[i] - ); - output.writeLong(val); - } else { - output.writeLong(starTreeDocument.metrics[i] == null ? 0L : (Long) starTreeDocument.metrics[i]); - } - numBytes += Long.BYTES; - break; - default: - throw new IllegalStateException("Unsupported metric type"); + } + numBytes += Long.BYTES; + } else { + throw new IllegalStateException("Unsupported metric type"); } } numBytes += StarTreeDocumentBitSetUtil.writeBitSet(starTreeDocument.metrics, output); @@ -157,22 +158,20 @@ protected long readDimensions(Long[] dimensions, RandomAccessInput input, long o protected long readMetrics(RandomAccessInput input, long offset, int numMetrics, Object[] metrics, boolean isAggregatedDoc) throws IOException { for (int i = 0; i < numMetrics; i++) { - switch (metricAggregatorInfos.get(i).getValueAggregators().getAggregatedValueType()) { - case LONG: - metrics[i] = input.readLong(offset); - offset += Long.BYTES; - break; - case DOUBLE: - long val = input.readLong(offset); - if (isAggregatedDoc) { - metrics[i] = StarTreeNumericTypeConverters.sortableLongtoDouble(val); - } else { - metrics[i] = val; - } - offset += Long.BYTES; - break; - default: - throw new IllegalStateException("Unsupported metric type"); + FieldValueConverter aggregatedValueType = metricAggregatorInfos.get(i).getValueAggregators().getAggregatedValueType(); + if (aggregatedValueType.equals(LONG)) { + metrics[i] = input.readLong(offset); + offset += Long.BYTES; + } else if (aggregatedValueType.equals(DOUBLE)) { + long val = input.readLong(offset); + if (isAggregatedDoc) { + metrics[i] = DOUBLE.toDoubleValue(val); + } else { + metrics[i] = val; + } + offset += Long.BYTES; + } else { + throw new IllegalStateException("Unsupported metric type"); } } offset += StarTreeDocumentBitSetUtil.readBitSet(input, offset, metrics, index -> null); diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java index d3105b4ae23c7..a1d638616f2aa 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java @@ -9,15 +9,18 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.EmptyDocValuesProducer; import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.index.VectorEncoding; -import org.apache.lucene.index.VectorSimilarityFunction; -import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.index.SortedNumericDocValuesWriterWrapper; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.Counter; +import org.apache.lucene.util.NumericUtils; import org.opensearch.index.compositeindex.datacube.Dimension; import org.opensearch.index.compositeindex.datacube.Metric; import org.opensearch.index.compositeindex.datacube.MetricStat; @@ -26,28 +29,34 @@ import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; import org.opensearch.index.compositeindex.datacube.startree.aggregators.ValueAggregator; +import org.opensearch.index.compositeindex.datacube.startree.fileformats.StarTreeWriter; +import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues; import org.opensearch.index.compositeindex.datacube.startree.node.InMemoryTreeNode; import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNodeType; import org.opensearch.index.compositeindex.datacube.startree.utils.SequentialDocValuesIterator; -import org.opensearch.index.fielddata.IndexNumericFieldData; import org.opensearch.index.mapper.DocCountFieldMapper; +import org.opensearch.index.mapper.FieldMapper; +import org.opensearch.index.mapper.FieldValueConverter; import org.opensearch.index.mapper.Mapper; import org.opensearch.index.mapper.MapperService; -import org.opensearch.index.mapper.NumberFieldMapper; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.ALL; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.fullyQualifiedFieldNameForStarTreeDimensionsDocValues; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.fullyQualifiedFieldNameForStarTreeMetricsDocValues; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.getFieldInfo; +import static org.opensearch.index.mapper.NumberFieldMapper.NumberType.DOUBLE; +import static org.opensearch.index.mapper.NumberFieldMapper.NumberType.LONG; /** * Builder for star tree. Defines the algorithm to construct star-tree @@ -76,19 +85,30 @@ public abstract class BaseStarTreeBuilder implements StarTreeBuilder { protected final InMemoryTreeNode rootNode = getNewNode(); protected final StarTreeField starTreeField; - private final SegmentWriteState state; - static String NUM_SEGMENT_DOCS = "numSegmentDocs"; + private final SegmentWriteState writeState; + + private final IndexOutput metaOut; + private final IndexOutput dataOut; /** * Reads all the configuration related to dimensions and metrics, builds a star-tree based on the different construction parameters. * * @param starTreeField holds the configuration for the star tree - * @param state stores the segment write state + * @param writeState stores the segment write writeState * @param mapperService helps to find the original type of the field */ - protected BaseStarTreeBuilder(StarTreeField starTreeField, SegmentWriteState state, MapperService mapperService) { + protected BaseStarTreeBuilder( + IndexOutput metaOut, + IndexOutput dataOut, + StarTreeField starTreeField, + SegmentWriteState writeState, + MapperService mapperService + ) { logger.debug("Building star tree : {}", starTreeField.getName()); + this.metaOut = metaOut; + this.dataOut = dataOut; + this.starTreeField = starTreeField; StarTreeFieldConfiguration starTreeFieldSpec = starTreeField.getStarTreeConfig(); @@ -96,8 +116,8 @@ protected BaseStarTreeBuilder(StarTreeField starTreeField, SegmentWriteState sta this.numDimensions = dimensionsSplitOrder.size(); this.skipStarNodeCreationForDimensions = new HashSet<>(); - this.totalSegmentDocs = state.segmentInfo.maxDoc(); - this.state = state; + this.totalSegmentDocs = writeState.segmentInfo.maxDoc(); + this.writeState = writeState; Set skipStarNodeCreationForDimensions = starTreeFieldSpec.getSkipStarNodeCreationInDims(); @@ -125,19 +145,16 @@ public List generateMetricAggregatorInfos(MapperService ma MetricStat.DOC_COUNT, metric.getField(), starTreeField.getName(), - IndexNumericFieldData.NumericType.LONG + LONG ); metricAggregatorInfos.add(metricAggregatorInfo); continue; } - for (MetricStat metricStat : metric.getMetrics()) { - if (metricStat.isDerivedMetric()) { - continue; - } - IndexNumericFieldData.NumericType numericType; + for (MetricStat metricStat : metric.getBaseMetrics()) { + FieldValueConverter fieldValueConverter; Mapper fieldMapper = mapperService.documentMapper().mappers().getMapper(metric.getField()); - if (fieldMapper instanceof NumberFieldMapper) { - numericType = ((NumberFieldMapper) fieldMapper).fieldType().numericType(); + if (fieldMapper instanceof FieldMapper && ((FieldMapper) fieldMapper).fieldType() instanceof FieldValueConverter) { + fieldValueConverter = (FieldValueConverter) ((FieldMapper) fieldMapper).fieldType(); } else { logger.error("unsupported mapper type"); throw new IllegalStateException("unsupported mapper type"); @@ -147,7 +164,7 @@ public List generateMetricAggregatorInfos(MapperService ma metricStat, metric.getField(), starTreeField.getName(), - numericType + fieldValueConverter ); metricAggregatorInfos.add(metricAggregatorInfo); } @@ -155,6 +172,224 @@ public List generateMetricAggregatorInfos(MapperService ma return metricAggregatorInfos; } + /** + * Generates the configuration required to perform aggregation for all the metrics on a field + * + * @return list of MetricAggregatorInfo + */ + public List getMetricReaders(SegmentWriteState state, Map fieldProducerMap) + throws IOException { + + List metricReaders = new ArrayList<>(); + for (Metric metric : this.starTreeField.getMetrics()) { + for (MetricStat metricStat : metric.getBaseMetrics()) { + SequentialDocValuesIterator metricReader; + FieldInfo metricFieldInfo = state.fieldInfos.fieldInfo(metric.getField()); + if (metricStat.equals(MetricStat.DOC_COUNT)) { + // _doc_count is numeric field , so we convert to sortedNumericDocValues and get iterator + metricReader = getIteratorForNumericField(fieldProducerMap, metricFieldInfo, DocCountFieldMapper.NAME); + } else { + if (metricFieldInfo == null) { + metricFieldInfo = getFieldInfo(metric.getField(), DocValuesType.SORTED_NUMERIC); + } + metricReader = new SequentialDocValuesIterator( + fieldProducerMap.get(metricFieldInfo.name).getSortedNumeric(metricFieldInfo) + ); + } + metricReaders.add(metricReader); + } + } + return metricReaders; + } + + /** + * Builds the star tree from the original segment documents + * + * @param fieldProducerMap contain s the docValues producer to get docValues associated with each field + * @param fieldNumberAcrossStarTrees maintains a counter for the number of star-tree fields + * @param starTreeDocValuesConsumer consumes the generated star-tree docValues + * @throws IOException when we are unable to build star-tree + */ + public void build( + Map fieldProducerMap, + AtomicInteger fieldNumberAcrossStarTrees, + DocValuesConsumer starTreeDocValuesConsumer + ) throws IOException { + long startTime = System.currentTimeMillis(); + logger.debug("Star-tree build is a go with star tree field {}", starTreeField.getName()); + + List metricReaders = getMetricReaders(writeState, fieldProducerMap); + List dimensionsSplitOrder = starTreeField.getDimensionsOrder(); + SequentialDocValuesIterator[] dimensionReaders = new SequentialDocValuesIterator[dimensionsSplitOrder.size()]; + for (int i = 0; i < numDimensions; i++) { + String dimension = dimensionsSplitOrder.get(i).getField(); + FieldInfo dimensionFieldInfo = writeState.fieldInfos.fieldInfo(dimension); + if (dimensionFieldInfo == null) { + dimensionFieldInfo = getFieldInfo(dimension, DocValuesType.SORTED_NUMERIC); + } + dimensionReaders[i] = new SequentialDocValuesIterator( + fieldProducerMap.get(dimensionFieldInfo.name).getSortedNumeric(dimensionFieldInfo) + ); + } + Iterator starTreeDocumentIterator = sortAndAggregateSegmentDocuments(dimensionReaders, metricReaders); + logger.debug("Sorting and aggregating star-tree in ms : {}", (System.currentTimeMillis() - startTime)); + build(starTreeDocumentIterator, fieldNumberAcrossStarTrees, starTreeDocValuesConsumer); + logger.debug("Finished Building star-tree in ms : {}", (System.currentTimeMillis() - startTime)); + } + + /** + * Builds the star tree using sorted and aggregated star-tree Documents + * + * @param starTreeDocumentIterator contains the sorted and aggregated documents + * @param fieldNumberAcrossStarTrees maintains a counter for the number of star-tree fields + * @param starTreeDocValuesConsumer consumes the generated star-tree docValues + * @throws IOException when we are unable to build star-tree + */ + public void build( + Iterator starTreeDocumentIterator, + AtomicInteger fieldNumberAcrossStarTrees, + DocValuesConsumer starTreeDocValuesConsumer + ) throws IOException { + int numSegmentStarTreeDocument = totalSegmentDocs; + + appendDocumentsToStarTree(starTreeDocumentIterator); + int numStarTreeDocument = numStarTreeDocs; + logger.debug("Generated star tree docs : [{}] from segment docs : [{}]", numStarTreeDocument, numSegmentStarTreeDocument); + + if (numStarTreeDocs == 0) { + // serialize the star tree data + serializeStarTree(numStarTreeDocument, numStarTreeDocs); + return; + } + + constructStarTree(rootNode, 0, numStarTreeDocs); + int numStarTreeDocumentUnderStarNode = numStarTreeDocs - numStarTreeDocument; + logger.debug( + "Finished constructing star-tree, got [ {} ] tree nodes and [ {} ] starTreeDocument under star-node", + numStarTreeNodes, + numStarTreeDocumentUnderStarNode + ); + + createAggregatedDocs(rootNode); + int numAggregatedStarTreeDocument = numStarTreeDocs - numStarTreeDocument - numStarTreeDocumentUnderStarNode; + logger.debug("Finished creating aggregated documents : {}", numAggregatedStarTreeDocument); + + // Create doc values indices in disk + createSortedDocValuesIndices(starTreeDocValuesConsumer, fieldNumberAcrossStarTrees); + + // serialize star-tree + serializeStarTree(numStarTreeDocument, numStarTreeDocs); + } + + void appendDocumentsToStarTree(Iterator starTreeDocumentIterator) throws IOException { + while (starTreeDocumentIterator.hasNext()) { + appendToStarTree(starTreeDocumentIterator.next()); + } + } + + private void serializeStarTree(int numSegmentStarTreeDocument, int numStarTreeDocs) throws IOException { + // serialize the star tree data + long dataFilePointer = dataOut.getFilePointer(); + StarTreeWriter starTreeWriter = new StarTreeWriter(); + long totalStarTreeDataLength = starTreeWriter.writeStarTree(dataOut, rootNode, numStarTreeNodes, starTreeField.getName()); + + // serialize the star tree meta + starTreeWriter.writeStarTreeMetadata( + metaOut, + starTreeField, + metricAggregatorInfos, + numStarTreeNodes, + numSegmentStarTreeDocument, + numStarTreeDocs, + dataFilePointer, + totalStarTreeDataLength + ); + } + + private void createSortedDocValuesIndices(DocValuesConsumer docValuesConsumer, AtomicInteger fieldNumberAcrossStarTrees) + throws IOException { + List dimensionWriters = new ArrayList<>(); + List metricWriters = new ArrayList<>(); + FieldInfo[] dimensionFieldInfoList = new FieldInfo[starTreeField.getDimensionsOrder().size()]; + FieldInfo[] metricFieldInfoList = new FieldInfo[metricAggregatorInfos.size()]; + for (int i = 0; i < dimensionFieldInfoList.length; i++) { + final FieldInfo fi = getFieldInfo( + fullyQualifiedFieldNameForStarTreeDimensionsDocValues( + starTreeField.getName(), + starTreeField.getDimensionsOrder().get(i).getField() + ), + DocValuesType.SORTED_NUMERIC, + fieldNumberAcrossStarTrees.getAndIncrement() + ); + dimensionFieldInfoList[i] = fi; + dimensionWriters.add(new SortedNumericDocValuesWriterWrapper(fi, Counter.newCounter())); + } + for (int i = 0; i < metricAggregatorInfos.size(); i++) { + + final FieldInfo fi = getFieldInfo( + fullyQualifiedFieldNameForStarTreeMetricsDocValues( + starTreeField.getName(), + metricAggregatorInfos.get(i).getField(), + metricAggregatorInfos.get(i).getMetricStat().getTypeName() + ), + DocValuesType.SORTED_NUMERIC, + fieldNumberAcrossStarTrees.getAndIncrement() + ); + + metricFieldInfoList[i] = fi; + metricWriters.add(new SortedNumericDocValuesWriterWrapper(fi, Counter.newCounter())); + } + + for (int docId = 0; docId < numStarTreeDocs; docId++) { + StarTreeDocument starTreeDocument = getStarTreeDocument(docId); + for (int i = 0; i < starTreeDocument.dimensions.length; i++) { + if (starTreeDocument.dimensions[i] != null) { + dimensionWriters.get(i).addValue(docId, starTreeDocument.dimensions[i]); + } + } + + for (int i = 0; i < starTreeDocument.metrics.length; i++) { + try { + FieldValueConverter aggregatedValueType = metricAggregatorInfos.get(i).getValueAggregators().getAggregatedValueType(); + if (aggregatedValueType.equals(LONG)) { + if (starTreeDocument.metrics[i] != null) { + metricWriters.get(i).addValue(docId, (long) starTreeDocument.metrics[i]); + } + } else if (aggregatedValueType.equals(DOUBLE)) { + if (starTreeDocument.metrics[i] != null) { + metricWriters.get(i).addValue(docId, NumericUtils.doubleToSortableLong((Double) starTreeDocument.metrics[i])); + } + } else { + throw new IllegalStateException("Unknown metric doc value type"); + } + } catch (IllegalArgumentException e) { + logger.error("could not parse the value, exiting creation of star tree"); + } + } + } + + addStarTreeDocValueFields(docValuesConsumer, dimensionWriters, dimensionFieldInfoList, starTreeField.getDimensionsOrder().size()); + addStarTreeDocValueFields(docValuesConsumer, metricWriters, metricFieldInfoList, metricAggregatorInfos.size()); + } + + private void addStarTreeDocValueFields( + DocValuesConsumer docValuesConsumer, + List docValuesWriters, + FieldInfo[] fieldInfoList, + int fieldCount + ) throws IOException { + for (int i = 0; i < fieldCount; i++) { + final int writerIndex = i; + DocValuesProducer docValuesProducer = new EmptyDocValuesProducer() { + @Override + public SortedNumericDocValues getSortedNumeric(FieldInfo field) { + return docValuesWriters.get(writerIndex).getDocValues(); + } + }; + docValuesConsumer.addSortedNumericField(fieldInfoList[i], docValuesProducer); + } + } + /** * Get star tree document from the segment for the current docId with the dimensionReaders and metricReaders */ @@ -376,6 +611,7 @@ protected StarTreeDocument reduceSegmentStarTreeDocuments( */ private static Long getLong(Object metric) { Long metricValue = null; + if (metric instanceof Long) { metricValue = (long) metric; } @@ -422,89 +658,6 @@ public StarTreeDocument reduceStarTreeDocuments(StarTreeDocument aggregatedDocum } } - /** - * Builds the star tree from the original segment documents - * - * @param fieldProducerMap contain s the docValues producer to get docValues associated with each field - * @throws IOException when we are unable to build star-tree - */ - public void build(Map fieldProducerMap) throws IOException { - long startTime = System.currentTimeMillis(); - logger.debug("Star-tree build is a go with star tree field {}", starTreeField.getName()); - if (totalSegmentDocs == 0) { - logger.debug("No documents found in the segment"); - return; - } - List metricReaders = getMetricReaders(state, fieldProducerMap); - List dimensionsSplitOrder = starTreeField.getDimensionsOrder(); - SequentialDocValuesIterator[] dimensionReaders = new SequentialDocValuesIterator[dimensionsSplitOrder.size()]; - for (int i = 0; i < numDimensions; i++) { - String dimension = dimensionsSplitOrder.get(i).getField(); - FieldInfo dimensionFieldInfo = state.fieldInfos.fieldInfo(dimension); - if (dimensionFieldInfo == null) { - dimensionFieldInfo = getFieldInfo(dimension, DocValuesType.SORTED_NUMERIC); - } - dimensionReaders[i] = new SequentialDocValuesIterator( - fieldProducerMap.get(dimensionFieldInfo.name).getSortedNumeric(dimensionFieldInfo) - ); - } - Iterator starTreeDocumentIterator = sortAndAggregateSegmentDocuments(dimensionReaders, metricReaders); - logger.debug("Sorting and aggregating star-tree in ms : {}", (System.currentTimeMillis() - startTime)); - build(starTreeDocumentIterator); - logger.debug("Finished Building star-tree in ms : {}", (System.currentTimeMillis() - startTime)); - } - - private static FieldInfo getFieldInfo(String field, DocValuesType docValuesType) { - return new FieldInfo( - field, - 1, // This is filled as part of doc values creation and is not used otherwise - false, - false, - false, - IndexOptions.NONE, - docValuesType, - -1, - Collections.emptyMap(), - 0, - 0, - 0, - 0, - VectorEncoding.FLOAT32, - VectorSimilarityFunction.EUCLIDEAN, - false, - false - ); - } - - /** - * Generates the configuration required to perform aggregation for all the metrics on a field - * - * @return list of MetricAggregatorInfo - */ - public List getMetricReaders(SegmentWriteState state, Map fieldProducerMap) - throws IOException { - List metricReaders = new ArrayList<>(); - for (Metric metric : this.starTreeField.getMetrics()) { - for (MetricStat metricStat : metric.getMetrics()) { - SequentialDocValuesIterator metricReader = null; - FieldInfo metricFieldInfo = state.fieldInfos.fieldInfo(metric.getField()); - if (metricStat.equals(MetricStat.DOC_COUNT)) { - // _doc_count is numeric field , so we convert to sortedNumericDocValues and get iterator - metricReader = getIteratorForNumericField(fieldProducerMap, metricFieldInfo, DocCountFieldMapper.NAME); - } else { - if (metricFieldInfo == null) { - metricFieldInfo = getFieldInfo(metric.getField(), DocValuesType.SORTED_NUMERIC); - } - metricReader = new SequentialDocValuesIterator( - fieldProducerMap.get(metricFieldInfo.name).getSortedNumeric(metricFieldInfo) - ); - } - metricReaders.add(metricReader); - } - } - return metricReaders; - } - /** * Converts numericDocValues to sortedNumericDocValues and returns SequentialDocValuesIterator */ @@ -524,45 +677,6 @@ private SequentialDocValuesIterator getIteratorForNumericField( return sequentialDocValuesIterator; } - /** - * Builds the star tree using Star-Tree Document - * - * @param starTreeDocumentIterator contains the sorted and aggregated documents - * @throws IOException when we are unable to build star-tree - */ - void build(Iterator starTreeDocumentIterator) throws IOException { - int numSegmentStarTreeDocument = totalSegmentDocs; - - while (starTreeDocumentIterator.hasNext()) { - appendToStarTree(starTreeDocumentIterator.next()); - } - int numStarTreeDocument = numStarTreeDocs; - logger.debug("Generated star tree docs : [{}] from segment docs : [{}]", numStarTreeDocument, numSegmentStarTreeDocument); - - if (numStarTreeDocs == 0) { - // TODO: Uncomment when segment codec and file formats is ready - // StarTreeBuilderUtils.serializeTree(indexOutput, rootNode, dimensionsSplitOrder, numNodes); - return; - } - - constructStarTree(rootNode, 0, numStarTreeDocs); - int numStarTreeDocumentUnderStarNode = numStarTreeDocs - numStarTreeDocument; - logger.debug( - "Finished constructing star-tree, got [ {} ] tree nodes and [ {} ] starTreeDocument under star-node", - numStarTreeNodes, - numStarTreeDocumentUnderStarNode - ); - - createAggregatedDocs(rootNode); - int numAggregatedStarTreeDocument = numStarTreeDocs - numStarTreeDocument - numStarTreeDocumentUnderStarNode; - logger.debug("Finished creating aggregated documents : {}", numAggregatedStarTreeDocument); - - // TODO: When StarTreeFactory Codec is ready - // Create doc values indices in disk - // Serialize and save in disk - // Write star tree metadata for off heap implementation - } - /** * Adds a document to star-tree * @@ -584,6 +698,20 @@ private InMemoryTreeNode getNewNode() { return new InMemoryTreeNode(); } + /** + * Returns a new star-tree node + * @param dimensionId dimension id of the star-tree node + * @param startDocId start doc id of the star-tree node + * @param endDocId end doc id of the star-tree node + * @param nodeType node type of the star-tree node + * @param dimensionValue dimension value of the star-tree node + * @return + */ + private InMemoryTreeNode getNewNode(int dimensionId, int startDocId, int endDocId, byte nodeType, long dimensionValue) { + numStarTreeNodes++; + return new InMemoryTreeNode(dimensionId, startDocId, endDocId, nodeType, dimensionValue); + } + /** * Implements the algorithm to construct a star-tree * @@ -594,63 +722,73 @@ private InMemoryTreeNode getNewNode() { */ private void constructStarTree(InMemoryTreeNode node, int startDocId, int endDocId) throws IOException { - int childDimensionId = node.dimensionId + 1; + int childDimensionId = node.getDimensionId() + 1; if (childDimensionId == numDimensions) { return; } // Construct all non-star children nodes - node.childDimensionId = childDimensionId; - Map children = constructNonStarNodes(startDocId, endDocId, childDimensionId); - node.children = children; + node.setChildDimensionId(childDimensionId); + constructNonStarNodes(node, startDocId, endDocId, childDimensionId); // Construct star-node if required - if (!skipStarNodeCreationForDimensions.contains(childDimensionId) && children.size() > 1) { - children.put((long) ALL, constructStarNode(startDocId, endDocId, childDimensionId)); + if (!skipStarNodeCreationForDimensions.contains(childDimensionId) && node.getChildren().size() > 1) { + node.addChildNode(constructStarNode(startDocId, endDocId, childDimensionId), (long) ALL); + } + + // Further split star node if needed + if (node.getChildStarNode() != null + && (node.getChildStarNode().getEndDocId() - node.getChildStarNode().getStartDocId() > maxLeafDocuments)) { + constructStarTree(node.getChildStarNode(), node.getChildStarNode().getStartDocId(), node.getChildStarNode().getEndDocId()); } // Further split on child nodes if required - for (InMemoryTreeNode child : children.values()) { - if (child.endDocId - child.startDocId > maxLeafDocuments) { - constructStarTree(child, child.startDocId, child.endDocId); + for (InMemoryTreeNode child : node.getChildren().values()) { + if (child.getEndDocId() - child.getStartDocId() > maxLeafDocuments) { + constructStarTree(child, child.getStartDocId(), child.getEndDocId()); } } + } /** * Constructs non star tree nodes * + * @param node parent node * @param startDocId start document id (inclusive) * @param endDocId end document id (exclusive) * @param dimensionId id of the dimension in the star tree - * @return root node with non-star nodes constructed + * * @throws IOException throws an exception if we are unable to construct non-star nodes */ - private Map constructNonStarNodes(int startDocId, int endDocId, int dimensionId) throws IOException { - Map nodes = new HashMap<>(); + private void constructNonStarNodes(InMemoryTreeNode node, int startDocId, int endDocId, int dimensionId) throws IOException { int nodeStartDocId = startDocId; Long nodeDimensionValue = getDimensionValue(startDocId, dimensionId); for (int i = startDocId + 1; i < endDocId; i++) { Long dimensionValue = getDimensionValue(i, dimensionId); if (Objects.equals(dimensionValue, nodeDimensionValue) == false) { - InMemoryTreeNode child = getNewNode(); - child.dimensionId = dimensionId; - child.dimensionValue = nodeDimensionValue != null ? nodeDimensionValue : ALL; - child.startDocId = nodeStartDocId; - child.endDocId = i; - nodes.put(nodeDimensionValue, child); + addChildNode(node, i, dimensionId, nodeStartDocId, nodeDimensionValue); nodeStartDocId = i; nodeDimensionValue = dimensionValue; } } - InMemoryTreeNode lastNode = getNewNode(); - lastNode.dimensionId = dimensionId; - lastNode.dimensionValue = nodeDimensionValue != null ? nodeDimensionValue : ALL; - lastNode.startDocId = nodeStartDocId; - lastNode.endDocId = endDocId; - nodes.put(nodeDimensionValue, lastNode); - return nodes; + addChildNode(node, endDocId, dimensionId, nodeStartDocId, nodeDimensionValue); + } + + private void addChildNode(InMemoryTreeNode node, int endDocId, int dimensionId, int nodeStartDocId, Long nodeDimensionValue) { + long childNodeDimensionValue; + byte childNodeType; + if (nodeDimensionValue == null) { + childNodeDimensionValue = ALL; + childNodeType = StarTreeNodeType.NULL.getValue(); + } else { + childNodeDimensionValue = nodeDimensionValue; + childNodeType = StarTreeNodeType.DEFAULT.getValue(); + } + + InMemoryTreeNode lastNode = getNewNode(dimensionId, nodeStartDocId, endDocId, childNodeType, childNodeDimensionValue); + node.addChildNode(lastNode, nodeDimensionValue); } /** @@ -663,17 +801,10 @@ private Map constructNonStarNodes(int startDocId, int en * @throws IOException throws an exception if we are unable to construct non-star nodes */ private InMemoryTreeNode constructStarNode(int startDocId, int endDocId, int dimensionId) throws IOException { - InMemoryTreeNode starNode = getNewNode(); - starNode.dimensionId = dimensionId; - starNode.dimensionValue = ALL; - starNode.nodeType = StarTreeNodeType.STAR.getValue(); - starNode.startDocId = numStarTreeDocs; + int starNodeStartDocId = numStarTreeDocs; Iterator starTreeDocumentIterator = generateStarTreeDocumentsForStarNode(startDocId, endDocId, dimensionId); - while (starTreeDocumentIterator.hasNext()) { - appendToStarTree(starTreeDocumentIterator.next()); - } - starNode.endDocId = numStarTreeDocs; - return starNode; + appendDocumentsToStarTree(starTreeDocumentIterator); + return getNewNode(dimensionId, starNodeStartDocId, numStarTreeDocs, StarTreeNodeType.STAR.getValue(), ALL); } /** @@ -685,57 +816,56 @@ private InMemoryTreeNode constructStarNode(int startDocId, int endDocId, int dim */ private StarTreeDocument createAggregatedDocs(InMemoryTreeNode node) throws IOException { StarTreeDocument aggregatedStarTreeDocument = null; - if (node.children == null) { - // For leaf node - if (node.startDocId == node.endDocId - 1) { + // For leaf node + if (!node.hasChild()) { + + if (node.getStartDocId() == node.getEndDocId() - 1) { // If it has only one document, use it as the aggregated document - aggregatedStarTreeDocument = getStarTreeDocument(node.startDocId); - node.aggregatedDocId = node.startDocId; + aggregatedStarTreeDocument = getStarTreeDocument(node.getStartDocId()); + node.setAggregatedDocId(node.getStartDocId()); } else { // If it has multiple documents, aggregate all of them - for (int i = node.startDocId; i < node.endDocId; i++) { + for (int i = node.getStartDocId(); i < node.getEndDocId(); i++) { aggregatedStarTreeDocument = reduceStarTreeDocuments(aggregatedStarTreeDocument, getStarTreeDocument(i)); } if (null == aggregatedStarTreeDocument) { throw new IllegalStateException("aggregated star-tree document is null after reducing the documents"); } - for (int i = node.dimensionId + 1; i < numDimensions; i++) { + for (int i = node.getDimensionId() + 1; i < numDimensions; i++) { aggregatedStarTreeDocument.dimensions[i] = STAR_IN_DOC_VALUES_INDEX; } - node.aggregatedDocId = numStarTreeDocs; + node.setAggregatedDocId(numStarTreeDocs); appendToStarTree(aggregatedStarTreeDocument); } } else { // For non-leaf node - if (node.children.containsKey((long) ALL)) { + if (node.getChildStarNode() != null) { // If it has star child, use the star child aggregated document directly - for (InMemoryTreeNode child : node.children.values()) { - if (child.nodeType == StarTreeNodeType.STAR.getValue()) { - aggregatedStarTreeDocument = createAggregatedDocs(child); - node.aggregatedDocId = child.aggregatedDocId; - } else { - createAggregatedDocs(child); - } + aggregatedStarTreeDocument = createAggregatedDocs(node.getChildStarNode()); + node.setAggregatedDocId(node.getChildStarNode().getAggregatedDocId()); + + for (InMemoryTreeNode child : node.getChildren().values()) { + createAggregatedDocs(child); } } else { // If no star child exists, aggregate all aggregated documents from non-star children - if (node.children.values().size() == 1) { - for (InMemoryTreeNode child : node.children.values()) { + if (node.getChildren().values().size() == 1) { + for (InMemoryTreeNode child : node.getChildren().values()) { aggregatedStarTreeDocument = reduceStarTreeDocuments(aggregatedStarTreeDocument, createAggregatedDocs(child)); - node.aggregatedDocId = child.aggregatedDocId; + node.setAggregatedDocId(child.getAggregatedDocId()); } } else { - for (InMemoryTreeNode child : node.children.values()) { + for (InMemoryTreeNode child : node.getChildren().values()) { aggregatedStarTreeDocument = reduceStarTreeDocuments(aggregatedStarTreeDocument, createAggregatedDocs(child)); } if (null == aggregatedStarTreeDocument) { throw new IllegalStateException("aggregated star-tree document is null after reducing the documents"); } - for (int i = node.dimensionId + 1; i < numDimensions; i++) { + for (int i = node.getDimensionId() + 1; i < numDimensions; i++) { aggregatedStarTreeDocument.dimensions[i] = STAR_IN_DOC_VALUES_INDEX; } - node.aggregatedDocId = numStarTreeDocs; + node.setAggregatedDocId(numStarTreeDocs); appendToStarTree(aggregatedStarTreeDocument); } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilder.java index f63b0cb0cc77d..1613b7c5a3ac0 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilder.java @@ -10,14 +10,18 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.IndexOutput; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.common.util.io.IOUtils; -import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues; import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.Metric; +import org.opensearch.index.compositeindex.datacube.MetricStat; import org.opensearch.index.compositeindex.datacube.startree.StarTreeDocument; import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues; import org.opensearch.index.compositeindex.datacube.startree.utils.SequentialDocValuesIterator; import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeDocumentsSorter; import org.opensearch.index.mapper.MapperService; @@ -29,11 +33,15 @@ import java.util.Collections; import java.util.Iterator; import java.util.List; -import java.util.Map; import java.util.Objects; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.opensearch.index.compositeindex.CompositeIndexConstants.SEGMENT_DOCS_COUNT; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.fullyQualifiedFieldNameForStarTreeMetricsDocValues; /** * Off-heap implementation of the star tree builder. + * * @opensearch.experimental */ @ExperimentalApi @@ -46,12 +54,20 @@ public class OffHeapStarTreeBuilder extends BaseStarTreeBuilder { * Builds star tree based on star tree field configuration consisting of dimensions, metrics and star tree index * specific configuration. * + * @param metaOut an index output to write star-tree metadata + * @param dataOut an index output to write star-tree data * @param starTreeField holds the configuration for the star tree * @param state stores the segment write state * @param mapperService helps to find the original type of the field */ - protected OffHeapStarTreeBuilder(StarTreeField starTreeField, SegmentWriteState state, MapperService mapperService) throws IOException { - super(starTreeField, state, mapperService); + protected OffHeapStarTreeBuilder( + IndexOutput metaOut, + IndexOutput dataOut, + StarTreeField starTreeField, + SegmentWriteState state, + MapperService mapperService + ) throws IOException { + super(metaOut, dataOut, starTreeField, state, mapperService); segmentDocumentFileManager = new SegmentDocsFileManager(state, starTreeField, metricAggregatorInfos); try { starTreeDocumentFileManager = new StarTreeDocsFileManager(state, starTreeField, metricAggregatorInfos); @@ -73,10 +89,14 @@ public void appendStarTreeDocument(StarTreeDocument starTreeDocument) throws IOE * @param starTreeValuesSubs contains the star tree values from multiple segments */ @Override - public void build(List starTreeValuesSubs) throws IOException { + public void build( + List starTreeValuesSubs, + AtomicInteger fieldNumberAcrossStarTrees, + DocValuesConsumer starTreeDocValuesConsumer + ) throws IOException { boolean success = false; try { - build(mergeStarTrees(starTreeValuesSubs)); + build(mergeStarTrees(starTreeValuesSubs), fieldNumberAcrossStarTrees, starTreeDocValuesConsumer); success = true; } finally { starTreeDocumentFileManager.deleteFiles(success); @@ -133,15 +153,23 @@ Iterator mergeStarTrees(List starTreeValuesSub .size()]; for (int i = 0; i < dimensionsSplitOrder.size(); i++) { String dimension = dimensionsSplitOrder.get(i).getField(); - dimensionReaders[i] = new SequentialDocValuesIterator(starTreeValues.getDimensionDocValuesIteratorMap().get(dimension)); + dimensionReaders[i] = new SequentialDocValuesIterator(starTreeValues.getDimensionDocIdSetIterator(dimension)); } List metricReaders = new ArrayList<>(); - for (Map.Entry metricDocValuesEntry : starTreeValues.getMetricDocValuesIteratorMap().entrySet()) { - metricReaders.add(new SequentialDocValuesIterator(metricDocValuesEntry.getValue())); + // get doc id set iterators for metrics + for (Metric metric : starTreeValues.getStarTreeField().getMetrics()) { + for (MetricStat metricStat : metric.getBaseMetrics()) { + String metricFullName = fullyQualifiedFieldNameForStarTreeMetricsDocValues( + starTreeValues.getStarTreeField().getName(), + metric.getField(), + metricStat.getTypeName() + ); + metricReaders.add(new SequentialDocValuesIterator(starTreeValues.getMetricDocIdSetIterator(metricFullName))); + } } int currentDocId = 0; int numSegmentDocs = Integer.parseInt( - starTreeValues.getAttributes().getOrDefault(NUM_SEGMENT_DOCS, String.valueOf(DocIdSetIterator.NO_MORE_DOCS)) + starTreeValues.getAttributes().getOrDefault(SEGMENT_DOCS_COUNT, String.valueOf(DocIdSetIterator.NO_MORE_DOCS)) ); while (currentDocId < numSegmentDocs) { StarTreeDocument starTreeDocument = getStarTreeDocument(currentDocId, dimensionReaders, metricReaders); diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java index 8ff111d3b41d9..1a5c906ad413b 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java @@ -7,13 +7,17 @@ */ package org.opensearch.index.compositeindex.datacube.startree.builder; +import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.IndexOutput; import org.opensearch.common.annotation.ExperimentalApi; -import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues; import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.Metric; +import org.opensearch.index.compositeindex.datacube.MetricStat; import org.opensearch.index.compositeindex.datacube.startree.StarTreeDocument; import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues; import org.opensearch.index.compositeindex.datacube.startree.utils.SequentialDocValuesIterator; import org.opensearch.index.mapper.MapperService; @@ -22,8 +26,11 @@ import java.util.Arrays; import java.util.Iterator; import java.util.List; -import java.util.Map; import java.util.Objects; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.opensearch.index.compositeindex.CompositeIndexConstants.SEGMENT_DOCS_COUNT; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.fullyQualifiedFieldNameForStarTreeMetricsDocValues; /** * On heap single tree builder @@ -38,12 +45,20 @@ public class OnHeapStarTreeBuilder extends BaseStarTreeBuilder { /** * Constructor for OnHeapStarTreeBuilder * + * @param metaOut an index output to write star-tree metadata + * @param dataOut an index output to write star-tree data * @param starTreeField star-tree field * @param segmentWriteState segment write state * @param mapperService helps with the numeric type of field */ - public OnHeapStarTreeBuilder(StarTreeField starTreeField, SegmentWriteState segmentWriteState, MapperService mapperService) { - super(starTreeField, segmentWriteState, mapperService); + public OnHeapStarTreeBuilder( + IndexOutput metaOut, + IndexOutput dataOut, + StarTreeField starTreeField, + SegmentWriteState segmentWriteState, + MapperService mapperService + ) throws IOException { + super(metaOut, dataOut, starTreeField, segmentWriteState, mapperService); } @Override @@ -84,12 +99,16 @@ public Iterator sortAndAggregateSegmentDocuments( // TODO : we can save empty iterator for dimensions which are not part of segment starTreeDocuments[currentDocId] = getSegmentStarTreeDocument(currentDocId, dimensionReaders, metricReaders); } - return sortAndAggregateStarTreeDocuments(starTreeDocuments); + return sortAndAggregateStarTreeDocuments(starTreeDocuments, false); } @Override - public void build(List starTreeValuesSubs) throws IOException { - build(mergeStarTrees(starTreeValuesSubs)); + public void build( + List starTreeValuesSubs, + AtomicInteger fieldNumberAcrossStarTrees, + DocValuesConsumer starTreeDocValuesConsumer + ) throws IOException { + build(mergeStarTrees(starTreeValuesSubs), fieldNumberAcrossStarTrees, starTreeDocValuesConsumer); } /** @@ -119,17 +138,26 @@ StarTreeDocument[] getSegmentsStarTreeDocuments(List starTreeVal for (int i = 0; i < dimensionsSplitOrder.size(); i++) { String dimension = dimensionsSplitOrder.get(i).getField(); - dimensionReaders[i] = new SequentialDocValuesIterator(starTreeValues.getDimensionDocValuesIteratorMap().get(dimension)); + dimensionReaders[i] = new SequentialDocValuesIterator(starTreeValues.getDimensionDocIdSetIterator(dimension)); } List metricReaders = new ArrayList<>(); - for (Map.Entry metricDocValuesEntry : starTreeValues.getMetricDocValuesIteratorMap().entrySet()) { - metricReaders.add(new SequentialDocValuesIterator(metricDocValuesEntry.getValue())); + // get doc id set iterators for metrics + for (Metric metric : starTreeValues.getStarTreeField().getMetrics()) { + for (MetricStat metricStat : metric.getBaseMetrics()) { + String metricFullName = fullyQualifiedFieldNameForStarTreeMetricsDocValues( + starTreeValues.getStarTreeField().getName(), + metric.getField(), + metricStat.getTypeName() + ); + metricReaders.add(new SequentialDocValuesIterator(starTreeValues.getMetricDocIdSetIterator(metricFullName))); + + } } int currentDocId = 0; int numSegmentDocs = Integer.parseInt( - starTreeValues.getAttributes().getOrDefault(NUM_SEGMENT_DOCS, String.valueOf(DocIdSetIterator.NO_MORE_DOCS)) + starTreeValues.getAttributes().getOrDefault(SEGMENT_DOCS_COUNT, String.valueOf(DocIdSetIterator.NO_MORE_DOCS)) ); while (currentDocId < numSegmentDocs) { starTreeDocuments.add(getStarTreeDocument(currentDocId, dimensionReaders, metricReaders)); @@ -140,10 +168,6 @@ StarTreeDocument[] getSegmentsStarTreeDocuments(List starTreeVal return starTreeDocuments.toArray(starTreeDocumentsArr); } - Iterator sortAndAggregateStarTreeDocuments(StarTreeDocument[] starTreeDocuments) { - return sortAndAggregateStarTreeDocuments(starTreeDocuments, false); - } - /** * Sort, aggregates and merges the star-tree documents * diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeBuilder.java index 357f48c0cc726..23415ddf29132 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeBuilder.java @@ -8,14 +8,16 @@ package org.opensearch.index.compositeindex.datacube.startree.builder; +import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesProducer; import org.opensearch.common.annotation.ExperimentalApi; -import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues; +import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues; import java.io.Closeable; import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; /** * A star-tree builder that builds a single star-tree. @@ -27,17 +29,29 @@ public interface StarTreeBuilder extends Closeable { /** * Builds the star tree from the original segment documents * - * @param fieldProducerMap contains the docValues producer to get docValues associated with each field + * @param fieldProducerMap contains the docValues producer to get docValues associated with each field + * @param fieldNumberAcrossStarTrees maintains the unique field number across the fields in the star tree + * @param starTreeDocValuesConsumer consumer of star-tree doc values * @throws IOException when we are unable to build star-tree */ - void build(Map fieldProducerMap) throws IOException; + void build( + Map fieldProducerMap, + AtomicInteger fieldNumberAcrossStarTrees, + DocValuesConsumer starTreeDocValuesConsumer + ) throws IOException; /** - * Builds the star tree using StarTreeFactory values from multiple segments + * Builds the star tree using Star Tree values from multiple segments * * @param starTreeValuesSubs contains the star tree values from multiple segments + * @param fieldNumberAcrossStarTrees maintains the unique field number across the fields in the star tree + * @param starTreeDocValuesConsumer consumer of star-tree doc values * @throws IOException when we are unable to build star-tree */ - void build(List starTreeValuesSubs) throws IOException; + void build( + List starTreeValuesSubs, + AtomicInteger fieldNumberAcrossStarTrees, + DocValuesConsumer starTreeDocValuesConsumer + ) throws IOException; } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilder.java index 3b376d7c34351..bc598c9aeab7c 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilder.java @@ -10,11 +10,13 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.store.IndexOutput; import org.opensearch.common.annotation.ExperimentalApi; -import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues; import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues; import org.opensearch.index.mapper.CompositeMappedFieldType; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.mapper.StarTreeMapper; @@ -25,6 +27,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; /** * Builder to construct star-trees based on multiple star-tree fields. @@ -39,8 +42,9 @@ public class StarTreesBuilder implements Closeable { private final List starTreeFields; private final SegmentWriteState state; private final MapperService mapperService; + private AtomicInteger fieldNumberAcrossStarTrees; - public StarTreesBuilder(SegmentWriteState segmentWriteState, MapperService mapperService) { + public StarTreesBuilder(SegmentWriteState segmentWriteState, MapperService mapperService, AtomicInteger fieldNumberAcrossStarTrees) { List starTreeFields = new ArrayList<>(); for (CompositeMappedFieldType compositeMappedFieldType : mapperService.getCompositeFieldTypes()) { if (compositeMappedFieldType instanceof StarTreeMapper.StarTreeFieldType) { @@ -58,12 +62,24 @@ public StarTreesBuilder(SegmentWriteState segmentWriteState, MapperService mappe this.starTreeFields = starTreeFields; this.state = segmentWriteState; this.mapperService = mapperService; + this.fieldNumberAcrossStarTrees = fieldNumberAcrossStarTrees; } /** - * Builds the star-trees. + * Builds all star-trees for given star-tree fields. + * + * @param metaOut an IndexInput for star-tree metadata + * @param dataOut an IndexInput for star-tree data + * @param fieldProducerMap fetches iterators for the fields (dimensions and metrics) + * @param starTreeDocValuesConsumer a consumer to write star-tree doc values + * @throws IOException when an error occurs while building the star-trees */ - public void build(Map fieldProducerMap) throws IOException { + public void build( + IndexOutput metaOut, + IndexOutput dataOut, + Map fieldProducerMap, + DocValuesConsumer starTreeDocValuesConsumer + ) throws IOException { if (starTreeFields.isEmpty()) { logger.debug("no star-tree fields found, returning from star-tree builder"); return; @@ -75,8 +91,8 @@ public void build(Map fieldProducerMap) throws IOExce // Build all star-trees for (StarTreeField starTreeField : starTreeFields) { - try (StarTreeBuilder starTreeBuilder = getStarTreeBuilder(starTreeField, state, mapperService)) { - starTreeBuilder.build(fieldProducerMap); + try (StarTreeBuilder starTreeBuilder = getStarTreeBuilder(metaOut, dataOut, starTreeField, state, mapperService)) { + starTreeBuilder.build(fieldProducerMap, fieldNumberAcrossStarTrees, starTreeDocValuesConsumer); } } logger.debug("Took {} ms to build {} star-trees with star-tree fields", System.currentTimeMillis() - startTime, numStarTrees); @@ -90,9 +106,17 @@ public void close() throws IOException { /** * Merges star tree fields from multiple segments * + * @param metaOut an IndexInput for star-tree metadata + * @param dataOut an IndexInput for star-tree data * @param starTreeValuesSubsPerField starTreeValuesSubs per field + * @param starTreeDocValuesConsumer a consumer to write star-tree doc values */ - public void buildDuringMerge(final Map> starTreeValuesSubsPerField) throws IOException { + public void buildDuringMerge( + IndexOutput metaOut, + IndexOutput dataOut, + final Map> starTreeValuesSubsPerField, + DocValuesConsumer starTreeDocValuesConsumer + ) throws IOException { logger.debug("Starting merge of {} star-trees with star-tree fields", starTreeValuesSubsPerField.size()); long startTime = System.currentTimeMillis(); for (Map.Entry> entry : starTreeValuesSubsPerField.entrySet()) { @@ -102,8 +126,8 @@ public void buildDuringMerge(final Map> starTreeVal continue; } StarTreeField starTreeField = starTreeValuesList.get(0).getStarTreeField(); - try (StarTreeBuilder builder = getStarTreeBuilder(starTreeField, state, mapperService)) { - builder.build(starTreeValuesList); + try (StarTreeBuilder builder = getStarTreeBuilder(metaOut, dataOut, starTreeField, state, mapperService)) { + builder.build(starTreeValuesList, fieldNumberAcrossStarTrees, starTreeDocValuesConsumer); } } logger.debug( @@ -116,13 +140,18 @@ public void buildDuringMerge(final Map> starTreeVal /** * Get star-tree builder based on build mode. */ - StarTreeBuilder getStarTreeBuilder(StarTreeField starTreeField, SegmentWriteState state, MapperService mapperService) - throws IOException { + StarTreeBuilder getStarTreeBuilder( + IndexOutput metaOut, + IndexOutput dataOut, + StarTreeField starTreeField, + SegmentWriteState state, + MapperService mapperService + ) throws IOException { switch (starTreeField.getStarTreeConfig().getBuildMode()) { case ON_HEAP: - return new OnHeapStarTreeBuilder(starTreeField, state, mapperService); + return new OnHeapStarTreeBuilder(metaOut, dataOut, starTreeField, state, mapperService); case OFF_HEAP: - return new OffHeapStarTreeBuilder(starTreeField, state, mapperService); + return new OffHeapStarTreeBuilder(metaOut, dataOut, starTreeField, state, mapperService); default: throw new IllegalArgumentException( String.format( diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/StarTreeWriter.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/StarTreeWriter.java index 7f1839024eea7..e5890be3ccb5b 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/StarTreeWriter.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/StarTreeWriter.java @@ -11,7 +11,7 @@ import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; import org.opensearch.index.compositeindex.datacube.startree.fileformats.data.StarTreeDataWriter; -import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.StarTreeMetaWriter; +import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.StarTreeMetadataWriter; import org.opensearch.index.compositeindex.datacube.startree.node.InMemoryTreeNode; import java.io.IOException; @@ -54,6 +54,7 @@ public long writeStarTree(IndexOutput dataOut, InMemoryTreeNode rootNode, int nu * @param metricAggregatorInfos metric aggregator infos * @param numNodes number of nodes in the star tree * @param segmentAggregatedCount segment aggregated count + * @param numStarTreeDocs the total number of star tree documents for the segment * @param dataFilePointer data file pointer * @param dataFileLength data file length * @throws IOException when star-tree data serialization fails @@ -64,15 +65,17 @@ public void writeStarTreeMetadata( List metricAggregatorInfos, Integer numNodes, Integer segmentAggregatedCount, + Integer numStarTreeDocs, long dataFilePointer, long dataFileLength ) throws IOException { - StarTreeMetaWriter.writeStarTreeMetadata( + StarTreeMetadataWriter.writeStarTreeMetadata( metaOut, starTreeField, metricAggregatorInfos, numNodes, segmentAggregatedCount, + numStarTreeDocs, dataFilePointer, dataFileLength ); diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeDataWriter.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeDataWriter.java index 32feb78a4db3d..5fe70d3c075cf 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeDataWriter.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeDataWriter.java @@ -14,10 +14,7 @@ import org.opensearch.index.compositeindex.datacube.startree.node.InMemoryTreeNode; import java.io.IOException; -import java.util.ArrayList; -import java.util.Comparator; import java.util.LinkedList; -import java.util.List; import java.util.Queue; import static org.opensearch.index.compositeindex.datacube.startree.fileformats.node.FixedLengthStarTreeNode.SERIALIZABLE_DATA_SIZE_IN_BYTES; @@ -66,21 +63,26 @@ private static void writeStarTreeNodes(IndexOutput output, InMemoryTreeNode root while (!queue.isEmpty()) { InMemoryTreeNode node = queue.remove(); - if (node.children == null || node.children.isEmpty()) { + if (!node.hasChild()) { writeStarTreeNode(output, node, ALL, ALL); } else { - // Sort all children nodes based on dimension value - List sortedChildren = new ArrayList<>(node.children.values()); - sortedChildren.sort( - Comparator.comparingInt(InMemoryTreeNode::getNodeType).thenComparingLong(InMemoryTreeNode::getDimensionValue) - ); - + int totalNumberOfChildren = 0; int firstChildId = currentNodeId + queue.size() + 1; - int lastChildId = firstChildId + sortedChildren.size() - 1; + + if (node.getChildStarNode() != null) { + totalNumberOfChildren++; + queue.add(node.getChildStarNode()); + } + + if (node.getChildren() != null) { + totalNumberOfChildren = totalNumberOfChildren + node.getChildren().values().size(); + queue.addAll(node.getChildren().values()); + } + + int lastChildId = firstChildId + totalNumberOfChildren - 1; writeStarTreeNode(output, node, firstChildId, lastChildId); - queue.addAll(sortedChildren); } currentNodeId++; @@ -97,12 +99,12 @@ private static void writeStarTreeNodes(IndexOutput output, InMemoryTreeNode root * @throws IOException if an I/O error occurs while writing the node */ private static void writeStarTreeNode(IndexOutput output, InMemoryTreeNode node, int firstChildId, int lastChildId) throws IOException { - output.writeInt(node.dimensionId); - output.writeLong(node.dimensionValue); - output.writeInt(node.startDocId); - output.writeInt(node.endDocId); - output.writeInt(node.aggregatedDocId); - output.writeByte(node.nodeType); + output.writeInt(node.getDimensionId()); + output.writeLong(node.getDimensionValue()); + output.writeInt(node.getStartDocId()); + output.writeInt(node.getEndDocId()); + output.writeInt(node.getAggregatedDocId()); + output.writeByte(node.getNodeType()); output.writeInt(firstChildId); output.writeInt(lastChildId); } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadata.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadata.java index 7519c85562a8c..7352c215ee390 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadata.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadata.java @@ -11,6 +11,7 @@ import org.apache.logging.log4j.Logger; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.store.IndexInput; +import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.compositeindex.CompositeIndexMetadata; import org.opensearch.index.compositeindex.datacube.Metric; import org.opensearch.index.compositeindex.datacube.MetricStat; @@ -30,6 +31,7 @@ * * @opensearch.experimental */ +@ExperimentalApi public class StarTreeMetadata extends CompositeIndexMetadata { private static final Logger logger = LogManager.getLogger(StarTreeMetadata.class); @@ -72,12 +74,12 @@ public class StarTreeMetadata extends CompositeIndexMetadata { /** * The total number of documents aggregated in this star-tree segment. */ - private final Integer segmentAggregatedDocCount; + private final int segmentAggregatedDocCount; /** * The maximum number of documents allowed in a leaf node. */ - private final Integer maxLeafDocs; + private final int maxLeafDocs; /** * Set of dimensions for which star node creation should be skipped. @@ -99,6 +101,11 @@ public class StarTreeMetadata extends CompositeIndexMetadata { */ private final long dataLength; + /** + * The number of star tree documents in the star tree. + */ + private final int starTreeDocCount; + /** * A star tree metadata constructor to initialize star tree metadata from the segment file (.cim) using index input. * @@ -124,6 +131,7 @@ public StarTreeMetadata( this.dimensionFields = readStarTreeDimensions(); this.metrics = readMetricEntries(); this.segmentAggregatedDocCount = readSegmentAggregatedDocCount(); + this.starTreeDocCount = readStarTreeDocCount(); this.maxLeafDocs = readMaxLeafDocs(); this.skipStarNodeCreationInDims = readSkipStarNodeCreationInDims(); this.starTreeBuildMode = readBuildMode(); @@ -146,6 +154,7 @@ public StarTreeMetadata( * @param dimensionFields list of dimension fields * @param metrics list of metric entries * @param segmentAggregatedDocCount segment aggregated doc count + * @param starTreeDocCount the total number of star tree documents for the segment * @param maxLeafDocs max leaf docs * @param skipStarNodeCreationInDims set of dimensions to skip star node creation * @param starTreeBuildMode star tree build mode @@ -161,6 +170,7 @@ public StarTreeMetadata( List dimensionFields, List metrics, Integer segmentAggregatedDocCount, + Integer starTreeDocCount, Integer maxLeafDocs, Set skipStarNodeCreationInDims, StarTreeFieldConfiguration.StarTreeBuildMode starTreeBuildMode, @@ -176,6 +186,7 @@ public StarTreeMetadata( this.dimensionFields = dimensionFields; this.metrics = metrics; this.segmentAggregatedDocCount = segmentAggregatedDocCount; + this.starTreeDocCount = starTreeDocCount; this.maxLeafDocs = maxLeafDocs; this.skipStarNodeCreationInDims = skipStarNodeCreationInDims; this.starTreeBuildMode = starTreeBuildMode; @@ -184,7 +195,7 @@ public StarTreeMetadata( } private int readNumberOfNodes() throws IOException { - return meta.readInt(); + return meta.readVInt(); } private int readDimensionsCount() throws IOException { @@ -209,22 +220,47 @@ private int readMetricsCount() throws IOException { private List readMetricEntries() throws IOException { int metricCount = readMetricsCount(); - Map starTreeMetricMap = new LinkedHashMap<>(); + Map> starTreeMetricStatMap = new LinkedHashMap<>(); for (int i = 0; i < metricCount; i++) { String metricName = meta.readString(); int metricStatOrdinal = meta.readVInt(); MetricStat metricStat = MetricStat.fromMetricOrdinal(metricStatOrdinal); - Metric metric = starTreeMetricMap.computeIfAbsent(metricName, field -> new Metric(field, new ArrayList<>())); - metric.getMetrics().add(metricStat); + List metricStats = starTreeMetricStatMap.computeIfAbsent(metricName, field -> new ArrayList<>()); + metricStats.add(metricStat); } + List starTreeMetricMap = new ArrayList<>(); + for (Map.Entry> metricStatsEntry : starTreeMetricStatMap.entrySet()) { + addEligibleDerivedMetrics(metricStatsEntry.getValue()); + starTreeMetricMap.add(new Metric(metricStatsEntry.getKey(), metricStatsEntry.getValue())); - return new ArrayList<>(starTreeMetricMap.values()); + } + return starTreeMetricMap; + } + + /** + * Add derived metrics if all associated base metrics are present + */ + private void addEligibleDerivedMetrics(List metricStatsList) { + Set metricStatsSet = new HashSet<>(metricStatsList); + for (MetricStat metric : MetricStat.values()) { + if (metric.isDerivedMetric() && !metricStatsSet.contains(metric)) { + List sourceMetrics = metric.getBaseMetrics(); + if (metricStatsSet.containsAll(sourceMetrics)) { + metricStatsList.add(metric); + metricStatsSet.add(metric); + } + } + } } private int readSegmentAggregatedDocCount() throws IOException { return meta.readVInt(); } + private Integer readStarTreeDocCount() throws IOException { + return meta.readVInt(); + } + private int readMaxLeafDocs() throws IOException { return meta.readVInt(); } @@ -296,16 +332,25 @@ public List getMetrics() { * * @return the aggregated document count for the star-tree. */ - public Integer getSegmentAggregatedDocCount() { + public int getSegmentAggregatedDocCount() { return segmentAggregatedDocCount; } + /** + * Returns the total number of star tree documents in the segment + * + * @return the number of star tree documents in the segment + */ + public int getStarTreeDocCount() { + return starTreeDocCount; + } + /** * Returns the max leaf docs for the star-tree. * * @return the max leaf docs. */ - public Integer getMaxLeafDocs() { + public int getMaxLeafDocs() { return maxLeafDocs; } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaWriter.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadataWriter.java similarity index 90% rename from server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaWriter.java rename to server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadataWriter.java index 2515c1efc3aed..1c04350e25047 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaWriter.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadataWriter.java @@ -29,9 +29,9 @@ * * @opensearch.experimental */ -public class StarTreeMetaWriter { +public class StarTreeMetadataWriter { - private static final Logger logger = LogManager.getLogger(StarTreeMetaWriter.class); + private static final Logger logger = LogManager.getLogger(StarTreeMetadataWriter.class); /** * Writes the star-tree metadata. @@ -51,6 +51,7 @@ public static void writeStarTreeMetadata( List metricAggregatorInfos, Integer numNodes, Integer segmentAggregatedCount, + Integer numStarTreeDocs, long dataFilePointer, long dataFileLength ) throws IOException { @@ -60,7 +61,16 @@ public static void writeStarTreeMetadata( writeMetaHeader(metaOut); // TODO: Replace the parameters with StarTreeMetadata class object - writeMeta(metaOut, metricAggregatorInfos, starTreeField, numNodes, segmentAggregatedCount, dataFilePointer, dataFileLength); + writeMeta( + metaOut, + metricAggregatorInfos, + starTreeField, + numNodes, + segmentAggregatedCount, + numStarTreeDocs, + dataFilePointer, + dataFileLength + ); logger.debug( "Star tree meta size in bytes : {} for star-tree field {}", @@ -81,6 +91,7 @@ private static void writeMetaHeader(IndexOutput metaOut) throws IOException { // version metaOut.writeVInt(VERSION_CURRENT); + } /** @@ -91,6 +102,7 @@ private static void writeMetaHeader(IndexOutput metaOut) throws IOException { * @param starTreeField the star tree field * @param numNodes number of nodes in the star tree * @param segmentAggregatedDocCount the aggregated document count for the segment + * @param numStarTreeDocs the total number of star tree documents for the segment * @param dataFilePointer the file pointer to the start of the star-tree data * @param dataFileLength the length of the star-tree data file * @throws IOException if an I/O error occurs while writing the metadata @@ -101,6 +113,7 @@ private static void writeMeta( StarTreeField starTreeField, int numNodes, Integer segmentAggregatedDocCount, + Integer numStarTreeDocs, long dataFilePointer, long dataFileLength ) throws IOException { @@ -112,7 +125,7 @@ private static void writeMeta( metaOut.writeString(CompositeMappedFieldType.CompositeFieldType.STAR_TREE.getName()); // number of nodes - metaOut.writeInt(numNodes); + metaOut.writeVInt(numNodes); // number of dimensions // TODO: Revisit the number of dimensions for timestamps (as we will split timestamp into min, hour, etc.) @@ -137,6 +150,9 @@ private static void writeMeta( // segment aggregated document count metaOut.writeVInt(segmentAggregatedDocCount); + // segment star tree document count + metaOut.writeVInt(numStarTreeDocs); + // max leaf docs metaOut.writeVInt(starTreeField.getStarTreeConfig().maxLeafDocs()); diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNode.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNode.java index 89ac4af51e221..df2ce9096bfc1 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNode.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNode.java @@ -201,7 +201,6 @@ public StarTreeNode getChildForDimensionValue(Long dimensionValue) throws IOExce StarTreeNode resultStarTreeNode = null; if (null != dimensionValue) { resultStarTreeNode = binarySearchChild(dimensionValue); - assert null != resultStarTreeNode; } return resultStarTreeNode; } @@ -250,12 +249,11 @@ private FixedLengthStarTreeNode binarySearchChild(long dimensionValue) throws IO low++; } - // if the current node is null node, increment the low to reduce the search space - if (matchStarTreeNodeTypeOrNull(new FixedLengthStarTreeNode(in, low), StarTreeNodeType.NULL) != null) { - low++; - } - int high = getInt(LAST_CHILD_ID_OFFSET); + // if the current node is null node, decrement the high to reduce the search space + if (matchStarTreeNodeTypeOrNull(new FixedLengthStarTreeNode(in, high), StarTreeNodeType.NULL) != null) { + high--; + } while (low <= high) { int mid = low + (high - low) / 2; diff --git a/server/src/main/java/org/opensearch/index/codec/composite/CompositeIndexValues.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/index/CompositeIndexValues.java similarity index 86% rename from server/src/main/java/org/opensearch/index/codec/composite/CompositeIndexValues.java rename to server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/index/CompositeIndexValues.java index f8848aceab343..3c5c6b71849f2 100644 --- a/server/src/main/java/org/opensearch/index/codec/composite/CompositeIndexValues.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/index/CompositeIndexValues.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.codec.composite; +package org.opensearch.index.compositeindex.datacube.startree.index; import org.opensearch.common.annotation.ExperimentalApi; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/index/StarTreeValues.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/index/StarTreeValues.java new file mode 100644 index 0000000000000..a34bbbe9ee738 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/index/StarTreeValues.java @@ -0,0 +1,274 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.index; + +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.IndexInput; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.compositeindex.CompositeIndexMetadata; +import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.Metric; +import org.opensearch.index.compositeindex.datacube.MetricStat; +import org.opensearch.index.compositeindex.datacube.ReadDimension; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; +import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.StarTreeMetadata; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeFactory; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNode; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Supplier; + +import static org.opensearch.index.codec.composite.composite99.Composite99DocValuesReader.getSortedNumericDocValues; +import static org.opensearch.index.compositeindex.CompositeIndexConstants.SEGMENT_DOCS_COUNT; +import static org.opensearch.index.compositeindex.CompositeIndexConstants.STAR_TREE_DOCS_COUNT; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.fullyQualifiedFieldNameForStarTreeDimensionsDocValues; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.fullyQualifiedFieldNameForStarTreeMetricsDocValues; + +/** + * Concrete class that holds the star tree associated values from the segment + * + * @opensearch.experimental + */ +@ExperimentalApi +public class StarTreeValues implements CompositeIndexValues { + + /** + * Representing the star tree field configuration. + */ + private final StarTreeField starTreeField; + + /** + * The root node of the star tree. + */ + private final StarTreeNode root; + + /** + * A map containing suppliers for DocIdSetIterators for dimensions. + */ + private final Map> dimensionDocValuesIteratorMap; + + /** + * A map containing suppliers for DocIdSetIterators for metrics. + */ + private final Map> metricDocValuesIteratorMap; + + /** + * A map containing attributes associated with the star tree values. + */ + private final Map attributes; + + /** + * A metadata for the star-tree + */ + private final StarTreeMetadata starTreeMetadata; + + /** + * Constructs a new StarTreeValues object with the provided parameters. + * Used for testing. + * + * @param starTreeField The StarTreeField object representing the star tree field configuration. + * @param root The root node of the star tree. + * @param dimensionDocValuesIteratorMap A map containing suppliers for DocIdSetIterators for dimensions. + * @param metricDocValuesIteratorMap A map containing suppliers for DocIdSetIterators for metrics. + * @param attributes A map containing attributes associated with the star tree values. + */ + public StarTreeValues( + StarTreeField starTreeField, + StarTreeNode root, + Map> dimensionDocValuesIteratorMap, + Map> metricDocValuesIteratorMap, + Map attributes, + StarTreeMetadata compositeIndexMetadata + ) { + this.starTreeField = starTreeField; + this.root = root; + this.dimensionDocValuesIteratorMap = dimensionDocValuesIteratorMap; + this.metricDocValuesIteratorMap = metricDocValuesIteratorMap; + this.attributes = attributes; + this.starTreeMetadata = compositeIndexMetadata; + } + + /** + * Constructs a new StarTreeValues object by reading the data from the segment + * + * @param compositeIndexMetadata The CompositeIndexMetadata object containing metadata for the composite index. + * @param compositeIndexDataIn The IndexInput object for reading the composite index data. + * @param compositeDocValuesProducer The DocValuesProducer object for producing doc values. + * @param readState The SegmentReadState object representing the state of the segment being read. + * @throws IOException If an I/O error occurs while reading the data. + */ + public StarTreeValues( + CompositeIndexMetadata compositeIndexMetadata, + IndexInput compositeIndexDataIn, + DocValuesProducer compositeDocValuesProducer, + SegmentReadState readState + ) throws IOException { + + starTreeMetadata = (StarTreeMetadata) compositeIndexMetadata; + + // build skip star node dimensions + Set skipStarNodeCreationInDims = starTreeMetadata.getSkipStarNodeCreationInDims(); + + // build dimensions + List readDimensions = new ArrayList<>(); + for (String dimension : starTreeMetadata.getDimensionFields()) { + readDimensions.add(new ReadDimension(dimension)); + } + + // star-tree field + this.starTreeField = new StarTreeField( + starTreeMetadata.getCompositeFieldName(), + readDimensions, + starTreeMetadata.getMetrics(), + new StarTreeFieldConfiguration( + starTreeMetadata.getMaxLeafDocs(), + skipStarNodeCreationInDims, + starTreeMetadata.getStarTreeBuildMode() + ) + ); + + this.root = StarTreeFactory.createStarTree(compositeIndexDataIn, starTreeMetadata); + + // get doc id set iterators for metrics and dimensions + dimensionDocValuesIteratorMap = new LinkedHashMap<>(); + metricDocValuesIteratorMap = new LinkedHashMap<>(); + + // get doc id set iterators for dimensions + for (String dimension : starTreeMetadata.getDimensionFields()) { + dimensionDocValuesIteratorMap.put(dimension, () -> { + try { + SortedNumericDocValues dimensionSortedNumericDocValues = null; + if (readState != null) { + FieldInfo dimensionfieldInfo = readState.fieldInfos.fieldInfo( + fullyQualifiedFieldNameForStarTreeDimensionsDocValues(starTreeField.getName(), dimension) + ); + if (dimensionfieldInfo != null) { + dimensionSortedNumericDocValues = compositeDocValuesProducer.getSortedNumeric(dimensionfieldInfo); + } + } + return getSortedNumericDocValues(dimensionSortedNumericDocValues); + } catch (IOException e) { + throw new RuntimeException("Error loading dimension DocIdSetIterator", e); + } + }); + } + + // get doc id set iterators for metrics + for (Metric metric : starTreeMetadata.getMetrics()) { + for (MetricStat metricStat : metric.getBaseMetrics()) { + String metricFullName = fullyQualifiedFieldNameForStarTreeMetricsDocValues( + starTreeField.getName(), + metric.getField(), + metricStat.getTypeName() + ); + metricDocValuesIteratorMap.put(metricFullName, () -> { + try { + SortedNumericDocValues metricSortedNumericDocValues = null; + if (readState != null) { + FieldInfo metricFieldInfo = readState.fieldInfos.fieldInfo(metricFullName); + if (metricFieldInfo != null) { + metricSortedNumericDocValues = compositeDocValuesProducer.getSortedNumeric(metricFieldInfo); + } + } + return getSortedNumericDocValues(metricSortedNumericDocValues); + } catch (IOException e) { + throw new RuntimeException("Error loading metric DocIdSetIterator", e); + } + }); + } + } + + // create star-tree attributes map + + // Create an unmodifiable view of the map + attributes = Map.of( + SEGMENT_DOCS_COUNT, + String.valueOf(starTreeMetadata.getSegmentAggregatedDocCount()), + STAR_TREE_DOCS_COUNT, + String.valueOf(starTreeMetadata.getStarTreeDocCount()) + ); + + } + + @Override + public CompositeIndexValues getValues() { + return this; + } + + /** + * Returns an object representing the star tree field configuration. + * + * @return The StarTreeField object representing the star tree field configuration. + */ + public StarTreeField getStarTreeField() { + return starTreeField; + } + + /** + * Returns the root node of the star tree. + * + * @return The root node of the star tree. + */ + public StarTreeNode getRoot() { + return root; + } + + /** + * Returns the map containing attributes associated with the star tree values. + * + * @return The map containing attributes associated with the star tree values. + */ + public Map getAttributes() { + return attributes; + } + + /** + * Returns the DocIdSetIterator for the specified dimension. + * + * @param dimension The name of the dimension. + * @return The DocIdSetIterator for the specified dimension. + */ + public DocIdSetIterator getDimensionDocIdSetIterator(String dimension) { + + if (dimensionDocValuesIteratorMap.containsKey(dimension)) { + return dimensionDocValuesIteratorMap.get(dimension).get(); + } + + throw new IllegalArgumentException("dimension [" + dimension + "] does not exist in the segment."); + } + + /** + * Returns the DocIdSetIterator for the specified fully qualified metric name. + * + * @param fullyQualifiedMetricName The fully qualified name of the metric. + * @return The DocIdSetIterator for the specified fully qualified metric name. + */ + public DocIdSetIterator getMetricDocIdSetIterator(String fullyQualifiedMetricName) { + + if (metricDocValuesIteratorMap.containsKey(fullyQualifiedMetricName)) { + return metricDocValuesIteratorMap.get(fullyQualifiedMetricName).get(); + } + + throw new IllegalArgumentException("metric [" + fullyQualifiedMetricName + "] does not exist in the segment."); + } + + public int getStarTreeDocumentCount() { + return starTreeMetadata.getStarTreeDocCount(); + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/package-info.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/index/package-info.java similarity index 57% rename from server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/package-info.java rename to server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/index/package-info.java index 67808ad51289a..06029042ab407 100644 --- a/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/package-info.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/index/package-info.java @@ -7,6 +7,6 @@ */ /** - * classes responsible for handling all star tree structures and operations as part of codec + * Classes responsible for handling all star tree values from the segment */ -package org.opensearch.index.codec.composite.datacube.startree; +package org.opensearch.index.compositeindex.datacube.startree.index; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/InMemoryTreeNode.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/InMemoryTreeNode.java index 20f7dcf184391..c3bf4475f75c2 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/InMemoryTreeNode.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/InMemoryTreeNode.java @@ -7,14 +7,15 @@ */ package org.opensearch.index.compositeindex.datacube.startree.node; +import org.opensearch.common.SetOnce; import org.opensearch.common.annotation.ExperimentalApi; +import java.util.LinkedHashMap; import java.util.Map; import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.ALL; /** - * /** * Represents a node in a tree data structure, specifically designed for a star-tree implementation. * A star-tree node will represent both star and non-star nodes. * @@ -23,45 +24,70 @@ @ExperimentalApi public class InMemoryTreeNode { + public InMemoryTreeNode() { + this.dimensionId = ALL; + this.startDocId = ALL; + this.endDocId = ALL; + this.nodeType = (byte) 0; + this.dimensionValue = ALL; + this.childStarNode = new SetOnce<>(); + this.children = new LinkedHashMap<>(); + } + + public InMemoryTreeNode(int dimensionId, int startDocId, int endDocId, byte nodeType, long dimensionValue) { + this.dimensionId = dimensionId; + this.startDocId = startDocId; + this.endDocId = endDocId; + this.nodeType = nodeType; + this.dimensionValue = dimensionValue; + this.childStarNode = new SetOnce<>(); + this.children = new LinkedHashMap<>(); + } + /** * The dimension id for the dimension (field) associated with this star-tree node. */ - public int dimensionId = ALL; + private final int dimensionId; /** * The starting document id (inclusive) associated with this star-tree node. */ - public int startDocId = ALL; + private final int startDocId; /** * The ending document id (exclusive) associated with this star-tree node. */ - public int endDocId = ALL; + private final int endDocId; /** * The aggregated document id associated with this star-tree node. */ - public int aggregatedDocId = ALL; + private int aggregatedDocId = ALL; /** * The child dimension identifier associated with this star-tree node. */ - public int childDimensionId = ALL; + private int childDimensionId = ALL; /** * The value of the dimension associated with this star-tree node. */ - public long dimensionValue = ALL; + private final long dimensionValue; /** * A byte indicating whether the node is star node, null node or default node (with dimension value present). */ - public byte nodeType = 0; + private byte nodeType; /** * A map containing the child nodes of this star-tree node, keyed by their dimension id. */ - public Map children; + private final Map children; + + /** + * A map containing the child star node of this star-tree node. + */ + private final SetOnce childStarNode; public long getDimensionValue() { return dimensionValue; @@ -71,4 +97,74 @@ public byte getNodeType() { return nodeType; } + public boolean hasChild() { + return !(this.children.isEmpty() && this.childStarNode.get() == null); + } + + public int getDimensionId() { + return dimensionId; + } + + public int getStartDocId() { + return startDocId; + } + + public int getEndDocId() { + return endDocId; + } + + public void setNodeType(byte nodeType) { + this.nodeType = nodeType; + } + + public void addChildNode(InMemoryTreeNode childNode, Long dimensionValue) { + if (childNode.getNodeType() == StarTreeNodeType.STAR.getValue()) { + this.childStarNode.set(childNode); + } else { + this.children.put(dimensionValue, childNode); + assert assertStarTreeChildOrder(childNode); + } + } + + public Map getChildren() { + return children; + } + + public InMemoryTreeNode getChildStarNode() { + return childStarNode.get(); + } + + public int getChildDimensionId() { + return childDimensionId; + } + + public void setChildDimensionId(int childDimensionId) { + this.childDimensionId = childDimensionId; + } + + public int getAggregatedDocId() { + return aggregatedDocId; + } + + public void setAggregatedDocId(int aggregatedDocId) { + this.aggregatedDocId = aggregatedDocId; + } + + private boolean assertStarTreeChildOrder(InMemoryTreeNode childNode) { + if (childNode.nodeType != StarTreeNodeType.NULL.getValue() && !this.children.isEmpty()) { + InMemoryTreeNode lastNode = null; + for (Map.Entry entry : this.children.entrySet()) { + lastNode = entry.getValue(); + } + assert lastNode.dimensionValue <= childNode.dimensionValue; + } else if (childNode.nodeType == StarTreeNodeType.NULL.getValue() && !this.children.isEmpty()) { + InMemoryTreeNode lastNode = null; + for (Map.Entry entry : this.children.entrySet()) { + lastNode = entry.getValue(); + } + assert lastNode.nodeType == StarTreeNodeType.NULL.getValue(); + } + return true; + } + } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeType.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeType.java index 4c4725e78ff15..9d7a31bda4deb 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeType.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeType.java @@ -9,7 +9,7 @@ package org.opensearch.index.compositeindex.datacube.startree.node; /** - * Represents the different types of nodes in a StarTreeFactory data structure. + * Represents the different types of nodes in a Star Tree data structure. * *

* In order to handle different node types, we use a byte value to represent the node type. @@ -23,11 +23,11 @@ *

* The node type can be one of the following: *

* @@ -42,17 +42,17 @@ public enum StarTreeNodeType { * Represents a star node type. * */ - STAR("star", (byte) -2), + STAR("star", (byte) -1), /** - * Represents a null node type. + * Represents a default node type. */ - NULL("null", (byte) -1), + DEFAULT("default", (byte) 0), /** - * Represents a default node type. + * Represents a null node type. */ - DEFAULT("default", (byte) 0); + NULL("null", (byte) 1); private final String name; private final byte value; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/package-info.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/package-info.java index 516d5b5a012ab..19d12bc6318d7 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/package-info.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/package-info.java @@ -8,5 +8,7 @@ /** * Holds classes associated with star tree node + * + * @opensearch.experimental */ package org.opensearch.index.compositeindex.datacube.startree.node; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/SequentialDocValuesIterator.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/SequentialDocValuesIterator.java index 400d7a1c00104..061841d3e140a 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/SequentialDocValuesIterator.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/SequentialDocValuesIterator.java @@ -28,6 +28,11 @@ public class SequentialDocValuesIterator { */ private final DocIdSetIterator docIdSetIterator; + /** + * The value associated with the latest document. + */ + private Long docValue; + /** * The id of the latest document. */ @@ -47,10 +52,19 @@ public SequentialDocValuesIterator(DocIdSetIterator docIdSetIterator) { * * @return the id of the latest document */ - int getDocId() { + public int getDocId() { return docId; } + /** + * Sets the id of the latest document. + * + * @param docId the ID of the latest document + */ + private void setDocId(int docId) { + this.docId = docId; + } + /** * Returns the DocIdSetIterator associated with this instance. * @@ -65,7 +79,7 @@ public int nextDoc(int currentDocId) throws IOException { if (docId >= currentDocId) { return docId; } - docId = this.docIdSetIterator.nextDoc(); + setDocId(this.docIdSetIterator.nextDoc()); return docId; } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtils.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtils.java index dc155df4eafca..2aae0d4ca7e29 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtils.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtils.java @@ -74,7 +74,7 @@ public static FieldInfo[] getFieldInfoList(List fields) { int fieldNumber = 0; for (String fieldName : fields) { - fieldInfoList[fieldNumber] = getFieldInfo(fieldName, fieldNumber); + fieldInfoList[fieldNumber] = getFieldInfo(fieldName, DocValuesType.SORTED_NUMERIC, fieldNumber); fieldNumber++; } return fieldInfoList; @@ -83,10 +83,11 @@ public static FieldInfo[] getFieldInfoList(List fields) { /** * Get new field info instance for a given field name and field number * @param fieldName name of the field + * @param docValuesType doc value type of the field * @param fieldNumber number of the field * @return new field info instance */ - public static FieldInfo getFieldInfo(String fieldName, int fieldNumber) { + public static FieldInfo getFieldInfo(String fieldName, DocValuesType docValuesType, int fieldNumber) { return new FieldInfo( fieldName, fieldNumber, @@ -94,7 +95,40 @@ public static FieldInfo getFieldInfo(String fieldName, int fieldNumber) { false, true, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, - DocValuesType.SORTED_NUMERIC, + docValuesType, + -1, + Collections.emptyMap(), + 0, + 0, + 0, + 0, + VectorEncoding.FLOAT32, + VectorSimilarityFunction.EUCLIDEAN, + false, + false + ); + } + + /** + * Get new field info instance for a given field name and field number. + * It's a dummy field info to fetch doc id set iterators based on field name. + *

+ * Actual field infos uses fieldNumberAcrossStarTrees parameter to achieve consistent + * and unique field numbers across fields and across multiple star trees + * + * @param fieldName name of the field + * @param docValuesType doc value type of the field + * @return new field info instance + */ + public static FieldInfo getFieldInfo(String fieldName, DocValuesType docValuesType) { + return new FieldInfo( + fieldName, + 0, + false, + false, + true, + IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, + docValuesType, -1, Collections.emptyMap(), 0, diff --git a/server/src/main/java/org/opensearch/index/engine/SegmentsStats.java b/server/src/main/java/org/opensearch/index/engine/SegmentsStats.java index 34aecfc62b8b2..8a5a4a5a94ce6 100644 --- a/server/src/main/java/org/opensearch/index/engine/SegmentsStats.java +++ b/server/src/main/java/org/opensearch/index/engine/SegmentsStats.java @@ -41,6 +41,7 @@ import org.opensearch.core.xcontent.ToXContentFragment; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.index.ReplicationStats; +import org.opensearch.index.codec.composite.composite99.Composite99DocValuesFormat; import org.opensearch.index.codec.fuzzy.FuzzyFilterPostingsFormat; import org.opensearch.index.remote.RemoteSegmentStats; @@ -97,6 +98,10 @@ public class SegmentsStats implements Writeable, ToXContentFragment { Map.entry("tvd", "Term Vector Documents"), Map.entry("tvf", "Term Vector Fields"), Map.entry("liv", "Live Documents"), + Map.entry(Composite99DocValuesFormat.DATA_EXTENSION, "Composite Index"), + Map.entry(Composite99DocValuesFormat.META_EXTENSION, "Composite Index"), + Map.entry(Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION, "Composite Index DocValues"), + Map.entry(Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION, "Composite Index DocValues"), Map.entry(FuzzyFilterPostingsFormat.FUZZY_FILTER_FILE_EXTENSION, "Fuzzy Filter") ); diff --git a/server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java index cf8703209fb37..bc50f507010c4 100644 --- a/server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java @@ -61,6 +61,8 @@ import org.opensearch.index.query.QueryRewriteContext; import org.opensearch.index.query.QueryShardContext; import org.opensearch.search.DocValueFormat; +import org.opensearch.search.approximate.ApproximateIndexOrDocValuesQuery; +import org.opensearch.search.approximate.ApproximatePointRangeQuery; import org.opensearch.search.lookup.SearchLookup; import java.io.IOException; @@ -80,6 +82,7 @@ import java.util.function.Supplier; import static org.opensearch.common.time.DateUtils.toLong; +import static org.apache.lucene.document.LongPoint.pack; /** * A {@link FieldMapper} for dates. @@ -108,6 +111,21 @@ public static DateFormatter getDefaultDateTimeFormatter() { : LEGACY_DEFAULT_DATE_TIME_FORMATTER; } + public static Query getDefaultQuery(Query pointRangeQuery, Query dvQuery, String name, long l, long u) { + return FeatureFlags.isEnabled(FeatureFlags.APPROXIMATE_POINT_RANGE_QUERY_SETTING) + ? new ApproximateIndexOrDocValuesQuery( + pointRangeQuery, + new ApproximatePointRangeQuery(name, pack(new long[] { l }).bytes, pack(new long[] { u }).bytes, new long[] { l }.length) { + @Override + protected String toString(int dimension, byte[] value) { + return Long.toString(LongPoint.decodeDimension(value, 0)); + } + }, + dvQuery + ) + : new IndexOrDocValuesQuery(pointRangeQuery, dvQuery); + } + /** * Resolution of the date time * @@ -331,6 +349,7 @@ public DateFieldMapper build(BuilderContext context) { Long nullTimestamp = parseNullValue(ft); return new DateFieldMapper(name, ft, multiFieldsBuilder.build(this, context), copyTo.build(), nullTimestamp, resolution, this); } + } public static final TypeParser MILLIS_PARSER = new TypeParser((n, c) -> { @@ -463,24 +482,22 @@ public Query rangeQuery( } DateMathParser parser = forcedDateParser == null ? dateMathParser : forcedDateParser; return dateRangeQuery(lowerTerm, upperTerm, includeLower, includeUpper, timeZone, parser, context, resolution, (l, u) -> { + Query pointRangeQuery = isSearchable() ? LongPoint.newRangeQuery(name(), l, u) : null; + Query dvQuery = hasDocValues() ? SortedNumericDocValuesField.newSlowRangeQuery(name(), l, u) : null; if (isSearchable() && hasDocValues()) { - Query query = LongPoint.newRangeQuery(name(), l, u); - Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery(name(), l, u); - query = new IndexOrDocValuesQuery(query, dvQuery); - + Query query = getDefaultQuery(pointRangeQuery, dvQuery, name(), l, u); if (context.indexSortedOnField(name())) { query = new IndexSortSortedNumericDocValuesRangeQuery(name(), l, u, query); } return query; } if (hasDocValues()) { - Query query = SortedNumericDocValuesField.newSlowRangeQuery(name(), l, u); if (context.indexSortedOnField(name())) { - query = new IndexSortSortedNumericDocValuesRangeQuery(name(), l, u, query); + dvQuery = new IndexSortSortedNumericDocValuesRangeQuery(name(), l, u, dvQuery); } - return query; + return dvQuery; } - return LongPoint.newRangeQuery(name(), l, u); + return pointRangeQuery; }); } diff --git a/server/src/main/java/org/opensearch/index/mapper/DocCountFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/DocCountFieldMapper.java index 0a461fff38fac..db4770adf6666 100644 --- a/server/src/main/java/org/opensearch/index/mapper/DocCountFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/DocCountFieldMapper.java @@ -75,6 +75,11 @@ protected List> getParameters() { public DocCountFieldMapper build(BuilderContext context) { return new DocCountFieldMapper(); } + + @Override + public boolean isDataCubeMetricSupported() { + return true; + } } /** diff --git a/server/src/main/java/org/opensearch/index/mapper/DocumentParser.java b/server/src/main/java/org/opensearch/index/mapper/DocumentParser.java index b03026d560dbf..50ff816695156 100644 --- a/server/src/main/java/org/opensearch/index/mapper/DocumentParser.java +++ b/server/src/main/java/org/opensearch/index/mapper/DocumentParser.java @@ -661,6 +661,17 @@ private static void parseNonDynamicArray(ParseContext context, ObjectMapper mapp throws IOException { XContentParser parser = context.parser(); XContentParser.Token token; + // block array values for composite index fields + if (context.indexSettings().isCompositeIndex() && context.mapperService().isFieldPartOfCompositeIndex(arrayFieldName)) { + throw new MapperParsingException( + String.format( + Locale.ROOT, + "object mapping for [%s] with array for [%s] cannot be accepted as field is also part of composite index mapping which does not accept arrays", + mapper.name(), + arrayFieldName + ) + ); + } final String[] paths = splitAndValidatePath(lastFieldName); while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { if (token == XContentParser.Token.START_OBJECT) { diff --git a/server/src/main/java/org/opensearch/index/mapper/FieldValueConverter.java b/server/src/main/java/org/opensearch/index/mapper/FieldValueConverter.java new file mode 100644 index 0000000000000..cb16dba6ab640 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/mapper/FieldValueConverter.java @@ -0,0 +1,30 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +/** + *

+ * Implementations of this interface should define the conversion logic + * from a sortable long value to a double value, taking into account any necessary + * scaling, normalization, or other transformations required by the specific + * field type. + * + * @opensearch.experimental + */ +public interface FieldValueConverter { + + /** + * Converts the Lucene representation of the value as a long to an actual double representation + * + * @param value the long value to be converted + * @return the corresponding double value + */ + double toDoubleValue(long value); + +} diff --git a/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java index 2116ac522b705..11ff601b3fd6d 100644 --- a/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java @@ -392,6 +392,9 @@ public Query termsQuery(List values, QueryShardContext context) { failIfNotIndexedAndNoDocValues(); // has index and doc_values enabled if (isSearchable() && hasDocValues()) { + if (!context.keywordFieldIndexOrDocValuesEnabled()) { + return super.termsQuery(values, context); + } BytesRef[] bytesRefs = new BytesRef[values.size()]; for (int i = 0; i < bytesRefs.length; i++) { bytesRefs[i] = indexedValueForSearch(values.get(i)); @@ -429,6 +432,9 @@ public Query prefixQuery( } failIfNotIndexedAndNoDocValues(); if (isSearchable() && hasDocValues()) { + if (!context.keywordFieldIndexOrDocValuesEnabled()) { + return super.prefixQuery(value, method, caseInsensitive, context); + } Query indexQuery = super.prefixQuery(value, method, caseInsensitive, context); Query dvQuery = super.prefixQuery(value, MultiTermQuery.DOC_VALUES_REWRITE, caseInsensitive, context); return new IndexOrDocValuesQuery(indexQuery, dvQuery); @@ -461,6 +467,9 @@ public Query regexpQuery( } failIfNotIndexedAndNoDocValues(); if (isSearchable() && hasDocValues()) { + if (!context.keywordFieldIndexOrDocValuesEnabled()) { + return super.regexpQuery(value, syntaxFlags, matchFlags, maxDeterminizedStates, method, context); + } Query indexQuery = super.regexpQuery(value, syntaxFlags, matchFlags, maxDeterminizedStates, method, context); Query dvQuery = super.regexpQuery( value, @@ -549,6 +558,9 @@ public Query fuzzyQuery( ); } if (isSearchable() && hasDocValues()) { + if (!context.keywordFieldIndexOrDocValuesEnabled()) { + return super.fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions, method, context); + } Query indexQuery = super.fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions, method, context); Query dvQuery = super.fuzzyQuery( value, @@ -591,6 +603,9 @@ public Query wildcardQuery( // wildcard // query text if (isSearchable() && hasDocValues()) { + if (!context.keywordFieldIndexOrDocValuesEnabled()) { + return super.wildcardQuery(value, method, caseInsensitive, true, context); + } Query indexQuery = super.wildcardQuery(value, method, caseInsensitive, true, context); Query dvQuery = super.wildcardQuery(value, MultiTermQuery.DOC_VALUES_REWRITE, caseInsensitive, true, context); return new IndexOrDocValuesQuery(indexQuery, dvQuery); diff --git a/server/src/main/java/org/opensearch/index/mapper/Mapper.java b/server/src/main/java/org/opensearch/index/mapper/Mapper.java index 46a5050d4fc18..87fdd8266a795 100644 --- a/server/src/main/java/org/opensearch/index/mapper/Mapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/Mapper.java @@ -100,7 +100,7 @@ public Version indexCreatedVersionOrDefault(@Nullable Version defaultValue) { * @opensearch.api */ @PublicApi(since = "1.0.0") - public abstract static class Builder { + public abstract static class Builder implements MapperBuilderProperties { public String name; diff --git a/server/src/main/java/org/opensearch/index/mapper/MapperBuilderProperties.java b/server/src/main/java/org/opensearch/index/mapper/MapperBuilderProperties.java new file mode 100644 index 0000000000000..ce7b8f28b0b29 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/mapper/MapperBuilderProperties.java @@ -0,0 +1,41 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.opensearch.index.compositeindex.datacube.DimensionType; + +import java.util.Optional; + +/** + * An interface that defines properties for MapperBuilder implementations. + * + * @opensearch.experimental + */ +public interface MapperBuilderProperties { + + /** + * Indicates whether the implementation supports data cube dimensions. + * + * @return an Optional containing the supported DimensionType if data cube dimensions are supported, + * or an empty Optional if not supported + */ + default Optional getSupportedDataCubeDimensionType() { + return Optional.empty(); + } + + /** + * Indicates whether the implementation supports data cube metrics. + * + * @return true if data cube metrics are supported, false otherwise + */ + default boolean isDataCubeMetricSupported() { + return false; + } + +} diff --git a/server/src/main/java/org/opensearch/index/mapper/MapperService.java b/server/src/main/java/org/opensearch/index/mapper/MapperService.java index 530a3092a5aa7..84b0b1d69432d 100644 --- a/server/src/main/java/org/opensearch/index/mapper/MapperService.java +++ b/server/src/main/java/org/opensearch/index/mapper/MapperService.java @@ -227,6 +227,7 @@ public enum MergeReason { private final BooleanSupplier idFieldDataEnabled; private volatile Set compositeMappedFieldTypes; + private volatile Set fieldsPartOfCompositeMappings; public MapperService( IndexSettings indexSettings, @@ -547,9 +548,18 @@ private synchronized Map internalMerge(DocumentMapper ma // initialize composite fields post merge this.compositeMappedFieldTypes = getCompositeFieldTypesFromMapper(); + buildCompositeFieldLookup(); return results; } + private void buildCompositeFieldLookup() { + Set fieldsPartOfCompositeMappings = new HashSet<>(); + for (CompositeMappedFieldType fieldType : compositeMappedFieldTypes) { + fieldsPartOfCompositeMappings.addAll(fieldType.fields()); + } + this.fieldsPartOfCompositeMappings = fieldsPartOfCompositeMappings; + } + private boolean assertSerialization(DocumentMapper mapper) { // capture the source now, it may change due to concurrent parsing final CompressedXContent mappingSource = mapper.mappingSource(); @@ -676,6 +686,10 @@ private Set getCompositeFieldTypesFromMapper() { return compositeMappedFieldTypes; } + public boolean isFieldPartOfCompositeIndex(String field) { + return fieldsPartOfCompositeMappings.contains(field); + } + public ObjectMapper getObjectMapper(String name) { return this.mapper == null ? null : this.mapper.objectMappers().get(name); } diff --git a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java index 9286b5c64b5f2..43e975f95757b 100644 --- a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java @@ -63,6 +63,7 @@ import org.opensearch.core.common.bytes.BytesArray; import org.opensearch.core.xcontent.XContentParser; import org.opensearch.core.xcontent.XContentParser.Token; +import org.opensearch.index.compositeindex.datacube.DimensionType; import org.opensearch.index.document.SortedUnsignedLongDocValuesRangeQuery; import org.opensearch.index.document.SortedUnsignedLongDocValuesSetQuery; import org.opensearch.index.fielddata.IndexFieldData; @@ -84,6 +85,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Optional; import java.util.function.BiFunction; import java.util.function.Function; import java.util.function.Supplier; @@ -172,6 +174,22 @@ public NumberFieldMapper build(BuilderContext context) { MappedFieldType ft = new NumberFieldType(buildFullName(context), this); return new NumberFieldMapper(name, ft, multiFieldsBuilder.build(this, context), copyTo.build(), this); } + + @Override + public Optional getSupportedDataCubeDimensionType() { + + // unsigned long is not supported as dimension for star tree + if (type.numericType.equals(NumericType.UNSIGNED_LONG)) { + return Optional.empty(); + } + + return Optional.of(DimensionType.NUMERIC); + } + + @Override + public boolean isDataCubeMetricSupported() { + return true; + } } /** @@ -179,7 +197,7 @@ public NumberFieldMapper build(BuilderContext context) { * * @opensearch.internal */ - public enum NumberType implements NumericPointEncoder { + public enum NumberType implements NumericPointEncoder, FieldValueConverter { HALF_FLOAT("half_float", NumericType.HALF_FLOAT) { @Override public Float parse(Object value, boolean coerce) { @@ -209,6 +227,11 @@ public byte[] encodePoint(Number value) { return point; } + @Override + public double toDoubleValue(long value) { + return HalfFloatPoint.sortableShortToHalfFloat((short) value); + } + @Override public Float parse(XContentParser parser, boolean coerce) throws IOException { float parsed = parser.floatValue(coerce); @@ -353,6 +376,11 @@ public byte[] encodePoint(Number value) { return point; } + @Override + public double toDoubleValue(long value) { + return NumericUtils.sortableIntToFloat((int) value); + } + @Override public Float parse(XContentParser parser, boolean coerce) throws IOException { float parsed = parser.floatValue(coerce); @@ -486,6 +514,11 @@ public byte[] encodePoint(Number value) { return point; } + @Override + public double toDoubleValue(long value) { + return NumericUtils.sortableLongToDouble(value); + } + @Override public Double parse(XContentParser parser, boolean coerce) throws IOException { double parsed = parser.doubleValue(coerce); @@ -618,6 +651,11 @@ public byte[] encodePoint(Number value) { return point; } + @Override + public double toDoubleValue(long value) { + return objectToDouble(value); + } + @Override public Short parse(XContentParser parser, boolean coerce) throws IOException { int value = parser.intValue(coerce); @@ -697,6 +735,11 @@ public byte[] encodePoint(Number value) { return point; } + @Override + public double toDoubleValue(long value) { + return (double) value; + } + @Override public Short parse(XContentParser parser, boolean coerce) throws IOException { return parser.shortValue(coerce); @@ -772,6 +815,11 @@ public byte[] encodePoint(Number value) { return point; } + @Override + public double toDoubleValue(long value) { + return (double) value; + } + @Override public Integer parse(XContentParser parser, boolean coerce) throws IOException { return parser.intValue(coerce); @@ -943,6 +991,11 @@ public byte[] encodePoint(Number value) { return point; } + @Override + public double toDoubleValue(long value) { + return (double) value; + } + @Override public Long parse(XContentParser parser, boolean coerce) throws IOException { return parser.longValue(coerce); @@ -1070,6 +1123,11 @@ public byte[] encodePoint(Number value) { return point; } + @Override + public double toDoubleValue(long value) { + return Numbers.unsignedLongToDouble(value); + } + @Override public BigInteger parse(XContentParser parser, boolean coerce) throws IOException { return parser.bigIntegerValue(coerce); @@ -1182,12 +1240,16 @@ Number valueForSearch(String value) { this.parser = new TypeParser((n, c) -> new Builder(n, this, c.getSettings())); } - /** Get the associated type name. */ + /** + * Get the associated type name. + */ public final String typeName() { return name; } - /** Get the associated numeric type */ + /** + * Get the associated numeric type + */ public final NumericType numericType() { return numericType; } @@ -1486,7 +1548,7 @@ protected String toString(byte[] value) { * * @opensearch.internal */ - public static class NumberFieldType extends SimpleMappedFieldType implements NumericPointEncoder { + public static class NumberFieldType extends SimpleMappedFieldType implements NumericPointEncoder, FieldValueConverter { private final NumberType type; private final boolean coerce; @@ -1652,6 +1714,11 @@ public Number parsePoint(byte[] value) { public byte[] encodePoint(Number value) { return type.encodePoint(value); } + + @Override + public double toDoubleValue(long value) { + return type.toDoubleValue(value); + } } private final NumberType type; diff --git a/server/src/main/java/org/opensearch/index/mapper/ObjectMapper.java b/server/src/main/java/org/opensearch/index/mapper/ObjectMapper.java index 533e6ca73d737..dd984373fc9df 100644 --- a/server/src/main/java/org/opensearch/index/mapper/ObjectMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/ObjectMapper.java @@ -440,6 +440,15 @@ protected static void parseCompositeField( + " feature flag in the JVM options" ); } + if (StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.get(parserContext.getSettings()) == false) { + throw new IllegalArgumentException( + String.format( + Locale.ROOT, + "Set '%s' as true as part of index settings to use star tree index", + StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey() + ) + ); + } Iterator> iterator = compositeNode.entrySet().iterator(); if (compositeNode.size() > StarTreeIndexSettings.STAR_TREE_MAX_FIELDS_SETTING.get(parserContext.getSettings())) { throw new IllegalArgumentException( diff --git a/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java b/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java index e52d6a621e4e8..52dab17e0b0bb 100644 --- a/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java @@ -22,6 +22,7 @@ import org.opensearch.search.lookup.SearchLookup; import java.util.ArrayList; +import java.util.HashSet; import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.List; @@ -61,11 +62,6 @@ public ParametrizedFieldMapper.Builder getMergeBuilder() { */ public static class Builder extends ParametrizedFieldMapper.Builder { private ObjectMapper.Builder objbuilder; - private static final Set> ALLOWED_DIMENSION_MAPPER_BUILDERS = Set.of( - NumberFieldMapper.Builder.class, - DateFieldMapper.Builder.class - ); - private static final Set> ALLOWED_METRIC_MAPPER_BUILDERS = Set.of(NumberFieldMapper.Builder.class); @SuppressWarnings("unchecked") private final Parameter config = new Parameter<>(CONFIG, false, () -> null, (name, context, nodeObj) -> { @@ -155,8 +151,20 @@ private List buildDimensions(String fieldName, Map ma String.format(Locale.ROOT, "Atleast two dimensions are required to build star tree index field [%s]", fieldName) ); } + Set dimensionFieldNames = new HashSet<>(); for (Object dim : dimList) { - dimensions.add(getDimension(fieldName, dim, context)); + Dimension dimension = getDimension(fieldName, dim, context); + if (dimensionFieldNames.add(dimension.getField()) == false) { + throw new IllegalArgumentException( + String.format( + Locale.ROOT, + "Duplicate dimension [%s] present as part star tree index field [%s]", + dimension.getField(), + fieldName + ) + ); + } + dimensions.add(dimension); } } else { throw new MapperParsingException( @@ -223,6 +231,7 @@ private List buildMetrics(String fieldName, Map map, Map } if (metricsFromInput instanceof List) { List metricsList = (List) metricsFromInput; + Set metricFieldNames = new HashSet<>(); for (Object metric : metricsList) { Map metricMap = (Map) metric; String name = (String) XContentMapValues.extractValue(CompositeDataCubeFieldType.NAME, metricMap); @@ -232,7 +241,18 @@ private List buildMetrics(String fieldName, Map map, Map } metricMap.remove(CompositeDataCubeFieldType.NAME); if (objbuilder == null || objbuilder.mappersBuilders == null) { - metrics.add(getMetric(name, metricMap, context)); + Metric metricFromParser = getMetric(name, metricMap, context); + if (metricFieldNames.add(metricFromParser.getField()) == false) { + throw new IllegalArgumentException( + String.format( + Locale.ROOT, + "Duplicate metrics [%s] present as part star tree index field [%s]", + metricFromParser.getField(), + fieldName + ) + ); + } + metrics.add(metricFromParser); } else { Optional meticBuilder = findMapperBuilderByName(name, this.objbuilder.mappersBuilders); if (meticBuilder.isEmpty()) { @@ -243,7 +263,18 @@ private List buildMetrics(String fieldName, Map map, Map String.format(Locale.ROOT, "non-numeric field type is associated with star tree metric [%s]", this.name) ); } - metrics.add(getMetric(name, metricMap, context)); + Metric metricFromParser = getMetric(name, metricMap, context); + if (metricFieldNames.add(metricFromParser.getField()) == false) { + throw new IllegalArgumentException( + String.format( + Locale.ROOT, + "Duplicate metrics [%s] present as part star tree index field [%s]", + metricFromParser.getField(), + fieldName + ) + ); + } + metrics.add(metricFromParser); DocumentMapperParser.checkNoRemainingFields( metricMap, context.indexVersionCreated(), @@ -254,6 +285,28 @@ private List buildMetrics(String fieldName, Map map, Map } else { throw new MapperParsingException(String.format(Locale.ROOT, "unable to parse metrics for star tree field [%s]", this.name)); } + int numBaseMetrics = 0; + for (Metric metric : metrics) { + numBaseMetrics += metric.getBaseMetrics().size(); + } + if (numBaseMetrics > context.getSettings() + .getAsInt( + StarTreeIndexSettings.STAR_TREE_MAX_BASE_METRICS_SETTING.getKey(), + StarTreeIndexSettings.STAR_TREE_MAX_BASE_METRICS_DEFAULT + )) { + throw new IllegalArgumentException( + String.format( + Locale.ROOT, + "There cannot be more than [%s] base metrics for star tree field [%s]", + context.getSettings() + .getAsInt( + StarTreeIndexSettings.STAR_TREE_MAX_BASE_METRICS_SETTING.getKey(), + StarTreeIndexSettings.STAR_TREE_MAX_BASE_METRICS_DEFAULT + ), + fieldName + ) + ); + } Metric docCountMetric = new Metric(DocCountFieldMapper.NAME, List.of(MetricStat.DOC_COUNT)); metrics.add(docCountMetric); return metrics; @@ -318,11 +371,11 @@ protected List> getParameters() { } private static boolean isBuilderAllowedForDimension(Mapper.Builder builder) { - return ALLOWED_DIMENSION_MAPPER_BUILDERS.stream().anyMatch(allowedType -> allowedType.isInstance(builder)); + return builder.getSupportedDataCubeDimensionType().isPresent(); } private static boolean isBuilderAllowedForMetric(Mapper.Builder builder) { - return ALLOWED_METRIC_MAPPER_BUILDERS.stream().anyMatch(allowedType -> allowedType.isInstance(builder)); + return builder.isDataCubeMetricSupported(); } private Optional findMapperBuilderByName(String field, List mappersBuilders) { diff --git a/server/src/main/java/org/opensearch/index/mapper/WildcardFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/WildcardFieldMapper.java index 4998a822917b4..e43e3bda692e7 100644 --- a/server/src/main/java/org/opensearch/index/mapper/WildcardFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/WildcardFieldMapper.java @@ -40,7 +40,7 @@ import org.apache.lucene.util.automaton.RegExp; import org.opensearch.common.lucene.BytesRefs; import org.opensearch.common.lucene.Lucene; -import org.opensearch.common.regex.Regex; +import org.opensearch.common.lucene.search.AutomatonQueries; import org.opensearch.common.unit.Fuzziness; import org.opensearch.core.xcontent.XContentParser; import org.opensearch.index.analysis.IndexAnalyzers; @@ -430,22 +430,27 @@ public Query wildcardQuery(String value, MultiTermQuery.RewriteMethod method, bo finalValue = value; } Predicate matchPredicate; - if (value.contains("?")) { - Automaton automaton = WildcardQuery.toAutomaton(new Term(name(), finalValue)); - CompiledAutomaton compiledAutomaton = new CompiledAutomaton(automaton); + Automaton automaton = WildcardQuery.toAutomaton(new Term(name(), finalValue)); + CompiledAutomaton compiledAutomaton = new CompiledAutomaton(automaton); + if (compiledAutomaton.type == CompiledAutomaton.AUTOMATON_TYPE.SINGLE) { + // when type equals SINGLE, #compiledAutomaton.runAutomaton is null matchPredicate = s -> { if (caseInsensitive) { s = s.toLowerCase(Locale.ROOT); } - BytesRef valueBytes = BytesRefs.toBytesRef(s); - return compiledAutomaton.runAutomaton.run(valueBytes.bytes, valueBytes.offset, valueBytes.length); + return s.equals(finalValue); }; + } else if (compiledAutomaton.type == CompiledAutomaton.AUTOMATON_TYPE.ALL) { + return existsQuery(context); + } else if (compiledAutomaton.type == CompiledAutomaton.AUTOMATON_TYPE.NONE) { + return new MatchNoDocsQuery("Wildcard expression matches nothing"); } else { matchPredicate = s -> { if (caseInsensitive) { s = s.toLowerCase(Locale.ROOT); } - return Regex.simpleMatch(finalValue, s); + BytesRef valueBytes = BytesRefs.toBytesRef(s); + return compiledAutomaton.runAutomaton.run(valueBytes.bytes, valueBytes.offset, valueBytes.length); }; } @@ -460,7 +465,7 @@ public Query wildcardQuery(String value, MultiTermQuery.RewriteMethod method, bo return existsQuery(context); } } else { - approximation = matchAllTermsQuery(name(), requiredNGrams); + approximation = matchAllTermsQuery(name(), requiredNGrams, caseInsensitive); } return new WildcardMatchingQuery(name(), approximation, matchPredicate, value, context, this); } @@ -468,11 +473,18 @@ public Query wildcardQuery(String value, MultiTermQuery.RewriteMethod method, bo // Package-private for testing static Set getRequiredNGrams(String value) { Set terms = new HashSet<>(); + + if (value.isEmpty()) { + return terms; + } + int pos = 0; + String rawSequence = null; String currentSequence = null; if (!value.startsWith("?") && !value.startsWith("*")) { // Can add prefix term - currentSequence = getNonWildcardSequence(value, 0); + rawSequence = getNonWildcardSequence(value, 0); + currentSequence = performEscape(rawSequence); if (currentSequence.length() == 1) { terms.add(new String(new char[] { 0, currentSequence.charAt(0) })); } else { @@ -480,10 +492,11 @@ static Set getRequiredNGrams(String value) { } } else { pos = findNonWildcardSequence(value, pos); - currentSequence = getNonWildcardSequence(value, pos); + rawSequence = getNonWildcardSequence(value, pos); } while (pos < value.length()) { - boolean isEndOfValue = pos + currentSequence.length() == value.length(); + boolean isEndOfValue = pos + rawSequence.length() == value.length(); + currentSequence = performEscape(rawSequence); if (!currentSequence.isEmpty() && currentSequence.length() < 3 && !isEndOfValue && pos > 0) { // If this is a prefix or suffix of length < 3, then we already have a longer token including the anchor. terms.add(currentSequence); @@ -502,8 +515,8 @@ static Set getRequiredNGrams(String value) { terms.add(new String(new char[] { a, b, 0 })); } } - pos = findNonWildcardSequence(value, pos + currentSequence.length()); - currentSequence = getNonWildcardSequence(value, pos); + pos = findNonWildcardSequence(value, pos + rawSequence.length()); + rawSequence = getNonWildcardSequence(value, pos); } return terms; } @@ -511,7 +524,7 @@ static Set getRequiredNGrams(String value) { private static String getNonWildcardSequence(String value, int startFrom) { for (int i = startFrom; i < value.length(); i++) { char c = value.charAt(i); - if (c == '?' || c == '*') { + if ((c == '?' || c == '*') && (i == 0 || value.charAt(i - 1) != '\\')) { return value.substring(startFrom, i); } } @@ -529,6 +542,22 @@ private static int findNonWildcardSequence(String value, int startFrom) { return value.length(); } + private static String performEscape(String str) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < str.length(); i++) { + if (str.charAt(i) == '\\' && (i + 1) < str.length()) { + char c = str.charAt(i + 1); + if (c == '*' || c == '?') { + i++; + } + } + sb.append(str.charAt(i)); + } + assert !sb.toString().contains("\\*"); + assert !sb.toString().contains("\\?"); + return sb.toString(); + } + @Override public Query regexpQuery( String value, @@ -635,7 +664,7 @@ public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower @Override public Query termQueryCaseInsensitive(Object value, QueryShardContext context) { - return wildcardQuery(value.toString(), MultiTermQuery.CONSTANT_SCORE_REWRITE, true, context); + return wildcardQuery(BytesRefs.toString(value), MultiTermQuery.CONSTANT_SCORE_REWRITE, true, context); } @Override @@ -649,8 +678,8 @@ public Query termsQuery(List values, QueryShardContext context) { Set expectedValues = new HashSet<>(); StringBuilder pattern = new StringBuilder(); for (Object value : values) { - String stringVal = value.toString(); - builder.add(matchAllTermsQuery(name(), getRequiredNGrams(stringVal)), BooleanClause.Occur.SHOULD); + String stringVal = BytesRefs.toString(value); + builder.add(matchAllTermsQuery(name(), getRequiredNGrams(stringVal), false), BooleanClause.Occur.SHOULD); expectedValues.add(stringVal); if (pattern.length() > 0) { pattern.append('|'); @@ -660,10 +689,16 @@ public Query termsQuery(List values, QueryShardContext context) { return new WildcardMatchingQuery(name(), builder.build(), expectedValues::contains, pattern.toString(), context, this); } - private static BooleanQuery matchAllTermsQuery(String fieldName, Set terms) { + private static BooleanQuery matchAllTermsQuery(String fieldName, Set terms, boolean caseInsensitive) { BooleanQuery.Builder matchAllTermsBuilder = new BooleanQuery.Builder(); + Query query; for (String term : terms) { - matchAllTermsBuilder.add(new TermQuery(new Term(fieldName, term)), BooleanClause.Occur.FILTER); + if (caseInsensitive) { + query = AutomatonQueries.caseInsensitiveTermQuery(new Term(fieldName, term)); + } else { + query = new TermQuery(new Term(fieldName, term)); + } + matchAllTermsBuilder.add(query, BooleanClause.Occur.FILTER); } return matchAllTermsBuilder.build(); } diff --git a/server/src/main/java/org/opensearch/index/query/AbstractGeometryQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/AbstractGeometryQueryBuilder.java index 9fb857e33bfee..3823b524df6fe 100644 --- a/server/src/main/java/org/opensearch/index/query/AbstractGeometryQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/AbstractGeometryQueryBuilder.java @@ -67,7 +67,9 @@ * * @opensearch.internal */ -public abstract class AbstractGeometryQueryBuilder> extends AbstractQueryBuilder { +public abstract class AbstractGeometryQueryBuilder> extends AbstractQueryBuilder + implements + WithFieldName { public static final String DEFAULT_SHAPE_INDEX_NAME = "shapes"; public static final String DEFAULT_SHAPE_FIELD_NAME = "shape"; @@ -218,6 +220,7 @@ protected void doWriteTo(StreamOutput out) throws IOException { /** * @return the name of the field that will be queried */ + @Override public String fieldName() { return fieldName; } diff --git a/server/src/main/java/org/opensearch/index/query/BaseTermQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/BaseTermQueryBuilder.java index c4d9437a60c75..deb36a26e4c86 100644 --- a/server/src/main/java/org/opensearch/index/query/BaseTermQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/BaseTermQueryBuilder.java @@ -47,7 +47,7 @@ * * @opensearch.internal */ -public abstract class BaseTermQueryBuilder> extends AbstractQueryBuilder { +public abstract class BaseTermQueryBuilder> extends AbstractQueryBuilder implements WithFieldName { public static final ParseField VALUE_FIELD = new ParseField("value"); @@ -153,6 +153,7 @@ protected void doWriteTo(StreamOutput out) throws IOException { } /** Returns the field name used in this query. */ + @Override public String fieldName() { return this.fieldName; } diff --git a/server/src/main/java/org/opensearch/index/query/CommonTermsQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/CommonTermsQueryBuilder.java index 652cae86da0dc..24b10851cbe10 100644 --- a/server/src/main/java/org/opensearch/index/query/CommonTermsQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/CommonTermsQueryBuilder.java @@ -67,7 +67,7 @@ * @opensearch.internal */ @Deprecated -public class CommonTermsQueryBuilder extends AbstractQueryBuilder { +public class CommonTermsQueryBuilder extends AbstractQueryBuilder implements WithFieldName { public static final String COMMON_TERMS_QUERY_DEPRECATION_MSG = "[match] query which can efficiently " + "skip blocks of documents if the total number of hits is not tracked"; @@ -152,6 +152,7 @@ protected void doWriteTo(StreamOutput out) throws IOException { out.writeFloat(cutoffFrequency); } + @Override public String fieldName() { return this.fieldName; } diff --git a/server/src/main/java/org/opensearch/index/query/DistanceFeatureQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/DistanceFeatureQueryBuilder.java index 1d9f0479c6b17..e4f3d8556158a 100644 --- a/server/src/main/java/org/opensearch/index/query/DistanceFeatureQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/DistanceFeatureQueryBuilder.java @@ -57,7 +57,7 @@ * * @opensearch.internal */ -public class DistanceFeatureQueryBuilder extends AbstractQueryBuilder { +public class DistanceFeatureQueryBuilder extends AbstractQueryBuilder implements WithFieldName { public static final String NAME = "distance_feature"; private static final ParseField FIELD_FIELD = new ParseField("field"); @@ -136,7 +136,8 @@ protected Query doToQuery(QueryShardContext context) throws IOException { return fieldType.distanceFeatureQuery(origin.origin(), pivot, 1.0f, context); } - String fieldName() { + @Override + public String fieldName() { return field; } diff --git a/server/src/main/java/org/opensearch/index/query/ExistsQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/ExistsQueryBuilder.java index 6ae40fe1b1e64..f6b24e98f0f71 100644 --- a/server/src/main/java/org/opensearch/index/query/ExistsQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/ExistsQueryBuilder.java @@ -59,7 +59,7 @@ * * @opensearch.internal */ -public class ExistsQueryBuilder extends AbstractQueryBuilder { +public class ExistsQueryBuilder extends AbstractQueryBuilder implements WithFieldName { public static final String NAME = "exists"; public static final ParseField FIELD_FIELD = new ParseField("field"); @@ -89,6 +89,7 @@ protected void doWriteTo(StreamOutput out) throws IOException { /** * @return the field name that has to exist for this query to match */ + @Override public String fieldName() { return this.fieldName; } diff --git a/server/src/main/java/org/opensearch/index/query/FieldMaskingSpanQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/FieldMaskingSpanQueryBuilder.java index 4e73d87b07b7a..7846098b55071 100644 --- a/server/src/main/java/org/opensearch/index/query/FieldMaskingSpanQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/FieldMaskingSpanQueryBuilder.java @@ -53,7 +53,10 @@ * * @opensearch.internal */ -public class FieldMaskingSpanQueryBuilder extends AbstractQueryBuilder implements SpanQueryBuilder { +public class FieldMaskingSpanQueryBuilder extends AbstractQueryBuilder + implements + SpanQueryBuilder, + WithFieldName { public static final String NAME = "span_field_masking"; public static final ParseField SPAN_FIELD_MASKING_FIELD = new ParseField(NAME, "field_masking_span"); @@ -100,6 +103,7 @@ protected void doWriteTo(StreamOutput out) throws IOException { /** * @return the field name for this query */ + @Override public String fieldName() { return this.fieldName; } diff --git a/server/src/main/java/org/opensearch/index/query/GeoBoundingBoxQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/GeoBoundingBoxQueryBuilder.java index 1fade8601e2a6..52e96159cf06e 100644 --- a/server/src/main/java/org/opensearch/index/query/GeoBoundingBoxQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/GeoBoundingBoxQueryBuilder.java @@ -66,7 +66,7 @@ * * @opensearch.internal * */ -public class GeoBoundingBoxQueryBuilder extends AbstractQueryBuilder { +public class GeoBoundingBoxQueryBuilder extends AbstractQueryBuilder implements WithFieldName { public static final String NAME = "geo_bounding_box"; /** Default type for executing this query (memory as of this writing). */ @@ -263,6 +263,7 @@ public GeoExecType type() { } /** Returns the name of the field to base the bounding box computation on. */ + @Override public String fieldName() { return this.fieldName; } diff --git a/server/src/main/java/org/opensearch/index/query/GeoDistanceQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/GeoDistanceQueryBuilder.java index 8d126f19a204c..79377ba01701f 100644 --- a/server/src/main/java/org/opensearch/index/query/GeoDistanceQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/GeoDistanceQueryBuilder.java @@ -63,7 +63,7 @@ * * @opensearch.internal */ -public class GeoDistanceQueryBuilder extends AbstractQueryBuilder { +public class GeoDistanceQueryBuilder extends AbstractQueryBuilder implements WithFieldName { public static final String NAME = "geo_distance"; /** Default for distance unit computation. */ @@ -129,6 +129,7 @@ protected void doWriteTo(StreamOutput out) throws IOException { } /** Name of the field this query is operating on. */ + @Override public String fieldName() { return this.fieldName; } diff --git a/server/src/main/java/org/opensearch/index/query/GeoPolygonQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/GeoPolygonQueryBuilder.java index 47eafa3893384..e120f04ed9351 100644 --- a/server/src/main/java/org/opensearch/index/query/GeoPolygonQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/GeoPolygonQueryBuilder.java @@ -61,7 +61,7 @@ * * @opensearch.internal */ -public class GeoPolygonQueryBuilder extends AbstractQueryBuilder { +public class GeoPolygonQueryBuilder extends AbstractQueryBuilder implements WithFieldName { public static final String NAME = "geo_polygon"; /** @@ -131,6 +131,7 @@ protected void doWriteTo(StreamOutput out) throws IOException { out.writeBoolean(ignoreUnmapped); } + @Override public String fieldName() { return fieldName; } diff --git a/server/src/main/java/org/opensearch/index/query/MatchBoolPrefixQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/MatchBoolPrefixQueryBuilder.java index 7ceb17203e837..2c2c2de943e2f 100644 --- a/server/src/main/java/org/opensearch/index/query/MatchBoolPrefixQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/MatchBoolPrefixQueryBuilder.java @@ -61,7 +61,7 @@ * * @opensearch.internal */ -public class MatchBoolPrefixQueryBuilder extends AbstractQueryBuilder { +public class MatchBoolPrefixQueryBuilder extends AbstractQueryBuilder implements WithFieldName { public static final String NAME = "match_bool_prefix"; @@ -127,6 +127,7 @@ protected void doWriteTo(StreamOutput out) throws IOException { } /** Returns the field name used in this query. */ + @Override public String fieldName() { return this.fieldName; } diff --git a/server/src/main/java/org/opensearch/index/query/MatchPhrasePrefixQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/MatchPhrasePrefixQueryBuilder.java index d61a5957627ea..3337a31658ca1 100644 --- a/server/src/main/java/org/opensearch/index/query/MatchPhrasePrefixQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/MatchPhrasePrefixQueryBuilder.java @@ -52,7 +52,7 @@ * * @opensearch.internal */ -public class MatchPhrasePrefixQueryBuilder extends AbstractQueryBuilder { +public class MatchPhrasePrefixQueryBuilder extends AbstractQueryBuilder implements WithFieldName { public static final String NAME = "match_phrase_prefix"; public static final ParseField MAX_EXPANSIONS_FIELD = new ParseField("max_expansions"); public static final ParseField ZERO_TERMS_QUERY_FIELD = new ParseField("zero_terms_query"); @@ -104,6 +104,7 @@ protected void doWriteTo(StreamOutput out) throws IOException { } /** Returns the field name used in this query. */ + @Override public String fieldName() { return this.fieldName; } diff --git a/server/src/main/java/org/opensearch/index/query/MatchPhraseQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/MatchPhraseQueryBuilder.java index 6cdf6c6600304..97e03d53d38f1 100644 --- a/server/src/main/java/org/opensearch/index/query/MatchPhraseQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/MatchPhraseQueryBuilder.java @@ -52,7 +52,7 @@ * * @opensearch.internal */ -public class MatchPhraseQueryBuilder extends AbstractQueryBuilder { +public class MatchPhraseQueryBuilder extends AbstractQueryBuilder implements WithFieldName { public static final String NAME = "match_phrase"; public static final ParseField SLOP_FIELD = new ParseField("slop"); public static final ParseField ZERO_TERMS_QUERY_FIELD = new ParseField("zero_terms_query"); @@ -100,6 +100,7 @@ protected void doWriteTo(StreamOutput out) throws IOException { } /** Returns the field name used in this query. */ + @Override public String fieldName() { return this.fieldName; } diff --git a/server/src/main/java/org/opensearch/index/query/MatchQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/MatchQueryBuilder.java index 5e9e6a3660e76..593077d18951e 100644 --- a/server/src/main/java/org/opensearch/index/query/MatchQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/MatchQueryBuilder.java @@ -56,7 +56,7 @@ * * @opensearch.internal */ -public class MatchQueryBuilder extends AbstractQueryBuilder { +public class MatchQueryBuilder extends AbstractQueryBuilder implements WithFieldName { private static final String CUTOFF_FREQUENCY_DEPRECATION_MSG = "you can omit this option, " + "the [match] query can skip block of documents efficiently if the total number of hits is not tracked"; @@ -171,6 +171,7 @@ protected void doWriteTo(StreamOutput out) throws IOException { } /** Returns the field name used in this query. */ + @Override public String fieldName() { return this.fieldName; } diff --git a/server/src/main/java/org/opensearch/index/query/MultiTermQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/MultiTermQueryBuilder.java index b8e88da4741bb..f854df79f1ee8 100644 --- a/server/src/main/java/org/opensearch/index/query/MultiTermQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/MultiTermQueryBuilder.java @@ -36,9 +36,4 @@ * * @opensearch.internal */ -public interface MultiTermQueryBuilder extends QueryBuilder { - /** - * Get the field name for this query. - */ - String fieldName(); -} +public interface MultiTermQueryBuilder extends QueryBuilder, WithFieldName {} diff --git a/server/src/main/java/org/opensearch/index/query/QueryShardContext.java b/server/src/main/java/org/opensearch/index/query/QueryShardContext.java index 91313092d8d28..bccead2b029d0 100644 --- a/server/src/main/java/org/opensearch/index/query/QueryShardContext.java +++ b/server/src/main/java/org/opensearch/index/query/QueryShardContext.java @@ -125,6 +125,7 @@ public class QueryShardContext extends QueryRewriteContext { private final ValuesSourceRegistry valuesSourceRegistry; private BitSetProducer parentFilter; private DerivedFieldResolver derivedFieldResolver; + private boolean keywordIndexOrDocValuesEnabled; public QueryShardContext( int shardId, @@ -208,7 +209,55 @@ public QueryShardContext( ), allowExpensiveQueries, valuesSourceRegistry, - validate + validate, + false + ); + } + + public QueryShardContext( + int shardId, + IndexSettings indexSettings, + BigArrays bigArrays, + BitsetFilterCache bitsetFilterCache, + TriFunction, IndexFieldData> indexFieldDataLookup, + MapperService mapperService, + SimilarityService similarityService, + ScriptService scriptService, + NamedXContentRegistry xContentRegistry, + NamedWriteableRegistry namedWriteableRegistry, + Client client, + IndexSearcher searcher, + LongSupplier nowInMillis, + String clusterAlias, + Predicate indexNameMatcher, + BooleanSupplier allowExpensiveQueries, + ValuesSourceRegistry valuesSourceRegistry, + boolean validate, + boolean keywordIndexOrDocValuesEnabled + ) { + this( + shardId, + indexSettings, + bigArrays, + bitsetFilterCache, + indexFieldDataLookup, + mapperService, + similarityService, + scriptService, + xContentRegistry, + namedWriteableRegistry, + client, + searcher, + nowInMillis, + indexNameMatcher, + new Index( + RemoteClusterAware.buildRemoteIndexName(clusterAlias, indexSettings.getIndex().getName()), + indexSettings.getIndex().getUUID() + ), + allowExpensiveQueries, + valuesSourceRegistry, + validate, + keywordIndexOrDocValuesEnabled ); } @@ -231,7 +280,8 @@ public QueryShardContext(QueryShardContext source) { source.fullyQualifiedIndex, source.allowExpensiveQueries, source.valuesSourceRegistry, - source.validate() + source.validate(), + source.keywordIndexOrDocValuesEnabled ); } @@ -253,7 +303,8 @@ private QueryShardContext( Index fullyQualifiedIndex, BooleanSupplier allowExpensiveQueries, ValuesSourceRegistry valuesSourceRegistry, - boolean validate + boolean validate, + boolean keywordIndexOrDocValuesEnabled ) { super(xContentRegistry, namedWriteableRegistry, client, nowInMillis, validate); this.shardId = shardId; @@ -277,6 +328,7 @@ private QueryShardContext( emptyList(), indexSettings.isDerivedFieldAllowed() ); + this.keywordIndexOrDocValuesEnabled = keywordIndexOrDocValuesEnabled; } private void reset() { @@ -414,6 +466,14 @@ public void setDerivedFieldResolver(DerivedFieldResolver derivedFieldResolver) { this.derivedFieldResolver = derivedFieldResolver; } + public boolean keywordFieldIndexOrDocValuesEnabled() { + return keywordIndexOrDocValuesEnabled; + } + + public void setKeywordFieldIndexOrDocValuesEnabled(boolean keywordIndexOrDocValuesEnabled) { + this.keywordIndexOrDocValuesEnabled = keywordIndexOrDocValuesEnabled; + } + public void setAllowUnmappedFields(boolean allowUnmappedFields) { this.allowUnmappedFields = allowUnmappedFields; } diff --git a/server/src/main/java/org/opensearch/index/query/SpanNearQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/SpanNearQueryBuilder.java index 30a1c29c29126..179673f500a92 100644 --- a/server/src/main/java/org/opensearch/index/query/SpanNearQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/SpanNearQueryBuilder.java @@ -322,7 +322,7 @@ public void visit(QueryBuilderVisitor visitor) { * * @opensearch.internal */ - public static class SpanGapQueryBuilder implements SpanQueryBuilder { + public static class SpanGapQueryBuilder implements SpanQueryBuilder, WithFieldName { public static final String NAME = "span_gap"; /** Name of field to match against. */ @@ -358,6 +358,7 @@ public SpanGapQueryBuilder(StreamInput in) throws IOException { /** * @return fieldName The name of the field */ + @Override public String fieldName() { return fieldName; } diff --git a/server/src/main/java/org/opensearch/index/query/TermsQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/TermsQueryBuilder.java index 4b92d6a1f5460..dbd141e00f81c 100644 --- a/server/src/main/java/org/opensearch/index/query/TermsQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/TermsQueryBuilder.java @@ -79,7 +79,7 @@ * * @opensearch.internal */ -public class TermsQueryBuilder extends AbstractQueryBuilder { +public class TermsQueryBuilder extends AbstractQueryBuilder implements WithFieldName { public static final String NAME = "terms"; private final String fieldName; @@ -269,6 +269,7 @@ protected void doWriteTo(StreamOutput out) throws IOException { } } + @Override public String fieldName() { return this.fieldName; } diff --git a/server/src/main/java/org/opensearch/index/query/WithFieldName.java b/server/src/main/java/org/opensearch/index/query/WithFieldName.java new file mode 100644 index 0000000000000..adfd789ac3707 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/query/WithFieldName.java @@ -0,0 +1,21 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.query; + +/** + * Interface for classes with a fieldName method + * + * @opensearch.internal + */ +public interface WithFieldName { + /** + * Get the field name for this query. + */ + String fieldName(); +} diff --git a/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java b/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java index d3c6fc9d1f3bf..03d841d13b7f7 100644 --- a/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java +++ b/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java @@ -227,7 +227,8 @@ private RemoteRestoreResult executeRestore( .build(); } - IndexId indexId = new IndexId(indexName, updatedIndexMetadata.getIndexUUID()); + // This instance of IndexId is not related to Snapshot Restore. Hence, we are using the ctor without pathType. + IndexId indexId = new IndexId(indexName, updatedIndexMetadata.getIndexUUID(), IndexId.DEFAULT_SHARD_PATH_TYPE); if (metadataFromRemoteStore == false) { Map indexShardRoutingTableMap = currentState.routingTable() diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteIndexPath.java b/server/src/main/java/org/opensearch/index/remote/RemoteIndexPath.java index 899ff16c9d607..e686bb60140c0 100644 --- a/server/src/main/java/org/opensearch/index/remote/RemoteIndexPath.java +++ b/server/src/main/java/org/opensearch/index/remote/RemoteIndexPath.java @@ -20,6 +20,7 @@ import org.opensearch.index.remote.RemoteStoreEnums.PathType; import org.opensearch.index.remote.RemoteStorePathStrategy.PathInput; import org.opensearch.index.remote.RemoteStorePathStrategy.ShardDataPathInput; +import org.opensearch.indices.RemoteStoreSettings; import java.io.IOException; import java.util.Collections; @@ -68,6 +69,7 @@ public class RemoteIndexPath implements ToXContentFragment { private final Iterable basePath; private final PathType pathType; private final PathHashAlgorithm pathHashAlgorithm; + private final RemoteStoreSettings remoteStoreSettings; /** * This keeps the map of paths that would be present in the content of the index path file. For eg - It is possible @@ -82,7 +84,8 @@ public RemoteIndexPath( Iterable basePath, PathType pathType, PathHashAlgorithm pathHashAlgorithm, - Map> pathCreationMap + Map> pathCreationMap, + RemoteStoreSettings remoteStoreSettings ) { if (Objects.isNull(pathCreationMap) || Objects.isNull(pathType) @@ -119,6 +122,7 @@ public RemoteIndexPath( this.pathType = pathType; this.pathHashAlgorithm = pathHashAlgorithm; this.pathCreationMap = pathCreationMap; + this.remoteStoreSettings = remoteStoreSettings; } @Override @@ -148,6 +152,11 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws .shardId(Integer.toString(shardNo)) .dataCategory(dataCategory) .dataType(type) + .fixedPrefix( + dataCategory == TRANSLOG + ? remoteStoreSettings.getTranslogPathFixedPrefix() + : remoteStoreSettings.getSegmentsPathFixedPrefix() + ) .build(); builder.value(pathType.path(pathInput, pathHashAlgorithm).buildAsString()); } diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteIndexPathUploader.java b/server/src/main/java/org/opensearch/index/remote/RemoteIndexPathUploader.java index d5617bdfd94a7..5878dff03acc2 100644 --- a/server/src/main/java/org/opensearch/index/remote/RemoteIndexPathUploader.java +++ b/server/src/main/java/org/opensearch/index/remote/RemoteIndexPathUploader.java @@ -25,6 +25,7 @@ import org.opensearch.gateway.remote.IndexMetadataUploadListener; import org.opensearch.gateway.remote.RemoteStateTransferException; import org.opensearch.index.remote.RemoteStoreEnums.PathType; +import org.opensearch.indices.RemoteStoreSettings; import org.opensearch.node.Node; import org.opensearch.node.remotestore.RemoteStoreNodeAttribute; import org.opensearch.repositories.RepositoriesService; @@ -79,6 +80,7 @@ public class RemoteIndexPathUploader extends IndexMetadataUploadListener { private final Settings settings; private final boolean isRemoteDataAttributePresent; private final boolean isTranslogSegmentRepoSame; + private final RemoteStoreSettings remoteStoreSettings; private final Supplier repositoriesService; private volatile TimeValue metadataUploadTimeout; @@ -89,7 +91,8 @@ public RemoteIndexPathUploader( ThreadPool threadPool, Settings settings, Supplier repositoriesService, - ClusterSettings clusterSettings + ClusterSettings clusterSettings, + RemoteStoreSettings remoteStoreSettings ) { super(threadPool, ThreadPool.Names.GENERIC); this.settings = Objects.requireNonNull(settings); @@ -100,6 +103,7 @@ public RemoteIndexPathUploader( Objects.requireNonNull(clusterSettings); metadataUploadTimeout = clusterSettings.get(GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING); clusterSettings.addSettingsUpdateConsumer(GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING, this::setMetadataUploadTimeout); + this.remoteStoreSettings = remoteStoreSettings; } @Override @@ -208,7 +212,8 @@ private void writePathToRemoteStore( basePath, pathType, hashAlgorithm, - pathCreationMap + pathCreationMap, + remoteStoreSettings ); String fileName = generateFileName(indexUUID, idxMD.getVersion(), remoteIndexPath.getVersion()); REMOTE_INDEX_PATH_FORMAT.writeAsyncWithUrgentPriority(remoteIndexPath, blobContainer, fileName, actionListener); diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteStoreEnums.java b/server/src/main/java/org/opensearch/index/remote/RemoteStoreEnums.java index b0376c97e6994..c00e1a6a96abd 100644 --- a/server/src/main/java/org/opensearch/index/remote/RemoteStoreEnums.java +++ b/server/src/main/java/org/opensearch/index/remote/RemoteStoreEnums.java @@ -13,6 +13,7 @@ import org.opensearch.common.annotation.PublicApi; import org.opensearch.common.blobstore.BlobPath; import org.opensearch.common.hash.FNV1a; +import org.opensearch.core.common.Strings; import org.opensearch.index.remote.RemoteStorePathStrategy.PathInput; import java.util.HashMap; @@ -107,7 +108,11 @@ boolean requiresHashAlgorithm() { @Override public BlobPath generatePath(PathInput pathInput, PathHashAlgorithm hashAlgorithm) { assert Objects.nonNull(hashAlgorithm) : "hashAlgorithm is expected to be non-null"; - return BlobPath.cleanPath().add(hashAlgorithm.hash(pathInput)).add(pathInput.basePath()).add(pathInput.fixedSubPath()); + String fixedPrefix = pathInput.fixedPrefix(); + return BlobPath.cleanPath() + .add(Strings.isNullOrEmpty(fixedPrefix) ? hashAlgorithm.hash(pathInput) : fixedPrefix + hashAlgorithm.hash(pathInput)) + .add(pathInput.basePath()) + .add(pathInput.fixedSubPath()); } @Override @@ -119,7 +124,10 @@ boolean requiresHashAlgorithm() { @Override public BlobPath generatePath(PathInput pathInput, PathHashAlgorithm hashAlgorithm) { assert Objects.nonNull(hashAlgorithm) : "hashAlgorithm is expected to be non-null"; - return pathInput.basePath().add(hashAlgorithm.hash(pathInput)).add(pathInput.fixedSubPath()); + String fixedPrefix = pathInput.fixedPrefix(); + return pathInput.basePath() + .add(Strings.isNullOrEmpty(fixedPrefix) ? hashAlgorithm.hash(pathInput) : fixedPrefix + hashAlgorithm.hash(pathInput)) + .add(pathInput.fixedSubPath()); } @Override @@ -207,7 +215,7 @@ public enum PathHashAlgorithm { @Override String hash(PathInput pathInput) { StringBuilder input = new StringBuilder(); - for (String path : pathInput.fixedSubPath().toArray()) { + for (String path : pathInput.hashPath().toArray()) { input.append(path); } long hash = FNV1a.hash64(input.toString()); @@ -222,7 +230,7 @@ String hash(PathInput pathInput) { @Override String hash(PathInput pathInput) { StringBuilder input = new StringBuilder(); - for (String path : pathInput.fixedSubPath().toArray()) { + for (String path : pathInput.hashPath().toArray()) { input.append(path); } long hash = FNV1a.hash64(input.toString()); diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteStorePathStrategy.java b/server/src/main/java/org/opensearch/index/remote/RemoteStorePathStrategy.java index d0250790068f7..ee28f47664614 100644 --- a/server/src/main/java/org/opensearch/index/remote/RemoteStorePathStrategy.java +++ b/server/src/main/java/org/opensearch/index/remote/RemoteStorePathStrategy.java @@ -17,6 +17,7 @@ import org.opensearch.index.remote.RemoteStoreEnums.DataType; import org.opensearch.index.remote.RemoteStoreEnums.PathHashAlgorithm; import org.opensearch.index.remote.RemoteStoreEnums.PathType; +import org.opensearch.repositories.blobstore.BlobStoreRepository; import java.util.Objects; @@ -82,10 +83,12 @@ public BlobPath generatePath(PathInput pathInput) { public static class PathInput { private final BlobPath basePath; private final String indexUUID; + private final String fixedPrefix; public PathInput(Builder builder) { this.basePath = Objects.requireNonNull(builder.basePath); this.indexUUID = Objects.requireNonNull(builder.indexUUID); + this.fixedPrefix = Objects.isNull(builder.fixedPrefix) ? "" : builder.fixedPrefix; } BlobPath basePath() { @@ -96,10 +99,18 @@ String indexUUID() { return indexUUID; } + String fixedPrefix() { + return fixedPrefix; + } + BlobPath fixedSubPath() { return BlobPath.cleanPath().add(indexUUID); } + BlobPath hashPath() { + return fixedSubPath(); + } + /** * Returns a new builder for {@link PathInput}. */ @@ -121,17 +132,23 @@ public void assertIsValid() { public static class Builder> { private BlobPath basePath; private String indexUUID; + private String fixedPrefix; public T basePath(BlobPath basePath) { this.basePath = basePath; return self(); } - public Builder indexUUID(String indexUUID) { + public T indexUUID(String indexUUID) { this.indexUUID = indexUUID; return self(); } + public T fixedPrefix(String fixedPrefix) { + this.fixedPrefix = fixedPrefix; + return self(); + } + protected T self() { return (T) this; } @@ -142,6 +159,65 @@ public PathInput build() { } } + /** + * A subclass of {@link PathInput} that represents the input required to generate a path + * for a shard in a snapshot. It includes the base path, index UUID, and shard ID. + * + * @opensearch.internal + */ + public static class SnapshotShardPathInput extends PathInput { + private final String shardId; + + public SnapshotShardPathInput(SnapshotShardPathInput.Builder builder) { + super(builder); + this.shardId = Objects.requireNonNull(builder.shardId); + } + + @Override + BlobPath fixedSubPath() { + return BlobPath.cleanPath().add(BlobStoreRepository.INDICES_DIR).add(super.fixedSubPath()).add(shardId); + } + + @Override + BlobPath hashPath() { + return BlobPath.cleanPath().add(shardId).add(indexUUID()); + } + + public String shardId() { + return shardId; + } + + /** + * Returns a new builder for {@link SnapshotShardPathInput}. + */ + public static SnapshotShardPathInput.Builder builder() { + return new SnapshotShardPathInput.Builder(); + } + + /** + * Builder for {@link SnapshotShardPathInput}. + * + * @opensearch.internal + */ + public static class Builder extends PathInput.Builder { + private String shardId; + + public SnapshotShardPathInput.Builder shardId(String shardId) { + this.shardId = shardId; + return this; + } + + @Override + protected SnapshotShardPathInput.Builder self() { + return this; + } + + public SnapshotShardPathInput build() { + return new SnapshotShardPathInput(this); + } + } + } + /** * Wrapper class for the data aware path input required to generate path for remote store uploads. This input is * composed of the parent inputs, shard id, data category and data type. @@ -204,16 +280,6 @@ public static class Builder extends PathInput.Builder { private DataCategory dataCategory; private DataType dataType; - public Builder basePath(BlobPath basePath) { - super.basePath = basePath; - return this; - } - - public Builder indexUUID(String indexUUID) { - super.indexUUID = indexUUID; - return this; - } - public Builder shardId(String shardId) { this.shardId = shardId; return this; diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteStoreUtils.java b/server/src/main/java/org/opensearch/index/remote/RemoteStoreUtils.java index 871e2eb3ce47f..edcfe17d31fb8 100644 --- a/server/src/main/java/org/opensearch/index/remote/RemoteStoreUtils.java +++ b/server/src/main/java/org/opensearch/index/remote/RemoteStoreUtils.java @@ -542,8 +542,11 @@ public static List filterOutMetadataFilesBasedOnAge( if (RemoteStoreSettings.isPinnedTimestampsEnabled() == false) { return new ArrayList<>(metadataFiles); } - long maximumAllowedTimestamp = lastSuccessfulFetchOfPinnedTimestamps - RemoteStoreSettings.getPinnedTimestampsLookbackInterval() - .getMillis(); + // We allow now() - loopback interval to be pinned. Also, the actual pinning can take at most loopback interval + // This means the pinned timestamp can be available for read after at most (2 * loopback interval) + long maximumAllowedTimestamp = lastSuccessfulFetchOfPinnedTimestamps - (2 * RemoteStoreSettings + .getPinnedTimestampsLookbackInterval() + .getMillis()); List metadataFilesWithMinAge = new ArrayList<>(); for (String metadataFileName : metadataFiles) { long metadataTimestamp = getTimestampFunction.apply(metadataFileName); diff --git a/server/src/main/java/org/opensearch/index/seqno/ReplicationTracker.java b/server/src/main/java/org/opensearch/index/seqno/ReplicationTracker.java index b9cb5e92d0ed1..1e43827afeb47 100644 --- a/server/src/main/java/org/opensearch/index/seqno/ReplicationTracker.java +++ b/server/src/main/java/org/opensearch/index/seqno/ReplicationTracker.java @@ -1251,12 +1251,13 @@ public ReplicationCheckpoint getLatestReplicationCheckpoint() { return this.latestReplicationCheckpoint; } - private boolean isPrimaryRelocation(String allocationId) { + // skip any shard that is a relocating primary or search only replica (not tracked by primary) + private boolean shouldSkipReplicationTimer(String allocationId) { Optional shardRouting = routingTable.shards() .stream() .filter(routing -> routing.allocationId().getId().equals(allocationId)) .findAny(); - return shardRouting.isPresent() && shardRouting.get().primary(); + return shardRouting.isPresent() && (shardRouting.get().primary() || shardRouting.get().isSearchOnly()); } private void createReplicationLagTimers() { @@ -1268,7 +1269,7 @@ private void createReplicationLagTimers() { // it is possible for a shard to be in-sync but not yet removed from the checkpoints collection after a failover event. if (cps.inSync && replicationGroup.getUnavailableInSyncShards().contains(allocationId) == false - && isPrimaryRelocation(allocationId) == false + && shouldSkipReplicationTimer(allocationId) == false && latestReplicationCheckpoint.isAheadOf(cps.visibleReplicationCheckpoint) && (indexSettings.isSegRepLocalEnabled() == true || isShardOnRemoteEnabledNode.apply(routingTable.getByAllocationId(allocationId).currentNodeId()))) { @@ -1302,7 +1303,7 @@ public synchronized void startReplicationLagTimers(ReplicationCheckpoint checkpo final CheckpointState cps = e.getValue(); if (cps.inSync && replicationGroup.getUnavailableInSyncShards().contains(allocationId) == false - && isPrimaryRelocation(e.getKey()) == false + && shouldSkipReplicationTimer(e.getKey()) == false && latestReplicationCheckpoint.isAheadOf(cps.visibleReplicationCheckpoint) && cps.checkpointTimers.containsKey(latestReplicationCheckpoint)) { cps.checkpointTimers.get(latestReplicationCheckpoint).start(); @@ -1330,7 +1331,7 @@ public synchronized Set getSegmentReplicationStats entry -> entry.getKey().equals(this.shardAllocationId) == false && entry.getValue().inSync && replicationGroup.getUnavailableInSyncShards().contains(entry.getKey()) == false - && isPrimaryRelocation(entry.getKey()) == false + && shouldSkipReplicationTimer(entry.getKey()) == false /*Check if the current primary shard is migrating to remote and all the other shard copies of the same index still hasn't completely moved over to the remote enabled nodes. Ensures that: diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index 4b18df7a7829d..6e12e4ed3da1a 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -150,6 +150,7 @@ import org.opensearch.index.recovery.RecoveryStats; import org.opensearch.index.refresh.RefreshStats; import org.opensearch.index.remote.RemoteSegmentStats; +import org.opensearch.index.remote.RemoteStorePathStrategy; import org.opensearch.index.remote.RemoteStoreStatsTrackerFactory; import org.opensearch.index.search.stats.SearchStats; import org.opensearch.index.search.stats.ShardSearchStats; @@ -2479,6 +2480,10 @@ private void loadGlobalCheckpointToReplicationTracker() throws IOException { * Operations from the translog will be replayed to bring lucene up to date. **/ public void openEngineAndRecoverFromTranslog() throws IOException { + openEngineAndRecoverFromTranslog(true); + } + + public void openEngineAndRecoverFromTranslog(boolean syncFromRemote) throws IOException { recoveryState.validateCurrentStage(RecoveryState.Stage.INDEX); maybeCheckIndex(); recoveryState.setStage(RecoveryState.Stage.TRANSLOG); @@ -2499,7 +2504,16 @@ public void openEngineAndRecoverFromTranslog() throws IOException { loadGlobalCheckpointToReplicationTracker(); } - innerOpenEngineAndTranslog(replicationTracker); + if (isSnapshotV2Restore()) { + translogConfig.setDownloadRemoteTranslogOnInit(false); + } + + innerOpenEngineAndTranslog(replicationTracker, syncFromRemote); + + if (isSnapshotV2Restore()) { + translogConfig.setDownloadRemoteTranslogOnInit(true); + } + getEngine().translogManager() .recoverFromTranslog(translogRecoveryRunner, getEngine().getProcessedLocalCheckpoint(), Long.MAX_VALUE); } @@ -2561,7 +2575,7 @@ private void innerOpenEngineAndTranslog(LongSupplier globalCheckpointSupplier, b if (shardRouting.primary()) { if (syncFromRemote) { syncRemoteTranslogAndUpdateGlobalCheckpoint(); - } else { + } else if (isSnapshotV2Restore() == false) { // we will enter this block when we do not want to recover from remote translog. // currently only during snapshot restore, we are coming into this block. // here, as while initiliazing remote translog we cannot skip downloading translog files, @@ -2607,6 +2621,11 @@ private void innerOpenEngineAndTranslog(LongSupplier globalCheckpointSupplier, b recoveryState.validateCurrentStage(RecoveryState.Stage.TRANSLOG); } + private boolean isSnapshotV2Restore() { + return routingEntry().recoverySource().getType() == RecoverySource.Type.SNAPSHOT + && ((SnapshotRecoverySource) routingEntry().recoverySource()).pinnedTimestamp() > 0; + } + private boolean assertSequenceNumbersInCommit() throws IOException { final Map userData = fetchUserData(); assert userData.containsKey(SequenceNumbers.LOCAL_CHECKPOINT_KEY) : "commit point doesn't contains a local checkpoint"; @@ -2892,7 +2911,26 @@ public void restoreFromSnapshotAndRemoteStore( assert recoveryState.getRecoverySource().getType() == RecoverySource.Type.SNAPSHOT : "invalid recovery type: " + recoveryState.getRecoverySource(); StoreRecovery storeRecovery = new StoreRecovery(shardId, logger); - storeRecovery.recoverFromSnapshotAndRemoteStore(this, repository, repositoriesService, listener, threadPool); + SnapshotRecoverySource recoverySource = (SnapshotRecoverySource) recoveryState().getRecoverySource(); + if (recoverySource.pinnedTimestamp() != 0) { + storeRecovery.recoverShallowSnapshotV2( + this, + repository, + repositoriesService, + listener, + remoteStoreSettings.getSegmentsPathFixedPrefix(), + threadPool + ); + } else { + storeRecovery.recoverFromSnapshotAndRemoteStore( + this, + repository, + repositoriesService, + listener, + remoteStoreSettings.getSegmentsPathFixedPrefix(), + threadPool + ); + } } catch (Exception e) { listener.onFailure(e); } @@ -5000,16 +5038,33 @@ public void syncTranslogFilesFromRemoteTranslog() throws IOException { TranslogFactory translogFactory = translogFactorySupplier.apply(indexSettings, shardRouting); assert translogFactory instanceof RemoteBlobStoreInternalTranslogFactory; Repository repository = ((RemoteBlobStoreInternalTranslogFactory) translogFactory).getRepository(); + syncTranslogFilesFromGivenRemoteTranslog( + repository, + shardId, + indexSettings.getRemoteStorePathStrategy(), + indexSettings().isTranslogMetadataEnabled(), + 0 + ); + } + + public void syncTranslogFilesFromGivenRemoteTranslog( + Repository repository, + ShardId shardId, + RemoteStorePathStrategy remoteStorePathStrategy, + boolean isTranslogMetadataEnabled, + long timestamp + ) throws IOException { RemoteFsTranslog.download( repository, shardId, getThreadPool(), shardPath().resolveTranslog(), - indexSettings.getRemoteStorePathStrategy(), + remoteStorePathStrategy, remoteStoreSettings, logger, shouldSeedRemoteStore(), - indexSettings().isTranslogMetadataEnabled() + isTranslogMetadataEnabled, + timestamp ); } @@ -5098,15 +5153,13 @@ public void syncSegmentsFromRemoteSegmentStore(boolean overrideLocal, final Runn * Downloads segments from given remote segment store for a specific commit. * @param overrideLocal flag to override local segment files with those in remote store * @param sourceRemoteDirectory RemoteSegmentDirectory Instance from which we need to sync segments - * @param primaryTerm Primary Term for shard at the time of commit operation for which we are syncing segments - * @param commitGeneration commit generation at the time of commit operation for which we are syncing segments * @throws IOException if exception occurs while reading segments from remote store */ public void syncSegmentsFromGivenRemoteSegmentStore( boolean overrideLocal, RemoteSegmentStoreDirectory sourceRemoteDirectory, - long primaryTerm, - long commitGeneration + RemoteSegmentMetadata remoteSegmentMetadata, + boolean pinnedTimestamp ) throws IOException { logger.trace("Downloading segments from given remote segment store"); RemoteSegmentStoreDirectory remoteDirectory = null; @@ -5142,12 +5195,29 @@ public void syncSegmentsFromGivenRemoteSegmentStore( overrideLocal, () -> {} ); - if (segmentsNFile != null) { + if (pinnedTimestamp) { + final SegmentInfos infosSnapshot = store.buildSegmentInfos( + remoteSegmentMetadata.getSegmentInfosBytes(), + remoteSegmentMetadata.getGeneration() + ); + long processedLocalCheckpoint = Long.parseLong(infosSnapshot.getUserData().get(LOCAL_CHECKPOINT_KEY)); + // delete any other commits, we want to start the engine only from a new commit made with the downloaded infos bytes. + // Extra segments will be wiped on engine open. + for (String file : List.of(store.directory().listAll())) { + if (file.startsWith(IndexFileNames.SEGMENTS)) { + store.deleteQuiet(file); + } + } + assert Arrays.stream(store.directory().listAll()).filter(f -> f.startsWith(IndexFileNames.SEGMENTS)).findAny().isEmpty() + : "There should not be any segments file in the dir"; + store.commitSegmentInfos(infosSnapshot, processedLocalCheckpoint, processedLocalCheckpoint); + } else if (segmentsNFile != null) { try ( ChecksumIndexInput indexInput = new BufferedChecksumIndexInput( storeDirectory.openInput(segmentsNFile, IOContext.DEFAULT) ) ) { + long commitGeneration = SegmentInfos.generationFromSegmentsFileName(segmentsNFile); SegmentInfos infosSnapshot = SegmentInfos.readCommit(store.directory(), indexInput, commitGeneration); long processedLocalCheckpoint = Long.parseLong(infosSnapshot.getUserData().get(LOCAL_CHECKPOINT_KEY)); if (remoteStore != null) { diff --git a/server/src/main/java/org/opensearch/index/shard/ReplicationGroup.java b/server/src/main/java/org/opensearch/index/shard/ReplicationGroup.java index f585267f21832..ccfaf50da1c6b 100644 --- a/server/src/main/java/org/opensearch/index/shard/ReplicationGroup.java +++ b/server/src/main/java/org/opensearch/index/shard/ReplicationGroup.java @@ -72,7 +72,8 @@ public ReplicationGroup( this.replicationTargets = new ArrayList<>(); this.skippedShards = new ArrayList<>(); for (final ShardRouting shard : routingTable) { - if (shard.unassigned()) { + // search only replicas never receive any replicated operations + if (shard.unassigned() || shard.isSearchOnly()) { assert shard.primary() == false : "primary shard should not be unassigned in a replication group: " + shard; skippedShards.add(shard); } else { diff --git a/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java b/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java index 8d689e8769728..6933e4e161dd1 100644 --- a/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java +++ b/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java @@ -58,12 +58,15 @@ import org.opensearch.index.engine.Engine; import org.opensearch.index.engine.EngineException; import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.remote.RemoteStorePathStrategy; +import org.opensearch.index.remote.RemoteStoreUtils; import org.opensearch.index.seqno.SequenceNumbers; import org.opensearch.index.snapshots.IndexShardRestoreFailedException; import org.opensearch.index.snapshots.blobstore.RemoteStoreShardShallowCopySnapshot; import org.opensearch.index.store.RemoteSegmentStoreDirectory; import org.opensearch.index.store.RemoteSegmentStoreDirectoryFactory; import org.opensearch.index.store.Store; +import org.opensearch.index.store.remote.metadata.RemoteSegmentMetadata; import org.opensearch.index.translog.Checkpoint; import org.opensearch.index.translog.Translog; import org.opensearch.index.translog.TranslogHeader; @@ -72,6 +75,7 @@ import org.opensearch.repositories.IndexId; import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.Repository; +import org.opensearch.repositories.RepositoryData; import org.opensearch.threadpool.ThreadPool; import java.io.IOException; @@ -367,6 +371,7 @@ void recoverFromSnapshotAndRemoteStore( Repository repository, RepositoriesService repositoriesService, ActionListener listener, + String segmentsPathFixedPrefix, ThreadPool threadPool ) { try { @@ -397,7 +402,8 @@ void recoverFromSnapshotAndRemoteStore( RemoteSegmentStoreDirectoryFactory directoryFactory = new RemoteSegmentStoreDirectoryFactory( () -> repositoriesService, - threadPool + threadPool, + segmentsPathFixedPrefix ); RemoteSegmentStoreDirectory sourceRemoteDirectory = (RemoteSegmentStoreDirectory) directoryFactory.newDirectory( remoteStoreRepository, @@ -405,14 +411,14 @@ void recoverFromSnapshotAndRemoteStore( shardId, shallowCopyShardMetadata.getRemoteStorePathStrategy() ); - sourceRemoteDirectory.initializeToSpecificCommit( + RemoteSegmentMetadata remoteSegmentMetadata = sourceRemoteDirectory.initializeToSpecificCommit( primaryTerm, commitGeneration, recoverySource.snapshot().getSnapshotId().getUUID() ); - indexShard.syncSegmentsFromGivenRemoteSegmentStore(true, sourceRemoteDirectory, primaryTerm, commitGeneration); + indexShard.syncSegmentsFromGivenRemoteSegmentStore(true, sourceRemoteDirectory, remoteSegmentMetadata, false); final Store store = indexShard.store(); - if (indexShard.indexSettings.isRemoteTranslogStoreEnabled() == false) { + if (indexShard.indexSettings.isRemoteStoreEnabled() == false) { bootstrap(indexShard, store); } else { bootstrapForSnapshot(indexShard, store); @@ -441,6 +447,98 @@ void recoverFromSnapshotAndRemoteStore( } } + void recoverShallowSnapshotV2( + final IndexShard indexShard, + Repository repository, + RepositoriesService repositoriesService, + ActionListener listener, + String segmentsPathFixedPrefix, + ThreadPool threadPool + ) { + try { + if (canRecover(indexShard)) { + indexShard.preRecovery(); + RecoverySource.Type recoveryType = indexShard.recoveryState().getRecoverySource().getType(); + assert recoveryType == RecoverySource.Type.SNAPSHOT : "expected snapshot recovery type: " + recoveryType; + SnapshotRecoverySource recoverySource = (SnapshotRecoverySource) indexShard.recoveryState().getRecoverySource(); + indexShard.prepareForIndexRecovery(); + + assert recoverySource.pinnedTimestamp() != 0; + final StepListener repositoryDataListener = new StepListener<>(); + repository.getRepositoryData(repositoryDataListener); + repositoryDataListener.whenComplete(repositoryData -> { + IndexId indexId = repositoryData.resolveIndexId(recoverySource.index().getName()); + IndexMetadata prevIndexMetadata = repository.getSnapshotIndexMetaData( + repositoryData, + recoverySource.snapshot().getSnapshotId(), + indexId + ); + RemoteSegmentStoreDirectoryFactory directoryFactory = new RemoteSegmentStoreDirectoryFactory( + () -> repositoriesService, + threadPool, + segmentsPathFixedPrefix + ); + String remoteSegmentStoreRepository = ((SnapshotRecoverySource) indexShard.recoveryState().getRecoverySource()) + .sourceRemoteStoreRepository(); + if (remoteSegmentStoreRepository == null) { + remoteSegmentStoreRepository = IndexMetadata.INDEX_REMOTE_SEGMENT_STORE_REPOSITORY_SETTING.get( + prevIndexMetadata.getSettings() + ); + } + RemoteStorePathStrategy remoteStorePathStrategy = RemoteStoreUtils.determineRemoteStorePathStrategy(prevIndexMetadata); + RemoteSegmentStoreDirectory sourceRemoteDirectory = (RemoteSegmentStoreDirectory) directoryFactory.newDirectory( + remoteSegmentStoreRepository, + prevIndexMetadata.getIndexUUID(), + shardId, + remoteStorePathStrategy + ); + RemoteSegmentMetadata remoteSegmentMetadata = sourceRemoteDirectory.initializeToSpecificTimestamp( + recoverySource.pinnedTimestamp() + ); + + String remoteTranslogRepository = ((SnapshotRecoverySource) indexShard.recoveryState().getRecoverySource()) + .sourceRemoteTranslogRepository(); + if (remoteTranslogRepository == null) { + remoteTranslogRepository = IndexMetadata.INDEX_REMOTE_TRANSLOG_REPOSITORY_SETTING.get( + prevIndexMetadata.getSettings() + ); + } + + indexShard.syncSegmentsFromGivenRemoteSegmentStore(true, sourceRemoteDirectory, remoteSegmentMetadata, true); + indexShard.syncTranslogFilesFromGivenRemoteTranslog( + repositoriesService.repository(remoteTranslogRepository), + new ShardId(prevIndexMetadata.getIndex(), shardId.id()), + remoteStorePathStrategy, + RemoteStoreUtils.determineTranslogMetadataEnabled(prevIndexMetadata), + recoverySource.pinnedTimestamp() + ); + + assert indexShard.shardRouting.primary() : "only primary shards can recover from store"; + writeEmptyRetentionLeasesFile(indexShard); + indexShard.recoveryState().getIndex().setFileDetailsComplete(); + indexShard.openEngineAndRecoverFromTranslog(false); + indexShard.getEngine().fillSeqNoGaps(indexShard.getPendingPrimaryTerm()); + indexShard.finalizeRecovery(); + if (indexShard.isRemoteTranslogEnabled() && indexShard.shardRouting.primary()) { + indexShard.waitForRemoteStoreSync(); + } + indexShard.postRecovery("post recovery from remote_store"); + SegmentInfos committedSegmentInfos = indexShard.store().readLastCommittedSegmentsInfo(); + try { + indexShard.getEngine() + .translogManager() + .setMinSeqNoToKeep(Long.parseLong(committedSegmentInfos.getUserData().get(SequenceNumbers.MAX_SEQ_NO)) + 1); + } catch (IllegalArgumentException e) { + logger.warn("MinSeqNoToKeep is already past the maxSeqNo from commited segment infos"); + } + listener.onResponse(true); + }, listener::onFailure); + } + } catch (Exception e) { + listener.onFailure(e); + } + } + private boolean canRecover(IndexShard indexShard) { if (indexShard.state() == IndexShardState.CLOSED) { // got closed on us, just ignore this recovery diff --git a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java index 53b43bbfb3bba..72bf07d4b03b2 100644 --- a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java +++ b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java @@ -412,7 +412,7 @@ static long getGeneration(String[] filenameTokens) { public static long getTimestamp(String filename) { String[] filenameTokens = filename.split(SEPARATOR); - return RemoteStoreUtils.invertLong(filenameTokens[6]); + return RemoteStoreUtils.invertLong(filenameTokens[filenameTokens.length - 2]); } public static Tuple getNodeIdByPrimaryTermAndGen(String filename) { diff --git a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryFactory.java b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryFactory.java index 3f6f4eeeef87b..233665e65aed9 100644 --- a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryFactory.java +++ b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryFactory.java @@ -40,11 +40,17 @@ @PublicApi(since = "2.3.0") public class RemoteSegmentStoreDirectoryFactory implements IndexStorePlugin.DirectoryFactory { private final Supplier repositoriesService; + private final String segmentsPathFixedPrefix; private final ThreadPool threadPool; - public RemoteSegmentStoreDirectoryFactory(Supplier repositoriesService, ThreadPool threadPool) { + public RemoteSegmentStoreDirectoryFactory( + Supplier repositoriesService, + ThreadPool threadPool, + String segmentsPathFixedPrefix + ) { this.repositoriesService = repositoriesService; + this.segmentsPathFixedPrefix = segmentsPathFixedPrefix; this.threadPool = threadPool; } @@ -71,6 +77,7 @@ public Directory newDirectory(String repositoryName, String indexUUID, ShardId s .shardId(shardIdStr) .dataCategory(SEGMENTS) .dataType(DATA) + .fixedPrefix(segmentsPathFixedPrefix) .build(); // Derive the path for data directory of SEGMENTS BlobPath dataPath = pathStrategy.generatePath(dataPathInput); @@ -87,6 +94,7 @@ public Directory newDirectory(String repositoryName, String indexUUID, ShardId s .shardId(shardIdStr) .dataCategory(SEGMENTS) .dataType(METADATA) + .fixedPrefix(segmentsPathFixedPrefix) .build(); // Derive the path for metadata directory of SEGMENTS BlobPath mdPath = pathStrategy.generatePath(mdPathInput); @@ -98,7 +106,8 @@ public Directory newDirectory(String repositoryName, String indexUUID, ShardId s repositoryName, indexUUID, shardIdStr, - pathStrategy + pathStrategy, + segmentsPathFixedPrefix ); return new RemoteSegmentStoreDirectory(dataDirectory, metadataDirectory, mdLockManager, threadPool, shardId); @@ -107,4 +116,8 @@ public Directory newDirectory(String repositoryName, String indexUUID, ShardId s } } + public Supplier getRepositoriesService() { + return this.repositoriesService; + } + } diff --git a/server/src/main/java/org/opensearch/index/store/lockmanager/RemoteStoreLockManagerFactory.java b/server/src/main/java/org/opensearch/index/store/lockmanager/RemoteStoreLockManagerFactory.java index 993c1bbdf033f..40b540d30adc7 100644 --- a/server/src/main/java/org/opensearch/index/store/lockmanager/RemoteStoreLockManagerFactory.java +++ b/server/src/main/java/org/opensearch/index/store/lockmanager/RemoteStoreLockManagerFactory.java @@ -31,9 +31,11 @@ @PublicApi(since = "2.8.0") public class RemoteStoreLockManagerFactory { private final Supplier repositoriesService; + private final String segmentsPathFixedPrefix; - public RemoteStoreLockManagerFactory(Supplier repositoriesService) { + public RemoteStoreLockManagerFactory(Supplier repositoriesService, String segmentsPathFixedPrefix) { this.repositoriesService = repositoriesService; + this.segmentsPathFixedPrefix = segmentsPathFixedPrefix; } public RemoteStoreLockManager newLockManager( @@ -42,7 +44,7 @@ public RemoteStoreLockManager newLockManager( String shardId, RemoteStorePathStrategy pathStrategy ) { - return newLockManager(repositoriesService.get(), repositoryName, indexUUID, shardId, pathStrategy); + return newLockManager(repositoriesService.get(), repositoryName, indexUUID, shardId, pathStrategy, segmentsPathFixedPrefix); } public static RemoteStoreMetadataLockManager newLockManager( @@ -50,7 +52,8 @@ public static RemoteStoreMetadataLockManager newLockManager( String repositoryName, String indexUUID, String shardId, - RemoteStorePathStrategy pathStrategy + RemoteStorePathStrategy pathStrategy, + String segmentsPathFixedPrefix ) { try (Repository repository = repositoriesService.repository(repositoryName)) { assert repository instanceof BlobStoreRepository : "repository should be instance of BlobStoreRepository"; @@ -62,6 +65,7 @@ public static RemoteStoreMetadataLockManager newLockManager( .shardId(shardId) .dataCategory(SEGMENTS) .dataType(LOCK_FILES) + .fixedPrefix(segmentsPathFixedPrefix) .build(); BlobPath lockDirectoryPath = pathStrategy.generatePath(lockFilesPathInput); BlobContainer lockDirectoryBlobContainer = ((BlobStoreRepository) repository).blobStore().blobContainer(lockDirectoryPath); diff --git a/server/src/main/java/org/opensearch/index/store/remote/directory/RemoteSnapshotDirectoryFactory.java b/server/src/main/java/org/opensearch/index/store/remote/directory/RemoteSnapshotDirectoryFactory.java index 177f0526e7571..e027e8b7cb3b1 100644 --- a/server/src/main/java/org/opensearch/index/store/remote/directory/RemoteSnapshotDirectoryFactory.java +++ b/server/src/main/java/org/opensearch/index/store/remote/directory/RemoteSnapshotDirectoryFactory.java @@ -11,14 +11,15 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.opensearch.common.blobstore.BlobContainer; -import org.opensearch.common.blobstore.BlobPath; import org.opensearch.index.IndexSettings; +import org.opensearch.index.remote.RemoteStoreEnums.PathType; import org.opensearch.index.shard.ShardPath; import org.opensearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshot; import org.opensearch.index.snapshots.blobstore.IndexShardSnapshot; import org.opensearch.index.store.remote.filecache.FileCache; import org.opensearch.index.store.remote.utils.TransferManager; import org.opensearch.plugins.IndexStorePlugin; +import org.opensearch.repositories.IndexId; import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.Repository; import org.opensearch.repositories.blobstore.BlobStoreRepository; @@ -74,10 +75,11 @@ private Future createRemoteSnapshotDirectoryFromSnapsho ShardPath localShardPath, BlobStoreRepository blobStoreRepository ) throws IOException { - final BlobPath blobPath = blobStoreRepository.basePath() - .add("indices") - .add(IndexSettings.SEARCHABLE_SNAPSHOT_INDEX_ID.get(indexSettings.getSettings())) - .add(Integer.toString(localShardPath.getShardId().getId())); + // The below information like the snapshot generated indexId, shard_path_type and shardId are used for + // creating the shard BlobContainer. This information has been updated as per the hashed_prefix snapshots. + String indexId = IndexSettings.SEARCHABLE_SNAPSHOT_INDEX_ID.get(indexSettings.getSettings()); + PathType pathType = IndexSettings.SEARCHABLE_SNAPSHOT_SHARD_PATH_TYPE.get(indexSettings.getSettings()); + int shardId = localShardPath.getShardId().getId(); final SnapshotId snapshotId = new SnapshotId( IndexSettings.SEARCHABLE_SNAPSHOT_ID_NAME.get(indexSettings.getSettings()), IndexSettings.SEARCHABLE_SNAPSHOT_ID_UUID.get(indexSettings.getSettings()) @@ -89,7 +91,12 @@ private Future createRemoteSnapshotDirectoryFromSnapsho // this trick is needed to bypass assertions in BlobStoreRepository::assertAllowableThreadPools in case of node restart and a remote // index restore is invoked return threadPool.executor(ThreadPool.Names.SNAPSHOT).submit(() -> { - final BlobContainer blobContainer = blobStoreRepository.blobStore().blobContainer(blobPath); + // shardContainer(IndexId, shardId) method uses the id and pathType information to generate the blobPath and + // hence the blobContainer. We have used a dummy name as it plays no relevance in the blobPath generation. + final BlobContainer blobContainer = blobStoreRepository.shardContainer( + new IndexId("DUMMY", indexId, pathType.getCode()), + shardId + ); final IndexShardSnapshot indexShardSnapshot = blobStoreRepository.loadShardSnapshot(blobContainer, snapshotId); assert indexShardSnapshot instanceof BlobStoreIndexShardSnapshot : "indexShardSnapshot should be an instance of BlobStoreIndexShardSnapshot"; diff --git a/server/src/main/java/org/opensearch/index/store/remote/utils/TransferManager.java b/server/src/main/java/org/opensearch/index/store/remote/utils/TransferManager.java index f07c4832d982c..94c25202ac90c 100644 --- a/server/src/main/java/org/opensearch/index/store/remote/utils/TransferManager.java +++ b/server/src/main/java/org/opensearch/index/store/remote/utils/TransferManager.java @@ -95,6 +95,19 @@ public IndexInput fetchBlob(BlobFetchRequest blobFetchRequest) throws IOExceptio @SuppressWarnings("removal") private static FileCachedIndexInput createIndexInput(FileCache fileCache, StreamReader streamReader, BlobFetchRequest request) { try { + // This local file cache is ref counted and may not strictly enforce configured capacity. + // If we find available capacity is exceeded, deny further BlobFetchRequests. + if (fileCache.capacity() < fileCache.usage().usage()) { + fileCache.prune(); + throw new IOException( + "Local file cache capacity (" + + fileCache.capacity() + + ") exceeded (" + + fileCache.usage().usage() + + ") - BlobFetchRequest failed: " + + request.getFilePath() + ); + } if (Files.exists(request.getFilePath()) == false) { logger.trace("Fetching from Remote in createIndexInput of Transfer Manager"); try ( diff --git a/server/src/main/java/org/opensearch/index/translog/RemoteBlobStoreInternalTranslogFactory.java b/server/src/main/java/org/opensearch/index/translog/RemoteBlobStoreInternalTranslogFactory.java index 4599aa32325c1..5dc2ad076d21c 100644 --- a/server/src/main/java/org/opensearch/index/translog/RemoteBlobStoreInternalTranslogFactory.java +++ b/server/src/main/java/org/opensearch/index/translog/RemoteBlobStoreInternalTranslogFactory.java @@ -69,19 +69,35 @@ public Translog newTranslog( assert repository instanceof BlobStoreRepository : "repository should be instance of BlobStoreRepository"; BlobStoreRepository blobStoreRepository = ((BlobStoreRepository) repository); - return new RemoteFsTranslog( - config, - translogUUID, - deletionPolicy, - globalCheckpointSupplier, - primaryTermSupplier, - persistedSequenceNumberConsumer, - blobStoreRepository, - threadPool, - startedPrimarySupplier, - remoteTranslogTransferTracker, - remoteStoreSettings - ); + if (RemoteStoreSettings.isPinnedTimestampsEnabled()) { + return new RemoteFsTimestampAwareTranslog( + config, + translogUUID, + deletionPolicy, + globalCheckpointSupplier, + primaryTermSupplier, + persistedSequenceNumberConsumer, + blobStoreRepository, + threadPool, + startedPrimarySupplier, + remoteTranslogTransferTracker, + remoteStoreSettings + ); + } else { + return new RemoteFsTranslog( + config, + translogUUID, + deletionPolicy, + globalCheckpointSupplier, + primaryTermSupplier, + persistedSequenceNumberConsumer, + blobStoreRepository, + threadPool, + startedPrimarySupplier, + remoteTranslogTransferTracker, + remoteStoreSettings + ); + } } public Repository getRepository() { diff --git a/server/src/main/java/org/opensearch/index/translog/RemoteFsTimestampAwareTranslog.java b/server/src/main/java/org/opensearch/index/translog/RemoteFsTimestampAwareTranslog.java new file mode 100644 index 0000000000000..27d34ec0d05af --- /dev/null +++ b/server/src/main/java/org/opensearch/index/translog/RemoteFsTimestampAwareTranslog.java @@ -0,0 +1,521 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.translog; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.blobstore.BlobMetadata; +import org.opensearch.common.collect.Tuple; +import org.opensearch.common.logging.Loggers; +import org.opensearch.core.action.ActionListener; +import org.opensearch.index.remote.RemoteStoreUtils; +import org.opensearch.index.remote.RemoteTranslogTransferTracker; +import org.opensearch.index.translog.transfer.TranslogTransferManager; +import org.opensearch.index.translog.transfer.TranslogTransferMetadata; +import org.opensearch.indices.RemoteStoreSettings; +import org.opensearch.node.remotestore.RemoteStorePinnedTimestampService; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.threadpool.ThreadPool; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.TreeSet; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.BooleanSupplier; +import java.util.function.LongConsumer; +import java.util.function.LongSupplier; +import java.util.stream.Collectors; +import java.util.stream.LongStream; + +import static org.opensearch.index.translog.transfer.TranslogTransferMetadata.METADATA_SEPARATOR; + +/** + * A Translog implementation which syncs local FS with a remote store + * The current impl uploads translog , ckp and metadata to remote store + * for every sync, post syncing to disk. Post that, a new generation is + * created. This implementation is also aware of pinned timestamp and makes + * sure data against pinned timestamp is retained. + * + * @opensearch.internal + */ +public class RemoteFsTimestampAwareTranslog extends RemoteFsTranslog { + + private static Logger staticLogger = LogManager.getLogger(RemoteFsTimestampAwareTranslog.class); + private final Logger logger; + private final Map metadataFilePinnedTimestampMap; + // For metadata files, with no min generation in the name, we cache generation data to avoid multiple reads. + private final Map> oldFormatMetadataFileGenerationMap; + private final Map> oldFormatMetadataFilePrimaryTermMap; + private final AtomicLong minPrimaryTermInRemote = new AtomicLong(Long.MAX_VALUE); + + public RemoteFsTimestampAwareTranslog( + TranslogConfig config, + String translogUUID, + TranslogDeletionPolicy deletionPolicy, + LongSupplier globalCheckpointSupplier, + LongSupplier primaryTermSupplier, + LongConsumer persistedSequenceNumberConsumer, + BlobStoreRepository blobStoreRepository, + ThreadPool threadPool, + BooleanSupplier startedPrimarySupplier, + RemoteTranslogTransferTracker remoteTranslogTransferTracker, + RemoteStoreSettings remoteStoreSettings + ) throws IOException { + super( + config, + translogUUID, + deletionPolicy, + globalCheckpointSupplier, + primaryTermSupplier, + persistedSequenceNumberConsumer, + blobStoreRepository, + threadPool, + startedPrimarySupplier, + remoteTranslogTransferTracker, + remoteStoreSettings + ); + logger = Loggers.getLogger(getClass(), shardId); + this.metadataFilePinnedTimestampMap = new HashMap<>(); + this.oldFormatMetadataFileGenerationMap = new HashMap<>(); + this.oldFormatMetadataFilePrimaryTermMap = new HashMap<>(); + } + + @Override + protected void onDelete() { + ClusterService.assertClusterOrClusterManagerStateThread(); + // clean up all remote translog files + try { + trimUnreferencedReaders(true, false); + } catch (IOException e) { + logger.error("Exception while deleting translog files from remote store", e); + } + } + + @Override + public void trimUnreferencedReaders() throws IOException { + trimUnreferencedReaders(false, true); + } + + // Visible for testing + protected void trimUnreferencedReaders(boolean indexDeleted, boolean trimLocal) throws IOException { + if (trimLocal) { + // clean up local translog files and updates readers + super.trimUnreferencedReaders(); + } + + // Update file tracker to reflect local translog state + Optional minLiveGeneration = readers.stream().map(BaseTranslogReader::getGeneration).min(Long::compareTo); + if (minLiveGeneration.isPresent()) { + List staleFilesInTracker = new ArrayList<>(); + for (String file : fileTransferTracker.allUploaded()) { + if (file.endsWith(TRANSLOG_FILE_SUFFIX)) { + long generation = Translog.parseIdFromFileName(file); + if (generation < minLiveGeneration.get()) { + staleFilesInTracker.add(file); + staleFilesInTracker.add(Translog.getCommitCheckpointFileName(generation)); + } + } + fileTransferTracker.delete(staleFilesInTracker); + } + } + + // This is to ensure that after the permits are acquired during primary relocation, there are no further modification on remote + // store. + if (startedPrimarySupplier.getAsBoolean() == false || pauseSync.get()) { + return; + } + + // This is to fail fast and avoid listing md files un-necessarily. + if (indexDeleted == false && RemoteStoreUtils.isPinnedTimestampStateStale()) { + logger.warn("Skipping remote segment store garbage collection as last fetch of pinned timestamp is stale"); + return; + } + + // Since remote generation deletion is async, this ensures that only one generation deletion happens at a time. + // Remote generations involves 2 async operations - 1) Delete translog generation files 2) Delete metadata files + // We try to acquire 2 permits and if we can not, we return from here itself. + if (remoteGenerationDeletionPermits.tryAcquire(REMOTE_DELETION_PERMITS) == false) { + return; + } + + ActionListener> listMetadataFilesListener = new ActionListener<>() { + @Override + public void onResponse(List blobMetadata) { + List metadataFiles = blobMetadata.stream().map(BlobMetadata::name).collect(Collectors.toList()); + + try { + if (metadataFiles.size() <= 1) { + logger.debug("No stale translog metadata files found"); + remoteGenerationDeletionPermits.release(REMOTE_DELETION_PERMITS); + return; + } + + // Check last fetch status of pinned timestamps. If stale, return. + if (indexDeleted == false && RemoteStoreUtils.isPinnedTimestampStateStale()) { + logger.warn("Skipping remote segment store garbage collection as last fetch of pinned timestamp is stale"); + remoteGenerationDeletionPermits.release(REMOTE_DELETION_PERMITS); + return; + } + + List metadataFilesToBeDeleted = getMetadataFilesToBeDeleted( + metadataFiles, + metadataFilePinnedTimestampMap, + logger + ); + + // If index is not deleted, make sure to keep latest metadata file + if (indexDeleted == false) { + metadataFilesToBeDeleted.remove(metadataFiles.get(0)); + } + + if (metadataFilesToBeDeleted.isEmpty()) { + logger.debug("No metadata files to delete"); + remoteGenerationDeletionPermits.release(REMOTE_DELETION_PERMITS); + return; + } + + logger.debug(() -> "metadataFilesToBeDeleted = " + metadataFilesToBeDeleted); + // For all the files that we are keeping, fetch min and max generations + List metadataFilesNotToBeDeleted = new ArrayList<>(metadataFiles); + metadataFilesNotToBeDeleted.removeAll(metadataFilesToBeDeleted); + + logger.debug(() -> "metadataFilesNotToBeDeleted = " + metadataFilesNotToBeDeleted); + Set generationsToBeDeleted = getGenerationsToBeDeleted( + metadataFilesNotToBeDeleted, + metadataFilesToBeDeleted, + indexDeleted + ); + + logger.debug(() -> "generationsToBeDeleted = " + generationsToBeDeleted); + if (generationsToBeDeleted.isEmpty() == false) { + // Delete stale generations + translogTransferManager.deleteGenerationAsync( + primaryTermSupplier.getAsLong(), + generationsToBeDeleted, + remoteGenerationDeletionPermits::release + ); + + // Delete stale metadata files + translogTransferManager.deleteMetadataFilesAsync( + metadataFilesToBeDeleted, + remoteGenerationDeletionPermits::release + ); + + // Update cache to keep only those metadata files that are not getting deleted + oldFormatMetadataFileGenerationMap.keySet().retainAll(metadataFilesNotToBeDeleted); + + // Delete stale primary terms + deleteStaleRemotePrimaryTerms(metadataFilesNotToBeDeleted); + } else { + remoteGenerationDeletionPermits.release(REMOTE_DELETION_PERMITS); + } + } catch (Exception e) { + remoteGenerationDeletionPermits.release(REMOTE_DELETION_PERMITS); + } + } + + @Override + public void onFailure(Exception e) { + remoteGenerationDeletionPermits.release(REMOTE_DELETION_PERMITS); + logger.error("Exception while listing translog metadata files", e); + } + }; + translogTransferManager.listTranslogMetadataFilesAsync(listMetadataFilesListener); + } + + // Visible for testing + protected Set getGenerationsToBeDeleted( + List metadataFilesNotToBeDeleted, + List metadataFilesToBeDeleted, + boolean indexDeleted + ) throws IOException { + long maxGenerationToBeDeleted = Long.MAX_VALUE; + + if (indexDeleted == false) { + maxGenerationToBeDeleted = minRemoteGenReferenced - 1 - indexSettings().getRemoteTranslogExtraKeep(); + } + + Set generationsFromMetadataFilesToBeDeleted = new HashSet<>(); + for (String mdFile : metadataFilesToBeDeleted) { + Tuple minMaxGen = getMinMaxTranslogGenerationFromMetadataFile(mdFile, translogTransferManager); + generationsFromMetadataFilesToBeDeleted.addAll( + LongStream.rangeClosed(minMaxGen.v1(), minMaxGen.v2()).boxed().collect(Collectors.toList()) + ); + } + + Map> metadataFileNotToBeDeletedGenerationMap = getGenerationForMetadataFiles(metadataFilesNotToBeDeleted); + TreeSet> pinnedGenerations = getOrderedPinnedMetadataGenerations(metadataFileNotToBeDeletedGenerationMap); + Set generationsToBeDeleted = new HashSet<>(); + for (long generation : generationsFromMetadataFilesToBeDeleted) { + // Check if the generation is not referred by metadata file matching pinned timestamps + if (generation <= maxGenerationToBeDeleted && isGenerationPinned(generation, pinnedGenerations) == false) { + generationsToBeDeleted.add(generation); + } + } + return generationsToBeDeleted; + } + + protected List getMetadataFilesToBeDeleted(List metadataFiles) { + return getMetadataFilesToBeDeleted(metadataFiles, metadataFilePinnedTimestampMap, logger); + } + + // Visible for testing + protected static List getMetadataFilesToBeDeleted( + List metadataFiles, + Map metadataFilePinnedTimestampMap, + Logger logger + ) { + Tuple> pinnedTimestampsState = RemoteStorePinnedTimestampService.getPinnedTimestamps(); + + // Keep files since last successful run of scheduler + List metadataFilesToBeDeleted = RemoteStoreUtils.filterOutMetadataFilesBasedOnAge( + metadataFiles, + file -> RemoteStoreUtils.invertLong(file.split(METADATA_SEPARATOR)[3]), + pinnedTimestampsState.v1() + ); + + logger.trace( + "metadataFiles.size = {}, metadataFilesToBeDeleted based on age based filtering = {}", + metadataFiles.size(), + metadataFilesToBeDeleted.size() + ); + + // Get md files matching pinned timestamps + Set implicitLockedFiles = RemoteStoreUtils.getPinnedTimestampLockedFiles( + metadataFilesToBeDeleted, + pinnedTimestampsState.v2(), + metadataFilePinnedTimestampMap, + file -> RemoteStoreUtils.invertLong(file.split(METADATA_SEPARATOR)[3]), + TranslogTransferMetadata::getNodeIdByPrimaryTermAndGen + ); + + // Filter out metadata files matching pinned timestamps + metadataFilesToBeDeleted.removeAll(implicitLockedFiles); + + logger.trace( + "implicitLockedFiles.size = {}, metadataFilesToBeDeleted based on pinned timestamp filtering = {}", + implicitLockedFiles.size(), + metadataFilesToBeDeleted.size() + ); + + return metadataFilesToBeDeleted; + } + + // Visible for testing + protected boolean isGenerationPinned(long generation, TreeSet> pinnedGenerations) { + Tuple ceilingGenerationRange = pinnedGenerations.ceiling(new Tuple<>(generation, generation)); + if (ceilingGenerationRange != null && generation >= ceilingGenerationRange.v1() && generation <= ceilingGenerationRange.v2()) { + return true; + } + Tuple floorGenerationRange = pinnedGenerations.floor(new Tuple<>(generation, generation)); + if (floorGenerationRange != null && generation >= floorGenerationRange.v1() && generation <= floorGenerationRange.v2()) { + return true; + } + return false; + } + + private TreeSet> getOrderedPinnedMetadataGenerations(Map> metadataFileGenerationMap) { + TreeSet> pinnedGenerations = new TreeSet<>((o1, o2) -> { + if (Objects.equals(o1.v1(), o2.v1()) == false) { + return o1.v1().compareTo(o2.v1()); + } else { + return o1.v2().compareTo(o2.v2()); + } + }); + pinnedGenerations.addAll(metadataFileGenerationMap.values()); + return pinnedGenerations; + } + + // Visible for testing + protected Map> getGenerationForMetadataFiles(List metadataFiles) throws IOException { + Map> metadataFileGenerationMap = new HashMap<>(); + for (String metadataFile : metadataFiles) { + metadataFileGenerationMap.put(metadataFile, getMinMaxTranslogGenerationFromMetadataFile(metadataFile, translogTransferManager)); + } + return metadataFileGenerationMap; + } + + // Visible for testing + protected Tuple getMinMaxTranslogGenerationFromMetadataFile( + String metadataFile, + TranslogTransferManager translogTransferManager + ) throws IOException { + Tuple minMaxGenerationFromFileName = TranslogTransferMetadata.getMinMaxTranslogGenerationFromFilename(metadataFile); + if (minMaxGenerationFromFileName != null) { + return minMaxGenerationFromFileName; + } else { + if (oldFormatMetadataFileGenerationMap.containsKey(metadataFile)) { + return oldFormatMetadataFileGenerationMap.get(metadataFile); + } else { + TranslogTransferMetadata metadata = translogTransferManager.readMetadata(metadataFile); + Tuple minMaxGenTuple = new Tuple<>(metadata.getMinTranslogGeneration(), metadata.getGeneration()); + oldFormatMetadataFileGenerationMap.put(metadataFile, minMaxGenTuple); + return minMaxGenTuple; + } + } + } + + private void deleteStaleRemotePrimaryTerms(List metadataFiles) { + deleteStaleRemotePrimaryTerms( + metadataFiles, + translogTransferManager, + oldFormatMetadataFilePrimaryTermMap, + minPrimaryTermInRemote, + logger + ); + } + + /** + * This method must be called only after there are valid generations to delete in trimUnreferencedReaders as it ensures + * implicitly that minimum primary term in latest translog metadata in remote store is the current primary term. + *
+ * This will also delete all stale translog metadata files from remote except the latest basis the metadata file comparator. + */ + protected static void deleteStaleRemotePrimaryTerms( + List metadataFiles, + TranslogTransferManager translogTransferManager, + Map> oldFormatMetadataFilePrimaryTermMap, + AtomicLong minPrimaryTermInRemoteAtomicLong, + Logger logger + ) { + // The deletion of older translog files in remote store is on best-effort basis, there is a possibility that there + // are older files that are no longer needed and should be cleaned up. In here, we delete all files that are part + // of older primary term. + if (metadataFiles.isEmpty()) { + logger.trace("No metadata is uploaded yet, returning from deleteStaleRemotePrimaryTerms"); + return; + } + Optional minPrimaryTermFromMetadataFiles = metadataFiles.stream().map(file -> { + try { + return getMinMaxPrimaryTermFromMetadataFile(file, translogTransferManager, oldFormatMetadataFilePrimaryTermMap).v1(); + } catch (IOException e) { + return Long.MAX_VALUE; + } + }).min(Long::compareTo); + // First we delete all stale primary terms folders from remote store + Long minPrimaryTermInRemote = getMinPrimaryTermInRemote(minPrimaryTermInRemoteAtomicLong, translogTransferManager, logger); + if (minPrimaryTermFromMetadataFiles.get() > minPrimaryTermInRemote) { + translogTransferManager.deletePrimaryTermsAsync(minPrimaryTermFromMetadataFiles.get()); + minPrimaryTermInRemoteAtomicLong.set(minPrimaryTermFromMetadataFiles.get()); + } else { + logger.debug( + "Skipping primary term cleanup. minimumReferencedPrimaryTerm = {}, minPrimaryTermInRemote = {}", + minPrimaryTermFromMetadataFiles.get(), + minPrimaryTermInRemote + ); + } + } + + private static Long getMinPrimaryTermInRemote( + AtomicLong minPrimaryTermInRemote, + TranslogTransferManager translogTransferManager, + Logger logger + ) { + if (minPrimaryTermInRemote.get() == Long.MAX_VALUE) { + try { + Set primaryTermsInRemote = translogTransferManager.listPrimaryTermsInRemote(); + if (primaryTermsInRemote.isEmpty() == false) { + Optional minPrimaryTerm = primaryTermsInRemote.stream().min(Long::compareTo); + minPrimaryTerm.ifPresent(minPrimaryTermInRemote::set); + } + } catch (IOException e) { + logger.error("Exception while listing primary terms in remote translog", e); + } + } + return minPrimaryTermInRemote.get(); + } + + protected static Tuple getMinMaxPrimaryTermFromMetadataFile( + String metadataFile, + TranslogTransferManager translogTransferManager, + Map> oldFormatMetadataFilePrimaryTermMap + ) throws IOException { + Tuple minMaxPrimaryTermFromFileName = TranslogTransferMetadata.getMinMaxPrimaryTermFromFilename(metadataFile); + if (minMaxPrimaryTermFromFileName != null) { + return minMaxPrimaryTermFromFileName; + } else { + if (oldFormatMetadataFilePrimaryTermMap.containsKey(metadataFile)) { + return oldFormatMetadataFilePrimaryTermMap.get(metadataFile); + } else { + TranslogTransferMetadata metadata = translogTransferManager.readMetadata(metadataFile); + long maxPrimaryTem = TranslogTransferMetadata.getPrimaryTermFromFileName(metadataFile); + long minPrimaryTem = -1; + if (metadata.getGenerationToPrimaryTermMapper() != null + && metadata.getGenerationToPrimaryTermMapper().values().isEmpty() == false) { + Optional primaryTerm = metadata.getGenerationToPrimaryTermMapper() + .values() + .stream() + .map(s -> Long.parseLong(s)) + .min(Long::compareTo); + if (primaryTerm.isPresent()) { + minPrimaryTem = primaryTerm.get(); + } + } + Tuple minMaxPrimaryTermTuple = new Tuple<>(minPrimaryTem, maxPrimaryTem); + oldFormatMetadataFilePrimaryTermMap.put(metadataFile, minMaxPrimaryTermTuple); + return minMaxPrimaryTermTuple; + } + } + } + + public static void cleanup(TranslogTransferManager translogTransferManager) throws IOException { + ActionListener> listMetadataFilesListener = new ActionListener<>() { + @Override + public void onResponse(List blobMetadata) { + List metadataFiles = blobMetadata.stream().map(BlobMetadata::name).collect(Collectors.toList()); + + try { + if (metadataFiles.isEmpty()) { + staticLogger.debug("No stale translog metadata files found"); + return; + } + List metadataFilesToBeDeleted = getMetadataFilesToBeDeleted(metadataFiles, new HashMap<>(), staticLogger); + if (metadataFilesToBeDeleted.isEmpty()) { + staticLogger.debug("No metadata files to delete"); + return; + } + staticLogger.debug(() -> "metadataFilesToBeDeleted = " + metadataFilesToBeDeleted); + + // For all the files that we are keeping, fetch min and max generations + List metadataFilesNotToBeDeleted = new ArrayList<>(metadataFiles); + metadataFilesNotToBeDeleted.removeAll(metadataFilesToBeDeleted); + staticLogger.debug(() -> "metadataFilesNotToBeDeleted = " + metadataFilesNotToBeDeleted); + + // Delete stale metadata files + translogTransferManager.deleteMetadataFilesAsync(metadataFilesToBeDeleted, () -> {}); + + // Delete stale primary terms + deleteStaleRemotePrimaryTerms( + metadataFilesNotToBeDeleted, + translogTransferManager, + new HashMap<>(), + new AtomicLong(Long.MAX_VALUE), + staticLogger + ); + } catch (Exception e) { + staticLogger.error("Exception while cleaning up metadata and primary terms", e); + } + } + + @Override + public void onFailure(Exception e) { + staticLogger.error("Exception while cleaning up metadata and primary terms", e); + } + }; + translogTransferManager.listTranslogMetadataFilesAsync(listMetadataFilesListener); + } +} diff --git a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java index c533a31c310c7..812852d107682 100644 --- a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java +++ b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java @@ -67,30 +67,30 @@ public class RemoteFsTranslog extends Translog { private final Logger logger; - private final TranslogTransferManager translogTransferManager; - private final FileTransferTracker fileTransferTracker; - private final BooleanSupplier startedPrimarySupplier; + protected final TranslogTransferManager translogTransferManager; + protected final FileTransferTracker fileTransferTracker; + protected final BooleanSupplier startedPrimarySupplier; private final RemoteTranslogTransferTracker remoteTranslogTransferTracker; private volatile long maxRemoteTranslogGenerationUploaded; private volatile long minSeqNoToKeep; // min generation referred by last uploaded translog - private volatile long minRemoteGenReferenced; + protected volatile long minRemoteGenReferenced; // clean up translog folder uploaded by previous primaries once - private final SetOnce olderPrimaryCleaned = new SetOnce<>(); + protected final SetOnce olderPrimaryCleaned = new SetOnce<>(); - private static final int REMOTE_DELETION_PERMITS = 2; + protected static final int REMOTE_DELETION_PERMITS = 2; private static final int DOWNLOAD_RETRIES = 2; // Semaphore used to allow only single remote generation to happen at a time - private final Semaphore remoteGenerationDeletionPermits = new Semaphore(REMOTE_DELETION_PERMITS); + protected final Semaphore remoteGenerationDeletionPermits = new Semaphore(REMOTE_DELETION_PERMITS); // These permits exist to allow any inflight background triggered upload. private static final int SYNC_PERMIT = 1; private final Semaphore syncPermit = new Semaphore(SYNC_PERMIT); - private final AtomicBoolean pauseSync = new AtomicBoolean(false); + protected final AtomicBoolean pauseSync = new AtomicBoolean(false); private final boolean isTranslogMetadataEnabled; public RemoteFsTranslog( @@ -123,7 +123,9 @@ public RemoteFsTranslog( isTranslogMetadataEnabled ); try { - download(translogTransferManager, location, logger, config.shouldSeedRemote()); + if (config.downloadRemoteTranslogOnInit()) { + download(translogTransferManager, location, logger, config.shouldSeedRemote(), 0); + } Checkpoint checkpoint = readCheckpoint(location); logger.info("Downloaded data from remote translog till maxSeqNo = {}", checkpoint.maxSeqNo); this.readers.addAll(recoverFromFiles(checkpoint)); @@ -132,6 +134,9 @@ public RemoteFsTranslog( logger.error(errorMsg); throw new IllegalStateException(errorMsg); } + if (config.downloadRemoteTranslogOnInit() == false) { + translogTransferManager.populateFileTrackerWithLocalState(this.readers); + } boolean success = false; current = null; try { @@ -173,7 +178,8 @@ public static void download( RemoteStoreSettings remoteStoreSettings, Logger logger, boolean seedRemote, - boolean isTranslogMetadataEnabled + boolean isTranslogMetadataEnabled, + long timestamp ) throws IOException { assert repository instanceof BlobStoreRepository : String.format( Locale.ROOT, @@ -195,11 +201,12 @@ public static void download( remoteStoreSettings, isTranslogMetadataEnabled ); - RemoteFsTranslog.download(translogTransferManager, location, logger, seedRemote); + RemoteFsTranslog.download(translogTransferManager, location, logger, seedRemote, timestamp); logger.trace(remoteTranslogTransferTracker.toString()); } - static void download(TranslogTransferManager translogTransferManager, Path location, Logger logger, boolean seedRemote) + // Visible for testing + static void download(TranslogTransferManager translogTransferManager, Path location, Logger logger, boolean seedRemote, long timestamp) throws IOException { /* In Primary to Primary relocation , there can be concurrent upload and download of translog. @@ -213,7 +220,7 @@ static void download(TranslogTransferManager translogTransferManager, Path locat boolean success = false; long startTimeMs = System.currentTimeMillis(); try { - downloadOnce(translogTransferManager, location, logger, seedRemote); + downloadOnce(translogTransferManager, location, logger, seedRemote, timestamp); success = true; return; } catch (FileNotFoundException | NoSuchFileException e) { @@ -227,13 +234,18 @@ static void download(TranslogTransferManager translogTransferManager, Path locat throw ex; } - private static void downloadOnce(TranslogTransferManager translogTransferManager, Path location, Logger logger, boolean seedRemote) - throws IOException { + private static void downloadOnce( + TranslogTransferManager translogTransferManager, + Path location, + Logger logger, + boolean seedRemote, + long timestamp + ) throws IOException { logger.debug("Downloading translog files from remote"); RemoteTranslogTransferTracker statsTracker = translogTransferManager.getRemoteTranslogTransferTracker(); long prevDownloadBytesSucceeded = statsTracker.getDownloadBytesSucceeded(); long prevDownloadTimeInMillis = statsTracker.getTotalDownloadTimeInMillis(); - TranslogTransferMetadata translogMetadata = translogTransferManager.readMetadata(); + TranslogTransferMetadata translogMetadata = translogTransferManager.readMetadata(timestamp); if (translogMetadata != null) { if (Files.notExists(location)) { Files.createDirectories(location); @@ -310,6 +322,7 @@ public static TranslogTransferManager buildTranslogTransferManager( .shardId(shardIdStr) .dataCategory(TRANSLOG) .dataType(DATA) + .fixedPrefix(remoteStoreSettings.getTranslogPathFixedPrefix()) .build(); BlobPath dataPath = pathStrategy.generatePath(dataPathInput); RemoteStorePathStrategy.ShardDataPathInput mdPathInput = RemoteStorePathStrategy.ShardDataPathInput.builder() @@ -318,6 +331,7 @@ public static TranslogTransferManager buildTranslogTransferManager( .shardId(shardIdStr) .dataCategory(TRANSLOG) .dataType(METADATA) + .fixedPrefix(remoteStoreSettings.getTranslogPathFixedPrefix()) .build(); BlobPath mdPath = pathStrategy.generatePath(mdPathInput); BlobStoreTransferService transferService = new BlobStoreTransferService(blobStoreRepository.blobStore(), threadPool); diff --git a/server/src/main/java/org/opensearch/index/translog/Translog.java b/server/src/main/java/org/opensearch/index/translog/Translog.java index 87e0c21b8203c..4b4ceb7444471 100644 --- a/server/src/main/java/org/opensearch/index/translog/Translog.java +++ b/server/src/main/java/org/opensearch/index/translog/Translog.java @@ -317,6 +317,10 @@ TranslogReader openReader(Path path, Checkpoint checkpoint) throws IOException { */ public static long parseIdFromFileName(Path translogFile) { final String fileName = translogFile.getFileName().toString(); + return parseIdFromFileName(fileName); + } + + public static long parseIdFromFileName(String fileName) { final Matcher matcher = PARSE_STRICT_ID_PATTERN.matcher(fileName); if (matcher.matches()) { try { diff --git a/server/src/main/java/org/opensearch/index/translog/TranslogConfig.java b/server/src/main/java/org/opensearch/index/translog/TranslogConfig.java index f720f041b287c..52e20d9838fca 100644 --- a/server/src/main/java/org/opensearch/index/translog/TranslogConfig.java +++ b/server/src/main/java/org/opensearch/index/translog/TranslogConfig.java @@ -60,6 +60,7 @@ public final class TranslogConfig { private final ByteSizeValue bufferSize; private final String nodeId; private final boolean seedRemote; + private boolean downloadRemoteTranslogOnInit = true; /** * Creates a new TranslogConfig instance @@ -140,4 +141,12 @@ public String getNodeId() { public boolean shouldSeedRemote() { return seedRemote; } + + public boolean downloadRemoteTranslogOnInit() { + return downloadRemoteTranslogOnInit; + } + + public void setDownloadRemoteTranslogOnInit(boolean downloadRemoteTranslogOnInit) { + this.downloadRemoteTranslogOnInit = downloadRemoteTranslogOnInit; + } } diff --git a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java index 1cc39cdf442e2..291218ea47499 100644 --- a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java +++ b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java @@ -28,6 +28,7 @@ import org.opensearch.index.remote.RemoteStoreUtils; import org.opensearch.index.remote.RemoteTranslogTransferTracker; import org.opensearch.index.translog.Translog; +import org.opensearch.index.translog.TranslogReader; import org.opensearch.index.translog.transfer.listener.TranslogTransferListener; import org.opensearch.indices.RemoteStoreSettings; import org.opensearch.threadpool.ThreadPool; @@ -45,10 +46,12 @@ import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; +import java.util.function.Function; import java.util.stream.Collectors; import static org.opensearch.index.translog.transfer.FileSnapshot.TransferFileSnapshot; import static org.opensearch.index.translog.transfer.FileSnapshot.TranslogFileSnapshot; +import static org.opensearch.index.translog.transfer.TranslogTransferMetadata.METADATA_SEPARATOR; /** * The class responsible for orchestrating the transfer of a {@link TransferSnapshot} via a {@link TransferService} @@ -337,35 +340,54 @@ private void deleteFileIfExists(Path filePath) throws IOException { } } + public TranslogTransferMetadata readMetadata(long pinnedTimestamp) throws IOException { + if (pinnedTimestamp <= 0) { + return readMetadata(); + } + return readMetadata((blobMetadataList) -> { + List metadataFiles = blobMetadataList.stream().map(BlobMetadata::name).collect(Collectors.toList()); + Set metadataFilesMatchingTimestamp = RemoteStoreUtils.getPinnedTimestampLockedFiles( + metadataFiles, + Set.of(pinnedTimestamp), + file -> RemoteStoreUtils.invertLong(file.split(METADATA_SEPARATOR)[3]), + TranslogTransferMetadata::getNodeIdByPrimaryTermAndGen, + true + ); + if (metadataFilesMatchingTimestamp.isEmpty()) { + return null; + } + assert metadataFilesMatchingTimestamp.size() == 1 : "There should be only 1 metadata file matching given timestamp"; + return metadataFilesMatchingTimestamp.stream().findFirst().get(); + }, Integer.MAX_VALUE); + } + public TranslogTransferMetadata readMetadata() throws IOException { + return readMetadata((blobMetadataList) -> { + RemoteStoreUtils.verifyNoMultipleWriters( + blobMetadataList.stream().map(BlobMetadata::name).collect(Collectors.toList()), + TranslogTransferMetadata::getNodeIdByPrimaryTermAndGen + ); + return blobMetadataList.get(0).name(); + }, METADATA_FILES_TO_FETCH); + } + + private TranslogTransferMetadata readMetadata(Function, String> getMetadataFileToRead, int numberOfFilesToFetch) + throws IOException { SetOnce metadataSetOnce = new SetOnce<>(); SetOnce exceptionSetOnce = new SetOnce<>(); final CountDownLatch latch = new CountDownLatch(1); LatchedActionListener> latchedActionListener = new LatchedActionListener<>( ActionListener.wrap(blobMetadataList -> { if (blobMetadataList.isEmpty()) return; - RemoteStoreUtils.verifyNoMultipleWriters( - blobMetadataList.stream().map(BlobMetadata::name).collect(Collectors.toList()), - TranslogTransferMetadata::getNodeIdByPrimaryTermAndGen - ); - String filename = blobMetadataList.get(0).name(); - boolean downloadStatus = false; - long downloadStartTime = System.nanoTime(), bytesToRead = 0; - try (InputStream inputStream = transferService.downloadBlob(remoteMetadataTransferPath, filename)) { - // Capture number of bytes for stats before reading - bytesToRead = inputStream.available(); - IndexInput indexInput = new ByteArrayIndexInput("metadata file", inputStream.readAllBytes()); - metadataSetOnce.set(metadataStreamWrapper.readStream(indexInput)); - downloadStatus = true; + String filename = getMetadataFileToRead.apply(blobMetadataList); + if (filename == null) { + return; + } + try { + metadataSetOnce.set(readMetadata(filename)); } catch (IOException e) { logger.error(() -> new ParameterizedMessage("Exception while reading metadata file: {}", filename), e); exceptionSetOnce.set(e); - } finally { - remoteTranslogTransferTracker.addDownloadTimeInMillis((System.nanoTime() - downloadStartTime) / 1_000_000L); - logger.debug("translogMetadataDownloadStatus={}", downloadStatus); - if (downloadStatus) { - remoteTranslogTransferTracker.addDownloadBytesSucceeded(bytesToRead); - } } }, e -> { if (e instanceof RuntimeException) { @@ -381,12 +403,14 @@ public TranslogTransferMetadata readMetadata() throws IOException { transferService.listAllInSortedOrder( remoteMetadataTransferPath, TranslogTransferMetadata.METADATA_PREFIX, - METADATA_FILES_TO_FETCH, + numberOfFilesToFetch, latchedActionListener ); - latch.await(); + if (latch.await(remoteStoreSettings.getClusterRemoteTranslogTransferTimeout().millis(), TimeUnit.MILLISECONDS) == false) { + throw new RuntimeException("Timed out reading metadata file"); + } } catch (InterruptedException e) { - throw new IOException("Exception while reading/downloading metadafile", e); + throw new IOException("Exception while reading/downloading metadata file", e); } if (exceptionSetOnce.get() != null) { @@ -396,6 +420,26 @@ public TranslogTransferMetadata readMetadata() throws IOException { return metadataSetOnce.get(); } + public TranslogTransferMetadata readMetadata(String metadataFilename) throws IOException { + boolean downloadStatus = false; + TranslogTransferMetadata translogTransferMetadata = null; + long downloadStartTime = System.nanoTime(), bytesToRead = 0; + try (InputStream inputStream = transferService.downloadBlob(remoteMetadataTransferPath, metadataFilename)) { + // Capture number of bytes for stats before reading + bytesToRead = inputStream.available(); + IndexInput indexInput = new ByteArrayIndexInput("metadata file", inputStream.readAllBytes()); + translogTransferMetadata = metadataStreamWrapper.readStream(indexInput); + downloadStatus = true; + } finally { + remoteTranslogTransferTracker.addDownloadTimeInMillis((System.nanoTime() - downloadStartTime) / 1_000_000L); + logger.debug("translogMetadataDownloadStatus={}", downloadStatus); + if (downloadStatus) { + remoteTranslogTransferTracker.addDownloadBytesSucceeded(bytesToRead); + } + } + return translogTransferMetadata; + } + private TransferFileSnapshot prepareMetadata(TransferSnapshot transferSnapshot) throws IOException { Map generationPrimaryTermMap = transferSnapshot.getTranslogFileSnapshots().stream().map(s -> { assert s instanceof TranslogFileSnapshot; @@ -501,6 +545,14 @@ public void onFailure(Exception e) { }); } + public Set listPrimaryTermsInRemote() throws IOException { + Set primaryTermsStr = transferService.listFolders(remoteDataTransferPath); + if (primaryTermsStr != null) { + return primaryTermsStr.stream().map(Long::parseLong).collect(Collectors.toSet()); + } + return new HashSet<>(); + } + /** * Handles deletion of all translog files associated with a primary term. * @@ -549,6 +601,16 @@ public void onFailure(Exception e) { }); } + public void listTranslogMetadataFilesAsync(ActionListener> listener) { + transferService.listAllInSortedOrderAsync( + ThreadPool.Names.REMOTE_PURGE, + remoteMetadataTransferPath, + TranslogTransferMetadata.METADATA_PREFIX, + Integer.MAX_VALUE, + listener + ); + } + public void deleteStaleTranslogMetadataFilesAsync(Runnable onCompletion) { try { transferService.listAllInSortedOrderAsync( @@ -635,7 +697,7 @@ public void onFailure(Exception e) { * @param files list of metadata files to be deleted. * @param onCompletion runnable to run on completion of deletion regardless of success/failure. */ - private void deleteMetadataFilesAsync(List files, Runnable onCompletion) { + public void deleteMetadataFilesAsync(List files, Runnable onCompletion) { try { transferService.deleteBlobsAsync(ThreadPool.Names.REMOTE_PURGE, remoteMetadataTransferPath, files, new ActionListener<>() { @Override @@ -659,4 +721,23 @@ public void onFailure(Exception e) { public int getMaxRemoteTranslogReadersSettings() { return this.remoteStoreSettings.getMaxRemoteTranslogReaders(); } + + public void populateFileTrackerWithLocalState(List readers) { + if (readers == null) { + return; + } + for (TranslogReader reader : readers) { + long generation = reader.getGeneration(); + String tlogFilename = Translog.getFilename(generation); + fileTransferTracker.add(tlogFilename, true); + if (isTranslogMetadataEnabled) { + String ckpFilename = Translog.getCommitCheckpointFileName(generation); + fileTransferTracker.add(ckpFilename, true); + } + } + } + + protected FileTransferTracker getFileTransferTracker() { + return fileTransferTracker; + } } diff --git a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferMetadata.java b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferMetadata.java index acd7574e648a3..3b8885055e8f7 100644 --- a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferMetadata.java +++ b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferMetadata.java @@ -8,6 +8,10 @@ package org.opensearch.index.translog.transfer; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.message.ParameterizedMessage; +import org.opensearch.Version; import org.opensearch.common.SetOnce; import org.opensearch.common.collect.Tuple; import org.opensearch.index.remote.RemoteStoreUtils; @@ -15,6 +19,7 @@ import java.util.Arrays; import java.util.Map; import java.util.Objects; +import java.util.Optional; /** * The metadata associated with every transfer {@link TransferSnapshot}. The metadata is uploaded at the end of the @@ -25,6 +30,8 @@ */ public class TranslogTransferMetadata { + public static final Logger logger = LogManager.getLogger(TranslogTransferMetadata.class); + private final long primaryTerm; private final long generation; @@ -102,11 +109,28 @@ public String getFileName() { RemoteStoreUtils.invertLong(createdAt), String.valueOf(Objects.hash(nodeId)), RemoteStoreUtils.invertLong(minTranslogGeneration), + String.valueOf(getMinPrimaryTermReferred()), String.valueOf(CURRENT_VERSION) ) ); } + private long getMinPrimaryTermReferred() { + if (generationToPrimaryTermMapper.get() == null || generationToPrimaryTermMapper.get().values().isEmpty()) { + return -1; + } + Optional minPrimaryTerm = generationToPrimaryTermMapper.get() + .values() + .stream() + .map(s -> Long.parseLong(s)) + .min(Long::compareTo); + if (minPrimaryTerm.isPresent()) { + return minPrimaryTerm.get(); + } else { + return -1; + } + } + public static Tuple, String> getNodeIdByPrimaryTermAndGeneration(String filename) { String[] tokens = filename.split(METADATA_SEPARATOR); if (tokens.length < 6) { @@ -128,6 +152,52 @@ public static Tuple getNodeIdByPrimaryTermAndGen(String filename return new Tuple<>(primaryTermAndGen, nodeId); } + public static Tuple getMinMaxTranslogGenerationFromFilename(String filename) { + String[] tokens = filename.split(METADATA_SEPARATOR); + if (tokens.length < 7) { + // For versions < 2.17, we don't have min translog generation. + return null; + } + assert Version.CURRENT.onOrAfter(Version.V_2_17_0); + try { + // instead of direct index, we go backwards to avoid running into same separator in nodeId + String minGeneration = tokens[tokens.length - 3]; + String maxGeneration = tokens[2]; + return new Tuple<>(RemoteStoreUtils.invertLong(minGeneration), RemoteStoreUtils.invertLong(maxGeneration)); + } catch (Exception e) { + logger.error(() -> new ParameterizedMessage("Exception while getting min and max translog generation from: {}", filename), e); + return null; + } + } + + public static Tuple getMinMaxPrimaryTermFromFilename(String filename) { + String[] tokens = filename.split(METADATA_SEPARATOR); + if (tokens.length < 7) { + // For versions < 2.17, we don't have min primary term. + return null; + } + assert Version.CURRENT.onOrAfter(Version.V_2_17_0); + try { + // instead of direct index, we go backwards to avoid running into same separator in nodeId + String minPrimaryTerm = tokens[tokens.length - 2]; + String maxPrimaryTerm = tokens[1]; + return new Tuple<>(Long.parseLong(minPrimaryTerm), RemoteStoreUtils.invertLong(maxPrimaryTerm)); + } catch (Exception e) { + logger.error(() -> new ParameterizedMessage("Exception while getting min and max primary term from: {}", filename), e); + return null; + } + } + + public static long getPrimaryTermFromFileName(String filename) { + String[] tokens = filename.split(METADATA_SEPARATOR); + try { + return RemoteStoreUtils.invertLong(tokens[1]); + } catch (Exception e) { + logger.error(() -> new ParameterizedMessage("Exception while getting max primary term from: {}", filename), e); + return -1; + } + } + @Override public int hashCode() { return Objects.hash(primaryTerm, generation); diff --git a/server/src/main/java/org/opensearch/indices/IndicesService.java b/server/src/main/java/org/opensearch/indices/IndicesService.java index be16d4ea184fa..4593aedfe1f83 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesService.java +++ b/server/src/main/java/org/opensearch/indices/IndicesService.java @@ -359,6 +359,7 @@ public class IndicesService extends AbstractLifecycleComponent private final SearchRequestStats searchRequestStats; private final FileCache fileCache; private final CompositeIndexSettings compositeIndexSettings; + private final Consumer replicator; @Override protected void doStart() { @@ -395,7 +396,8 @@ public IndicesService( CacheService cacheService, RemoteStoreSettings remoteStoreSettings, FileCache fileCache, - CompositeIndexSettings compositeIndexSettings + CompositeIndexSettings compositeIndexSettings, + Consumer replicator ) { this.settings = settings; this.threadPool = threadPool; @@ -504,6 +506,7 @@ protected void closeInternal() { this.remoteStoreSettings = remoteStoreSettings; this.compositeIndexSettings = compositeIndexSettings; this.fileCache = fileCache; + this.replicator = replicator; } public IndicesService( @@ -564,6 +567,7 @@ public IndicesService( cacheService, remoteStoreSettings, null, + null, null ); } @@ -980,7 +984,8 @@ private synchronized IndexService createIndexService( translogFactorySupplier, this::getClusterDefaultRefreshInterval, this.recoverySettings, - this.remoteStoreSettings + this.remoteStoreSettings, + replicator ); } diff --git a/server/src/main/java/org/opensearch/indices/InvalidIndexContextException.java b/server/src/main/java/org/opensearch/indices/InvalidIndexContextException.java new file mode 100644 index 0000000000000..40e9d25bf95c3 --- /dev/null +++ b/server/src/main/java/org/opensearch/indices/InvalidIndexContextException.java @@ -0,0 +1,40 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.indices; + +import org.opensearch.OpenSearchException; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.rest.RestStatus; + +import java.io.IOException; + +/** + * Exception when the context provided in the creation of an index is invalid. + */ +public class InvalidIndexContextException extends OpenSearchException { + + /** + * + * @param indexName name of the index + * @param name context name provided + * @param description error message + */ + public InvalidIndexContextException(String indexName, String name, String description) { + super("Invalid context name [{}] provide for index: {}, [{}]", name, indexName, description); + } + + public InvalidIndexContextException(StreamInput in) throws IOException { + super(in); + } + + @Override + public RestStatus status() { + return RestStatus.BAD_REQUEST; + } +} diff --git a/server/src/main/java/org/opensearch/indices/NodeIndicesStats.java b/server/src/main/java/org/opensearch/indices/NodeIndicesStats.java index 83a759cdb71c5..4c28c08d8061b 100644 --- a/server/src/main/java/org/opensearch/indices/NodeIndicesStats.java +++ b/server/src/main/java/org/opensearch/indices/NodeIndicesStats.java @@ -83,7 +83,7 @@ public class NodeIndicesStats implements Writeable, ToXContentFragment { public NodeIndicesStats(StreamInput in) throws IOException { stats = new CommonStats(in); - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + if (in.getVersion().onOrAfter(Version.V_2_17_0)) { // contains statsByIndex if (in.readBoolean()) { statsByIndex = readStatsByIndex(in); @@ -284,7 +284,7 @@ public RecoveryStats getRecoveryStats() { public void writeTo(StreamOutput out) throws IOException { stats.writeTo(out); - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + if (out.getVersion().onOrAfter(Version.V_2_17_0)) { out.writeBoolean(statsByIndex != null); if (statsByIndex != null) { writeStatsByIndex(out); diff --git a/server/src/main/java/org/opensearch/indices/RemoteStoreSettings.java b/server/src/main/java/org/opensearch/indices/RemoteStoreSettings.java index 55280ca5c96d6..4bd0caa709436 100644 --- a/server/src/main/java/org/opensearch/indices/RemoteStoreSettings.java +++ b/server/src/main/java/org/opensearch/indices/RemoteStoreSettings.java @@ -164,6 +164,26 @@ public class RemoteStoreSettings { Setting.Property.NodeScope ); + /** + * Controls the fixed prefix for the translog path on remote store. + */ + public static final Setting CLUSTER_REMOTE_STORE_TRANSLOG_PATH_PREFIX = Setting.simpleString( + "cluster.remote_store.translog.path.prefix", + "", + Property.NodeScope, + Property.Final + ); + + /** + * Controls the fixed prefix for the segments path on remote store. + */ + public static final Setting CLUSTER_REMOTE_STORE_SEGMENTS_PATH_PREFIX = Setting.simpleString( + "cluster.remote_store.segments.path.prefix", + "", + Property.NodeScope, + Property.Final + ); + private volatile TimeValue clusterRemoteTranslogBufferInterval; private volatile int minRemoteSegmentMetadataFiles; private volatile TimeValue clusterRemoteTranslogTransferTimeout; @@ -175,6 +195,8 @@ public class RemoteStoreSettings { private static volatile boolean isPinnedTimestampsEnabled; private static volatile TimeValue pinnedTimestampsSchedulerInterval; private static volatile TimeValue pinnedTimestampsLookbackInterval; + private final String translogPathFixedPrefix; + private final String segmentsPathFixedPrefix; public RemoteStoreSettings(Settings settings, ClusterSettings clusterSettings) { clusterRemoteTranslogBufferInterval = CLUSTER_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING.get(settings); @@ -216,6 +238,9 @@ public RemoteStoreSettings(Settings settings, ClusterSettings clusterSettings) { pinnedTimestampsSchedulerInterval = CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_SCHEDULER_INTERVAL.get(settings); pinnedTimestampsLookbackInterval = CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_LOOKBACK_INTERVAL.get(settings); isPinnedTimestampsEnabled = CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_ENABLED.get(settings); + + translogPathFixedPrefix = CLUSTER_REMOTE_STORE_TRANSLOG_PATH_PREFIX.get(settings); + segmentsPathFixedPrefix = CLUSTER_REMOTE_STORE_SEGMENTS_PATH_PREFIX.get(settings); } public TimeValue getClusterRemoteTranslogBufferInterval() { @@ -292,7 +317,20 @@ public static TimeValue getPinnedTimestampsLookbackInterval() { return pinnedTimestampsLookbackInterval; } + // Visible for testing + public static void setPinnedTimestampsLookbackInterval(TimeValue pinnedTimestampsLookbackInterval) { + RemoteStoreSettings.pinnedTimestampsLookbackInterval = pinnedTimestampsLookbackInterval; + } + public static boolean isPinnedTimestampsEnabled() { return isPinnedTimestampsEnabled; } + + public String getTranslogPathFixedPrefix() { + return translogPathFixedPrefix; + } + + public String getSegmentsPathFixedPrefix() { + return segmentsPathFixedPrefix; + } } diff --git a/server/src/main/java/org/opensearch/indices/recovery/LocalStorePeerRecoverySourceHandler.java b/server/src/main/java/org/opensearch/indices/recovery/LocalStorePeerRecoverySourceHandler.java index ac6b2e6b77d18..234e9ba66f340 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/LocalStorePeerRecoverySourceHandler.java +++ b/server/src/main/java/org/opensearch/indices/recovery/LocalStorePeerRecoverySourceHandler.java @@ -12,15 +12,12 @@ import org.opensearch.action.StepListener; import org.opensearch.action.support.ThreadedActionListener; import org.opensearch.action.support.replication.ReplicationResponse; -import org.opensearch.cluster.routing.IndexShardRoutingTable; -import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.common.SetOnce; import org.opensearch.common.concurrent.GatedCloseable; import org.opensearch.common.lease.Releasable; import org.opensearch.common.unit.TimeValue; import org.opensearch.core.action.ActionListener; import org.opensearch.index.engine.RecoveryEngineException; -import org.opensearch.index.seqno.ReplicationTracker; import org.opensearch.index.seqno.RetentionLease; import org.opensearch.index.seqno.RetentionLeaseNotFoundException; import org.opensearch.index.seqno.RetentionLeases; @@ -58,21 +55,7 @@ public LocalStorePeerRecoverySourceHandler( @Override protected void innerRecoveryToTarget(ActionListener listener, Consumer onFailure) throws IOException { final SetOnce retentionLeaseRef = new SetOnce<>(); - - RunUnderPrimaryPermit.run(() -> { - final IndexShardRoutingTable routingTable = shard.getReplicationGroup().getRoutingTable(); - ShardRouting targetShardRouting = routingTable.getByAllocationId(request.targetAllocationId()); - if (targetShardRouting == null) { - logger.debug( - "delaying recovery of {} as it is not listed as assigned to target node {}", - request.shardId(), - request.targetNode() - ); - throw new DelayRecoveryException("source node does not have the shard listed in its state as allocated on the node"); - } - assert targetShardRouting.initializing() : "expected recovery target to be initializing but was " + targetShardRouting; - retentionLeaseRef.set(shard.getRetentionLeases().get(ReplicationTracker.getPeerRecoveryRetentionLeaseId(targetShardRouting))); - }, shardId + " validating recovery target [" + request.targetAllocationId() + "] registered ", shard, cancellableThreads, logger); + waitForAssignmentPropagate(retentionLeaseRef); final Closeable retentionLock = shard.acquireHistoryRetentionLock(); resources.add(retentionLock); final long startingSeqNo; diff --git a/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java b/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java index 6279a8ec3646c..d746aaa2a0783 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java +++ b/server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java @@ -294,6 +294,7 @@ private void doRecovery(final long recoveryId, final StartRecoveryRequest preExi logger.debug("{} reestablishing recovery from {}", startRequest.shardId(), startRequest.sourceNode()); } } + transportService.sendRequest( startRequest.sourceNode(), actionName, diff --git a/server/src/main/java/org/opensearch/indices/recovery/RecoverySourceHandler.java b/server/src/main/java/org/opensearch/indices/recovery/RecoverySourceHandler.java index 8966516d3ef7f..7aa90d2f9fa3e 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RecoverySourceHandler.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RecoverySourceHandler.java @@ -41,10 +41,14 @@ import org.apache.lucene.util.ArrayUtil; import org.opensearch.action.ActionRunnable; import org.opensearch.action.StepListener; +import org.opensearch.action.bulk.BackoffPolicy; import org.opensearch.action.support.PlainActionFuture; import org.opensearch.action.support.ThreadedActionListener; import org.opensearch.action.support.replication.ReplicationResponse; +import org.opensearch.cluster.routing.IndexShardRoutingTable; +import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.common.CheckedRunnable; +import org.opensearch.common.SetOnce; import org.opensearch.common.StopWatch; import org.opensearch.common.concurrent.GatedCloseable; import org.opensearch.common.lease.Releasable; @@ -59,6 +63,7 @@ import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.unit.ByteSizeValue; import org.opensearch.index.engine.RecoveryEngineException; +import org.opensearch.index.seqno.ReplicationTracker; import org.opensearch.index.seqno.RetentionLease; import org.opensearch.index.seqno.RetentionLeaseNotFoundException; import org.opensearch.index.seqno.RetentionLeases; @@ -79,12 +84,14 @@ import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; +import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; import java.util.function.Consumer; import java.util.function.IntSupplier; import java.util.stream.StreamSupport; @@ -191,6 +198,50 @@ public void recoverToTarget(ActionListener listener) { protected abstract void innerRecoveryToTarget(ActionListener listener, Consumer onFailure) throws IOException; + /* + Waits for cluster state propagation of assignment of replica on the target node + */ + void waitForAssignmentPropagate(SetOnce retentionLeaseRef) { + BackoffPolicy EXPONENTIAL_BACKOFF_POLICY = BackoffPolicy.exponentialBackoff(TimeValue.timeValueMillis(1000), 5); + AtomicReference targetShardRouting = new AtomicReference<>(); + Iterator backoffDelayIterator = EXPONENTIAL_BACKOFF_POLICY.iterator(); + while (backoffDelayIterator.hasNext()) { + RunUnderPrimaryPermit.run(() -> { + final IndexShardRoutingTable routingTable = shard.getReplicationGroup().getRoutingTable(); + targetShardRouting.set(routingTable.getByAllocationId(request.targetAllocationId())); + if (targetShardRouting.get() == null) { + logger.info( + "delaying recovery of {} as it is not listed as assigned to target node {}", + request.shardId(), + request.targetNode() + ); + Thread.sleep(backoffDelayIterator.next().millis()); + } + if (targetShardRouting.get() != null) { + assert targetShardRouting.get().initializing() : "expected recovery target to be initializing but was " + + targetShardRouting; + retentionLeaseRef.set( + shard.getRetentionLeases().get(ReplicationTracker.getPeerRecoveryRetentionLeaseId(targetShardRouting.get())) + ); + } + + }, + shardId + " validating recovery target [" + request.targetAllocationId() + "] registered ", + shard, + cancellableThreads, + logger + ); + + if (targetShardRouting.get() != null) { + return; + } + } + if (targetShardRouting.get() != null) { + return; + } + throw new DelayRecoveryException("source node does not have the shard listed in its state as allocated on the node"); + } + protected void finalizeStepAndCompleteFuture( long startingSeqNo, StepListener sendSnapshotStep, diff --git a/server/src/main/java/org/opensearch/indices/recovery/RemoteStorePeerRecoverySourceHandler.java b/server/src/main/java/org/opensearch/indices/recovery/RemoteStorePeerRecoverySourceHandler.java index 66c7a3b48f28f..383ed92314165 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RemoteStorePeerRecoverySourceHandler.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RemoteStorePeerRecoverySourceHandler.java @@ -10,11 +10,13 @@ import org.apache.lucene.index.IndexCommit; import org.opensearch.action.StepListener; +import org.opensearch.common.SetOnce; import org.opensearch.common.concurrent.GatedCloseable; import org.opensearch.common.lease.Releasable; import org.opensearch.common.unit.TimeValue; import org.opensearch.core.action.ActionListener; import org.opensearch.index.engine.RecoveryEngineException; +import org.opensearch.index.seqno.RetentionLease; import org.opensearch.index.seqno.SequenceNumbers; import org.opensearch.index.shard.IndexShard; import org.opensearch.indices.RunUnderPrimaryPermit; @@ -48,7 +50,8 @@ protected void innerRecoveryToTarget(ActionListener listener, // A replica of an index with remote translog does not require the translogs locally and keeps receiving the // updated segments file on refresh, flushes, and merges. In recovery, here, only file-based recovery is performed // and there is no translog replay done. - + final SetOnce retentionLeaseRef = new SetOnce<>(); + waitForAssignmentPropagate(retentionLeaseRef); final StepListener sendFileStep = new StepListener<>(); final StepListener prepareEngineStep = new StepListener<>(); final StepListener sendSnapshotStep = new StepListener<>(); @@ -102,4 +105,5 @@ protected void innerRecoveryToTarget(ActionListener listener, finalizeStepAndCompleteFuture(startingSeqNo, sendSnapshotStep, sendFileStep, prepareEngineStep, onFailure); } + } diff --git a/server/src/main/java/org/opensearch/indices/recovery/StartRecoveryRequest.java b/server/src/main/java/org/opensearch/indices/recovery/StartRecoveryRequest.java index 1df0d3861f686..7309712314acd 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/StartRecoveryRequest.java +++ b/server/src/main/java/org/opensearch/indices/recovery/StartRecoveryRequest.java @@ -144,8 +144,8 @@ public void writeTo(StreamOutput out) throws IOException { out.writeLong(recoveryId); shardId.writeTo(out); out.writeString(targetAllocationId); - sourceNode.writeTo(out); - targetNode.writeTo(out); + sourceNode.writeToWithAttribute(out); + targetNode.writeToWithAttribute(out); metadataSnapshot.writeTo(out); out.writeBoolean(primaryRelocation); out.writeLong(startingSeqNo); diff --git a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTargetService.java b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTargetService.java index f6ed113019897..8fee3f671ecc9 100644 --- a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTargetService.java +++ b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTargetService.java @@ -11,9 +11,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; -import org.apache.lucene.index.CorruptIndexException; import org.opensearch.ExceptionsHelper; -import org.opensearch.OpenSearchCorruptionException; import org.opensearch.action.support.ChannelActionListener; import org.opensearch.cluster.ClusterChangedEvent; import org.opensearch.cluster.ClusterStateListener; @@ -24,7 +22,6 @@ import org.opensearch.common.lifecycle.AbstractLifecycleComponent; import org.opensearch.common.settings.Settings; import org.opensearch.common.util.CancellableThreads; -import org.opensearch.common.util.concurrent.AbstractRunnable; import org.opensearch.common.util.concurrent.ConcurrentCollections; import org.opensearch.core.action.ActionListener; import org.opensearch.core.index.shard.ShardId; @@ -33,7 +30,6 @@ import org.opensearch.index.shard.IndexEventListener; import org.opensearch.index.shard.IndexShard; import org.opensearch.index.shard.IndexShardState; -import org.opensearch.index.store.Store; import org.opensearch.indices.IndicesService; import org.opensearch.indices.recovery.FileChunkRequest; import org.opensearch.indices.recovery.ForceSyncRequest; @@ -61,7 +57,7 @@ import static org.opensearch.indices.replication.SegmentReplicationSourceService.Actions.UPDATE_VISIBLE_CHECKPOINT; /** - * Service class that orchestrates replication events on replicas. + * Service class that handles incoming checkpoints to initiate replication events on replicas. * * @opensearch.internal */ @@ -72,10 +68,6 @@ public class SegmentReplicationTargetService extends AbstractLifecycleComponent private final ThreadPool threadPool; private final RecoverySettings recoverySettings; - private final ReplicationCollection onGoingReplications; - - private final Map completedReplications = ConcurrentCollections.newConcurrentMap(); - private final SegmentReplicationSourceFactory sourceFactory; protected final Map latestReceivedCheckpoint = ConcurrentCollections.newConcurrentMap(); @@ -83,6 +75,7 @@ public class SegmentReplicationTargetService extends AbstractLifecycleComponent private final IndicesService indicesService; private final ClusterService clusterService; private final TransportService transportService; + private final SegmentReplicator replicator; /** * The internal actions @@ -94,6 +87,7 @@ public static class Actions { public static final String FORCE_SYNC = "internal:index/shard/replication/segments_sync"; } + @Deprecated public SegmentReplicationTargetService( final ThreadPool threadPool, final RecoverySettings recoverySettings, @@ -113,6 +107,7 @@ public SegmentReplicationTargetService( ); } + @Deprecated public SegmentReplicationTargetService( final ThreadPool threadPool, final RecoverySettings recoverySettings, @@ -121,14 +116,34 @@ public SegmentReplicationTargetService( final IndicesService indicesService, final ClusterService clusterService, final ReplicationCollection ongoingSegmentReplications + ) { + this( + threadPool, + recoverySettings, + transportService, + sourceFactory, + indicesService, + clusterService, + new SegmentReplicator(threadPool) + ); + } + + public SegmentReplicationTargetService( + final ThreadPool threadPool, + final RecoverySettings recoverySettings, + final TransportService transportService, + final SegmentReplicationSourceFactory sourceFactory, + final IndicesService indicesService, + final ClusterService clusterService, + final SegmentReplicator replicator ) { this.threadPool = threadPool; this.recoverySettings = recoverySettings; - this.onGoingReplications = ongoingSegmentReplications; this.sourceFactory = sourceFactory; this.indicesService = indicesService; this.clusterService = clusterService; this.transportService = transportService; + this.replicator = replicator; transportService.registerRequestHandler( Actions.FILE_CHUNK, @@ -142,6 +157,7 @@ public SegmentReplicationTargetService( ForceSyncRequest::new, new ForceSyncTransportRequestHandler() ); + replicator.setSourceFactory(sourceFactory); } @Override @@ -154,7 +170,7 @@ protected void doStart() { @Override protected void doStop() { if (DiscoveryNode.isDataNode(clusterService.getSettings())) { - assert onGoingReplications.size() == 0 : "Replication collection should be empty on shutdown"; + assert replicator.size() == 0 : "Replication collection should be empty on shutdown"; clusterService.removeListener(this); } } @@ -171,7 +187,7 @@ public void clusterChanged(ClusterChangedEvent event) { if (indexService.getIndexSettings().isSegRepEnabledOrRemoteNode() && event.indexRoutingTableChanged(indexService.index().getName())) { for (IndexShard shard : indexService) { - if (shard.routingEntry().primary() == false) { + if (shard.routingEntry().primary() == false && shard.routingEntry().isSearchOnly() == false) { // for this shard look up its primary routing, if it has completed a relocation trigger replication final String previousNode = event.previousState() .routingTable() @@ -199,7 +215,7 @@ public void clusterChanged(ClusterChangedEvent event) { @Override public void beforeIndexShardClosed(ShardId shardId, @Nullable IndexShard indexShard, Settings indexSettings) { if (indexShard != null && indexShard.indexSettings().isSegRepEnabledOrRemoteNode()) { - onGoingReplications.cancelForShard(indexShard.shardId(), "Shard closing"); + replicator.cancel(indexShard.shardId(), "Shard closing"); latestReceivedCheckpoint.remove(shardId); } } @@ -224,7 +240,7 @@ public void shardRoutingChanged(IndexShard indexShard, @Nullable ShardRouting ol && indexShard.indexSettings().isSegRepEnabledOrRemoteNode() && oldRouting.primary() == false && newRouting.primary()) { - onGoingReplications.cancelForShard(indexShard.shardId(), "Shard has been promoted to primary"); + replicator.cancel(indexShard.shardId(), "Shard has been promoted to primary"); latestReceivedCheckpoint.remove(indexShard.shardId()); } } @@ -234,9 +250,7 @@ public void shardRoutingChanged(IndexShard indexShard, @Nullable ShardRouting ol */ @Nullable public SegmentReplicationState getOngoingEventSegmentReplicationState(ShardId shardId) { - return Optional.ofNullable(onGoingReplications.getOngoingReplicationTarget(shardId)) - .map(SegmentReplicationTarget::state) - .orElse(null); + return Optional.ofNullable(replicator.get(shardId)).map(SegmentReplicationTarget::state).orElse(null); } /** @@ -244,7 +258,7 @@ public SegmentReplicationState getOngoingEventSegmentReplicationState(ShardId sh */ @Nullable public SegmentReplicationState getlatestCompletedEventSegmentReplicationState(ShardId shardId) { - return completedReplications.get(shardId); + return replicator.getCompleted(shardId); } /** @@ -257,11 +271,11 @@ public SegmentReplicationState getSegmentReplicationState(ShardId shardId) { } public ReplicationRef get(long replicationId) { - return onGoingReplications.get(replicationId); + return replicator.get(replicationId); } public SegmentReplicationTarget get(ShardId shardId) { - return onGoingReplications.getOngoingReplicationTarget(shardId); + return replicator.get(shardId); } /** @@ -285,7 +299,7 @@ public synchronized void onNewCheckpoint(final ReplicationCheckpoint receivedChe // checkpoint to be replayed once the shard is Active. if (replicaShard.state().equals(IndexShardState.STARTED) == true) { // Checks if received checkpoint is already present and ahead then it replaces old received checkpoint - SegmentReplicationTarget ongoingReplicationTarget = onGoingReplications.getOngoingReplicationTarget(replicaShard.shardId()); + SegmentReplicationTarget ongoingReplicationTarget = replicator.get(replicaShard.shardId()); if (ongoingReplicationTarget != null) { if (ongoingReplicationTarget.getCheckpoint().getPrimaryTerm() < receivedCheckpoint.getPrimaryTerm()) { logger.debug( @@ -504,28 +518,12 @@ public SegmentReplicationTarget startReplication( final ReplicationCheckpoint checkpoint, final SegmentReplicationListener listener ) { - final SegmentReplicationTarget target = new SegmentReplicationTarget( - indexShard, - checkpoint, - sourceFactory.get(indexShard), - listener - ); - startReplication(target); - return target; + return replicator.startReplication(indexShard, checkpoint, sourceFactory.get(indexShard), listener); } // pkg-private for integration tests void startReplication(final SegmentReplicationTarget target) { - final long replicationId; - try { - replicationId = onGoingReplications.startSafe(target, recoverySettings.activityTimeout()); - } catch (ReplicationFailedException e) { - // replication already running for shard. - target.fail(e, false); - return; - } - logger.trace(() -> new ParameterizedMessage("Added new replication to collection {}", target.description())); - threadPool.generic().execute(new ReplicationRunner(replicationId)); + replicator.startReplication(target, recoverySettings.activityTimeout()); } /** @@ -550,81 +548,6 @@ default void onFailure(ReplicationState state, ReplicationFailedException e, boo void onReplicationFailure(SegmentReplicationState state, ReplicationFailedException e, boolean sendShardFailure); } - /** - * Runnable implementation to trigger a replication event. - */ - private class ReplicationRunner extends AbstractRunnable { - - final long replicationId; - - public ReplicationRunner(long replicationId) { - this.replicationId = replicationId; - } - - @Override - public void onFailure(Exception e) { - onGoingReplications.fail(replicationId, new ReplicationFailedException("Unexpected Error during replication", e), false); - } - - @Override - public void doRun() { - start(replicationId); - } - } - - private void start(final long replicationId) { - final SegmentReplicationTarget target; - try (ReplicationRef replicationRef = onGoingReplications.get(replicationId)) { - // This check is for handling edge cases where the reference is removed before the ReplicationRunner is started by the - // threadpool. - if (replicationRef == null) { - return; - } - target = replicationRef.get(); - } - target.startReplication(new ActionListener<>() { - @Override - public void onResponse(Void o) { - logger.debug(() -> new ParameterizedMessage("Finished replicating {} marking as done.", target.description())); - onGoingReplications.markAsDone(replicationId); - if (target.state().getIndex().recoveredFileCount() != 0 && target.state().getIndex().recoveredBytes() != 0) { - completedReplications.put(target.shardId(), target.state()); - } - } - - @Override - public void onFailure(Exception e) { - logger.debug("Replication failed {}", target.description()); - if (isStoreCorrupt(target) || e instanceof CorruptIndexException || e instanceof OpenSearchCorruptionException) { - onGoingReplications.fail(replicationId, new ReplicationFailedException("Store corruption during replication", e), true); - return; - } - onGoingReplications.fail(replicationId, new ReplicationFailedException("Segment Replication failed", e), false); - } - }); - } - - private boolean isStoreCorrupt(SegmentReplicationTarget target) { - // ensure target is not already closed. In that case - // we can assume the store is not corrupt and that the replication - // event completed successfully. - if (target.refCount() > 0) { - final Store store = target.store(); - if (store.tryIncRef()) { - try { - return store.isMarkedCorrupted(); - } catch (IOException ex) { - logger.warn("Unable to determine if store is corrupt", ex); - return false; - } finally { - store.decRef(); - } - } - } - // store already closed. - return false; - } - private class FileChunkTransportRequestHandler implements TransportRequestHandler { // How many bytes we've copied since we last called RateLimiter.pause @@ -632,7 +555,7 @@ private class FileChunkTransportRequestHandler implements TransportRequestHandle @Override public void messageReceived(final FileChunkRequest request, TransportChannel channel, Task task) throws Exception { - try (ReplicationRef ref = onGoingReplications.getSafe(request.recoveryId(), request.shardId())) { + try (ReplicationRef ref = replicator.get(request.recoveryId(), request.shardId())) { final SegmentReplicationTarget target = ref.get(); final ActionListener listener = target.createOrFinishListener(channel, Actions.FILE_CHUNK, request); target.handleFileChunk(request, target, bytesSinceLastPause, recoverySettings.replicationRateLimiter(), listener); diff --git a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicator.java b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicator.java new file mode 100644 index 0000000000000..ad3bc1933208c --- /dev/null +++ b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicator.java @@ -0,0 +1,217 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.indices.replication; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.message.ParameterizedMessage; +import org.apache.lucene.index.CorruptIndexException; +import org.opensearch.OpenSearchCorruptionException; +import org.opensearch.common.SetOnce; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.concurrent.AbstractRunnable; +import org.opensearch.common.util.concurrent.ConcurrentCollections; +import org.opensearch.core.action.ActionListener; +import org.opensearch.core.index.shard.ShardId; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.index.store.Store; +import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; +import org.opensearch.indices.replication.common.ReplicationCollection; +import org.opensearch.indices.replication.common.ReplicationFailedException; +import org.opensearch.indices.replication.common.ReplicationListener; +import org.opensearch.threadpool.ThreadPool; + +import java.io.IOException; +import java.util.Map; + +/** + * This class is responsible for managing segment replication events on replicas. + * It uses a {@link ReplicationCollection} to track ongoing replication events and + * manages the state of each replication event. + * + * @opensearch.internal + */ +public class SegmentReplicator { + + private static final Logger logger = LogManager.getLogger(SegmentReplicator.class); + + private final ReplicationCollection onGoingReplications; + private final Map completedReplications = ConcurrentCollections.newConcurrentMap(); + private final ThreadPool threadPool; + + private final SetOnce sourceFactory; + + public SegmentReplicator(ThreadPool threadPool) { + this.onGoingReplications = new ReplicationCollection<>(logger, threadPool); + this.threadPool = threadPool; + this.sourceFactory = new SetOnce<>(); + } + + /** + * Starts a replication event for the given shard. + * @param shard - {@link IndexShard} replica shard + */ + public void startReplication(IndexShard shard) { + if (sourceFactory.get() == null) return; + startReplication( + shard, + shard.getLatestReplicationCheckpoint(), + sourceFactory.get().get(shard), + new SegmentReplicationTargetService.SegmentReplicationListener() { + @Override + public void onReplicationDone(SegmentReplicationState state) { + logger.trace("Completed replication for {}", shard.shardId()); + } + + @Override + public void onReplicationFailure(SegmentReplicationState state, ReplicationFailedException e, boolean sendShardFailure) { + logger.error(() -> new ParameterizedMessage("Failed segment replication for {}", shard.shardId()), e); + if (sendShardFailure) { + shard.failShard("unrecoverable replication failure", e); + } + } + } + ); + } + + void setSourceFactory(SegmentReplicationSourceFactory sourceFactory) { + this.sourceFactory.set(sourceFactory); + } + + /** + * Start a round of replication and sync to at least the given checkpoint. + * @param indexShard - {@link IndexShard} replica shard + * @param checkpoint - {@link ReplicationCheckpoint} checkpoint to sync to + * @param listener - {@link ReplicationListener} + * @return {@link SegmentReplicationTarget} target event orchestrating the event. + */ + SegmentReplicationTarget startReplication( + final IndexShard indexShard, + final ReplicationCheckpoint checkpoint, + final SegmentReplicationSource source, + final SegmentReplicationTargetService.SegmentReplicationListener listener + ) { + final SegmentReplicationTarget target = new SegmentReplicationTarget(indexShard, checkpoint, source, listener); + startReplication(target, indexShard.getRecoverySettings().activityTimeout()); + return target; + } + + /** + * Runnable implementation to trigger a replication event. + */ + private class ReplicationRunner extends AbstractRunnable { + + final long replicationId; + + public ReplicationRunner(long replicationId) { + this.replicationId = replicationId; + } + + @Override + public void onFailure(Exception e) { + onGoingReplications.fail(replicationId, new ReplicationFailedException("Unexpected Error during replication", e), false); + } + + @Override + public void doRun() { + start(replicationId); + } + } + + private void start(final long replicationId) { + final SegmentReplicationTarget target; + try (ReplicationCollection.ReplicationRef replicationRef = onGoingReplications.get(replicationId)) { + // This check is for handling edge cases where the reference is removed before the ReplicationRunner is started by the + // threadpool. + if (replicationRef == null) { + return; + } + target = replicationRef.get(); + } + target.startReplication(new ActionListener<>() { + @Override + public void onResponse(Void o) { + logger.debug(() -> new ParameterizedMessage("Finished replicating {} marking as done.", target.description())); + onGoingReplications.markAsDone(replicationId); + if (target.state().getIndex().recoveredFileCount() != 0 && target.state().getIndex().recoveredBytes() != 0) { + completedReplications.put(target.shardId(), target.state()); + } + } + + @Override + public void onFailure(Exception e) { + logger.debug("Replication failed {}", target.description()); + if (isStoreCorrupt(target) || e instanceof CorruptIndexException || e instanceof OpenSearchCorruptionException) { + onGoingReplications.fail(replicationId, new ReplicationFailedException("Store corruption during replication", e), true); + return; + } + onGoingReplications.fail(replicationId, new ReplicationFailedException("Segment Replication failed", e), false); + } + }); + } + + // pkg-private for integration tests + void startReplication(final SegmentReplicationTarget target, TimeValue timeout) { + final long replicationId; + try { + replicationId = onGoingReplications.startSafe(target, timeout); + } catch (ReplicationFailedException e) { + // replication already running for shard. + target.fail(e, false); + return; + } + logger.trace(() -> new ParameterizedMessage("Added new replication to collection {}", target.description())); + threadPool.generic().execute(new ReplicationRunner(replicationId)); + } + + private boolean isStoreCorrupt(SegmentReplicationTarget target) { + // ensure target is not already closed. In that case + // we can assume the store is not corrupt and that the replication + // event completed successfully. + if (target.refCount() > 0) { + final Store store = target.store(); + if (store.tryIncRef()) { + try { + return store.isMarkedCorrupted(); + } catch (IOException ex) { + logger.warn("Unable to determine if store is corrupt", ex); + return false; + } finally { + store.decRef(); + } + } + } + // store already closed. + return false; + } + + int size() { + return onGoingReplications.size(); + } + + void cancel(ShardId shardId, String reason) { + onGoingReplications.cancelForShard(shardId, reason); + } + + SegmentReplicationTarget get(ShardId shardId) { + return onGoingReplications.getOngoingReplicationTarget(shardId); + } + + ReplicationCollection.ReplicationRef get(long id) { + return onGoingReplications.get(id); + } + + SegmentReplicationState getCompleted(ShardId shardId) { + return completedReplications.get(shardId); + } + + ReplicationCollection.ReplicationRef get(long id, ShardId shardId) { + return onGoingReplications.getSafe(id, shardId); + } +} diff --git a/server/src/main/java/org/opensearch/indices/store/TransportNodesListShardStoreMetadata.java b/server/src/main/java/org/opensearch/indices/store/TransportNodesListShardStoreMetadata.java index eeee5d8a409aa..2f666f67280bd 100644 --- a/server/src/main/java/org/opensearch/indices/store/TransportNodesListShardStoreMetadata.java +++ b/server/src/main/java/org/opensearch/indices/store/TransportNodesListShardStoreMetadata.java @@ -176,7 +176,7 @@ public Request(StreamInput in) throws IOException { } public Request(ShardId shardId, String customDataPath, DiscoveryNode[] nodes) { - super(nodes); + super(false, nodes); this.shardId = Objects.requireNonNull(shardId); this.customDataPath = Objects.requireNonNull(customDataPath); } diff --git a/server/src/main/java/org/opensearch/indices/store/TransportNodesListShardStoreMetadataBatch.java b/server/src/main/java/org/opensearch/indices/store/TransportNodesListShardStoreMetadataBatch.java index 22b03539cca74..2c84ec31b987d 100644 --- a/server/src/main/java/org/opensearch/indices/store/TransportNodesListShardStoreMetadataBatch.java +++ b/server/src/main/java/org/opensearch/indices/store/TransportNodesListShardStoreMetadataBatch.java @@ -188,7 +188,7 @@ public Request(StreamInput in) throws IOException { } public Request(Map shardAttributes, DiscoveryNode[] nodes) { - super(nodes); + super(false, nodes); this.shardAttributes = Objects.requireNonNull(shardAttributes); } diff --git a/server/src/main/java/org/opensearch/ingest/IngestService.java b/server/src/main/java/org/opensearch/ingest/IngestService.java index 938ca7493926e..0315a960dae92 100644 --- a/server/src/main/java/org/opensearch/ingest/IngestService.java +++ b/server/src/main/java/org/opensearch/ingest/IngestService.java @@ -62,6 +62,7 @@ import org.opensearch.common.collect.Tuple; import org.opensearch.common.metrics.OperationMetrics; import org.opensearch.common.regex.Regex; +import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.concurrent.AbstractRunnable; @@ -107,6 +108,18 @@ public class IngestService implements ClusterStateApplier, ReportingService MAX_NUMBER_OF_INGEST_PROCESSORS = Setting.intSetting( + "cluster.ingest.max_number_processors", + Integer.MAX_VALUE, + 1, + Integer.MAX_VALUE, + Setting.Property.NodeScope, + Setting.Property.Dynamic + ); + private static final Logger logger = LogManager.getLogger(IngestService.class); private final ClusterService clusterService; @@ -123,6 +136,7 @@ public class IngestService implements ClusterStateApplier, ReportingService processorFactories(List ingestPlugins, Processor.Parameters parameters) { @@ -494,6 +514,9 @@ void validatePipeline(Map ingestInfos, PutPipelineReq Map pipelineConfig = XContentHelper.convertToMap(request.getSource(), false, request.getMediaType()).v2(); Pipeline pipeline = Pipeline.create(request.getId(), pipelineConfig, processorFactories, scriptService); + + validateProcessorCountForIngestPipeline(pipeline); + List exceptions = new ArrayList<>(); for (Processor processor : pipeline.flattenAllProcessors()) { for (Map.Entry entry : ingestInfos.entrySet()) { @@ -507,6 +530,20 @@ void validatePipeline(Map ingestInfos, PutPipelineReq ExceptionsHelper.rethrowAndSuppress(exceptions); } + public void validateProcessorCountForIngestPipeline(Pipeline pipeline) { + List processors = pipeline.flattenAllProcessors(); + + if (processors.size() > maxIngestProcessorCount) { + throw new IllegalStateException( + "Cannot use more than the maximum processors allowed. Number of processors being configured is [" + + processors.size() + + "] which exceeds the maximum allowed configuration of [" + + maxIngestProcessorCount + + "] processors." + ); + } + } + public void executeBulkRequest( int numberOfActionRequests, Iterable> actionRequests, diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index ea656af6110e5..a8d4ebcf23dab 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -178,6 +178,7 @@ import org.opensearch.indices.replication.SegmentReplicationSourceFactory; import org.opensearch.indices.replication.SegmentReplicationSourceService; import org.opensearch.indices.replication.SegmentReplicationTargetService; +import org.opensearch.indices.replication.SegmentReplicator; import org.opensearch.indices.store.IndicesStore; import org.opensearch.ingest.IngestService; import org.opensearch.monitor.MonitorService; @@ -234,6 +235,7 @@ import org.opensearch.search.aggregations.support.AggregationUsageService; import org.opensearch.search.backpressure.SearchBackpressureService; import org.opensearch.search.backpressure.settings.SearchBackpressureSettings; +import org.opensearch.search.deciders.ConcurrentSearchRequestDecider; import org.opensearch.search.fetch.FetchPhase; import org.opensearch.search.pipeline.SearchPipelineService; import org.opensearch.search.query.QueryPhase; @@ -269,7 +271,7 @@ import org.opensearch.watcher.ResourceWatcherService; import org.opensearch.wlm.QueryGroupService; import org.opensearch.wlm.WorkloadManagementTransportInterceptor; -import org.opensearch.wlm.listeners.QueryGroupRequestRejectionOperationListener; +import org.opensearch.wlm.listeners.QueryGroupRequestOperationListener; import javax.net.ssl.SNIHostName; @@ -790,6 +792,7 @@ protected Node( clusterService.getClusterSettings(), threadPool::relativeTimeInMillis ); + final RemoteStoreSettings remoteStoreSettings = new RemoteStoreSettings(settings, settingsModule.getClusterSettings()); final RemoteClusterStateService remoteClusterStateService; final RemoteClusterStateCleanupManager remoteClusterStateCleanupManager; final RemoteIndexPathUploader remoteIndexPathUploader; @@ -798,7 +801,8 @@ protected Node( threadPool, settings, repositoriesServiceReference::get, - clusterService.getClusterSettings() + clusterService.getClusterSettings(), + remoteStoreSettings ); remoteClusterStateService = new RemoteClusterStateService( nodeEnvironment.nodeId(), @@ -867,15 +871,16 @@ protected Node( final RerouteService rerouteService = new BatchedRerouteService(clusterService, clusterModule.getAllocationService()::reroute); rerouteServiceReference.set(rerouteService); clusterService.setRerouteService(rerouteService); + clusterModule.setRerouteServiceForAllocator(rerouteService); final RecoverySettings recoverySettings = new RecoverySettings(settings, settingsModule.getClusterSettings()); - final RemoteStoreSettings remoteStoreSettings = new RemoteStoreSettings(settings, settingsModule.getClusterSettings()); final CompositeIndexSettings compositeIndexSettings = new CompositeIndexSettings(settings, settingsModule.getClusterSettings()); final IndexStorePlugin.DirectoryFactory remoteDirectoryFactory = new RemoteSegmentStoreDirectoryFactory( repositoriesServiceReference::get, - threadPool + threadPool, + remoteStoreSettings.getSegmentsPathFixedPrefix() ); final TaskResourceTrackingService taskResourceTrackingService = new TaskResourceTrackingService( @@ -893,6 +898,7 @@ protected Node( remoteStoreStatsTrackerFactory = new RemoteStoreStatsTrackerFactory(clusterService, settings); CacheModule cacheModule = new CacheModule(pluginsService.filterPlugins(CachePlugin.class), settings); CacheService cacheService = cacheModule.getCacheService(); + final SegmentReplicator segmentReplicator = new SegmentReplicator(threadPool); final IndicesService indicesService = new IndicesService( settings, pluginsService, @@ -922,7 +928,8 @@ protected Node( cacheService, remoteStoreSettings, fileCache, - compositeIndexSettings + compositeIndexSettings, + segmentReplicator::startReplication ); final IngestService ingestService = new IngestService( @@ -1019,11 +1026,12 @@ protected Node( List identityAwarePlugins = pluginsService.filterPlugins(IdentityAwarePlugin.class); identityService.initializeIdentityAwarePlugins(identityAwarePlugins); - final QueryGroupRequestRejectionOperationListener queryGroupRequestRejectionListener = - new QueryGroupRequestRejectionOperationListener( - new QueryGroupService(), // We will need to replace this with actual instance of the queryGroupService - threadPool - ); + final QueryGroupService queryGroupService = new QueryGroupService(); // We will need to replace this with actual instance of the + // queryGroupService + final QueryGroupRequestOperationListener queryGroupRequestOperationListener = new QueryGroupRequestOperationListener( + queryGroupService, + threadPool + ); // register all standard SearchRequestOperationsCompositeListenerFactory to the SearchRequestOperationsCompositeListenerFactory final SearchRequestOperationsCompositeListenerFactory searchRequestOperationsCompositeListenerFactory = @@ -1033,7 +1041,7 @@ protected Node( searchRequestStats, searchRequestSlowLog, searchTaskRequestOperationsListener, - queryGroupRequestRejectionListener + queryGroupRequestOperationListener ), pluginComponents.stream() .filter(p -> p instanceof SearchRequestOperationsListener) @@ -1207,7 +1215,8 @@ protected Node( repositoryService, transportService, actionModule.getActionFilters(), - remoteStorePinnedTimestampService + remoteStorePinnedTimestampService, + remoteStoreSettings ); SnapshotShardsService snapshotShardsService = new SnapshotShardsService( settings, @@ -1334,7 +1343,8 @@ protected Node( responseCollectorService, circuitBreakerService, searchModule.getIndexSearcherExecutor(threadPool), - taskResourceTrackingService + taskResourceTrackingService, + searchModule.getConcurrentSearchRequestDeciderFactories() ); final List> tasksExecutors = pluginsService.filterPlugins(PersistentTaskPlugin.class) @@ -1417,6 +1427,7 @@ protected Node( b.bind(SnapshotsInfoService.class).toInstance(snapshotsInfoService); b.bind(GatewayMetaState.class).toInstance(gatewayMetaState); b.bind(Discovery.class).toInstance(discoveryModule.getDiscovery()); + b.bind(RemoteStoreSettings.class).toInstance(remoteStoreSettings); { b.bind(PeerRecoverySourceService.class) .toInstance(new PeerRecoverySourceService(transportService, indicesService, recoverySettings)); @@ -1430,7 +1441,8 @@ protected Node( transportService, new SegmentReplicationSourceFactory(transportService, recoverySettings, clusterService), indicesService, - clusterService + clusterService, + segmentReplicator ) ); b.bind(SegmentReplicationSourceService.class) @@ -1465,6 +1477,7 @@ protected Node( b.bind(PersistedStateRegistry.class).toInstance(persistedStateRegistry); b.bind(SegmentReplicationStatsTracker.class).toInstance(segmentReplicationStatsTracker); b.bind(SearchRequestOperationsCompositeListenerFactory.class).toInstance(searchRequestOperationsCompositeListenerFactory); + b.bind(SegmentReplicator.class).toInstance(segmentReplicator); taskManagerClientOptional.ifPresent(value -> b.bind(TaskManagerClient.class).toInstance(value)); }); @@ -1990,7 +2003,8 @@ protected SearchService newSearchService( ResponseCollectorService responseCollectorService, CircuitBreakerService circuitBreakerService, Executor indexSearcherExecutor, - TaskResourceTrackingService taskResourceTrackingService + TaskResourceTrackingService taskResourceTrackingService, + Collection concurrentSearchDeciderFactories ) { return new SearchService( clusterService, @@ -2003,7 +2017,8 @@ protected SearchService newSearchService( responseCollectorService, circuitBreakerService, indexSearcherExecutor, - taskResourceTrackingService + taskResourceTrackingService, + concurrentSearchDeciderFactories ); } diff --git a/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeAttribute.java b/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeAttribute.java index a0f745a4270c4..55971398634c5 100644 --- a/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeAttribute.java +++ b/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeAttribute.java @@ -13,7 +13,6 @@ import org.opensearch.cluster.metadata.RepositoryMetadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.common.settings.Settings; -import org.opensearch.common.util.FeatureFlags; import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.node.Node; import org.opensearch.repositories.blobstore.BlobStoreRepository; @@ -29,8 +28,6 @@ import java.util.Set; import java.util.stream.Collectors; -import static org.opensearch.common.util.FeatureFlags.REMOTE_PUBLICATION_EXPERIMENTAL; - /** * This is an abstraction for validating and storing information specific to remote backed storage nodes. * @@ -58,6 +55,11 @@ public class RemoteStoreNodeAttribute { REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY ); + public static List REMOTE_CLUSTER_PUBLICATION_REPO_NAME_ATTRIBUTES = List.of( + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY + ); + /** * Creates a new {@link RemoteStoreNodeAttribute} */ @@ -202,7 +204,7 @@ private static boolean isRemoteRoutingTableAttributePresent(Settings settings) { } public static boolean isRemoteRoutingTableEnabled(Settings settings) { - return FeatureFlags.isEnabled(REMOTE_PUBLICATION_EXPERIMENTAL) && isRemoteRoutingTableAttributePresent(settings); + return isRemoteRoutingTableAttributePresent(settings); } public RepositoriesMetadata getRepositoriesMetadata() { @@ -264,6 +266,14 @@ public boolean equalsWithRepoSkip(Object o, List reposToSkip) { return this.getRepositoriesMetadata().equalsIgnoreGenerationsWithRepoSkip(that.getRepositoriesMetadata(), reposToSkip); } + public boolean equalsForRepositories(Object otherNode, List repositoryToValidate) { + if (this == otherNode) return true; + if (otherNode == null || getClass() != otherNode.getClass()) return false; + + RemoteStoreNodeAttribute other = (RemoteStoreNodeAttribute) otherNode; + return this.getRepositoriesMetadata().equalsIgnoreGenerationsForRepo(other.repositoriesMetadata, repositoryToValidate); + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/server/src/main/java/org/opensearch/node/remotestore/RemoteStorePinnedTimestampService.java b/server/src/main/java/org/opensearch/node/remotestore/RemoteStorePinnedTimestampService.java index f7b262664d147..71133615ed056 100644 --- a/server/src/main/java/org/opensearch/node/remotestore/RemoteStorePinnedTimestampService.java +++ b/server/src/main/java/org/opensearch/node/remotestore/RemoteStorePinnedTimestampService.java @@ -10,19 +10,16 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.opensearch.cluster.ClusterName; +import org.apache.logging.log4j.message.ParameterizedMessage; import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.blobstore.BlobContainer; import org.opensearch.common.blobstore.BlobMetadata; -import org.opensearch.common.blobstore.BlobPath; import org.opensearch.common.collect.Tuple; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.concurrent.AbstractAsyncTask; import org.opensearch.core.action.ActionListener; -import org.opensearch.gateway.remote.model.RemotePinnedTimestamps; -import org.opensearch.gateway.remote.model.RemotePinnedTimestamps.PinnedTimestamps; -import org.opensearch.gateway.remote.model.RemoteStorePinnedTimestampsBlobStore; -import org.opensearch.index.translog.transfer.BlobStoreTransferService; import org.opensearch.indices.RemoteStoreSettings; import org.opensearch.node.Node; import org.opensearch.repositories.RepositoriesService; @@ -30,15 +27,15 @@ import org.opensearch.repositories.blobstore.BlobStoreRepository; import org.opensearch.threadpool.ThreadPool; +import java.io.ByteArrayInputStream; import java.io.Closeable; import java.io.IOException; import java.util.HashMap; +import java.util.HashSet; import java.util.List; +import java.util.Locale; +import java.util.Map; import java.util.Set; -import java.util.concurrent.Semaphore; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; -import java.util.function.Consumer; import java.util.function.Supplier; import java.util.stream.Collectors; @@ -48,20 +45,19 @@ * * @opensearch.internal */ +@ExperimentalApi public class RemoteStorePinnedTimestampService implements Closeable { private static final Logger logger = LogManager.getLogger(RemoteStorePinnedTimestampService.class); private static Tuple> pinnedTimestampsSet = new Tuple<>(-1L, Set.of()); - public static final int PINNED_TIMESTAMP_FILES_TO_KEEP = 5; + public static final String PINNED_TIMESTAMPS_PATH_TOKEN = "pinned_timestamps"; + public static final String PINNED_TIMESTAMPS_FILENAME_SEPARATOR = "__"; private final Supplier repositoriesService; private final Settings settings; private final ThreadPool threadPool; private final ClusterService clusterService; - private BlobStoreRepository blobStoreRepository; - private BlobStoreTransferService blobStoreTransferService; - private RemoteStorePinnedTimestampsBlobStore pinnedTimestampsBlobStore; + private BlobContainer blobContainer; private AsyncUpdatePinnedTimestampTask asyncUpdatePinnedTimestampTask; - private final Semaphore updateTimetampPinningSemaphore = new Semaphore(1); public RemoteStorePinnedTimestampService( Supplier repositoriesService, @@ -81,37 +77,46 @@ public RemoteStorePinnedTimestampService( * and starts the asynchronous update task. */ public void start() { - validateRemoteStoreConfiguration(); - initializeComponents(); + blobContainer = validateAndCreateBlobContainer(settings, repositoriesService.get()); startAsyncUpdateTask(RemoteStoreSettings.getPinnedTimestampsSchedulerInterval()); } - private void validateRemoteStoreConfiguration() { + private static BlobContainer validateAndCreateBlobContainer(Settings settings, RepositoriesService repositoriesService) { final String remoteStoreRepo = settings.get( Node.NODE_ATTRIBUTES.getKey() + RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY ); assert remoteStoreRepo != null : "Remote Segment Store repository is not configured"; - final Repository repository = repositoriesService.get().repository(remoteStoreRepo); + final Repository repository = repositoriesService.repository(remoteStoreRepo); assert repository instanceof BlobStoreRepository : "Repository should be instance of BlobStoreRepository"; - blobStoreRepository = (BlobStoreRepository) repository; - } - - private void initializeComponents() { - String clusterName = ClusterName.CLUSTER_NAME_SETTING.get(settings).value(); - blobStoreTransferService = new BlobStoreTransferService(blobStoreRepository.blobStore(), this.threadPool); - pinnedTimestampsBlobStore = new RemoteStorePinnedTimestampsBlobStore( - blobStoreTransferService, - blobStoreRepository, - clusterName, - this.threadPool, - ThreadPool.Names.REMOTE_STATE_READ - ); + BlobStoreRepository blobStoreRepository = (BlobStoreRepository) repository; + return blobStoreRepository.blobStore().blobContainer(blobStoreRepository.basePath().add(PINNED_TIMESTAMPS_PATH_TOKEN)); } private void startAsyncUpdateTask(TimeValue pinnedTimestampsSchedulerInterval) { asyncUpdatePinnedTimestampTask = new AsyncUpdatePinnedTimestampTask(logger, threadPool, pinnedTimestampsSchedulerInterval, true); } + public static Map> fetchPinnedTimestamps(Settings settings, RepositoriesService repositoriesService) + throws IOException { + BlobContainer blobContainer = validateAndCreateBlobContainer(settings, repositoriesService); + Set pinnedTimestamps = blobContainer.listBlobs().keySet(); + Map> pinningEntityTimestampMap = new HashMap<>(); + for (String pinnedTimestamp : pinnedTimestamps) { + try { + String[] tokens = pinnedTimestamp.split(PINNED_TIMESTAMPS_FILENAME_SEPARATOR); + Long timestamp = Long.parseLong(tokens[tokens.length - 1]); + String pinningEntity = pinnedTimestamp.substring(0, pinnedTimestamp.lastIndexOf(PINNED_TIMESTAMPS_FILENAME_SEPARATOR)); + if (pinningEntityTimestampMap.containsKey(pinningEntity) == false) { + pinningEntityTimestampMap.put(pinningEntity, new HashSet<>()); + } + pinningEntityTimestampMap.get(pinningEntity).add(timestamp); + } catch (NumberFormatException e) { + logger.error("Exception while parsing pinned timestamp from {}, skipping this entry", pinnedTimestamp); + } + } + return pinningEntityTimestampMap; + } + /** * Pins a timestamp in the remote store. * @@ -129,101 +134,109 @@ public void pinTimestamp(long timestamp, String pinningEntity, ActionListener pinnedTimestamps.pin(timestamp, pinningEntity), listener); + long startTime = System.nanoTime(); + try { + logger.debug("Pinning timestamp = {} against entity = {}", timestamp, pinningEntity); + blobContainer.writeBlob(getBlobName(timestamp, pinningEntity), new ByteArrayInputStream(new byte[0]), 0, true); + long elapsedTime = System.nanoTime() - startTime; + if (elapsedTime > RemoteStoreSettings.getPinnedTimestampsLookbackInterval().nanos()) { + String errorMessage = String.format( + Locale.ROOT, + "Timestamp pinning took %s nanoseconds which is more than limit of %s nanoseconds, failing the operation", + elapsedTime, + RemoteStoreSettings.getPinnedTimestampsLookbackInterval().nanos() + ); + logger.error(errorMessage); + unpinTimestamp(timestamp, pinningEntity, ActionListener.wrap(() -> listener.onFailure(new RuntimeException(errorMessage)))); + } else { + listener.onResponse(null); + } + } catch (IOException e) { + listener.onFailure(e); + } } /** - * Unpins a timestamp from the remote store. + * Clones a timestamp by creating a new pinning entity for an existing timestamp. * - * @param timestamp The timestamp to be unpinned - * @param pinningEntity The entity responsible for unpinning the timestamp - * @param listener A listener to be notified when the unpinning operation completes + * This method attempts to create a new pinning entity for a given timestamp that is already + * associated with an existing pinning entity. If the timestamp exists for the existing entity, + * a new blob is created for the new pinning entity. If the timestamp doesn't exist for the + * existing entity, the operation fails with an IllegalArgumentException. + * + * @param timestamp The timestamp to be cloned. + * @param existingPinningEntity The name of the existing entity that has pinned the timestamp. + * @param newPinningEntity The name of the new entity to pin the timestamp to. + * @param listener An ActionListener that will be notified of the operation's success or failure. + * On success, onResponse will be called with null. On failure, onFailure will + * be called with the appropriate exception. */ - public void unpinTimestamp(long timestamp, String pinningEntity, ActionListener listener) { - updatePinning(pinnedTimestamps -> pinnedTimestamps.unpin(timestamp, pinningEntity), listener); - } - - private void updatePinning(Consumer updateConsumer, ActionListener listener) { - RemotePinnedTimestamps remotePinnedTimestamps = new RemotePinnedTimestamps( - clusterService.state().metadata().clusterUUID(), - blobStoreRepository.getCompressor() - ); - BlobPath path = pinnedTimestampsBlobStore.getBlobPathForUpload(remotePinnedTimestamps); + public void cloneTimestamp(long timestamp, String existingPinningEntity, String newPinningEntity, ActionListener listener) { try { - if (updateTimetampPinningSemaphore.tryAcquire(10, TimeUnit.MINUTES)) { - ActionListener semaphoreAwareListener = ActionListener.runBefore(listener, updateTimetampPinningSemaphore::release); - ActionListener> listCallResponseListener = getListenerForListCallResponse( - remotePinnedTimestamps, - updateConsumer, - semaphoreAwareListener - ); - blobStoreTransferService.listAllInSortedOrder( - path, - remotePinnedTimestamps.getType(), - Integer.MAX_VALUE, - listCallResponseListener - ); + logger.debug( + "cloning timestamp = {} with existing pinningEntity = {} with new pinningEntity = {}", + timestamp, + existingPinningEntity, + newPinningEntity + ); + String blobName = getBlobName(timestamp, existingPinningEntity); + if (blobContainer.blobExists(blobName)) { + logger.debug("Pinning timestamp = {} against entity = {}", timestamp, newPinningEntity); + blobContainer.writeBlob(getBlobName(timestamp, newPinningEntity), new ByteArrayInputStream(new byte[0]), 0, true); + listener.onResponse(null); } else { - throw new TimeoutException("Timed out while waiting to acquire lock in updatePinning"); + String errorMessage = String.format( + Locale.ROOT, + "Timestamp: %s is not pinned by existing entity: %s", + timestamp, + existingPinningEntity + ); + logger.error(errorMessage); + listener.onFailure(new IllegalArgumentException(errorMessage)); } - } catch (InterruptedException | TimeoutException e) { + } catch (IOException e) { listener.onFailure(e); } } - private ActionListener> getListenerForListCallResponse( - RemotePinnedTimestamps remotePinnedTimestamps, - Consumer updateConsumer, - ActionListener listener - ) { - return ActionListener.wrap(blobMetadata -> { - PinnedTimestamps pinnedTimestamps = new PinnedTimestamps(new HashMap<>()); - if (blobMetadata.isEmpty() == false) { - pinnedTimestamps = readExistingPinnedTimestamps(blobMetadata.get(0).name(), remotePinnedTimestamps); - } - updateConsumer.accept(pinnedTimestamps); - remotePinnedTimestamps.setPinnedTimestamps(pinnedTimestamps); - ActionListener writeCallResponseListener = getListenerForWriteCallResponse( - remotePinnedTimestamps, - blobMetadata, - listener - ); - pinnedTimestampsBlobStore.writeAsync(remotePinnedTimestamps, writeCallResponseListener); - }, listener::onFailure); + private String getBlobName(long timestamp, String pinningEntity) { + return String.join(PINNED_TIMESTAMPS_FILENAME_SEPARATOR, pinningEntity, String.valueOf(timestamp)); } - private ActionListener getListenerForWriteCallResponse( - RemotePinnedTimestamps remotePinnedTimestamps, - List blobMetadata, - ActionListener listener - ) { - return ActionListener.wrap(unused -> { - // Delete older pinnedTimestamp files - if (blobMetadata.size() > PINNED_TIMESTAMP_FILES_TO_KEEP) { - List oldFilesToBeDeleted = blobMetadata.subList(PINNED_TIMESTAMP_FILES_TO_KEEP, blobMetadata.size()) - .stream() - .map(BlobMetadata::name) - .collect(Collectors.toList()); - try { - blobStoreTransferService.deleteBlobs( - pinnedTimestampsBlobStore.getBlobPathForUpload(remotePinnedTimestamps), - oldFilesToBeDeleted - ); - } catch (IOException e) { - logger.error("Exception while deleting stale pinned timestamps", e); - } - } - listener.onResponse(null); - }, listener::onFailure); + private long getTimestampFromBlobName(String blobName) { + String[] blobNameTokens = blobName.split(PINNED_TIMESTAMPS_FILENAME_SEPARATOR); + if (blobNameTokens.length < 2) { + logger.error("Pinned timestamps blob name contains invalid format: {}", blobName); + } + try { + return Long.parseLong(blobNameTokens[blobNameTokens.length - 1]); + } catch (NumberFormatException e) { + logger.error(() -> new ParameterizedMessage("Pinned timestamps blob name contains invalid format: {}", blobName), e); + } + return -1; } - private PinnedTimestamps readExistingPinnedTimestamps(String blobFilename, RemotePinnedTimestamps remotePinnedTimestamps) { - remotePinnedTimestamps.setBlobFileName(blobFilename); - remotePinnedTimestamps.setFullBlobName(pinnedTimestampsBlobStore().getBlobPathForUpload(remotePinnedTimestamps)); + /** + * Unpins a timestamp from the remote store. + * + * @param timestamp The timestamp to be unpinned + * @param pinningEntity The entity responsible for unpinning the timestamp + * @param listener A listener to be notified when the unpinning operation completes + */ + public void unpinTimestamp(long timestamp, String pinningEntity, ActionListener listener) { try { - return pinnedTimestampsBlobStore().read(remotePinnedTimestamps); + logger.debug("Unpinning timestamp = {} against entity = {}", timestamp, pinningEntity); + String blobName = getBlobName(timestamp, pinningEntity); + if (blobContainer.blobExists(blobName)) { + blobContainer.deleteBlobsIgnoringIfNotExists(List.of(blobName)); + listener.onResponse(null); + } else { + String errorMessage = String.format(Locale.ROOT, "Timestamp: %s is not pinned by entity: %s", timestamp, pinningEntity); + logger.error(errorMessage); + listener.onFailure(new IllegalArgumentException(errorMessage)); + } } catch (IOException e) { - throw new RuntimeException("Failed to read existing pinned timestamps", e); + listener.onFailure(e); } } @@ -245,14 +258,6 @@ public static Tuple> getPinnedTimestamps() { return pinnedTimestampsSet; } - public RemoteStorePinnedTimestampsBlobStore pinnedTimestampsBlobStore() { - return pinnedTimestampsBlobStore; - } - - public BlobStoreTransferService blobStoreTransferService() { - return blobStoreTransferService; - } - /** * Inner class for asynchronously updating the pinned timestamp set. */ @@ -270,32 +275,22 @@ protected boolean mustReschedule() { @Override protected void runInternal() { long triggerTimestamp = System.currentTimeMillis(); - RemotePinnedTimestamps remotePinnedTimestamps = new RemotePinnedTimestamps( - clusterService.state().metadata().clusterUUID(), - blobStoreRepository.getCompressor() - ); - BlobPath path = pinnedTimestampsBlobStore().getBlobPathForUpload(remotePinnedTimestamps); - blobStoreTransferService().listAllInSortedOrder(path, remotePinnedTimestamps.getType(), 1, new ActionListener<>() { - @Override - public void onResponse(List blobMetadata) { - if (blobMetadata.isEmpty()) { - pinnedTimestampsSet = new Tuple<>(triggerTimestamp, Set.of()); - return; - } - PinnedTimestamps pinnedTimestamps = readExistingPinnedTimestamps(blobMetadata.get(0).name(), remotePinnedTimestamps); - logger.debug( - "Fetched pinned timestamps from remote store: {} - {}", - triggerTimestamp, - pinnedTimestamps.getPinnedTimestampPinningEntityMap().keySet() - ); - pinnedTimestampsSet = new Tuple<>(triggerTimestamp, pinnedTimestamps.getPinnedTimestampPinningEntityMap().keySet()); + try { + Map pinnedTimestampList = blobContainer.listBlobs(); + if (pinnedTimestampList.isEmpty()) { + pinnedTimestampsSet = new Tuple<>(triggerTimestamp, Set.of()); + return; } - - @Override - public void onFailure(Exception e) { - logger.error("Exception while listing pinned timestamp files", e); - } - }); + Set pinnedTimestamps = pinnedTimestampList.keySet() + .stream() + .map(RemoteStorePinnedTimestampService.this::getTimestampFromBlobName) + .filter(timestamp -> timestamp != -1) + .collect(Collectors.toSet()); + logger.debug("Fetched pinned timestamps from remote store: {} - {}", triggerTimestamp, pinnedTimestamps); + pinnedTimestampsSet = new Tuple<>(triggerTimestamp, pinnedTimestamps); + } catch (Throwable t) { + logger.error("Exception while fetching pinned timestamp details", t); + } } } } diff --git a/server/src/main/java/org/opensearch/plugins/SearchPlugin.java b/server/src/main/java/org/opensearch/plugins/SearchPlugin.java index 40b4f97cd1897..710dd32371f83 100644 --- a/server/src/main/java/org/opensearch/plugins/SearchPlugin.java +++ b/server/src/main/java/org/opensearch/plugins/SearchPlugin.java @@ -36,6 +36,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.Sort; import org.opensearch.common.CheckedFunction; +import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.common.lucene.search.function.ScoreFunction; import org.opensearch.common.settings.Settings; import org.opensearch.core.ParseField; @@ -64,6 +65,7 @@ import org.opensearch.search.aggregations.pipeline.MovAvgPipelineAggregator; import org.opensearch.search.aggregations.pipeline.PipelineAggregator; import org.opensearch.search.aggregations.support.ValuesSourceRegistry; +import org.opensearch.search.deciders.ConcurrentSearchRequestDecider; import org.opensearch.search.fetch.FetchSubPhase; import org.opensearch.search.fetch.subphase.highlight.Highlighter; import org.opensearch.search.query.QueryPhaseSearcher; @@ -138,6 +140,15 @@ default Map getHighlighters() { return emptyMap(); } + /** + * Allows plugins to register a factory to create custom decider for concurrent search + * @return A {@link ConcurrentSearchRequestDecider.Factory} + */ + @ExperimentalApi + default Optional getConcurrentSearchRequestDeciderFactory() { + return Optional.empty(); + } + /** * The new {@link Suggester}s defined by this plugin. */ diff --git a/server/src/main/java/org/opensearch/repositories/FilterRepository.java b/server/src/main/java/org/opensearch/repositories/FilterRepository.java index 114cd0260fcca..188d61e00f206 100644 --- a/server/src/main/java/org/opensearch/repositories/FilterRepository.java +++ b/server/src/main/java/org/opensearch/repositories/FilterRepository.java @@ -97,6 +97,27 @@ public void getRepositoryData(ActionListener listener) { in.getRepositoryData(listener); } + @Override + public void finalizeSnapshot( + ShardGenerations shardGenerations, + long repositoryStateId, + Metadata clusterMetadata, + SnapshotInfo snapshotInfo, + Version repositoryMetaVersion, + Function stateTransformer, + ActionListener listener + ) { + in.finalizeSnapshot( + shardGenerations, + repositoryStateId, + clusterMetadata, + snapshotInfo, + repositoryMetaVersion, + stateTransformer, + listener + ); + } + @Override public void finalizeSnapshot( ShardGenerations shardGenerations, diff --git a/server/src/main/java/org/opensearch/repositories/IndexId.java b/server/src/main/java/org/opensearch/repositories/IndexId.java index 87a0063e8c21b..5a9a757e31cb1 100644 --- a/server/src/main/java/org/opensearch/repositories/IndexId.java +++ b/server/src/main/java/org/opensearch/repositories/IndexId.java @@ -32,6 +32,7 @@ package org.opensearch.repositories; +import org.opensearch.Version; import org.opensearch.common.annotation.PublicApi; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; @@ -40,6 +41,7 @@ import org.opensearch.core.xcontent.ToXContent; import org.opensearch.core.xcontent.ToXContentObject; import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.remote.RemoteStoreEnums; import java.io.IOException; import java.util.Objects; @@ -51,23 +53,36 @@ */ @PublicApi(since = "1.0.0") public final class IndexId implements Writeable, ToXContentObject { - protected static final String NAME = "name"; - protected static final String ID = "id"; + static final String NAME = "name"; + static final String ID = "id"; + static final String SHARD_PATH_TYPE = "shard_path_type"; + public static final int DEFAULT_SHARD_PATH_TYPE = RemoteStoreEnums.PathType.FIXED.getCode(); private final String name; private final String id; + private final int shardPathType; private final int hashCode; + // Used for testing only public IndexId(final String name, final String id) { + this(name, id, DEFAULT_SHARD_PATH_TYPE); + } + + public IndexId(String name, String id, int shardPathType) { this.name = name; this.id = id; + this.shardPathType = shardPathType; this.hashCode = computeHashCode(); - } public IndexId(final StreamInput in) throws IOException { this.name = in.readString(); this.id = in.readString(); + if (in.getVersion().onOrAfter(Version.V_2_17_0)) { + this.shardPathType = in.readVInt(); + } else { + this.shardPathType = DEFAULT_SHARD_PATH_TYPE; + } this.hashCode = computeHashCode(); } @@ -93,9 +108,16 @@ public String getId() { return id; } + /** + * The storage path type in remote store for the indexes having the underlying index ids. + */ + public int getShardPathType() { + return shardPathType; + } + @Override public String toString() { - return "[" + name + "/" + id + "]"; + return "[" + name + "/" + id + "/" + shardPathType + "]"; } @Override @@ -107,7 +129,7 @@ public boolean equals(Object o) { return false; } IndexId that = (IndexId) o; - return Objects.equals(name, that.name) && Objects.equals(id, that.id); + return Objects.equals(name, that.name) && Objects.equals(id, that.id) && Objects.equals(this.shardPathType, that.shardPathType); } @Override @@ -116,13 +138,16 @@ public int hashCode() { } private int computeHashCode() { - return Objects.hash(name, id); + return Objects.hash(name, id, shardPathType); } @Override public void writeTo(final StreamOutput out) throws IOException { out.writeString(name); out.writeString(id); + if (out.getVersion().onOrAfter(Version.V_2_17_0)) { + out.writeVInt(shardPathType); + } } @Override @@ -130,6 +155,7 @@ public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params par builder.startObject(); builder.field(NAME, name); builder.field(ID, id); + builder.field(SHARD_PATH_TYPE, shardPathType); builder.endObject(); return builder; } diff --git a/server/src/main/java/org/opensearch/repositories/RepositoriesService.java b/server/src/main/java/org/opensearch/repositories/RepositoriesService.java index 68669feb16abc..7da52147661dc 100644 --- a/server/src/main/java/org/opensearch/repositories/RepositoriesService.java +++ b/server/src/main/java/org/opensearch/repositories/RepositoriesService.java @@ -68,7 +68,9 @@ import org.opensearch.common.util.io.IOUtils; import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.Strings; +import org.opensearch.node.remotestore.RemoteStorePinnedTimestampService; import org.opensearch.repositories.blobstore.MeteredBlobStoreRepository; +import org.opensearch.snapshots.SnapshotsService; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportService; @@ -84,6 +86,7 @@ import java.util.stream.Stream; import static org.opensearch.repositories.blobstore.BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY; +import static org.opensearch.repositories.blobstore.BlobStoreRepository.SHALLOW_SNAPSHOT_V2; import static org.opensearch.repositories.blobstore.BlobStoreRepository.SYSTEM_REPOSITORY_SETTING; /** @@ -123,6 +126,7 @@ public class RepositoriesService extends AbstractLifecycleComponent implements C private final RepositoriesStatsArchive repositoriesStatsArchive; private final ClusterManagerTaskThrottler.ThrottlingKey putRepositoryTaskKey; private final ClusterManagerTaskThrottler.ThrottlingKey deleteRepositoryTaskKey; + private final Settings settings; public RepositoriesService( Settings settings, @@ -132,6 +136,7 @@ public RepositoriesService( Map internalTypesRegistry, ThreadPool threadPool ) { + this.settings = settings; this.typesRegistry = typesRegistry; this.internalTypesRegistry = internalTypesRegistry; this.clusterService = clusterService; @@ -173,7 +178,7 @@ public void registerOrUpdateRepository(final PutRepositoryRequest request, final CryptoMetadata.fromRequest(request.cryptoSettings()) ); validate(request.name()); - validateRepositoryMetadataSettings(clusterService, request.name(), request.settings()); + validateRepositoryMetadataSettings(clusterService, request.name(), request.settings(), repositories, settings, this); if (newRepositoryMetadata.cryptoMetadata() != null) { validate(newRepositoryMetadata.cryptoMetadata().keyProviderName()); } @@ -684,7 +689,10 @@ public static void validate(final String identifier) { public static void validateRepositoryMetadataSettings( ClusterService clusterService, final String repositoryName, - final Settings repositoryMetadataSettings + final Settings repositoryMetadataSettings, + Map repositories, + Settings settings, + RepositoriesService repositoriesService ) { // We can add more validations here for repository settings in the future. Version minVersionInCluster = clusterService.state().getNodes().getMinNodeVersion(); @@ -699,6 +707,51 @@ public static void validateRepositoryMetadataSettings( + minVersionInCluster ); } + if (SHALLOW_SNAPSHOT_V2.get(repositoryMetadataSettings)) { + if (minVersionInCluster.onOrAfter(Version.V_2_17_0) == false) { + throw new RepositoryException( + repositoryName, + "setting " + + SHALLOW_SNAPSHOT_V2.getKey() + + " cannot be enabled as some of the nodes in cluster are on version older than " + + Version.V_2_17_0 + + ". Minimum node version in cluster is: " + + minVersionInCluster + ); + } + if (repositoryName.contains(SnapshotsService.SNAPSHOT_PINNED_TIMESTAMP_DELIMITER)) { + throw new RepositoryException( + repositoryName, + "setting " + + SHALLOW_SNAPSHOT_V2.getKey() + + " cannot be enabled for repository with " + + SnapshotsService.SNAPSHOT_PINNED_TIMESTAMP_DELIMITER + + " in the name as this delimiter is used to create pinning entity" + ); + } + if (repositoryWithShallowV2Exists(repositories)) { + throw new RepositoryException( + repositoryName, + "setting " + + SHALLOW_SNAPSHOT_V2.getKey() + + " cannot be enabled as this setting can be enabled only on one repository " + + " and one or more repositories in the cluster have the setting as enabled" + ); + } + try { + if (pinnedTimestampExistsWithDifferentRepository(repositoryName, settings, repositoriesService)) { + throw new RepositoryException( + repositoryName, + "setting " + + SHALLOW_SNAPSHOT_V2.getKey() + + " cannot be enabled if there are existing snapshots created with shallow V2 " + + "setting using different repository." + ); + } + } catch (IOException e) { + throw new RepositoryException(repositoryName, "Exception while fetching pinned timestamp details"); + } + } // Validation to not allow users to create system repository via put repository call. if (isSystemRepositorySettingPresent(repositoryMetadataSettings)) { throw new RepositoryException( @@ -710,6 +763,28 @@ public static void validateRepositoryMetadataSettings( } } + private static boolean repositoryWithShallowV2Exists(Map repositories) { + return repositories.values().stream().anyMatch(repo -> SHALLOW_SNAPSHOT_V2.get(repo.getMetadata().settings())); + } + + private static boolean pinnedTimestampExistsWithDifferentRepository( + String newRepoName, + Settings settings, + RepositoriesService repositoriesService + ) throws IOException { + Map> pinningEntityTimestampMap = RemoteStorePinnedTimestampService.fetchPinnedTimestamps( + settings, + repositoriesService + ); + for (String pinningEntity : pinningEntityTimestampMap.keySet()) { + String repoNameWithPinnedTimestamps = pinningEntity.split(SnapshotsService.SNAPSHOT_PINNED_TIMESTAMP_DELIMITER)[0]; + if (repoNameWithPinnedTimestamps.equals(newRepoName) == false) { + return true; + } + } + return false; + } + private static void ensureRepositoryNotInUse(ClusterState clusterState, String repository) { if (isRepositoryInUse(clusterState, repository)) { throw new IllegalStateException("trying to modify or unregister repository that is currently used"); diff --git a/server/src/main/java/org/opensearch/repositories/Repository.java b/server/src/main/java/org/opensearch/repositories/Repository.java index 637503d3f54df..d4520beb5b570 100644 --- a/server/src/main/java/org/opensearch/repositories/Repository.java +++ b/server/src/main/java/org/opensearch/repositories/Repository.java @@ -50,9 +50,11 @@ import org.opensearch.index.mapper.MapperService; import org.opensearch.index.snapshots.IndexShardSnapshotStatus; import org.opensearch.index.snapshots.blobstore.RemoteStoreShardShallowCopySnapshot; +import org.opensearch.index.store.RemoteSegmentStoreDirectoryFactory; import org.opensearch.index.store.Store; import org.opensearch.index.store.lockmanager.RemoteStoreLockManagerFactory; import org.opensearch.indices.recovery.RecoveryState; +import org.opensearch.node.remotestore.RemoteStorePinnedTimestampService; import org.opensearch.snapshots.SnapshotId; import org.opensearch.snapshots.SnapshotInfo; @@ -139,6 +141,30 @@ default Repository create(RepositoryMetadata metadata, Function listener); + /** + * Finalizes snapshotting process + *

+ * This method is called on cluster-manager after all shards are snapshotted. + * + * @param shardGenerations updated shard generations + * @param repositoryStateId the unique id identifying the state of the repository when the snapshot began + * @param clusterMetadata cluster metadata + * @param snapshotInfo SnapshotInfo instance to write for this snapshot + * @param repositoryMetaVersion version of the updated repository metadata to write + * @param stateTransformer a function that filters the last cluster state update that the snapshot finalization will execute and + * is used to remove any state tracked for the in-progress snapshot from the cluster state + * @param listener listener to be invoked with the new {@link RepositoryData} after completing the snapshot + */ + void finalizeSnapshot( + ShardGenerations shardGenerations, + long repositoryStateId, + Metadata clusterMetadata, + SnapshotInfo snapshotInfo, + Version repositoryMetaVersion, + Function stateTransformer, + ActionListener listener + ); + /** * Finalizes snapshotting process *

@@ -183,11 +209,59 @@ void deleteSnapshots( /** * Deletes snapshots and releases respective lock files from remote store repository. * - * @param snapshotIds snapshot ids - * @param repositoryStateId the unique id identifying the state of the repository when the snapshot deletion began - * @param repositoryMetaVersion version of the updated repository metadata to write - * @param remoteStoreLockManagerFactory RemoteStoreLockManagerFactory to be used for cleaning up remote store lock files - * @param listener completion listener + * @param snapshotIds snapshot ids + * @param repositoryStateId the unique id identifying the state of the repository when the snapshot deletion began + * @param repositoryMetaVersion version of the updated repository metadata to write + * @param remoteStoreLockManagerFactory RemoteStoreLockManagerFactory to be used for cleaning up remote store lock files + * @param remoteSegmentStoreDirectoryFactory RemoteSegmentStoreDirectoryFactory to be used for cleaning up remote store segment files + * @param remoteStorePinnedTimestampService service for pinning and unpinning of the timestamp + * @param snapshotIdsPinnedTimestampMap map of snapshots ids and the pinned timestamp + * @param isShallowSnapshotV2 true for shallow snapshots v2 + * @param listener completion listener + */ + default void deleteSnapshotsInternal( + Collection snapshotIds, + long repositoryStateId, + Version repositoryMetaVersion, + RemoteStoreLockManagerFactory remoteStoreLockManagerFactory, + RemoteSegmentStoreDirectoryFactory remoteSegmentStoreDirectoryFactory, + RemoteStorePinnedTimestampService remoteStorePinnedTimestampService, + Map snapshotIdsPinnedTimestampMap, + boolean isShallowSnapshotV2, + ActionListener listener + ) { + throw new UnsupportedOperationException(); + } + + /** + * Deletes snapshots and unpin the snapshot timestamp using remoteStorePinnedTimestampService + * + * @param snapshotsWithPinnedTimestamp map of snapshot ids and the pinned timestamps + * @param repositoryStateId the unique id identifying the state of the repository when the snapshot deletion began + * @param repositoryMetaVersion version of the updated repository metadata to write + * @param remoteSegmentStoreDirectoryFactory RemoteSegmentStoreDirectoryFactory to be used for cleaning up remote store segment files + * @param remoteStorePinnedTimestampService service for pinning and unpinning of the timestamp + * @param listener completion listener + */ + default void deleteSnapshotsWithPinnedTimestamp( + Map snapshotsWithPinnedTimestamp, + long repositoryStateId, + Version repositoryMetaVersion, + RemoteSegmentStoreDirectoryFactory remoteSegmentStoreDirectoryFactory, + RemoteStorePinnedTimestampService remoteStorePinnedTimestampService, + ActionListener listener + ) { + throw new UnsupportedOperationException(); + } + + /** + * Deletes snapshots and releases respective lock files from remote store repository + * + * @param snapshotIds + * @param repositoryStateId + * @param repositoryMetaVersion + * @param remoteStoreLockManagerFactory + * @param listener */ default void deleteSnapshotsAndReleaseLockFiles( Collection snapshotIds, @@ -394,6 +468,18 @@ default RemoteStoreShardShallowCopySnapshot getRemoteStoreShallowCopyShardMetada */ IndexShardSnapshotStatus getShardSnapshotStatus(SnapshotId snapshotId, IndexId indexId, ShardId shardId); + /** + * Retrieve shard snapshot status for the stored snapshot + * + * @param snapshotInfo snapshot info + * @param indexId the snapshotted index id for the shard to get status for + * @param shardId shard id + * @return snapshot status + */ + default IndexShardSnapshotStatus getShardSnapshotStatus(SnapshotInfo snapshotInfo, IndexId indexId, ShardId shardId) { + return getShardSnapshotStatus(snapshotInfo.snapshotId(), indexId, shardId); + } + /** * Update the repository with the incoming cluster state. This method is invoked from {@link RepositoriesService#applyClusterState} and * thus the same semantics as with {@link org.opensearch.cluster.ClusterStateApplier#applyClusterState} apply for the diff --git a/server/src/main/java/org/opensearch/repositories/RepositoryData.java b/server/src/main/java/org/opensearch/repositories/RepositoryData.java index ea48d9b1a49fe..f52f1fa76f6bf 100644 --- a/server/src/main/java/org/opensearch/repositories/RepositoryData.java +++ b/server/src/main/java/org/opensearch/repositories/RepositoryData.java @@ -111,6 +111,11 @@ public final class RepositoryData { * The indices found in the repository across all snapshots, as a name to {@link IndexId} mapping */ private final Map indices; + + public Map> getIndexSnapshots() { + return indexSnapshots; + } + /** * The snapshots that each index belongs to. */ @@ -517,7 +522,7 @@ public List resolveIndices(final List indices) { * @param indicesToResolve names of indices to resolve * @param inFlightIds name to index mapping for currently in-flight snapshots not yet in the repository data to fall back to */ - public List resolveNewIndices(List indicesToResolve, Map inFlightIds) { + public List resolveNewIndices(List indicesToResolve, Map inFlightIds, int pathType) { List snapshotIndices = new ArrayList<>(); for (String index : indicesToResolve) { IndexId indexId = indices.get(index); @@ -525,7 +530,7 @@ public List resolveNewIndices(List indicesToResolve, Map resolveNewIndices(List indicesToResolve, Map snapshotIds = indexSnapshots.get(indexId); assert snapshotIds != null; @@ -765,14 +779,20 @@ private static void parseIndices( final List snapshotIds = new ArrayList<>(); final List gens = new ArrayList<>(); + String id = null; + int pathType = IndexId.DEFAULT_SHARD_PATH_TYPE; IndexId indexId = null; + XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.nextToken(), parser); while (parser.nextToken() != XContentParser.Token.END_OBJECT) { final String indexMetaFieldName = parser.currentName(); final XContentParser.Token currentToken = parser.nextToken(); switch (indexMetaFieldName) { case INDEX_ID: - indexId = new IndexId(indexName, parser.text()); + id = parser.text(); + break; + case IndexId.SHARD_PATH_TYPE: + pathType = parser.intValue(); break; case SNAPSHOTS: XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_ARRAY, currentToken, parser); @@ -795,7 +815,7 @@ private static void parseIndices( // different versions create or delete snapshot in the same repository. throw new OpenSearchParseException( "Detected a corrupted repository, index " - + indexId + + new IndexId(indexName, id, pathType) + " references an unknown snapshot uuid [" + uuid + "]" @@ -812,9 +832,10 @@ private static void parseIndices( break; } } - assert indexId != null; + assert id != null; + indexId = new IndexId(indexName, id, pathType); indexSnapshots.put(indexId, Collections.unmodifiableList(snapshotIds)); - indexLookup.put(indexId.getId(), indexId); + indexLookup.put(id, indexId); for (int i = 0; i < gens.size(); i++) { String parsedGen = gens.get(i); if (parsedGen != null) { diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java index e18706824d39d..b954560c1bc94 100644 --- a/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java @@ -66,6 +66,7 @@ import org.opensearch.common.Nullable; import org.opensearch.common.Numbers; import org.opensearch.common.Priority; +import org.opensearch.common.Randomness; import org.opensearch.common.SetOnce; import org.opensearch.common.UUIDs; import org.opensearch.common.blobstore.BlobContainer; @@ -109,8 +110,13 @@ import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.core.xcontent.XContentParser; import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.remote.RemoteStoreEnums.PathHashAlgorithm; +import org.opensearch.index.remote.RemoteStoreEnums.PathType; import org.opensearch.index.remote.RemoteStorePathStrategy; import org.opensearch.index.remote.RemoteStorePathStrategy.PathInput; +import org.opensearch.index.remote.RemoteStorePathStrategy.SnapshotShardPathInput; +import org.opensearch.index.remote.RemoteStoreUtils; +import org.opensearch.index.remote.RemoteTranslogTransferTracker; import org.opensearch.index.snapshots.IndexShardRestoreFailedException; import org.opensearch.index.snapshots.IndexShardSnapshotStatus; import org.opensearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshot; @@ -127,8 +133,14 @@ import org.opensearch.index.store.lockmanager.FileLockInfo; import org.opensearch.index.store.lockmanager.RemoteStoreLockManager; import org.opensearch.index.store.lockmanager.RemoteStoreLockManagerFactory; +import org.opensearch.index.translog.RemoteFsTimestampAwareTranslog; +import org.opensearch.index.translog.RemoteFsTranslog; +import org.opensearch.index.translog.transfer.FileTransferTracker; +import org.opensearch.index.translog.transfer.TranslogTransferManager; +import org.opensearch.indices.RemoteStoreSettings; import org.opensearch.indices.recovery.RecoverySettings; import org.opensearch.indices.recovery.RecoveryState; +import org.opensearch.node.remotestore.RemoteStorePinnedTimestampService; import org.opensearch.repositories.IndexId; import org.opensearch.repositories.IndexMetaDataGenerations; import org.opensearch.repositories.Repository; @@ -145,6 +157,8 @@ import org.opensearch.snapshots.SnapshotId; import org.opensearch.snapshots.SnapshotInfo; import org.opensearch.snapshots.SnapshotMissingException; +import org.opensearch.snapshots.SnapshotShardPaths; +import org.opensearch.snapshots.SnapshotShardPaths.ShardInfo; import org.opensearch.snapshots.SnapshotsService; import org.opensearch.threadpool.ThreadPool; @@ -156,6 +170,8 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Locale; import java.util.Map; @@ -178,9 +194,9 @@ import java.util.stream.Stream; import static org.opensearch.index.remote.RemoteStoreEnums.PathHashAlgorithm.FNV_1A_COMPOSITE_1; -import static org.opensearch.index.remote.RemoteStoreEnums.PathType.HASHED_PREFIX; import static org.opensearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshot.FileInfo.canonicalName; import static org.opensearch.repositories.blobstore.ChecksumBlobStoreFormat.SNAPSHOT_ONLY_FORMAT_PARAMS; +import static org.opensearch.snapshots.SnapshotsService.SNAPSHOT_PINNED_TIMESTAMP_DELIMITER; /** * BlobStore - based implementation of Snapshot Repository @@ -224,6 +240,8 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp private static final String UPLOADED_DATA_BLOB_PREFIX = "__"; + public static final String INDICES_DIR = "indices"; + /** * Prefix used for the identifiers of data blobs that were not actually written to the repository physically because their contents are * already stored in the metadata referencing them, i.e. in {@link BlobStoreIndexShardSnapshot} and @@ -269,6 +287,12 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp public static final Setting SHALLOW_SNAPSHOT_V2 = Setting.boolSetting("shallow_snapshot_v2", false); + public static final Setting SHARD_PATH_TYPE = new Setting<>( + "shard_path_type", + PathType.FIXED.toString(), + PathType::parseString + ); + /** * Setting to set batch size of stale snapshot shard blobs that will be deleted by snapshot workers as part of snapshot deletion. * For optimal performance the value of the setting should be equal to or close to repository's max # of keys that can be deleted in single operation @@ -319,6 +343,16 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp Setting.Property.NodeScope ); + /** + * Controls the fixed prefix for the snapshot shard blob path. + */ + public static final Setting SNAPSHOT_SHARD_PATH_PREFIX_SETTING = Setting.simpleString( + "cluster.snapshot.shard.path.prefix", + "", + Setting.Property.NodeScope, + Setting.Property.Final + ); + protected volatile boolean supportURLRepo; private volatile int maxShardBlobDeleteBatch; @@ -382,6 +416,10 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp BlobStoreIndexShardSnapshots::fromXContent ); + public static final ConfigBlobStoreFormat SNAPSHOT_SHARD_PATHS_FORMAT = new ConfigBlobStoreFormat<>( + SnapshotShardPaths.FILE_NAME_FORMAT + ); + private volatile boolean readOnly; private final boolean isSystemRepository; @@ -392,14 +430,22 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp private final SetOnce blobContainer = new SetOnce<>(); + private final SetOnce rootBlobContainer = new SetOnce<>(); + + private final SetOnce snapshotShardPathBlobContainer = new SetOnce<>(); + private final SetOnce blobStore = new SetOnce<>(); protected final ClusterService clusterService; private final RecoverySettings recoverySettings; + private final RemoteStoreSettings remoteStoreSettings; + private final NamedXContentRegistry namedXContentRegistry; + private final String snapshotShardPathPrefix; + /** * Flag that is set to {@code true} if this instance is started with {@link #metadata} that has a higher value for * {@link RepositoryMetadata#pendingGeneration()} than for {@link RepositoryMetadata#generation()} indicating a full cluster restart @@ -450,6 +496,8 @@ protected BlobStoreRepository( this.threadPool = clusterService.getClusterApplierService().threadPool(); this.clusterService = clusterService; this.recoverySettings = recoverySettings; + this.remoteStoreSettings = new RemoteStoreSettings(clusterService.getSettings(), clusterService.getClusterSettings()); + this.snapshotShardPathPrefix = SNAPSHOT_SHARD_PATH_PREFIX_SETTING.get(clusterService.getSettings()); } @Override @@ -782,6 +830,16 @@ BlobContainer getBlobContainer() { return blobContainer.get(); } + // package private, only use for testing + BlobContainer getRootBlobContainer() { + return rootBlobContainer.get(); + } + + // package private, only use for testing + public SetOnce getSnapshotShardPathBlobContainer() { + return snapshotShardPathBlobContainer; + } + // for test purposes only protected BlobStore getBlobStore() { return blobStore.get(); @@ -807,10 +865,47 @@ protected BlobContainer blobContainer() { } } } - return blobContainer; } + /** + * maintains single lazy instance of {@link BlobContainer} + */ + protected BlobContainer rootBlobContainer() { + assertSnapshotOrGenericThread(); + + BlobContainer rootBlobContainer = this.rootBlobContainer.get(); + if (rootBlobContainer == null) { + synchronized (lock) { + rootBlobContainer = this.rootBlobContainer.get(); + if (rootBlobContainer == null) { + rootBlobContainer = blobStore().blobContainer(BlobPath.cleanPath()); + this.rootBlobContainer.set(rootBlobContainer); + } + } + } + return rootBlobContainer; + } + + /** + * maintains single lazy instance of {@link BlobContainer} + */ + protected BlobContainer snapshotShardPathBlobContainer() { + assertSnapshotOrGenericThread(); + + BlobContainer snapshotShardPathBlobContainer = this.snapshotShardPathBlobContainer.get(); + if (snapshotShardPathBlobContainer == null) { + synchronized (lock) { + snapshotShardPathBlobContainer = this.snapshotShardPathBlobContainer.get(); + if (snapshotShardPathBlobContainer == null) { + snapshotShardPathBlobContainer = blobStore().blobContainer(basePath().add(SnapshotShardPaths.DIR)); + this.snapshotShardPathBlobContainer.set(snapshotShardPathBlobContainer); + } + } + } + return snapshotShardPathBlobContainer; + } + /** * Maintains single lazy instance of {@link BlobStore}. * Public for testing. @@ -895,11 +990,15 @@ public RepositoryStats stats() { return new RepositoryStats(store.stats()); } - public void deleteSnapshotsAndReleaseLockFiles( + public void deleteSnapshotsInternal( Collection snapshotIds, long repositoryStateId, Version repositoryMetaVersion, RemoteStoreLockManagerFactory remoteStoreLockManagerFactory, + RemoteSegmentStoreDirectoryFactory remoteSegmentStoreDirectoryFactory, + RemoteStorePinnedTimestampService remoteStorePinnedTimestampService, + Map snapshotIdsPinnedTimestampMap, + boolean isShallowSnapshotV2, ActionListener listener ) { if (isReadOnly()) { @@ -921,6 +1020,10 @@ protected void doRun() throws Exception { repositoryData, repositoryMetaVersion, remoteStoreLockManagerFactory, + remoteSegmentStoreDirectoryFactory, + remoteStorePinnedTimestampService, + snapshotIdsPinnedTimestampMap, + isShallowSnapshotV2, listener ); } @@ -933,6 +1036,49 @@ public void onFailure(Exception e) { } } + @Override + public void deleteSnapshotsWithPinnedTimestamp( + Map snapshotIdPinnedTimestampMap, + long repositoryStateId, + Version repositoryMetaVersion, + RemoteSegmentStoreDirectoryFactory remoteSegmentStoreDirectoryFactory, + RemoteStorePinnedTimestampService remoteStorePinnedTimestampService, + ActionListener listener + ) { + deleteSnapshotsInternal( + snapshotIdPinnedTimestampMap.keySet(), + repositoryStateId, + repositoryMetaVersion, + null, // Passing null since no remote store lock files need to be cleaned up. + remoteSegmentStoreDirectoryFactory, + remoteStorePinnedTimestampService, + snapshotIdPinnedTimestampMap, + true, // true only for shallow snapshot v2 + listener + ); + } + + @Override + public void deleteSnapshotsAndReleaseLockFiles( + Collection snapshotIds, + long repositoryStateId, + Version repositoryMetaVersion, + RemoteStoreLockManagerFactory remoteStoreLockManagerFactory, + ActionListener listener + ) { + deleteSnapshotsInternal( + snapshotIds, + repositoryStateId, + repositoryMetaVersion, + remoteStoreLockManagerFactory, + null, + null, + Collections.emptyMap(), + false, + listener + ); + } + @Override public void deleteSnapshots( Collection snapshotIds, @@ -940,11 +1086,15 @@ public void deleteSnapshots( Version repositoryMetaVersion, ActionListener listener ) { - deleteSnapshotsAndReleaseLockFiles( + deleteSnapshotsInternal( snapshotIds, repositoryStateId, repositoryMetaVersion, null, // Passing null since no remote store lock files need to be cleaned up. + null, // Passing null since no remote store segment files need to be cleaned up + null, + Collections.emptyMap(), + false, listener ); } @@ -1009,6 +1159,10 @@ private RepositoryData safeRepositoryData(long repositoryStateId, Map snapshotIdPinnedTimestampMap, + boolean isShallowSnapshotV2, ActionListener listener ) { // First write the new shard state metadata (with the removed snapshot) and compute deletion targets @@ -1054,45 +1212,188 @@ private void doDeleteShardSnapshots( ); }, listener::onFailure); // Once we have updated the repository, run the clean-ups + final StepListener pinnedTimestampListener = new StepListener<>(); writeUpdatedRepoDataStep.whenComplete(updatedRepoData -> { + if (snapshotIdPinnedTimestampMap == null || snapshotIdPinnedTimestampMap.isEmpty()) { + pinnedTimestampListener.onResponse(updatedRepoData); + } else { + removeSnapshotsPinnedTimestamp( + snapshotIdPinnedTimestampMap, + this, + updatedRepoData, + remoteStorePinnedTimestampService, + pinnedTimestampListener + ); + } + }, listener::onFailure); + + pinnedTimestampListener.whenComplete(updatedRepoData -> { + // Run unreferenced blobs cleanup in parallel to shard-level snapshot deletion final ActionListener afterCleanupsListener = new GroupedActionListener<>( ActionListener.wrap(() -> listener.onResponse(updatedRepoData)), 2 ); + + // We can create map of indexId to ShardInfo based on the old repository data. This is later used in cleanup + // of stale indexes in combination with Snapshot Shard Paths file + Map idToShardInfoMap = repositoryData.getIndices() + .values() + .stream() + .collect( + Collectors.toMap( + IndexId::getId, + indexId -> new ShardInfo(indexId, repositoryData.shardGenerations().getGens(indexId).size()) + ) + ); + cleanupUnlinkedRootAndIndicesBlobs( snapshotIds, foundIndices, rootBlobs, updatedRepoData, - remoteStoreLockManagerFactory, - afterCleanupsListener - ); - asyncCleanupUnlinkedShardLevelBlobs( repositoryData, - snapshotIds, - writeShardMetaDataAndComputeDeletesStep.result(), remoteStoreLockManagerFactory, - afterCleanupsListener + remoteSegmentStoreDirectoryFactory, + afterCleanupsListener, + idToShardInfoMap ); + if (isShallowSnapshotV2) { + cleanUpRemoteStoreFilesForDeletedIndicesV2( + repositoryData, + snapshotIds, + writeShardMetaDataAndComputeDeletesStep.result(), + remoteSegmentStoreDirectoryFactory, + afterCleanupsListener + ); + } else { + asyncCleanupUnlinkedShardLevelBlobs( + repositoryData, + snapshotIds, + writeShardMetaDataAndComputeDeletesStep.result(), + remoteStoreLockManagerFactory, + afterCleanupsListener + ); + } }, listener::onFailure); } + private void cleanUpRemoteStoreFilesForDeletedIndicesV2( + RepositoryData repositoryData, + Collection snapshotIds, + Collection result, + RemoteSegmentStoreDirectoryFactory remoteSegmentStoreDirectoryFactory, + ActionListener afterCleanupsListener + ) { + try { + Set uniqueIndexIds = new HashSet<>(); + for (ShardSnapshotMetaDeleteResult shardSnapshotMetaDeleteResult : result) { + uniqueIndexIds.add(shardSnapshotMetaDeleteResult.indexId.getId()); + } + // iterate through all the indices and trigger remote store directory cleanup for deleted index segments + for (String indexId : uniqueIndexIds) { + cleanRemoteStoreDirectoryIfNeeded(snapshotIds, indexId, repositoryData, remoteSegmentStoreDirectoryFactory); + } + afterCleanupsListener.onResponse(null); + } catch (Exception e) { + logger.warn("Exception during cleanup of remote directory files for snapshot v2", e); + afterCleanupsListener.onFailure(e); + } + + } + + private void removeSnapshotsPinnedTimestamp( + Map snapshotsWithPinnedTimestamp, + Repository repository, + RepositoryData repositoryData, + RemoteStorePinnedTimestampService remoteStorePinnedTimestampService, + ActionListener pinnedTimestampListener + ) { + // Create a GroupedActionListener to aggregate the results of all unpin operations + GroupedActionListener groupedListener = new GroupedActionListener<>( + ActionListener.wrap( + // This is called once all operations have succeeded + ignored -> pinnedTimestampListener.onResponse(repositoryData), + // This is called if any operation fails + pinnedTimestampListener::onFailure + ), + snapshotsWithPinnedTimestamp.size() + ); + + snapshotsWithPinnedTimestamp.forEach((snapshotId, pinnedTimestamp) -> { + removeSnapshotPinnedTimestamp( + remoteStorePinnedTimestampService, + snapshotId, + repository.getMetadata().name(), + pinnedTimestamp, + groupedListener + ); + }); + } + + private void removeSnapshotPinnedTimestamp( + RemoteStorePinnedTimestampService remoteStorePinnedTimestampService, + SnapshotId snapshotId, + String repository, + long timestampToUnpin, + ActionListener listener + ) { + remoteStorePinnedTimestampService.unpinTimestamp( + timestampToUnpin, + repository + SNAPSHOT_PINNED_TIMESTAMP_DELIMITER + snapshotId.getUUID(), + new ActionListener() { + @Override + public void onResponse(Void unused) { + logger.debug("Timestamp {} unpinned successfully for snapshot {}", timestampToUnpin, snapshotId.getName()); + listener.onResponse(null); + } + + @Override + public void onFailure(Exception e) { + logger.error( + "Failed to unpin timestamp {} for snapshot {} with exception {}", + timestampToUnpin, + snapshotId.getName(), + e + ); + listener.onFailure(e); + } + } + ); + } + + /** + * Cleans up the indices and data corresponding to all it's shards. + * + * @param deletedSnapshots list of snapshots being deleted + * @param foundIndices indices that are found at [base_path]/indices + * @param rootBlobs the blobs at the [base_path] + * @param updatedRepoData the new repository data after the deletion + * @param remoteStoreLockManagerFactory remote store lock manager factory used for shallow snapshots + * @param listener listener on deletion of the stale indices + * @param idToShardInfoMap map of indexId to ShardInfo + */ private void cleanupUnlinkedRootAndIndicesBlobs( Collection deletedSnapshots, Map foundIndices, Map rootBlobs, RepositoryData updatedRepoData, + RepositoryData oldRepoData, RemoteStoreLockManagerFactory remoteStoreLockManagerFactory, - ActionListener listener + RemoteSegmentStoreDirectoryFactory remoteSegmentStoreDirectoryFactory, + ActionListener listener, + Map idToShardInfoMap ) { cleanupStaleBlobs( deletedSnapshots, foundIndices, rootBlobs, updatedRepoData, + oldRepoData, remoteStoreLockManagerFactory, - ActionListener.map(listener, ignored -> null) + remoteSegmentStoreDirectoryFactory, + ActionListener.map(listener, ignored -> null), + idToShardInfoMap ); } @@ -1103,7 +1404,11 @@ private void asyncCleanupUnlinkedShardLevelBlobs( RemoteStoreLockManagerFactory remoteStoreLockManagerFactory, ActionListener listener ) { - final List filesToDelete = resolveFilesToDelete(oldRepositoryData, snapshotIds, deleteResults); + final List> filesToDelete = resolveFilesToDelete(oldRepositoryData, snapshotIds, deleteResults); + long startTimeNs = System.nanoTime(); + Randomness.shuffle(filesToDelete); + logger.debug("[{}] shuffled the filesToDelete with timeElapsedNs={}", metadata.name(), (System.nanoTime() - startTimeNs)); + if (filesToDelete.isEmpty()) { listener.onResponse(null); return; @@ -1111,18 +1416,18 @@ private void asyncCleanupUnlinkedShardLevelBlobs( try { AtomicInteger counter = new AtomicInteger(); - Collection> subList = filesToDelete.stream() + Collection>> subList = filesToDelete.stream() .collect(Collectors.groupingBy(it -> counter.getAndIncrement() / maxShardBlobDeleteBatch)) .values(); - final BlockingQueue> staleFilesToDeleteInBatch = new LinkedBlockingQueue<>(subList); + final BlockingQueue>> staleFilesToDeleteInBatch = new LinkedBlockingQueue<>(subList); final GroupedActionListener groupedListener = new GroupedActionListener<>( ActionListener.wrap(r -> { listener.onResponse(null); }, listener::onFailure), staleFilesToDeleteInBatch.size() ); - // Start as many workers as fit into the snapshot pool at once at the most - final int workers = Math.min(threadPool.info(ThreadPool.Names.SNAPSHOT).getMax(), staleFilesToDeleteInBatch.size()); + // Start as many workers as fit into the snapshot_deletion pool at once at the most + final int workers = Math.min(threadPool.info(ThreadPool.Names.SNAPSHOT_DELETION).getMax(), staleFilesToDeleteInBatch.size()); for (int i = 0; i < workers; ++i) { executeStaleShardDelete(staleFilesToDeleteInBatch, remoteStoreLockManagerFactory, groupedListener); } @@ -1200,7 +1505,8 @@ protected void releaseRemoteStoreLockAndCleanup( // related issue: https://github.com/opensearch-project/OpenSearch/issues/8469 RemoteSegmentStoreDirectoryFactory remoteDirectoryFactory = new RemoteSegmentStoreDirectoryFactory( remoteStoreLockManagerFactory.getRepositoriesService(), - threadPool + threadPool, + remoteStoreSettings.getSegmentsPathFixedPrefix() ); remoteDirectoryCleanupAsync( remoteDirectoryFactory, @@ -1217,57 +1523,67 @@ protected void releaseRemoteStoreLockAndCleanup( // When remoteStoreLockManagerFactory is non-null, while deleting the files, lock files are also released before deletion of respective // shallow-snap-UUID files. And if it is null, we just delete the stale shard blobs. private void executeStaleShardDelete( - BlockingQueue> staleFilesToDeleteInBatch, + BlockingQueue>> staleFilesToDeleteInBatch, RemoteStoreLockManagerFactory remoteStoreLockManagerFactory, GroupedActionListener listener ) throws InterruptedException { - List filesToDelete = staleFilesToDeleteInBatch.poll(0L, TimeUnit.MILLISECONDS); - if (filesToDelete != null) { - threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(ActionRunnable.wrap(listener, l -> { - try { - // filtering files for which remote store lock release and cleanup succeeded, - // remaining files for which it failed will be retried in next snapshot delete run. - List eligibleFilesToDelete = new ArrayList<>(); - for (String fileToDelete : filesToDelete) { - if (fileToDelete.contains(SHALLOW_SNAPSHOT_PREFIX)) { - String[] fileToDeletePath = fileToDelete.split("/"); - String indexId = fileToDeletePath[1]; - String shardId = fileToDeletePath[2]; - String shallowSnapBlob = fileToDeletePath[3]; - String snapshotUUID = extractShallowSnapshotUUID(shallowSnapBlob).orElseThrow(); - BlobContainer shardContainer = blobStore().blobContainer(indicesPath().add(indexId).add(shardId)); - try { - releaseRemoteStoreLockAndCleanup(shardId, snapshotUUID, shardContainer, remoteStoreLockManagerFactory); - eligibleFilesToDelete.add(fileToDelete); - } catch (Exception e) { - logger.error( - "Failed to release lock or cleanup shard for indexID {}, shardID {} " + "and snapshot {}", - indexId, - shardId, - snapshotUUID - ); - } - } else { - eligibleFilesToDelete.add(fileToDelete); + List> filesToDelete = staleFilesToDeleteInBatch.poll(0L, TimeUnit.MILLISECONDS); + if (filesToDelete == null) { + return; + } + threadPool.executor(ThreadPool.Names.SNAPSHOT_DELETION).execute(ActionRunnable.wrap(listener, l -> { + try { + // filtering files for which remote store lock release and cleanup succeeded, + // remaining files for which it failed will be retried in next snapshot delete run. + List eligibleFilesToDelete = new ArrayList<>(); + for (Tuple fileToDelete : filesToDelete) { + BlobPath blobPath = fileToDelete.v1(); + String blobName = fileToDelete.v2(); + boolean deleteBlob = false; + if (blobName.startsWith(SHALLOW_SNAPSHOT_PREFIX)) { + String snapshotUUID = extractShallowSnapshotUUID(blobName).orElseThrow(); + String[] parts = blobPath.toArray(); + // For fixed, the parts would look like [,"indices","",""] + // For hashed_prefix, the parts would look like ["j01010001010",,"indices","",""] + // For hashed_infix, the parts would look like [,"j01010001010","indices","",""] + int partLength = parts.length; + String indexId = parts[partLength - 2]; + String shardId = parts[partLength - 1]; + BlobContainer shardContainer = blobStore().blobContainer(blobPath); + try { + releaseRemoteStoreLockAndCleanup(shardId, snapshotUUID, shardContainer, remoteStoreLockManagerFactory); + deleteBlob = true; + } catch (Exception e) { + logger.error( + "Failed to release lock or cleanup shard for indexID {}, shardID {} and snapshot {}", + indexId, + shardId, + snapshotUUID + ); } + } else { + deleteBlob = true; + } + if (deleteBlob) { + eligibleFilesToDelete.add(blobPath.buildAsString() + blobName); } - // Deleting the shard blobs - deleteFromContainer(blobContainer(), eligibleFilesToDelete); - l.onResponse(null); - } catch (Exception e) { - logger.warn( - () -> new ParameterizedMessage( - "[{}] Failed to delete following blobs during snapshot delete : {}", - metadata.name(), - filesToDelete - ), - e - ); - l.onFailure(e); } - executeStaleShardDelete(staleFilesToDeleteInBatch, remoteStoreLockManagerFactory, listener); - })); - } + // Deleting the shard blobs + deleteFromContainer(rootBlobContainer(), eligibleFilesToDelete); + l.onResponse(null); + } catch (Exception e) { + logger.warn( + () -> new ParameterizedMessage( + "[{}] Failed to delete following blobs during snapshot delete : {}", + metadata.name(), + filesToDelete + ), + e + ); + l.onFailure(e); + } + executeStaleShardDelete(staleFilesToDeleteInBatch, remoteStoreLockManagerFactory, listener); + })); } // updates the shard state metadata for shards of a snapshot that is to be deleted. Also computes the files to be cleaned up. @@ -1279,7 +1595,7 @@ private void writeUpdatedShardMetaDataAndComputeDeletes( ActionListener> onAllShardsCompleted ) { - final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT); + final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT_DELETION); final List indices = oldRepositoryData.indicesToUpdateAfterRemovingSnapshot(snapshotIds); if (indices.isEmpty()) { @@ -1409,26 +1725,30 @@ public void onFailure(Exception ex) { } } - private List resolveFilesToDelete( + /** + * Resolves the list of files that should be deleted during a snapshot deletion operation. + * This method combines files to be deleted from shard-level metadata and index-level metadata. + * + * @param oldRepositoryData The repository data before the snapshot deletion + * @param snapshotIds The IDs of the snapshots being deleted + * @param deleteResults The results of removing snapshots from shard-level metadata + * @return A list of tuples, each containing a blob path and the name of a blob to be deleted + */ + private List> resolveFilesToDelete( RepositoryData oldRepositoryData, Collection snapshotIds, Collection deleteResults ) { - final String basePath = basePath().buildAsString(); - final int basePathLen = basePath.length(); final Map> indexMetaGenerations = oldRepositoryData.indexMetaDataToRemoveAfterRemovingSnapshots( snapshotIds ); return Stream.concat(deleteResults.stream().flatMap(shardResult -> { - final String shardPath = shardContainer(shardResult.indexId, shardResult.shardId).path().buildAsString(); - return shardResult.blobsToDelete.stream().map(blob -> shardPath + blob); + final BlobPath shardPath = shardPath(shardResult.indexId, shardResult.shardId); + return shardResult.blobsToDelete.stream().map(blob -> Tuple.tuple(shardPath, blob)); }), indexMetaGenerations.entrySet().stream().flatMap(entry -> { - final String indexContainerPath = indexContainer(entry.getKey()).path().buildAsString(); - return entry.getValue().stream().map(id -> indexContainerPath + INDEX_METADATA_FORMAT.blobName(id)); - })).map(absolutePath -> { - assert absolutePath.startsWith(basePath); - return absolutePath.substring(basePathLen); - }).collect(Collectors.toList()); + final BlobPath indexPath = indexPath(entry.getKey()); + return entry.getValue().stream().map(id -> Tuple.tuple(indexPath, INDEX_METADATA_FORMAT.blobName(id))); + })).collect(Collectors.toList()); } /** @@ -1443,6 +1763,7 @@ private List resolveFilesToDelete( * @param rootBlobs all blobs found directly under the repository root * @param newRepoData new repository data that was just written * @param remoteStoreLockManagerFactory RemoteStoreLockManagerFactory to be used for cleaning up remote store lock files. + * @param idToShardInfoMap map of indexId to ShardInfo * @param listener listener to invoke with the combined {@link DeleteResult} of all blobs removed in this operation */ private void cleanupStaleBlobs( @@ -1450,8 +1771,11 @@ private void cleanupStaleBlobs( Map foundIndices, Map rootBlobs, RepositoryData newRepoData, + RepositoryData oldRepoData, RemoteStoreLockManagerFactory remoteStoreLockManagerFactory, - ActionListener listener + RemoteSegmentStoreDirectoryFactory remoteSegmentStoreDirectoryFactory, + ActionListener listener, + Map idToShardInfoMap ) { final GroupedActionListener groupedListener = new GroupedActionListener<>(ActionListener.wrap(deleteResults -> { DeleteResult deleteResult = DeleteResult.ZERO; @@ -1461,7 +1785,7 @@ private void cleanupStaleBlobs( listener.onResponse(deleteResult); }, listener::onFailure), 2); - final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT); + final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT_DELETION); final List staleRootBlobs = staleRootBlobs(newRepoData, rootBlobs.keySet()); if (staleRootBlobs.isEmpty()) { groupedListener.onResponse(DeleteResult.ZERO); @@ -1476,8 +1800,28 @@ private void cleanupStaleBlobs( if (foundIndices.keySet().equals(survivingIndexIds)) { groupedListener.onResponse(DeleteResult.ZERO); } else { - cleanupStaleIndices(foundIndices, survivingIndexIds, remoteStoreLockManagerFactory, groupedListener); + Map snapshotShardPaths = getSnapshotShardPaths(); + cleanupStaleIndices( + deletedSnapshots, + foundIndices, + survivingIndexIds, + remoteStoreLockManagerFactory, + remoteSegmentStoreDirectoryFactory, + oldRepoData, + groupedListener, + snapshotShardPaths, + idToShardInfoMap + ); + } + } + + private Map getSnapshotShardPaths() { + try { + return snapshotShardPathBlobContainer().listBlobs(); + } catch (IOException ex) { + logger.warn(new ParameterizedMessage("Repository [{}] Failed to get the snapshot shard paths", metadata.name()), ex); } + return Collections.emptyMap(); } /** @@ -1493,12 +1837,14 @@ private void cleanupStaleBlobs( * @param repositoryStateId Current repository state id * @param repositoryMetaVersion version of the updated repository metadata to write * @param remoteStoreLockManagerFactory RemoteStoreLockManagerFactory to be used for cleaning up remote store lock files. + * @param remoteSegmentStoreDirectoryFactory RemoteSegmentStoreDirectoryFactory to be used for cleaning up remote store segments. * @param listener Listener to complete when done */ public void cleanup( long repositoryStateId, Version repositoryMetaVersion, RemoteStoreLockManagerFactory remoteStoreLockManagerFactory, + RemoteSegmentStoreDirectoryFactory remoteSegmentStoreDirectoryFactory, ActionListener listener ) { try { @@ -1531,8 +1877,11 @@ public void cleanup( foundIndices, rootBlobs, repositoryData, + repositoryData, remoteStoreLockManagerFactory, - ActionListener.map(listener, RepositoryCleanupResult::new) + remoteSegmentStoreDirectoryFactory, + ActionListener.map(listener, RepositoryCleanupResult::new), + Collections.emptyMap() ), listener::onFailure ) @@ -1620,11 +1969,16 @@ private List cleanupStaleRootFiles( return Collections.emptyList(); } - private void cleanupStaleIndices( + void cleanupStaleIndices( + Collection deletedSnapshots, Map foundIndices, Set survivingIndexIds, RemoteStoreLockManagerFactory remoteStoreLockManagerFactory, - GroupedActionListener listener + RemoteSegmentStoreDirectoryFactory remoteSegmentStoreDirectoryFactory, + RepositoryData oldRepoData, + GroupedActionListener listener, + Map snapshotShardPaths, + Map idToShardInfoMap ) { final GroupedActionListener groupedListener = new GroupedActionListener<>(ActionListener.wrap(deleteResults -> { DeleteResult deleteResult = DeleteResult.ZERO; @@ -1644,11 +1998,20 @@ private void cleanupStaleIndices( // Start as many workers as fit into the snapshot pool at once at the most final int workers = Math.min( - threadPool.info(ThreadPool.Names.SNAPSHOT).getMax(), + threadPool.info(ThreadPool.Names.SNAPSHOT_DELETION).getMax(), foundIndices.size() - survivingIndexIds.size() ); for (int i = 0; i < workers; ++i) { - executeOneStaleIndexDelete(staleIndicesToDelete, remoteStoreLockManagerFactory, groupedListener); + executeOneStaleIndexDelete( + deletedSnapshots, + staleIndicesToDelete, + remoteStoreLockManagerFactory, + remoteSegmentStoreDirectoryFactory, + oldRepoData, + groupedListener, + snapshotShardPaths, + idToShardInfoMap + ); } } catch (Exception e) { // TODO: We shouldn't be blanket catching and suppressing all exceptions here and instead handle them safely upstream. @@ -1668,58 +2031,372 @@ private static boolean isIndexPresent(ClusterService clusterService, String inde return false; } + /** + * Executes the deletion of a single stale index. + * + * @param staleIndicesToDelete Queue of stale indices to delete + * @param remoteStoreLockManagerFactory Factory for creating remote store lock managers + * @param listener Listener for grouped delete actions + * @param snapshotShardPaths Map of snapshot shard paths and their metadata + * @param idToShardInfoMap Map of indexId to ShardInfo + * @throws InterruptedException if the thread is interrupted while waiting + */ private void executeOneStaleIndexDelete( + Collection deletedSnapshots, BlockingQueue> staleIndicesToDelete, RemoteStoreLockManagerFactory remoteStoreLockManagerFactory, - GroupedActionListener listener + RemoteSegmentStoreDirectoryFactory remoteSegmentStoreDirectoryFactory, + RepositoryData oldRepoData, + GroupedActionListener listener, + Map snapshotShardPaths, + Map idToShardInfoMap ) throws InterruptedException { Map.Entry indexEntry = staleIndicesToDelete.poll(0L, TimeUnit.MILLISECONDS); - if (indexEntry != null) { - final String indexSnId = indexEntry.getKey(); - threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(ActionRunnable.supply(listener, () -> { - DeleteResult deleteResult = DeleteResult.ZERO; + if (indexEntry == null) { + return; + } + final String indexSnId = indexEntry.getKey(); + threadPool.executor(ThreadPool.Names.SNAPSHOT_DELETION).execute(ActionRunnable.supply(listener, () -> { + try { + logger.debug("[{}] Found stale index [{}]. Cleaning it up", metadata.name(), indexSnId); + List matchingShardPaths = findMatchingShardPaths(indexSnId, snapshotShardPaths); + Optional highestGenShardPaths = findHighestGenerationShardPaths(matchingShardPaths); + + // The shardInfo can be null for 1) snapshots that pre-dates the hashed prefix snapshots. + // 2) Snapshot shard paths file upload failed + // In such cases, we fallback to fixed_path for cleanup of the data. + ShardInfo shardInfo = getShardInfo(highestGenShardPaths, idToShardInfoMap, indexSnId); + + if (remoteStoreLockManagerFactory != null) { + cleanupRemoteStoreLocks(indexEntry, shardInfo, remoteStoreLockManagerFactory); + } + + // Deletes the shard level data for the underlying index based on the shardInfo that was obtained above. + DeleteResult deleteResult = deleteShardData(shardInfo); + + // If there are matchingShardPaths, then we delete them after we have deleted the shard data. + deleteResult = deleteResult.add(cleanUpStaleSnapshotShardPathsFile(matchingShardPaths, snapshotShardPaths)); + + if (remoteSegmentStoreDirectoryFactory != null) { + cleanRemoteStoreDirectoryIfNeeded(deletedSnapshots, indexSnId, oldRepoData, remoteSegmentStoreDirectoryFactory); + } + + // Finally, we delete the [base_path]/indexId folder + deleteResult = deleteResult.add(indexEntry.getValue().delete()); // Deleting the index folder + logger.debug("[{}] Cleaned up stale index [{}]", metadata.name(), indexSnId); + return deleteResult; + } catch (IOException e) { + logger.warn( + () -> new ParameterizedMessage( + "[{}] index {} is no longer part of any snapshots in the repository, " + + "but failed to clean up their index folders", + metadata.name(), + indexSnId + ), + e + ); + return DeleteResult.ZERO; + } catch (Exception e) { + assert false : e; + logger.warn(new ParameterizedMessage("[{}] Exception during single stale index delete", metadata.name()), e); + return DeleteResult.ZERO; + } finally { + executeOneStaleIndexDelete( + deletedSnapshots, + staleIndicesToDelete, + remoteStoreLockManagerFactory, + remoteSegmentStoreDirectoryFactory, + oldRepoData, + listener, + snapshotShardPaths, + idToShardInfoMap + ); + } + })); + } + + /** + * Cleans up the remote store directory if needed. + *

This method cleans up segments in the remote store directory for deleted indices. + * This cleanup flow is executed only for v2 snapshots. For v1 snapshots, + * the cleanup is done per shard after releasing the lock files. + *

+ * + *

Since this method requires old repository data to fetch index metadata of the deleted index, + * the cleanup won't happen on retries in case of failures. This is because subsequent retries may + * not have access to the older repository data.

+ * + * @param indexSnId The snapshot index id of the index to be cleaned up + * @param oldRepoData The old repository metadata used to fetch the index metadata. + * @param remoteSegmentStoreDirectoryFactory RemoteSegmentStoreDirectoryFactory to be used for cleaning up remote + * store segments + */ + private void cleanRemoteStoreDirectoryIfNeeded( + Collection deletedSnapshots, + String indexSnId, + RepositoryData oldRepoData, + RemoteSegmentStoreDirectoryFactory remoteSegmentStoreDirectoryFactory + ) { + assert (indexSnId != null); + + IndexId indexId = null; + List snapshotIds = Collections.emptyList(); + try { + for (Map.Entry> entry : oldRepoData.getIndexSnapshots().entrySet()) { + indexId = entry.getKey(); + if (indexId != null && indexId.getId().equals(indexSnId)) { + snapshotIds = entry.getValue(); + break; + } + } + if (snapshotIds.isEmpty()) { + logger.info("No snapshots found for indexSnId: {}", indexSnId); + return; + } + for (SnapshotId snapshotId : snapshotIds) { try { - logger.debug("[{}] Found stale index [{}]. Cleaning it up", metadata.name(), indexSnId); - if (remoteStoreLockManagerFactory != null) { - final Map shardBlobs = indexEntry.getValue().children(); - for (Map.Entry shardBlob : shardBlobs.entrySet()) { - for (String blob : shardBlob.getValue().listBlobs().keySet()) { - final Optional snapshotUUID = extractShallowSnapshotUUID(blob); - if (snapshotUUID.isPresent()) { - releaseRemoteStoreLockAndCleanup( - shardBlob.getKey(), - snapshotUUID.get(), - shardBlob.getValue(), - remoteStoreLockManagerFactory - ); - } - } + // skip cleanup for snapshot not present in deleted snapshots list + if (!deletedSnapshots.contains(snapshotId)) { + continue; + } + IndexMetadata prevIndexMetadata = this.getSnapshotIndexMetaData(oldRepoData, snapshotId, indexId); + if (prevIndexMetadata != null && !isIndexPresent(clusterService, prevIndexMetadata.getIndexUUID())) { + String remoteStoreRepository = IndexMetadata.INDEX_REMOTE_SEGMENT_STORE_REPOSITORY_SETTING.get( + prevIndexMetadata.getSettings() + ); + assert (remoteStoreRepository != null); + + String remoteTranslogRepositoryName = IndexMetadata.INDEX_REMOTE_TRANSLOG_REPOSITORY_SETTING.get( + prevIndexMetadata.getSettings() + ); + assert (remoteTranslogRepositoryName != null); + Repository remoteTranslogRepository = remoteSegmentStoreDirectoryFactory.getRepositoriesService() + .get() + .repository(remoteTranslogRepositoryName); + + RemoteStorePathStrategy remoteStorePathStrategy = RemoteStoreUtils.determineRemoteStorePathStrategy( + prevIndexMetadata + ); + + for (int shardId = 0; shardId < prevIndexMetadata.getNumberOfShards(); shardId++) { + ShardId shard = new ShardId(Index.UNKNOWN_INDEX_NAME, prevIndexMetadata.getIndexUUID(), shardId); + remoteDirectoryCleanupAsync( + remoteSegmentStoreDirectoryFactory, + threadPool, + remoteStoreRepository, + prevIndexMetadata.getIndexUUID(), + shard, + ThreadPool.Names.REMOTE_PURGE, + remoteStorePathStrategy + ); + remoteTranslogCleanupAsync(remoteTranslogRepository, shard, remoteStorePathStrategy, prevIndexMetadata); } } - // Deleting the index folder - deleteResult = indexEntry.getValue().delete(); - logger.debug("[{}] Cleaned up stale index [{}]", metadata.name(), indexSnId); - } catch (IOException e) { + } catch (Exception e) { logger.warn( - () -> new ParameterizedMessage( - "[{}] index {} is no longer part of any snapshots in the repository, " - + "but failed to clean up their index folders", - metadata.name(), + new ParameterizedMessage( + "Exception during cleanup of remote directory for snapshot [{}] deleted index [{}]", + snapshotId, indexSnId ), e ); - } catch (Exception e) { - assert false : e; - logger.warn(new ParameterizedMessage("[{}] Exception during single stale index delete", metadata.name()), e); } + } + } catch (Exception e) { + logger.error(new ParameterizedMessage("Exception during the remote directory cleanup for indecSnId [{}]", indexSnId), e); + } - executeOneStaleIndexDelete(staleIndicesToDelete, remoteStoreLockManagerFactory, listener); - return deleteResult; - })); + } + + private void remoteTranslogCleanupAsync( + Repository remoteTranslogRepository, + ShardId shardId, + RemoteStorePathStrategy remoteStorePathStrategy, + IndexMetadata prevIndexMetadata + ) { + assert remoteTranslogRepository instanceof BlobStoreRepository; + boolean indexMetadataEnabled = RemoteStoreUtils.determineTranslogMetadataEnabled(prevIndexMetadata); + RemoteTranslogTransferTracker remoteTranslogTransferTracker = new RemoteTranslogTransferTracker(shardId, 1000); + FileTransferTracker fileTransferTracker = new FileTransferTracker(shardId, remoteTranslogTransferTracker); + TranslogTransferManager translogTransferManager = RemoteFsTranslog.buildTranslogTransferManager( + (BlobStoreRepository) remoteTranslogRepository, + threadPool, + shardId, + fileTransferTracker, + remoteTranslogTransferTracker, + remoteStorePathStrategy, + remoteStoreSettings, + indexMetadataEnabled + ); + try { + RemoteFsTimestampAwareTranslog.cleanup(translogTransferManager); + } catch (IOException e) { + logger.error("Exception while cleaning up remote translog for shard: " + shardId, e); + } + } + + /** + * Finds and returns a list of shard paths that match the given index ID. + * + * @param indexId The ID of the index to match + * @param snapshotShardPaths Map of snapshot shard paths and their metadata + * @return List of matching shard paths + */ + private List findMatchingShardPaths(String indexId, Map snapshotShardPaths) { + return snapshotShardPaths.keySet().stream().filter(s -> s.startsWith(indexId)).collect(Collectors.toList()); + } + + /** + * Finds the shard path with the highest generation number from the given list of matching shard paths. + * + * @param matchingShardPaths List of shard paths that match a specific criteria + * @return An Optional containing the shard path with the highest generation number, or empty if the list is empty + */ + private Optional findHighestGenerationShardPaths(List matchingShardPaths) { + return matchingShardPaths.stream() + .map(s -> s.split("\\" + SnapshotShardPaths.DELIMITER)) + .sorted((a, b) -> Integer.parseInt(b[2]) - Integer.parseInt(a[2])) + .map(parts -> String.join(SnapshotShardPaths.DELIMITER, parts)) + .findFirst(); + } + + /** + * Cleans up remote store locks for a given index entry. + * + * @param indexEntry The index entry containing the blob container + * @param shardInfo ShardInfo for the IndexId being cleaned up + * @param remoteStoreLockManagerFactory Factory for creating remote store lock managers + * @throws IOException If an I/O error occurs during the cleanup process + */ + private void cleanupRemoteStoreLocks( + Map.Entry indexEntry, + ShardInfo shardInfo, + RemoteStoreLockManagerFactory remoteStoreLockManagerFactory + ) throws IOException { + if (shardInfo == null) { + releaseRemoteStoreLocksAndCleanup(indexEntry.getValue().children(), remoteStoreLockManagerFactory); + } else { + Map shardContainers = new HashMap<>(shardInfo.getShardCount()); + for (int i = 0; i < shardInfo.getShardCount(); i++) { + shardContainers.put(String.valueOf(i), shardContainer(shardInfo.getIndexId(), i)); + } + releaseRemoteStoreLocksAndCleanup(shardContainers, remoteStoreLockManagerFactory); + } + } + + /** + * Releases remote store locks and performs cleanup for each shard blob. + * + * @param shardBlobs Map of shard IDs to their corresponding BlobContainers + * @param remoteStoreLockManagerFactory Factory for creating remote store lock managers + * @throws IOException If an I/O error occurs during the release and cleanup process + */ + void releaseRemoteStoreLocksAndCleanup( + Map shardBlobs, + RemoteStoreLockManagerFactory remoteStoreLockManagerFactory + ) throws IOException { + for (Map.Entry shardBlob : shardBlobs.entrySet()) { + for (String blob : shardBlob.getValue().listBlobs().keySet()) { + final Optional snapshotUUID = extractShallowSnapshotUUID(blob); + if (snapshotUUID.isPresent()) { + releaseRemoteStoreLockAndCleanup( + shardBlob.getKey(), + snapshotUUID.get(), + shardBlob.getValue(), + remoteStoreLockManagerFactory + ); + } + } } } + /** + * Deletes shard data for the provided ShardInfo object. + * + * @param shardInfo The ShardInfo object containing information about the shards to be deleted. + * @return A DeleteResult object representing the result of the deletion operation. + * @throws IOException If an I/O error occurs during the deletion process. + */ + private DeleteResult deleteShardData(ShardInfo shardInfo) throws IOException { + // If the provided ShardInfo is null, return a zero DeleteResult + if (shardInfo == null) { + return DeleteResult.ZERO; + } + + // Initialize the DeleteResult with zero values + DeleteResult deleteResult = DeleteResult.ZERO; + + // Iterate over the shards and delete each shard's data + for (int i = 0; i < shardInfo.getShardCount(); i++) { + // Call the delete method on the shardContainer and accumulate the result + deleteResult = deleteResult.add(shardContainer(shardInfo.getIndexId(), i).delete()); + } + + // Return the accumulated DeleteResult + return deleteResult; + } + + /** + * Retrieves the ShardInfo object based on the provided highest generation shard paths, + * index ID, and the mapping of index IDs to ShardInfo objects. + * + * @param highestGenShardPaths The optional highest generation shard path. + * @param idToShardInfoMap A map containing index IDs and their corresponding ShardInfo objects. + * @param indexId The index ID for which the ShardInfo object is needed. + * @return The ShardInfo object with the highest shard count, or null if no ShardInfo is available. + */ + private ShardInfo getShardInfo(Optional highestGenShardPaths, Map idToShardInfoMap, String indexId) { + // Extract the ShardInfo object from the highest generation shard path, if present + ShardInfo shardInfoFromPath = highestGenShardPaths.map(SnapshotShardPaths::parseShardPath).orElse(null); + + // Retrieve the ShardInfo object from the idToShardInfoMap using the indexId + ShardInfo shardInfoFromMap = idToShardInfoMap.get(indexId); + + // If shardInfoFromPath is null, return shardInfoFromMap (which could also be null) + if (shardInfoFromPath == null) { + return shardInfoFromMap; + } + + // If shardInfoFromMap is null, return shardInfoFromPath (which could also be null) + if (shardInfoFromMap == null) { + return shardInfoFromPath; + } + + // If both shardInfoFromPath and shardInfoFromMap are non-null, + // return the ShardInfo object with the higher shard count + return shardInfoFromPath.getShardCount() >= shardInfoFromMap.getShardCount() ? shardInfoFromPath : shardInfoFromMap; + } + + private DeleteResult cleanUpStaleSnapshotShardPathsFile(List matchingShardPaths, Map snapshotShardPaths) + throws IOException { + deleteFromContainer(snapshotShardPathBlobContainer(), matchingShardPaths); + long totalBytes = matchingShardPaths.stream().mapToLong(s -> snapshotShardPaths.get(s).length()).sum(); + return new DeleteResult(matchingShardPaths.size(), totalBytes); + } + + @Override + public void finalizeSnapshot( + final ShardGenerations shardGenerations, + final long repositoryStateId, + final Metadata clusterMetadata, + SnapshotInfo snapshotInfo, + Version repositoryMetaVersion, + Function stateTransformer, + final ActionListener listener + ) { + finalizeSnapshot( + shardGenerations, + repositoryStateId, + clusterMetadata, + snapshotInfo, + repositoryMetaVersion, + stateTransformer, + Priority.NORMAL, + listener + ); + } + @Override public void finalizeSnapshot( final ShardGenerations shardGenerations, @@ -1760,6 +2437,10 @@ public void finalizeSnapshot( indexMetas, indexMetaIdentifiers ); + // The snapshot shards path would be uploaded for new index ids or index ids where the shard gen count (a.k.a + // number_of_shards) has increased. + Set updatedIndexIds = writeNewIndexShardPaths(existingRepositoryData, updatedRepositoryData, snapshotId); + cleanupRedundantSnapshotShardPaths(updatedIndexIds); writeIndexGen( updatedRepositoryData, repositoryStateId, @@ -1767,8 +2448,7 @@ public void finalizeSnapshot( stateTransformer, repositoryUpdatePriority, ActionListener.wrap(newRepoData -> { - cleanupOldShardGens(existingRepositoryData, updatedRepositoryData); - listener.onResponse(newRepoData); + cleanupOldShardGens(existingRepositoryData, updatedRepositoryData, newRepoData, listener); }, onUpdateFailure) ); }, onUpdateFailure), 2 + indices.size()); @@ -1811,23 +2491,189 @@ public void finalizeSnapshot( }, onUpdateFailure); } + /** + * This method cleans up the redundant snapshot shard paths file for index ids where the number of shards has increased + * on account of new indexes by same index name being snapshotted that exists already in the repository's snapshots. + */ + private void cleanupRedundantSnapshotShardPaths(Set updatedShardPathsIndexIds) { + Set updatedIndexIds = updatedShardPathsIndexIds.stream() + .map(s -> s.split("\\" + SnapshotShardPaths.DELIMITER)[0]) + .collect(Collectors.toSet()); + Set indexIdShardPaths = getSnapshotShardPaths().keySet(); + List staleShardPaths = indexIdShardPaths.stream().filter(s -> updatedShardPathsIndexIds.contains(s) == false).filter(s -> { + String indexId = s.split("\\" + SnapshotShardPaths.DELIMITER)[0]; + return updatedIndexIds.contains(indexId); + }).collect(Collectors.toList()); + try { + deleteFromContainer(snapshotShardPathBlobContainer(), staleShardPaths); + } catch (IOException e) { + logger.warn( + new ParameterizedMessage( + "Repository [{}] Exception during snapshot stale index deletion {}", + metadata.name(), + staleShardPaths + ), + e + ); + } + } + + private Set writeNewIndexShardPaths( + RepositoryData existingRepositoryData, + RepositoryData updatedRepositoryData, + SnapshotId snapshotId + ) { + Set updatedIndexIds = new HashSet<>(); + Set indicesToUpdate = new HashSet<>(updatedRepositoryData.getIndices().values()); + for (IndexId indexId : indicesToUpdate) { + if (indexId.getShardPathType() == PathType.FIXED.getCode()) { + continue; + } + int oldShardCount = existingRepositoryData.shardGenerations().getGens(indexId).size(); + int newShardCount = updatedRepositoryData.shardGenerations().getGens(indexId).size(); + if (newShardCount > oldShardCount) { + String shardPathsBlobName = writeIndexShardPaths(indexId, snapshotId, newShardCount); + if (Objects.nonNull(shardPathsBlobName)) { + updatedIndexIds.add(shardPathsBlobName); + } + } + } + return updatedIndexIds; + } + + String writeIndexShardPaths(IndexId indexId, SnapshotId snapshotId, int shardCount) { + try { + List paths = getShardPaths(indexId, shardCount); + int pathType = indexId.getShardPathType(); + int pathHashAlgorithm = FNV_1A_COMPOSITE_1.getCode(); + String blobName = String.join( + SnapshotShardPaths.DELIMITER, + indexId.getId(), + indexId.getName(), + String.valueOf(shardCount), + String.valueOf(pathType), + String.valueOf(pathHashAlgorithm) + ); + SnapshotShardPaths shardPaths = new SnapshotShardPaths( + paths, + indexId.getId(), + indexId.getName(), + shardCount, + PathType.fromCode(pathType), + PathHashAlgorithm.fromCode(pathHashAlgorithm) + ); + SNAPSHOT_SHARD_PATHS_FORMAT.write(shardPaths, snapshotShardPathBlobContainer(), blobName); + logShardPathsOperationSuccess(indexId, snapshotId); + return blobName; + } catch (IOException e) { + logShardPathsOperationWarning(indexId, snapshotId, e); + } + return null; + } + + private List getShardPaths(IndexId indexId, int shardCount) { + List paths = new ArrayList<>(); + for (int shardId = 0; shardId < shardCount; shardId++) { + BlobPath shardPath = shardPath(indexId, shardId); + paths.add(shardPath.buildAsString()); + } + return paths; + } + + private void logShardPathsOperationSuccess(IndexId indexId, SnapshotId snapshotId) { + logger.trace( + () -> new ParameterizedMessage( + "Repository [{}] successfully wrote shard paths for index [{}] in snapshot [{}]", + metadata.name(), + indexId.getName(), + snapshotId.getName() + ) + ); + } + + private void logShardPathsOperationWarning(IndexId indexId, SnapshotId snapshotId, @Nullable Exception e) { + logger.warn( + () -> new ParameterizedMessage( + "Repository [{}] Failed to write shard paths for index [{}] in snapshot [{}]", + metadata.name(), + indexId.getName(), + snapshotId.getName() + ), + e + ); + } + // Delete all old shard gen blobs that aren't referenced any longer as a result from moving to updated repository data - private void cleanupOldShardGens(RepositoryData existingRepositoryData, RepositoryData updatedRepositoryData) { + private void cleanupOldShardGens( + RepositoryData existingRepositoryData, + RepositoryData updatedRepositoryData, + RepositoryData newRepositoryData, + ActionListener listener + ) { final List toDelete = new ArrayList<>(); - final int prefixPathLen = basePath().buildAsString().length(); updatedRepositoryData.shardGenerations() .obsoleteShardGenerations(existingRepositoryData.shardGenerations()) .forEach( (indexId, gens) -> gens.forEach( - (shardId, oldGen) -> toDelete.add( - shardContainer(indexId, shardId).path().buildAsString().substring(prefixPathLen) + INDEX_FILE_PREFIX + oldGen - ) + (shardId, oldGen) -> toDelete.add(shardPath(indexId, shardId).buildAsString() + INDEX_FILE_PREFIX + oldGen) ) ); + if (toDelete.isEmpty()) { + listener.onResponse(newRepositoryData); + return; + } try { - deleteFromContainer(blobContainer(), toDelete); + AtomicInteger counter = new AtomicInteger(); + Collection> subList = toDelete.stream() + .collect(Collectors.groupingBy(it -> counter.getAndIncrement() / maxShardBlobDeleteBatch)) + .values(); + final BlockingQueue> staleFilesToDeleteInBatch = new LinkedBlockingQueue<>(subList); + logger.info( + "[{}] cleanupOldShardGens toDeleteSize={} groupSize={}", + metadata.name(), + toDelete.size(), + staleFilesToDeleteInBatch.size() + ); + final GroupedActionListener groupedListener = new GroupedActionListener<>(ActionListener.wrap(r -> { + logger.info("[{}] completed cleanupOldShardGens", metadata.name()); + listener.onResponse(newRepositoryData); + }, ex -> { + logger.error(new ParameterizedMessage("[{}] exception in cleanupOldShardGens", metadata.name()), ex); + listener.onResponse(newRepositoryData); + }), staleFilesToDeleteInBatch.size()); + + // Start as many workers as fit into the snapshot pool at once at the most + final int workers = Math.min(threadPool.info(ThreadPool.Names.SNAPSHOT_DELETION).getMax(), staleFilesToDeleteInBatch.size()); + for (int i = 0; i < workers; ++i) { + executeOldShardGensCleanup(staleFilesToDeleteInBatch, groupedListener); + } } catch (Exception e) { - logger.warn("Failed to clean up old shard generation blobs", e); + logger.warn(new ParameterizedMessage(" [{}] Failed to clean up old shard generation blobs", metadata.name()), e); + listener.onResponse(newRepositoryData); + } + } + + private void executeOldShardGensCleanup(BlockingQueue> staleFilesToDeleteInBatch, GroupedActionListener listener) + throws InterruptedException { + List filesToDelete = staleFilesToDeleteInBatch.poll(0L, TimeUnit.MILLISECONDS); + if (filesToDelete != null) { + threadPool.executor(ThreadPool.Names.SNAPSHOT_DELETION).execute(ActionRunnable.wrap(listener, l -> { + try { + deleteFromContainer(rootBlobContainer(), filesToDelete); + l.onResponse(null); + } catch (Exception e) { + logger.warn( + () -> new ParameterizedMessage( + "[{}] Failed to delete following blobs during cleanupOldFiles : {}", + metadata.name(), + filesToDelete + ), + e + ); + l.onFailure(e); + } + executeOldShardGensCleanup(staleFilesToDeleteInBatch, listener); + })); } } @@ -1872,11 +2718,15 @@ private void deleteFromContainer(BlobContainer container, List blobs) th } private BlobPath indicesPath() { - return basePath().add("indices"); + return basePath().add(INDICES_DIR); } private BlobContainer indexContainer(IndexId indexId) { - return blobStore().blobContainer(indicesPath().add(indexId.getId())); + return blobStore().blobContainer(indexPath(indexId)); + } + + private BlobPath indexPath(IndexId indexId) { + return indicesPath().add(indexId.getId()); } private BlobContainer shardContainer(IndexId indexId, ShardId shardId) { @@ -1884,7 +2734,18 @@ private BlobContainer shardContainer(IndexId indexId, ShardId shardId) { } public BlobContainer shardContainer(IndexId indexId, int shardId) { - return blobStore().blobContainer(indicesPath().add(indexId.getId()).add(Integer.toString(shardId))); + return blobStore().blobContainer(shardPath(indexId, shardId)); + } + + private BlobPath shardPath(IndexId indexId, int shardId) { + PathType pathType = PathType.fromCode(indexId.getShardPathType()); + SnapshotShardPathInput shardPathInput = new SnapshotShardPathInput.Builder().basePath(basePath()) + .indexUUID(indexId.getId()) + .shardId(String.valueOf(shardId)) + .fixedPrefix(snapshotShardPathPrefix) + .build(); + PathHashAlgorithm pathHashAlgorithm = pathType != PathType.FIXED ? FNV_1A_COMPOSITE_1 : null; + return pathType.path(shardPathInput, pathHashAlgorithm); } /** @@ -1930,10 +2791,11 @@ public long getRemoteDownloadThrottleTimeInNanos() { } protected void assertSnapshotOrGenericThread() { - assert Thread.currentThread().getName().contains('[' + ThreadPool.Names.SNAPSHOT + ']') + assert Thread.currentThread().getName().contains('[' + ThreadPool.Names.SNAPSHOT_DELETION + ']') + || Thread.currentThread().getName().contains('[' + ThreadPool.Names.SNAPSHOT + ']') || Thread.currentThread().getName().contains('[' + ThreadPool.Names.GENERIC + ']') : "Expected current thread [" + Thread.currentThread() - + "] to be the snapshot or generic thread."; + + "] to be the snapshot_deletion or snapshot or generic thread."; } @Override @@ -1971,7 +2833,7 @@ private BlobContainer testContainer(String seed) { BlobPath testBlobPath; if (prefixModeVerification == true) { PathInput pathInput = PathInput.builder().basePath(basePath()).indexUUID(seed).build(); - testBlobPath = HASHED_PREFIX.path(pathInput, FNV_1A_COMPOSITE_1); + testBlobPath = PathType.HASHED_PREFIX.path(pathInput, FNV_1A_COMPOSITE_1); } else { testBlobPath = basePath(); } @@ -2059,11 +2921,12 @@ private void doGetRepositoryData(ActionListener listener) { loaded = repositoryDataFromCachedEntry(cached); } else { loaded = getRepositoryData(genToLoad); + Version minNodeVersion = clusterService.state().nodes().getMinNodeVersion(); // We can cache serialized in the most recent version here without regard to the actual repository metadata version // since we're only caching the information that we just wrote and thus won't accidentally cache any information that // isn't safe cacheRepositoryData( - BytesReference.bytes(loaded.snapshotsToXContent(XContentFactory.jsonBuilder(), Version.CURRENT)), + BytesReference.bytes(loaded.snapshotsToXContent(XContentFactory.jsonBuilder(), Version.CURRENT, minNodeVersion)), genToLoad ); } @@ -2453,8 +3316,9 @@ public void onFailure(Exception e) { } final String indexBlob = INDEX_FILE_PREFIX + Long.toString(newGen); logger.debug("Repository [{}] writing new index generational blob [{}]", metadata.name(), indexBlob); + Version minNodeVersion = clusterService.state().nodes().getMinNodeVersion(); final BytesReference serializedRepoData = BytesReference.bytes( - newRepositoryData.snapshotsToXContent(XContentFactory.jsonBuilder(), version) + newRepositoryData.snapshotsToXContent(XContentFactory.jsonBuilder(), version, minNodeVersion) ); writeAtomic(blobContainer(), indexBlob, serializedRepoData, true); maybeWriteIndexLatest(newGen); @@ -3294,6 +4158,11 @@ public IndexShardSnapshotStatus getShardSnapshotStatus(SnapshotId snapshotId, In return snapshot.getIndexShardSnapshotStatus(); } + public IndexShardSnapshotStatus getShardSnapshotStatus(SnapshotInfo snapshotInfo, IndexId indexId, ShardId shardId) { + IndexShardSnapshot snapshot = loadShardSnapshot(shardContainer(indexId, shardId), snapshotInfo); + return snapshot.getIndexShardSnapshotStatus(); + } + @Override public void verify(String seed, DiscoveryNode localNode) { if (isSystemRepository == false) { @@ -3502,6 +4371,38 @@ public IndexShardSnapshot loadShardSnapshot(BlobContainer shardContainer, Snapsh } } + public IndexShardSnapshot loadShardSnapshot(BlobContainer shardContainer, SnapshotInfo snapshotInfo) { + try { + SnapshotId snapshotId = snapshotInfo.snapshotId(); + if (snapshotInfo.getPinnedTimestamp() != 0) { + return () -> IndexShardSnapshotStatus.newDone(0L, 0L, 0, 0, 0, 0, "1"); + } else if (snapshotInfo.isRemoteStoreIndexShallowCopyEnabled()) { + if (shardContainer.blobExists(REMOTE_STORE_SHARD_SHALLOW_COPY_SNAPSHOT_FORMAT.blobName(snapshotId.getUUID()))) { + return REMOTE_STORE_SHARD_SHALLOW_COPY_SNAPSHOT_FORMAT.read( + shardContainer, + snapshotId.getUUID(), + namedXContentRegistry + ); + } else { + throw new SnapshotMissingException(metadata.name(), snapshotId.getName()); + } + } else { + if (shardContainer.blobExists(INDEX_SHARD_SNAPSHOT_FORMAT.blobName(snapshotId.getUUID()))) { + return INDEX_SHARD_SNAPSHOT_FORMAT.read(shardContainer, snapshotId.getUUID(), namedXContentRegistry); + } else { + throw new SnapshotMissingException(metadata.name(), snapshotId.getName()); + } + } + } catch (IOException ex) { + throw new SnapshotException( + metadata.name(), + snapshotInfo.snapshotId(), + "failed to read shard snapshot file for [" + shardContainer.path() + ']', + ex + ); + } + } + /** * Loads all available snapshots in the repository using the given {@code generation} or falling back to trying to determine it from * the given list of blobs in the shard container. diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumWritableBlobStoreFormat.java b/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumWritableBlobStoreFormat.java index 0add86ab88a16..88672995f4fd6 100644 --- a/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumWritableBlobStoreFormat.java +++ b/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumWritableBlobStoreFormat.java @@ -28,6 +28,7 @@ import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.common.io.stream.Writeable.Writer; import org.opensearch.core.compress.Compressor; import org.opensearch.core.compress.CompressorRegistry; import org.opensearch.gateway.CorruptStateException; @@ -56,6 +57,10 @@ public ChecksumWritableBlobStoreFormat(String codec, CheckedFunction unSerializedObj.writeTo(out), obj, blobName, compressor); + } + + public BytesReference serialize(final Writer writer, T obj, final String blobName, final Compressor compressor) throws IOException { try (BytesStreamOutput outputStream = new BytesStreamOutput()) { try ( OutputStreamIndexOutput indexOutput = new OutputStreamIndexOutput( @@ -76,7 +81,7 @@ public void close() throws IOException { }; StreamOutput stream = new OutputStreamStreamOutput(compressor.threadLocalOutputStream(indexOutputOutputStream));) { // TODO The stream version should be configurable stream.setVersion(Version.CURRENT); - obj.writeTo(stream); + writer.write(stream, obj); } CodecUtil.writeFooter(indexOutput); } diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/ConfigBlobStoreFormat.java b/server/src/main/java/org/opensearch/repositories/blobstore/ConfigBlobStoreFormat.java index 8127bf8c2a2a2..630048c61785d 100644 --- a/server/src/main/java/org/opensearch/repositories/blobstore/ConfigBlobStoreFormat.java +++ b/server/src/main/java/org/opensearch/repositories/blobstore/ConfigBlobStoreFormat.java @@ -76,4 +76,8 @@ public void writeAsyncWithUrgentPriority(T obj, BlobContainer blobContainer, Str ((AsyncMultiStreamBlobContainer) blobContainer).asyncBlobUpload(remoteTransferContainer.createWriteContext(), listener); } } + + public void write(T obj, BlobContainer blobContainer, String name) throws IOException { + write(obj, blobContainer, name, new NoneCompressor(), ToXContent.EMPTY_PARAMS, XContentType.JSON, null, null); + } } diff --git a/server/src/main/java/org/opensearch/rest/RestController.java b/server/src/main/java/org/opensearch/rest/RestController.java index 9889f5d67e966..0d6f965c7033f 100644 --- a/server/src/main/java/org/opensearch/rest/RestController.java +++ b/server/src/main/java/org/opensearch/rest/RestController.java @@ -326,8 +326,8 @@ public void dispatchBadRequest(final RestChannel channel, final ThreadContext th private void dispatchRequest(RestRequest request, RestChannel channel, RestHandler handler) throws Exception { final int contentLength = request.content().length(); + final MediaType mediaType = request.getMediaType(); if (contentLength > 0) { - final MediaType mediaType = request.getMediaType(); if (mediaType == null) { sendContentTypeErrorMessage(request.getAllHeaderValues("Content-Type"), channel); return; @@ -343,6 +343,7 @@ private void dispatchRequest(RestRequest request, RestChannel channel, RestHandl return; } } + RestChannel responseChannel = channel; try { if (handler.canTripCircuitBreaker()) { @@ -364,6 +365,11 @@ private void dispatchRequest(RestRequest request, RestChannel channel, RestHandl + "]" ); } + + if (mediaType == null) { + sendContentTypeErrorMessage(request.getAllHeaderValues("Content-Type"), responseChannel); + return; + } } else { // if we could reserve bytes for the request we need to send the response also over this channel responseChannel = new ResourceHandlingHttpChannel(channel, circuitBreakerService, contentLength); diff --git a/server/src/main/java/org/opensearch/rest/action/admin/cluster/ClusterAdminTask.java b/server/src/main/java/org/opensearch/rest/action/admin/cluster/ClusterAdminTask.java new file mode 100644 index 0000000000000..5361b05ebecc3 --- /dev/null +++ b/server/src/main/java/org/opensearch/rest/action/admin/cluster/ClusterAdminTask.java @@ -0,0 +1,47 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.rest.action.admin.cluster; + +import org.opensearch.common.annotation.PublicApi; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.tasks.TaskId; +import org.opensearch.tasks.CancellableTask; + +import java.util.Map; + +import static org.opensearch.search.SearchService.NO_TIMEOUT; + +/** + * Task storing information about a currently running ClusterRequest. + * + * @opensearch.api + */ +@PublicApi(since = "2.17.0") +public class ClusterAdminTask extends CancellableTask { + + public ClusterAdminTask(long id, String type, String action, TaskId parentTaskId, Map headers) { + this(id, type, action, parentTaskId, headers, NO_TIMEOUT); + } + + public ClusterAdminTask( + long id, + String type, + String action, + TaskId parentTaskId, + Map headers, + TimeValue cancelAfterTimeInterval + ) { + super(id, type, action, null, parentTaskId, headers, cancelAfterTimeInterval); + } + + @Override + public boolean shouldCancelChildrenOnCancellation() { + return true; + } +} diff --git a/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestClusterHealthAction.java b/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestClusterHealthAction.java index f4d7676456826..59aef510ec1c4 100644 --- a/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestClusterHealthAction.java +++ b/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestClusterHealthAction.java @@ -105,6 +105,7 @@ public static ClusterHealthRequest fromRequest(final RestRequest request) { if (level != null) { clusterHealthRequest.setLevel(level); } + clusterHealthRequest.setApplyLevelAtTransportLayer(true); String waitForStatus = request.param("wait_for_status"); if (waitForStatus != null) { clusterHealthRequest.waitForStatus(ClusterHealthStatus.valueOf(waitForStatus.toUpperCase(Locale.ROOT))); diff --git a/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestClusterStatsAction.java b/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestClusterStatsAction.java index d4426a004af8e..ee33bd18db05d 100644 --- a/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestClusterStatsAction.java +++ b/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestClusterStatsAction.java @@ -66,7 +66,6 @@ public String getName() { public RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) throws IOException { ClusterStatsRequest clusterStatsRequest = new ClusterStatsRequest().nodesIds(request.paramAsStringArray("nodeId", null)); clusterStatsRequest.timeout(request.param("timeout")); - clusterStatsRequest.setIncludeDiscoveryNodes(false); clusterStatsRequest.useAggregatedNodeLevelResponses(true); return channel -> client.admin().cluster().clusterStats(clusterStatsRequest, new NodesResponseRestListener<>(channel)); } diff --git a/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestNodesInfoAction.java b/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestNodesInfoAction.java index 4ac51933ea382..37e4c8783d0df 100644 --- a/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestNodesInfoAction.java +++ b/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestNodesInfoAction.java @@ -88,7 +88,6 @@ public RestChannelConsumer prepareRequest(final RestRequest request, final NodeC final NodesInfoRequest nodesInfoRequest = prepareRequest(request); nodesInfoRequest.timeout(request.param("timeout")); settingsFilter.addFilterSettingParams(request); - nodesInfoRequest.setIncludeDiscoveryNodes(false); return channel -> client.admin().cluster().nodesInfo(nodesInfoRequest, new NodesResponseRestListener<>(channel)); } diff --git a/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestNodesStatsAction.java b/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestNodesStatsAction.java index 0119731e4a0d7..0a0ee84b1999f 100644 --- a/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestNodesStatsAction.java +++ b/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestNodesStatsAction.java @@ -232,7 +232,6 @@ public RestChannelConsumer prepareRequest(final RestRequest request, final NodeC // If no levels are passed in this results in an empty array. String[] levels = Strings.splitStringByCommaToArray(request.param("level")); nodesStatsRequest.indices().setLevels(levels); - nodesStatsRequest.setIncludeDiscoveryNodes(false); nodesStatsRequest.indices().setIncludeIndicesStatsByLevel(true); return channel -> client.admin().cluster().nodesStats(nodesStatsRequest, new NodesResponseRestListener<>(channel)); diff --git a/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestSnapshotsStatusAction.java b/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestSnapshotsStatusAction.java index 2b4b188ab0acd..502be16f2fa8e 100644 --- a/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestSnapshotsStatusAction.java +++ b/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestSnapshotsStatusAction.java @@ -61,6 +61,7 @@ public class RestSnapshotsStatusAction extends BaseRestHandler { public List routes() { return unmodifiableList( asList( + new Route(GET, "/_snapshot/{repository}/{snapshot}/{index}/_status"), new Route(GET, "/_snapshot/{repository}/{snapshot}/_status"), new Route(GET, "/_snapshot/{repository}/_status"), new Route(GET, "/_snapshot/_status") @@ -80,7 +81,8 @@ public RestChannelConsumer prepareRequest(final RestRequest request, final NodeC if (snapshots.length == 1 && "_all".equalsIgnoreCase(snapshots[0])) { snapshots = Strings.EMPTY_ARRAY; } - SnapshotsStatusRequest snapshotsStatusRequest = snapshotsStatusRequest(repository).snapshots(snapshots); + String[] indices = request.paramAsStringArray("index", Strings.EMPTY_ARRAY); + SnapshotsStatusRequest snapshotsStatusRequest = snapshotsStatusRequest(repository).snapshots(snapshots).indices(indices); snapshotsStatusRequest.ignoreUnavailable(request.paramAsBoolean("ignore_unavailable", snapshotsStatusRequest.ignoreUnavailable())); snapshotsStatusRequest.clusterManagerNodeTimeout( diff --git a/server/src/main/java/org/opensearch/rest/action/cat/RestAllocationAction.java b/server/src/main/java/org/opensearch/rest/action/cat/RestAllocationAction.java index 07b0fbbe4a911..912e5173f8e01 100644 --- a/server/src/main/java/org/opensearch/rest/action/cat/RestAllocationAction.java +++ b/server/src/main/java/org/opensearch/rest/action/cat/RestAllocationAction.java @@ -101,6 +101,7 @@ public void processResponse(final ClusterStateResponse state) { statsRequest.clear() .addMetric(NodesStatsRequest.Metric.FS.metricName()) .indices(new CommonStatsFlags(CommonStatsFlags.Flag.Store)); + statsRequest.indices().setIncludeIndicesStatsByLevel(true); client.admin().cluster().nodesStats(statsRequest, new RestResponseListener(channel) { @Override diff --git a/server/src/main/java/org/opensearch/rest/action/cat/RestNodesAction.java b/server/src/main/java/org/opensearch/rest/action/cat/RestNodesAction.java index 1aa40b50290cd..26d6b91882cbd 100644 --- a/server/src/main/java/org/opensearch/rest/action/cat/RestNodesAction.java +++ b/server/src/main/java/org/opensearch/rest/action/cat/RestNodesAction.java @@ -125,7 +125,6 @@ public RestChannelConsumer doCatRequest(final RestRequest request, final NodeCli public void processResponse(final ClusterStateResponse clusterStateResponse) { NodesInfoRequest nodesInfoRequest = new NodesInfoRequest(); nodesInfoRequest.timeout(request.param("timeout")); - nodesInfoRequest.setIncludeDiscoveryNodes(false); nodesInfoRequest.clear() .addMetrics( NodesInfoRequest.Metric.JVM.metricName(), @@ -138,7 +137,6 @@ public void processResponse(final ClusterStateResponse clusterStateResponse) { public void processResponse(final NodesInfoResponse nodesInfoResponse) { NodesStatsRequest nodesStatsRequest = new NodesStatsRequest(); nodesStatsRequest.timeout(request.param("timeout")); - nodesStatsRequest.setIncludeDiscoveryNodes(false); nodesStatsRequest.clear() .indices(true) .addMetrics( diff --git a/server/src/main/java/org/opensearch/rest/action/cat/RestShardsAction.java b/server/src/main/java/org/opensearch/rest/action/cat/RestShardsAction.java index 4413c8eb370be..a7ad5fe6c14a3 100644 --- a/server/src/main/java/org/opensearch/rest/action/cat/RestShardsAction.java +++ b/server/src/main/java/org/opensearch/rest/action/cat/RestShardsAction.java @@ -32,10 +32,11 @@ package org.opensearch.rest.action.cat; -import org.opensearch.action.admin.cluster.state.ClusterStateRequest; +import org.opensearch.action.admin.cluster.shards.CatShardsAction; +import org.opensearch.action.admin.cluster.shards.CatShardsRequest; +import org.opensearch.action.admin.cluster.shards.CatShardsResponse; import org.opensearch.action.admin.cluster.state.ClusterStateResponse; import org.opensearch.action.admin.indices.stats.CommonStats; -import org.opensearch.action.admin.indices.stats.IndicesStatsRequest; import org.opensearch.action.admin.indices.stats.IndicesStatsResponse; import org.opensearch.action.admin.indices.stats.ShardStats; import org.opensearch.client.node.NodeClient; @@ -61,7 +62,6 @@ import org.opensearch.index.warmer.WarmerStats; import org.opensearch.rest.RestRequest; import org.opensearch.rest.RestResponse; -import org.opensearch.rest.action.RestActionListener; import org.opensearch.rest.action.RestResponseListener; import org.opensearch.search.suggest.completion.CompletionStats; @@ -73,6 +73,7 @@ import static java.util.Arrays.asList; import static java.util.Collections.unmodifiableList; import static org.opensearch.rest.RestRequest.Method.GET; +import static org.opensearch.search.SearchService.NO_TIMEOUT; /** * _cat API action to get shard information @@ -107,25 +108,18 @@ protected void documentation(StringBuilder sb) { @Override public RestChannelConsumer doCatRequest(final RestRequest request, final NodeClient client) { final String[] indices = Strings.splitStringByCommaToArray(request.param("index")); - final ClusterStateRequest clusterStateRequest = new ClusterStateRequest(); - clusterStateRequest.local(request.paramAsBoolean("local", clusterStateRequest.local())); - clusterStateRequest.clusterManagerNodeTimeout( - request.paramAsTime("cluster_manager_timeout", clusterStateRequest.clusterManagerNodeTimeout()) - ); - parseDeprecatedMasterTimeoutParameter(clusterStateRequest, request, deprecationLogger, getName()); - clusterStateRequest.clear().nodes(true).routingTable(true).indices(indices); - return channel -> client.admin().cluster().state(clusterStateRequest, new RestActionListener(channel) { + final CatShardsRequest shardsRequest = new CatShardsRequest(); + shardsRequest.local(request.paramAsBoolean("local", shardsRequest.local())); + shardsRequest.clusterManagerNodeTimeout(request.paramAsTime("cluster_manager_timeout", shardsRequest.clusterManagerNodeTimeout())); + shardsRequest.setCancelAfterTimeInterval(request.paramAsTime("cancel_after_time_interval", NO_TIMEOUT)); + shardsRequest.setIndices(indices); + parseDeprecatedMasterTimeoutParameter(shardsRequest, request, deprecationLogger, getName()); + return channel -> client.execute(CatShardsAction.INSTANCE, shardsRequest, new RestResponseListener(channel) { @Override - public void processResponse(final ClusterStateResponse clusterStateResponse) { - IndicesStatsRequest indicesStatsRequest = new IndicesStatsRequest(); - indicesStatsRequest.all(); - indicesStatsRequest.indices(indices); - client.admin().indices().stats(indicesStatsRequest, new RestResponseListener(channel) { - @Override - public RestResponse buildResponse(IndicesStatsResponse indicesStatsResponse) throws Exception { - return RestTable.buildResponse(buildTable(request, clusterStateResponse, indicesStatsResponse), channel); - } - }); + public RestResponse buildResponse(CatShardsResponse catShardsResponse) throws Exception { + ClusterStateResponse clusterStateResponse = catShardsResponse.getClusterStateResponse(); + IndicesStatsResponse indicesStatsResponse = catShardsResponse.getIndicesStatsResponse(); + return RestTable.buildResponse(buildTable(request, clusterStateResponse, indicesStatsResponse), channel); } }); } @@ -321,7 +315,11 @@ Table buildTable(RestRequest request, ClusterStateResponse state, IndicesStatsRe if (shard.primary()) { table.addCell("p"); } else { - table.addCell("r"); + if (shard.isSearchOnly()) { + table.addCell("s"); + } else { + table.addCell("r"); + } } table.addCell(shard.state()); table.addCell(getOrNull(commonStats, CommonStats::getDocs, DocsStats::getCount)); diff --git a/server/src/main/java/org/opensearch/search/DefaultSearchContext.java b/server/src/main/java/org/opensearch/search/DefaultSearchContext.java index 1401c6b89d38a..74a7482d975df 100644 --- a/server/src/main/java/org/opensearch/search/DefaultSearchContext.java +++ b/server/src/main/java/org/opensearch/search/DefaultSearchContext.java @@ -32,6 +32,8 @@ package org.opensearch.search; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; @@ -70,6 +72,9 @@ import org.opensearch.search.aggregations.SearchContextAggregations; import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.search.collapse.CollapseContext; +import org.opensearch.search.deciders.ConcurrentSearchDecision; +import org.opensearch.search.deciders.ConcurrentSearchRequestDecider; +import org.opensearch.search.deciders.ConcurrentSearchVisitor; import org.opensearch.search.dfs.DfsSearchResult; import org.opensearch.search.fetch.FetchPhase; import org.opensearch.search.fetch.FetchSearchResult; @@ -98,16 +103,25 @@ import java.io.IOException; import java.io.UncheckedIOException; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Optional; +import java.util.Set; import java.util.concurrent.Executor; import java.util.function.Function; import java.util.function.LongSupplier; import static org.opensearch.search.SearchService.CARDINALITY_AGGREGATION_PRUNING_THRESHOLD; +import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE; import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_ALL; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_AUTO; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_NONE; +import static org.opensearch.search.SearchService.KEYWORD_INDEX_OR_DOC_VALUES_ENABLED; import static org.opensearch.search.SearchService.MAX_AGGREGATION_REWRITE_FILTERS; /** @@ -117,11 +131,14 @@ */ final class DefaultSearchContext extends SearchContext { + private static final Logger logger = LogManager.getLogger(DefaultSearchContext.class); + private final ReaderContext readerContext; private final Engine.Searcher engineSearcher; private final ShardSearchRequest request; private final SearchShardTarget shardTarget; private final LongSupplier relativeTimeSupplier; + private final Collection concurrentSearchDeciderFactories; private SearchType searchType; private final BigArrays bigArrays; private final IndexShard indexShard; @@ -187,10 +204,11 @@ final class DefaultSearchContext extends SearchContext { private final QueryShardContext queryShardContext; private final FetchPhase fetchPhase; private final Function requestToAggReduceContextBuilder; - private final boolean concurrentSearchSettingsEnabled; + private final String concurrentSearchMode; private final SetOnce requestShouldUseConcurrentSearch = new SetOnce<>(); private final int maxAggRewriteFilters; private final int cardinalityAggregationPruningThreshold; + private final boolean keywordIndexOrDocValuesEnabled; DefaultSearchContext( ReaderContext readerContext, @@ -205,7 +223,8 @@ final class DefaultSearchContext extends SearchContext { Version minNodeVersion, boolean validate, Executor executor, - Function requestToAggReduceContextBuilder + Function requestToAggReduceContextBuilder, + Collection concurrentSearchDeciderFactories ) throws IOException { this.readerContext = readerContext; this.request = request; @@ -221,14 +240,15 @@ final class DefaultSearchContext extends SearchContext { this.indexShard = readerContext.indexShard(); this.clusterService = clusterService; this.engineSearcher = readerContext.acquireSearcher("search"); - this.concurrentSearchSettingsEnabled = evaluateConcurrentSegmentSearchSettings(executor); + this.concurrentSearchMode = evaluateConcurrentSearchMode(executor); this.searcher = new ContextIndexSearcher( engineSearcher.getIndexReader(), engineSearcher.getSimilarity(), engineSearcher.getQueryCache(), engineSearcher.getQueryCachingPolicy(), lowLevelCancellation, - concurrentSearchSettingsEnabled ? executor : null, + concurrentSearchMode.equals(CONCURRENT_SEGMENT_SEARCH_MODE_AUTO) + || concurrentSearchMode.equals(CONCURRENT_SEGMENT_SEARCH_MODE_ALL) ? executor : null, this ); this.relativeTimeSupplier = relativeTimeSupplier; @@ -239,7 +259,8 @@ final class DefaultSearchContext extends SearchContext { this.searcher, request::nowInMillis, shardTarget.getClusterAlias(), - validate + validate, + evaluateKeywordIndexOrDocValuesEnabled() ); queryBoost = request.indexBoost(); this.lowLevelCancellation = lowLevelCancellation; @@ -247,6 +268,8 @@ final class DefaultSearchContext extends SearchContext { this.maxAggRewriteFilters = evaluateFilterRewriteSetting(); this.cardinalityAggregationPruningThreshold = evaluateCardinalityAggregationPruningThreshold(); + this.concurrentSearchDeciderFactories = concurrentSearchDeciderFactories; + this.keywordIndexOrDocValuesEnabled = evaluateKeywordIndexOrDocValuesEnabled(); } @Override @@ -902,11 +925,75 @@ public Profilers getProfilers() { @Override public boolean shouldUseConcurrentSearch() { assert requestShouldUseConcurrentSearch.get() != null : "requestShouldUseConcurrentSearch must be set"; - return concurrentSearchSettingsEnabled && Boolean.TRUE.equals(requestShouldUseConcurrentSearch.get()); + assert concurrentSearchMode != null : "concurrentSearchMode must be set"; + return (concurrentSearchMode.equals(CONCURRENT_SEGMENT_SEARCH_MODE_AUTO) + || concurrentSearchMode.equals(CONCURRENT_SEGMENT_SEARCH_MODE_ALL)) + && Boolean.TRUE.equals(requestShouldUseConcurrentSearch.get()); + } + + private boolean evaluateAutoMode() { + + final Set concurrentSearchRequestDeciders = new HashSet<>(); + + // create the ConcurrentSearchRequestDeciders using registered factories + for (ConcurrentSearchRequestDecider.Factory deciderFactory : concurrentSearchDeciderFactories) { + final Optional concurrentSearchRequestDecider = deciderFactory.create( + indexService.getIndexSettings() + ); + concurrentSearchRequestDecider.ifPresent(concurrentSearchRequestDeciders::add); + + } + + // evaluate based on concurrent search query visitor + if (concurrentSearchRequestDeciders.size() > 0) { + ConcurrentSearchVisitor concurrentSearchVisitor = new ConcurrentSearchVisitor( + concurrentSearchRequestDeciders, + indexService.getIndexSettings() + ); + if (request().source() != null && request().source().query() != null) { + QueryBuilder queryBuilder = request().source().query(); + queryBuilder.visit(concurrentSearchVisitor); + } + } + + final List decisions = new ArrayList<>(); + for (ConcurrentSearchRequestDecider decider : concurrentSearchRequestDeciders) { + ConcurrentSearchDecision decision = decider.getConcurrentSearchDecision(); + if (decision != null) { + if (logger.isDebugEnabled()) { + logger.debug("concurrent search decision from plugin decider [{}]", decision.toString()); + } + decisions.add(decision); + } + } + + final ConcurrentSearchDecision pluginDecision = ConcurrentSearchDecision.getCompositeDecision(decisions); + if (pluginDecision.getDecisionStatus().equals(ConcurrentSearchDecision.DecisionStatus.NO_OP)) { + // plugins don't have preference, decide based on whether request has aggregations or not. + if (aggregations() != null) { + if (logger.isDebugEnabled()) { + logger.debug("request has supported aggregations, using concurrent search"); + } + return true; + + } else { + if (logger.isDebugEnabled()) { + logger.debug("request does not have aggregations, not using concurrent search"); + } + return false; + } + + } else { + if (logger.isDebugEnabled()) { + logger.debug("concurrent search decision from plugins [{}]", pluginDecision.toString()); + } + return pluginDecision.getDecisionStatus() == ConcurrentSearchDecision.DecisionStatus.YES; + } + } /** - * Evaluate if parsed request supports concurrent segment search + * Evaluate if request should use concurrent search based on request and concurrent search deciders */ public void evaluateRequestShouldUseConcurrentSearch() { if (sort != null && sort.isSortOnTimeSeriesField()) { @@ -917,6 +1004,8 @@ && aggregations().factories() != null requestShouldUseConcurrentSearch.set(false); } else if (terminateAfter != DEFAULT_TERMINATE_AFTER) { requestShouldUseConcurrentSearch.set(false); + } else if (concurrentSearchMode.equals(CONCURRENT_SEGMENT_SEARCH_MODE_AUTO)) { + requestShouldUseConcurrentSearch.set(evaluateAutoMode()); } else { requestShouldUseConcurrentSearch.set(true); } @@ -964,26 +1053,40 @@ public BucketCollectorProcessor bucketCollectorProcessor() { } /** - * Evaluate based on cluster and index settings if concurrent segment search should be used for this request context - * @return true: use concurrent search - * false: otherwise + * Evaluate the concurrentSearchMode based on cluster and index settings if concurrent segment search + * should be used for this request context + * If the cluster.search.concurrent_segment_search.mode setting + * is not explicitly set, the evaluation falls back to the + * cluster.search.concurrent_segment_search.enabled boolean setting + * which will evaluate to true or false. This is then evaluated to "all" or "none" respectively + * @return one of "none", "auto", "all" */ - private boolean evaluateConcurrentSegmentSearchSettings(Executor concurrentSearchExecutor) { + private String evaluateConcurrentSearchMode(Executor concurrentSearchExecutor) { // Do not use concurrent segment search for system indices or throttled requests. See: // https://github.com/opensearch-project/OpenSearch/issues/12951 if (indexShard.isSystem() || indexShard.indexSettings().isSearchThrottled()) { - return false; + return CONCURRENT_SEGMENT_SEARCH_MODE_NONE; } + if ((clusterService != null) && concurrentSearchExecutor != null) { + String concurrentSearchMode = indexService.getIndexSettings() + .getSettings() + .get( + IndexSettings.INDEX_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), + clusterService.getClusterSettings().getOrNull(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE) + ); + if (concurrentSearchMode != null) { + return concurrentSearchMode; + } - if ((clusterService != null) && (concurrentSearchExecutor != null)) { + // mode setting not set, fallback to concurrent_segment_search.enabled setting return indexService.getIndexSettings() .getSettings() .getAsBoolean( IndexSettings.INDEX_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), clusterService.getClusterSettings().get(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING) - ); + ) ? CONCURRENT_SEGMENT_SEARCH_MODE_ALL : CONCURRENT_SEGMENT_SEARCH_MODE_NONE; } - return false; + return CONCURRENT_SEGMENT_SEARCH_MODE_NONE; } @Override @@ -1026,10 +1129,22 @@ public int cardinalityAggregationPruningThreshold() { return cardinalityAggregationPruningThreshold; } + @Override + public boolean keywordIndexOrDocValuesEnabled() { + return keywordIndexOrDocValuesEnabled; + } + private int evaluateCardinalityAggregationPruningThreshold() { if (clusterService != null) { return clusterService.getClusterSettings().get(CARDINALITY_AGGREGATION_PRUNING_THRESHOLD); } return 0; } + + public boolean evaluateKeywordIndexOrDocValuesEnabled() { + if (clusterService != null) { + return clusterService.getClusterSettings().get(KEYWORD_INDEX_OR_DOC_VALUES_ENABLED); + } + return false; + } } diff --git a/server/src/main/java/org/opensearch/search/SearchModule.java b/server/src/main/java/org/opensearch/search/SearchModule.java index b463458847a88..3a746259af3b5 100644 --- a/server/src/main/java/org/opensearch/search/SearchModule.java +++ b/server/src/main/java/org/opensearch/search/SearchModule.java @@ -239,6 +239,7 @@ import org.opensearch.search.aggregations.pipeline.StatsBucketPipelineAggregationBuilder; import org.opensearch.search.aggregations.pipeline.SumBucketPipelineAggregationBuilder; import org.opensearch.search.aggregations.support.ValuesSourceRegistry; +import org.opensearch.search.deciders.ConcurrentSearchRequestDecider; import org.opensearch.search.fetch.FetchPhase; import org.opensearch.search.fetch.FetchSubPhase; import org.opensearch.search.fetch.subphase.ExplainPhase; @@ -282,6 +283,7 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.List; import java.util.Map; import java.util.Optional; @@ -316,6 +318,8 @@ public class SearchModule { private final QueryPhaseSearcher queryPhaseSearcher; private final SearchPlugin.ExecutorServiceProvider indexSearcherExecutorProvider; + private final Collection concurrentSearchDeciderFactories; + /** * Constructs a new SearchModule object *

@@ -344,6 +348,23 @@ public SearchModule(Settings settings, List plugins) { queryPhaseSearcher = registerQueryPhaseSearcher(plugins); indexSearcherExecutorProvider = registerIndexSearcherExecutorProvider(plugins); namedWriteables.addAll(SortValue.namedWriteables()); + concurrentSearchDeciderFactories = registerConcurrentSearchDeciderFactories(plugins); + } + + private Collection registerConcurrentSearchDeciderFactories(List plugins) { + List concurrentSearchDeciderFactories = new ArrayList<>(); + for (SearchPlugin plugin : plugins) { + final Optional deciderFactory = plugin.getConcurrentSearchRequestDeciderFactory(); + deciderFactory.ifPresent(concurrentSearchDeciderFactories::add); + } + return concurrentSearchDeciderFactories; + } + + /** + * Returns the concurrent search decider factories that the plugins have registered + */ + public Collection getConcurrentSearchRequestDeciderFactories() { + return concurrentSearchDeciderFactories; } public List getNamedWriteables() { diff --git a/server/src/main/java/org/opensearch/search/SearchService.java b/server/src/main/java/org/opensearch/search/SearchService.java index a53a7198c366f..40afdbfbdaa9e 100644 --- a/server/src/main/java/org/opensearch/search/SearchService.java +++ b/server/src/main/java/org/opensearch/search/SearchService.java @@ -104,6 +104,7 @@ import org.opensearch.search.aggregations.pipeline.PipelineAggregator.PipelineTree; import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.search.collapse.CollapseContext; +import org.opensearch.search.deciders.ConcurrentSearchRequestDecider; import org.opensearch.search.dfs.DfsPhase; import org.opensearch.search.dfs.DfsSearchResult; import org.opensearch.search.fetch.FetchPhase; @@ -146,6 +147,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -256,6 +258,34 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv "search.concurrent_segment_search.enabled", false, Property.Dynamic, + Property.NodeScope, + Property.Deprecated + ); + + // Allow concurrent segment search for all requests + public static final String CONCURRENT_SEGMENT_SEARCH_MODE_ALL = "all"; + + // Disallow concurrent search for all requests + public static final String CONCURRENT_SEGMENT_SEARCH_MODE_NONE = "none"; + + // Make decision for concurrent search based on concurrent search deciders + public static final String CONCURRENT_SEGMENT_SEARCH_MODE_AUTO = "auto"; + + public static final Setting CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE = Setting.simpleString( + "search.concurrent_segment_search.mode", + CONCURRENT_SEGMENT_SEARCH_MODE_NONE, + value -> { + switch (value) { + case CONCURRENT_SEGMENT_SEARCH_MODE_ALL: + case CONCURRENT_SEGMENT_SEARCH_MODE_NONE: + case CONCURRENT_SEGMENT_SEARCH_MODE_AUTO: + // valid setting + break; + default: + throw new IllegalArgumentException("Setting value must be one of [all, none, auto]"); + } + }, + Property.Dynamic, Property.NodeScope ); @@ -307,6 +337,13 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv Property.NodeScope ); + public static final Setting KEYWORD_INDEX_OR_DOC_VALUES_ENABLED = Setting.boolSetting( + "search.keyword_index_or_doc_values_enabled", + false, + Property.Dynamic, + Property.NodeScope + ); + public static final int DEFAULT_SIZE = 10; public static final int DEFAULT_FROM = 0; @@ -327,6 +364,7 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv private final QueryPhase queryPhase; private final FetchPhase fetchPhase; + private final Collection concurrentSearchDeciderFactories; private volatile long defaultKeepAlive; @@ -371,7 +409,8 @@ public SearchService( ResponseCollectorService responseCollectorService, CircuitBreakerService circuitBreakerService, Executor indexSearcherExecutor, - TaskResourceTrackingService taskResourceTrackingService + TaskResourceTrackingService taskResourceTrackingService, + Collection concurrentSearchDeciderFactories ) { Settings settings = clusterService.getSettings(); this.threadPool = threadPool; @@ -426,6 +465,8 @@ public SearchService( allowDerivedField = CLUSTER_ALLOW_DERIVED_FIELD_SETTING.get(settings); clusterService.getClusterSettings().addSettingsUpdateConsumer(CLUSTER_ALLOW_DERIVED_FIELD_SETTING, this::setAllowDerivedField); + + this.concurrentSearchDeciderFactories = concurrentSearchDeciderFactories; } private void validateKeepAlives(TimeValue defaultKeepAlive, TimeValue maxKeepAlive) { @@ -1125,7 +1166,8 @@ private DefaultSearchContext createSearchContext(ReaderContext reader, ShardSear clusterService.state().nodes().getMinNodeVersion(), validate, indexSearcherExecutor, - this::aggReduceContextBuilder + this::aggReduceContextBuilder, + concurrentSearchDeciderFactories ); // we clone the query shard context here just for rewriting otherwise we // might end up with incorrect state since we are using now() or script services @@ -1138,6 +1180,7 @@ private DefaultSearchContext createSearchContext(ReaderContext reader, ShardSear context.getIndexSettings().isDerivedFieldAllowed() && allowDerivedField ); context.setDerivedFieldResolver(derivedFieldResolver); + context.setKeywordFieldIndexOrDocValuesEnabled(searchContext.keywordIndexOrDocValuesEnabled()); searchContext.getQueryShardContext().setDerivedFieldResolver(derivedFieldResolver); Rewriteable.rewrite(request.getRewriteable(), context, true); assert searchContext.getQueryShardContext().isCacheable(); diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/Helper.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/Helper.java index 7493754d8efa2..17da7e5712be8 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/Helper.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/Helper.java @@ -23,6 +23,7 @@ import org.opensearch.common.lucene.search.function.FunctionScoreQuery; import org.opensearch.index.mapper.DateFieldMapper; import org.opensearch.index.query.DateRangeIncludingNowQuery; +import org.opensearch.search.approximate.ApproximateIndexOrDocValuesQuery; import org.opensearch.search.internal.SearchContext; import java.io.IOException; @@ -54,6 +55,7 @@ private Helper() {} queryWrappers.put(FunctionScoreQuery.class, q -> ((FunctionScoreQuery) q).getSubQuery()); queryWrappers.put(DateRangeIncludingNowQuery.class, q -> ((DateRangeIncludingNowQuery) q).getQuery()); queryWrappers.put(IndexOrDocValuesQuery.class, q -> ((IndexOrDocValuesQuery) q).getIndexQuery()); + queryWrappers.put(ApproximateIndexOrDocValuesQuery.class, q -> ((ApproximateIndexOrDocValuesQuery) q).getOriginalQuery()); } /** diff --git a/server/src/main/java/org/opensearch/search/approximate/ApproximateIndexOrDocValuesQuery.java b/server/src/main/java/org/opensearch/search/approximate/ApproximateIndexOrDocValuesQuery.java new file mode 100644 index 0000000000000..b99e0a0cbf808 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/approximate/ApproximateIndexOrDocValuesQuery.java @@ -0,0 +1,62 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.approximate; + +import org.apache.lucene.search.IndexOrDocValuesQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryVisitor; + +/** + * A wrapper around {@link IndexOrDocValuesQuery} that can be used to run approximate queries. + * It delegates to either {@link ApproximateQuery} or {@link IndexOrDocValuesQuery} based on whether the query can be approximated or not. + * @see ApproximateQuery + */ +public final class ApproximateIndexOrDocValuesQuery extends ApproximateScoreQuery { + + private final ApproximateQuery approximateIndexQuery; + private final IndexOrDocValuesQuery indexOrDocValuesQuery; + + public ApproximateIndexOrDocValuesQuery(Query indexQuery, ApproximateQuery approximateIndexQuery, Query dvQuery) { + super(new IndexOrDocValuesQuery(indexQuery, dvQuery), approximateIndexQuery); + this.approximateIndexQuery = approximateIndexQuery; + this.indexOrDocValuesQuery = new IndexOrDocValuesQuery(indexQuery, dvQuery); + } + + @Override + public String toString(String field) { + return "ApproximateIndexOrDocValuesQuery(indexQuery=" + + indexOrDocValuesQuery.getIndexQuery().toString(field) + + ", approximateIndexQuery=" + + approximateIndexQuery.toString(field) + + ", dvQuery=" + + indexOrDocValuesQuery.getRandomAccessQuery().toString(field) + + ")"; + } + + @Override + public void visit(QueryVisitor visitor) { + indexOrDocValuesQuery.visit(visitor); + } + + @Override + public boolean equals(Object obj) { + if (sameClassAs(obj) == false) { + return false; + } + return true; + } + + @Override + public int hashCode() { + int h = classHash(); + h = 31 * h + indexOrDocValuesQuery.getIndexQuery().hashCode(); + h = 31 * h + indexOrDocValuesQuery.getRandomAccessQuery().hashCode(); + return h; + } +} diff --git a/server/src/main/java/org/opensearch/search/approximate/ApproximatePointRangeQuery.java b/server/src/main/java/org/opensearch/search/approximate/ApproximatePointRangeQuery.java new file mode 100644 index 0000000000000..8076da6ab970b --- /dev/null +++ b/server/src/main/java/org/opensearch/search/approximate/ApproximatePointRangeQuery.java @@ -0,0 +1,520 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.approximate; + +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PointValues; +import org.apache.lucene.search.ConstantScoreScorer; +import org.apache.lucene.search.ConstantScoreWeight; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.PointRangeQuery; +import org.apache.lucene.search.QueryVisitor; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.ScorerSupplier; +import org.apache.lucene.search.Weight; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.DocIdSetBuilder; +import org.apache.lucene.util.IntsRef; +import org.opensearch.index.query.RangeQueryBuilder; +import org.opensearch.search.internal.SearchContext; +import org.opensearch.search.sort.FieldSortBuilder; +import org.opensearch.search.sort.SortOrder; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Objects; + +/** + * An approximate-able version of {@link PointRangeQuery}. It creates an instance of {@link PointRangeQuery} but short-circuits the intersect logic + * after {@code size} is hit + */ +public abstract class ApproximatePointRangeQuery extends ApproximateQuery { + private int size; + + private SortOrder sortOrder; + + public final PointRangeQuery pointRangeQuery; + + protected ApproximatePointRangeQuery(String field, byte[] lowerPoint, byte[] upperPoint, int numDims) { + this(field, lowerPoint, upperPoint, numDims, 10_000, null); + } + + protected ApproximatePointRangeQuery(String field, byte[] lowerPoint, byte[] upperPoint, int numDims, int size) { + this(field, lowerPoint, upperPoint, numDims, size, null); + } + + protected ApproximatePointRangeQuery(String field, byte[] lowerPoint, byte[] upperPoint, int numDims, int size, SortOrder sortOrder) { + this.size = size; + this.sortOrder = sortOrder; + this.pointRangeQuery = new PointRangeQuery(field, lowerPoint, upperPoint, numDims) { + @Override + protected String toString(int dimension, byte[] value) { + return super.toString(field); + } + }; + } + + public int getSize() { + return this.size; + } + + public void setSize(int size) { + this.size = size; + } + + public SortOrder getSortOrder() { + return this.sortOrder; + } + + public void setSortOrder(SortOrder sortOrder) { + this.sortOrder = sortOrder; + } + + @Override + public void visit(QueryVisitor visitor) { + pointRangeQuery.visit(visitor); + } + + @Override + public final ConstantScoreWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + Weight pointRangeQueryWeight = pointRangeQuery.createWeight(searcher, scoreMode, boost); + + return new ConstantScoreWeight(this, boost) { + + private final ArrayUtil.ByteArrayComparator comparator = ArrayUtil.getUnsignedComparator(pointRangeQuery.getBytesPerDim()); + + // we pull this from PointRangeQuery since it is final + private boolean matches(byte[] packedValue) { + for (int dim = 0; dim < pointRangeQuery.getNumDims(); dim++) { + int offset = dim * pointRangeQuery.getBytesPerDim(); + if (comparator.compare(packedValue, offset, pointRangeQuery.getLowerPoint(), offset) < 0) { + // Doc's value is too low, in this dimension + return false; + } + if (comparator.compare(packedValue, offset, pointRangeQuery.getUpperPoint(), offset) > 0) { + // Doc's value is too high, in this dimension + return false; + } + } + return true; + } + + // we pull this from PointRangeQuery since it is final + private PointValues.Relation relate(byte[] minPackedValue, byte[] maxPackedValue) { + + boolean crosses = false; + + for (int dim = 0; dim < pointRangeQuery.getNumDims(); dim++) { + int offset = dim * pointRangeQuery.getBytesPerDim(); + + if (comparator.compare(minPackedValue, offset, pointRangeQuery.getUpperPoint(), offset) > 0 + || comparator.compare(maxPackedValue, offset, pointRangeQuery.getLowerPoint(), offset) < 0) { + return PointValues.Relation.CELL_OUTSIDE_QUERY; + } + + crosses |= comparator.compare(minPackedValue, offset, pointRangeQuery.getLowerPoint(), offset) < 0 + || comparator.compare(maxPackedValue, offset, pointRangeQuery.getUpperPoint(), offset) > 0; + } + + if (crosses) { + return PointValues.Relation.CELL_CROSSES_QUERY; + } else { + return PointValues.Relation.CELL_INSIDE_QUERY; + } + } + + public PointValues.IntersectVisitor getIntersectVisitor(DocIdSetBuilder result, long[] docCount) { + return new PointValues.IntersectVisitor() { + + DocIdSetBuilder.BulkAdder adder; + + @Override + public void grow(int count) { + adder = result.grow(count); + } + + @Override + public void visit(int docID) { + // it is possible that size < 1024 and docCount < size but we will continue to count through all the 1024 docs + // and collect less, but it won't hurt performance + if (docCount[0] >= size) { + return; + } + adder.add(docID); + docCount[0]++; + } + + @Override + public void visit(DocIdSetIterator iterator) throws IOException { + adder.add(iterator); + } + + @Override + public void visit(IntsRef ref) { + for (int i = 0; i < ref.length; i++) { + adder.add(ref.ints[ref.offset + i]); + } + } + + @Override + public void visit(int docID, byte[] packedValue) { + if (matches(packedValue)) { + visit(docID); + } + } + + @Override + public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException { + if (matches(packedValue)) { + adder.add(iterator); + } + } + + @Override + public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { + return relate(minPackedValue, maxPackedValue); + } + }; + } + + // we pull this from PointRangeQuery since it is final + private boolean checkValidPointValues(PointValues values) throws IOException { + if (values == null) { + // No docs in this segment/field indexed any points + return false; + } + + if (values.getNumIndexDimensions() != pointRangeQuery.getNumDims()) { + throw new IllegalArgumentException( + "field=\"" + + pointRangeQuery.getField() + + "\" was indexed with numIndexDimensions=" + + values.getNumIndexDimensions() + + " but this query has numDims=" + + pointRangeQuery.getNumDims() + ); + } + if (pointRangeQuery.getBytesPerDim() != values.getBytesPerDimension()) { + throw new IllegalArgumentException( + "field=\"" + + pointRangeQuery.getField() + + "\" was indexed with bytesPerDim=" + + values.getBytesPerDimension() + + " but this query has bytesPerDim=" + + pointRangeQuery.getBytesPerDim() + ); + } + return true; + } + + private void intersectLeft(PointValues.PointTree pointTree, PointValues.IntersectVisitor visitor, long[] docCount) + throws IOException { + intersectLeft(visitor, pointTree, docCount); + assert pointTree.moveToParent() == false; + } + + private void intersectRight(PointValues.PointTree pointTree, PointValues.IntersectVisitor visitor, long[] docCount) + throws IOException { + intersectRight(visitor, pointTree, docCount); + assert pointTree.moveToParent() == false; + } + + // custom intersect visitor to walk the left of the tree + public void intersectLeft(PointValues.IntersectVisitor visitor, PointValues.PointTree pointTree, long[] docCount) + throws IOException { + PointValues.Relation r = visitor.compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue()); + if (docCount[0] >= size) { + return; + } + switch (r) { + case CELL_OUTSIDE_QUERY: + // This cell is fully outside the query shape: stop recursing + break; + case CELL_INSIDE_QUERY: + // If the cell is fully inside, we keep moving to child until we reach a point where we can no longer move or when + // we have sufficient doc count. We first move down and then move to the left child + if (pointTree.moveToChild() && docCount[0] < size) { + do { + intersectLeft(visitor, pointTree, docCount); + } while (pointTree.moveToSibling() && docCount[0] < size); + pointTree.moveToParent(); + } else { + // we're at the leaf node, if we're under the size, visit all the docIds in this node. + if (docCount[0] < size) { + pointTree.visitDocIDs(visitor); + } + } + break; + case CELL_CROSSES_QUERY: + // The cell crosses the shape boundary, or the cell fully contains the query, so we fall + // through and do full filtering: + if (pointTree.moveToChild() && docCount[0] < size) { + do { + intersectLeft(visitor, pointTree, docCount); + } while (pointTree.moveToSibling() && docCount[0] < size); + pointTree.moveToParent(); + } else { + // TODO: we can assert that the first value here in fact matches what the pointTree + // claimed? + // Leaf node; scan and filter all points in this block: + if (docCount[0] < size) { + pointTree.visitDocValues(visitor); + } + } + break; + default: + throw new IllegalArgumentException("Unreachable code"); + } + } + + // custom intersect visitor to walk the right of tree + public void intersectRight(PointValues.IntersectVisitor visitor, PointValues.PointTree pointTree, long[] docCount) + throws IOException { + PointValues.Relation r = visitor.compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue()); + if (docCount[0] >= size) { + return; + } + switch (r) { + case CELL_OUTSIDE_QUERY: + // This cell is fully outside the query shape: stop recursing + break; + + case CELL_INSIDE_QUERY: + // If the cell is fully inside, we keep moving right as long as the point tree size is over our size requirement + if (pointTree.size() > size && docCount[0] < size && moveRight(pointTree)) { + intersectRight(visitor, pointTree, docCount); + pointTree.moveToParent(); + } + // if point tree size is no longer over, we have to go back one level where it still was over and the intersect left + else if (pointTree.size() <= size && docCount[0] < size) { + pointTree.moveToParent(); + intersectLeft(visitor, pointTree, docCount); + } + // if we've reached leaf, it means out size is under the size of the leaf, we can just collect all docIDs + else { + // Leaf node; scan and filter all points in this block: + if (docCount[0] < size) { + pointTree.visitDocIDs(visitor); + } + } + break; + case CELL_CROSSES_QUERY: + // If the cell is fully inside, we keep moving right as long as the point tree size is over our size requirement + if (pointTree.size() > size && docCount[0] < size && moveRight(pointTree)) { + intersectRight(visitor, pointTree, docCount); + pointTree.moveToParent(); + } + // if point tree size is no longer over, we have to go back one level where it still was over and the intersect left + else if (pointTree.size() <= size && docCount[0] < size) { + pointTree.moveToParent(); + intersectLeft(visitor, pointTree, docCount); + } + // if we've reached leaf, it means out size is under the size of the leaf, we can just collect all doc values + else { + // Leaf node; scan and filter all points in this block: + if (docCount[0] < size) { + pointTree.visitDocValues(visitor); + } + } + break; + default: + throw new IllegalArgumentException("Unreachable code"); + } + } + + public boolean moveRight(PointValues.PointTree pointTree) throws IOException { + return pointTree.moveToChild() && pointTree.moveToSibling(); + } + + @Override + public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { + LeafReader reader = context.reader(); + long[] docCount = { 0 }; + + PointValues values = reader.getPointValues(pointRangeQuery.getField()); + if (checkValidPointValues(values) == false) { + return null; + } + final Weight weight = this; + if (size > values.size()) { + return pointRangeQueryWeight.scorerSupplier(context); + } else { + if (sortOrder == null || sortOrder.equals(SortOrder.ASC)) { + return new ScorerSupplier() { + + final DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, pointRangeQuery.getField()); + final PointValues.IntersectVisitor visitor = getIntersectVisitor(result, docCount); + long cost = -1; + + @Override + public Scorer get(long leadCost) throws IOException { + intersectLeft(values.getPointTree(), visitor, docCount); + DocIdSetIterator iterator = result.build().iterator(); + return new ConstantScoreScorer(weight, score(), scoreMode, iterator); + } + + @Override + public long cost() { + if (cost == -1) { + // Computing the cost may be expensive, so only do it if necessary + cost = values.estimateDocCount(visitor); + assert cost >= 0; + } + return cost; + } + }; + } else { + // we need to fetch size + deleted docs since the collector will prune away deleted docs resulting in fewer results + // than expected + final int deletedDocs = reader.numDeletedDocs(); + size += deletedDocs; + return new ScorerSupplier() { + + final DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, pointRangeQuery.getField()); + final PointValues.IntersectVisitor visitor = getIntersectVisitor(result, docCount); + long cost = -1; + + @Override + public Scorer get(long leadCost) throws IOException { + intersectRight(values.getPointTree(), visitor, docCount); + DocIdSetIterator iterator = result.build().iterator(); + return new ConstantScoreScorer(weight, score(), scoreMode, iterator); + } + + @Override + public long cost() { + if (cost == -1) { + // Computing the cost may be expensive, so only do it if necessary + cost = values.estimateDocCount(visitor); + assert cost >= 0; + } + return cost; + } + }; + } + } + } + + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + ScorerSupplier scorerSupplier = scorerSupplier(context); + if (scorerSupplier == null) { + return null; + } + return scorerSupplier.get(Long.MAX_VALUE); + } + + @Override + public int count(LeafReaderContext context) throws IOException { + return pointRangeQueryWeight.count(context); + } + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + return false; + } + }; + } + + @Override + public boolean canApproximate(SearchContext context) { + if (context == null) { + return false; + } + if (context.aggregations() != null) { + return false; + } + if (!(context.query() instanceof ApproximateIndexOrDocValuesQuery)) { + return false; + } + // size 0 could be set for caching + if (context.from() + context.size() == 0) { + this.setSize(10_000); + } + this.setSize(Math.max(context.from() + context.size(), context.trackTotalHitsUpTo())); + if (context.request() != null && context.request().source() != null) { + FieldSortBuilder primarySortField = FieldSortBuilder.getPrimaryFieldSortOrNull(context.request().source()); + if (primarySortField != null + && primarySortField.missing() == null + && primarySortField.getFieldName().equals(((RangeQueryBuilder) context.request().source().query()).fieldName())) { + if (primarySortField.order() == SortOrder.DESC) { + this.setSortOrder(SortOrder.DESC); + } + } + } + return true; + } + + @Override + public final int hashCode() { + return pointRangeQuery.hashCode(); + } + + @Override + public final boolean equals(Object o) { + return sameClassAs(o) && equalsTo(getClass().cast(o)); + } + + private boolean equalsTo(ApproximatePointRangeQuery other) { + return Objects.equals(pointRangeQuery.getField(), other.pointRangeQuery.getField()) + && pointRangeQuery.getNumDims() == other.pointRangeQuery.getNumDims() + && pointRangeQuery.getBytesPerDim() == other.pointRangeQuery.getBytesPerDim() + && Arrays.equals(pointRangeQuery.getLowerPoint(), other.pointRangeQuery.getLowerPoint()) + && Arrays.equals(pointRangeQuery.getUpperPoint(), other.pointRangeQuery.getUpperPoint()); + } + + @Override + public final String toString(String field) { + final StringBuilder sb = new StringBuilder(); + if (pointRangeQuery.getField().equals(field) == false) { + sb.append(pointRangeQuery.getField()); + sb.append(':'); + } + + // print ourselves as "range per dimension" + for (int i = 0; i < pointRangeQuery.getNumDims(); i++) { + if (i > 0) { + sb.append(','); + } + + int startOffset = pointRangeQuery.getBytesPerDim() * i; + + sb.append('['); + sb.append( + toString( + i, + ArrayUtil.copyOfSubArray(pointRangeQuery.getLowerPoint(), startOffset, startOffset + pointRangeQuery.getBytesPerDim()) + ) + ); + sb.append(" TO "); + sb.append( + toString( + i, + ArrayUtil.copyOfSubArray(pointRangeQuery.getUpperPoint(), startOffset, startOffset + pointRangeQuery.getBytesPerDim()) + ) + ); + sb.append(']'); + } + + return sb.toString(); + } + + /** + * Returns a string of a single value in a human-readable format for debugging. This is used by + * {@link #toString()}. + * + * @param dimension dimension of the particular value + * @param value single value, never null + * @return human readable value for debugging + */ + protected abstract String toString(int dimension, byte[] value); +} diff --git a/server/src/main/java/org/opensearch/search/approximate/ApproximateQuery.java b/server/src/main/java/org/opensearch/search/approximate/ApproximateQuery.java new file mode 100644 index 0000000000000..0e6faf396b671 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/approximate/ApproximateQuery.java @@ -0,0 +1,21 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.approximate; + +import org.apache.lucene.search.Query; +import org.opensearch.search.internal.SearchContext; + +/** + * Abstract class that can be inherited by queries that can be approximated. Queries should implement {@link #canApproximate(SearchContext)} to specify conditions on when they can be approximated +*/ +public abstract class ApproximateQuery extends Query { + + protected abstract boolean canApproximate(SearchContext context); + +} diff --git a/server/src/main/java/org/opensearch/search/approximate/ApproximateScoreQuery.java b/server/src/main/java/org/opensearch/search/approximate/ApproximateScoreQuery.java new file mode 100644 index 0000000000000..d1dd32b239f28 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/approximate/ApproximateScoreQuery.java @@ -0,0 +1,90 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.approximate; + +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryVisitor; +import org.apache.lucene.search.Weight; +import org.opensearch.search.internal.SearchContext; + +import java.io.IOException; + +/** + * Entry-point for the approximation framework. + * This class is heavily inspired by {@link org.apache.lucene.search.IndexOrDocValuesQuery}. It acts as a wrapper that consumer two queries, a regular query and an approximate version of the same. By default, it executes the regular query and returns {@link Weight#scorer} for the original query. At run-time, depending on certain constraints, we can re-write the {@code Weight} to use the approximate weight instead. + */ +public class ApproximateScoreQuery extends Query { + + private final Query originalQuery; + private final ApproximateQuery approximationQuery; + + protected Query resolvedQuery; + + public ApproximateScoreQuery(Query originalQuery, ApproximateQuery approximationQuery) { + this.originalQuery = originalQuery; + this.approximationQuery = approximationQuery; + } + + public Query getOriginalQuery() { + return originalQuery; + } + + public ApproximateQuery getApproximationQuery() { + return approximationQuery; + } + + @Override + public final Query rewrite(IndexSearcher indexSearcher) throws IOException { + if (resolvedQuery == null) { + throw new IllegalStateException("Cannot rewrite resolved query without setContext being called"); + } + return resolvedQuery.rewrite(indexSearcher); + } + + public void setContext(SearchContext context) { + if (resolvedQuery != null) { + throw new IllegalStateException("Query already resolved, duplicate call to setContext"); + } + resolvedQuery = approximationQuery.canApproximate(context) ? approximationQuery : originalQuery; + }; + + @Override + public String toString(String s) { + return "ApproximateScoreQuery(originalQuery=" + + originalQuery.toString() + + ", approximationQuery=" + + approximationQuery.toString() + + ")"; + } + + @Override + public void visit(QueryVisitor queryVisitor) { + QueryVisitor v = queryVisitor.getSubVisitor(BooleanClause.Occur.MUST, this); + originalQuery.visit(v); + approximationQuery.visit(v); + } + + @Override + public boolean equals(Object o) { + if (!sameClassAs(o)) { + return false; + } + return true; + } + + @Override + public int hashCode() { + int h = classHash(); + h = 31 * h + originalQuery.hashCode(); + h = 31 * h + approximationQuery.hashCode(); + return h; + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/numerictype/package-info.java b/server/src/main/java/org/opensearch/search/approximate/package-info.java similarity index 56% rename from server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/numerictype/package-info.java rename to server/src/main/java/org/opensearch/search/approximate/package-info.java index fe5c2a7ceb254..1a09183c7d9fa 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/numerictype/package-info.java +++ b/server/src/main/java/org/opensearch/search/approximate/package-info.java @@ -7,8 +7,6 @@ */ /** - * Numeric Types for Composite Index Star Tree - * - * @opensearch.experimental + * Approximation query framework to approximate commonly used queries */ -package org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype; +package org.opensearch.search.approximate; diff --git a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java index f563cad947354..e98046ba1dede 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java +++ b/server/src/main/java/org/opensearch/search/backpressure/SearchBackpressureService.java @@ -80,7 +80,6 @@ public class SearchBackpressureService extends AbstractLifecycleComponent implem private final SearchBackpressureSettings settings; private final TaskResourceTrackingService taskResourceTrackingService; private final ThreadPool threadPool; - private final LongSupplier timeNanosSupplier; private final NodeDuressTrackers nodeDuressTrackers; private final Map, TaskResourceUsageTrackers> taskTrackers; @@ -150,7 +149,6 @@ public SearchBackpressureService( this.taskResourceTrackingService = taskResourceTrackingService; this.taskResourceTrackingService.addTaskCompletionListener(this); this.threadPool = threadPool; - this.timeNanosSupplier = timeNanosSupplier; this.nodeDuressTrackers = nodeDuressTrackers; this.taskManager = taskManager; diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java index 8c680a09a73da..38213506c55b7 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchShardTaskSettings.java @@ -26,12 +26,8 @@ */ public class SearchShardTaskSettings { private final List listeners = new ArrayList<>(); - private final ClusterSettings clusterSettings; private static class Defaults { - private static final double CANCELLATION_RATIO = 0.1; - private static final double CANCELLATION_RATE = 0.003; - private static final double CANCELLATION_BURST = 10.0; private static final double TOTAL_HEAP_PERCENT_THRESHOLD = 0.05; private static final long CPU_TIME_MILLIS_THRESHOLD = 15000; private static final long ELAPSED_TIME_MILLIS_THRESHOLD = 30000; @@ -175,7 +171,6 @@ public SearchShardTaskSettings(Settings settings, ClusterSettings clusterSetting this.cancellationRatio = SETTING_CANCELLATION_RATIO.get(settings); this.cancellationRate = SETTING_CANCELLATION_RATE.get(settings); this.cancellationBurst = SETTING_CANCELLATION_BURST.get(settings); - this.clusterSettings = clusterSettings; clusterSettings.addSettingsUpdateConsumer(SETTING_TOTAL_HEAP_PERCENT_THRESHOLD, this::setTotalHeapPercentThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_CPU_TIME_MILLIS_THRESHOLD, this::setCpuTimeMillisThreshold); diff --git a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java index 4b22b25facb4f..f9af7f9b59fdb 100644 --- a/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java +++ b/server/src/main/java/org/opensearch/search/backpressure/settings/SearchTaskSettings.java @@ -27,7 +27,6 @@ public class SearchTaskSettings { private final List listeners = new ArrayList<>(); - private final ClusterSettings clusterSettings; private static class Defaults { private static final double CANCELLATION_RATIO = 0.1; @@ -176,7 +175,6 @@ public SearchTaskSettings(Settings settings, ClusterSettings clusterSettings) { this.cancellationRatio = SETTING_CANCELLATION_RATIO.get(settings); this.cancellationRate = SETTING_CANCELLATION_RATE.get(settings); this.cancellationBurst = SETTING_CANCELLATION_BURST.get(settings); - this.clusterSettings = clusterSettings; clusterSettings.addSettingsUpdateConsumer(SETTING_TOTAL_HEAP_PERCENT_THRESHOLD, this::setTotalHeapPercentThreshold); clusterSettings.addSettingsUpdateConsumer(SETTING_CPU_TIME_MILLIS_THRESHOLD, this::setCpuTimeMillisThreshold); diff --git a/server/src/main/java/org/opensearch/search/deciders/ConcurrentSearchDecision.java b/server/src/main/java/org/opensearch/search/deciders/ConcurrentSearchDecision.java new file mode 100644 index 0000000000000..4ac47221856d1 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/deciders/ConcurrentSearchDecision.java @@ -0,0 +1,88 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.deciders; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.util.Collection; + +/** + * This Class defines the decisions that a {@link ConcurrentSearchRequestDecider#getConcurrentSearchDecision} can return. + * + */ +@ExperimentalApi +public class ConcurrentSearchDecision { + + final private DecisionStatus decisionStatus; + final private String decisionReason; + + public ConcurrentSearchDecision(DecisionStatus decisionStatus, String decisionReason) { + this.decisionStatus = decisionStatus; + this.decisionReason = decisionReason; + } + + public DecisionStatus getDecisionStatus() { + return decisionStatus; + } + + public String getDecisionReason() { + return decisionReason; + } + + /** + * This enum contains the decision status for concurrent search. + */ + @ExperimentalApi + public enum DecisionStatus { + YES(0), // use concurrent search + NO(1), // don't use concurrent search + NO_OP(2); // no preference + + private final int id; + + DecisionStatus(int id) { + this.id = id; + } + } + + @Override + public String toString() { + return "ConcurrentSearchDecision{" + "decisionStatus=" + decisionStatus + ", decisionReason='" + decisionReason + '\'' + '}'; + } + + /** + * Combine a collection of {@link ConcurrentSearchDecision} to return final {@link ConcurrentSearchDecision} + * The decisions are combined as: + * NO_OP AND NO_OP results in NO_OP + * NO_OP AND YES results in YES + * NO_OP AND NO results in NO + */ + public static ConcurrentSearchDecision getCompositeDecision(Collection allDecisions) { + + DecisionStatus finalDecisionStatus = DecisionStatus.NO_OP; + for (ConcurrentSearchDecision decision : allDecisions) { + switch (decision.decisionStatus) { + case YES: + finalDecisionStatus = DecisionStatus.YES; + break; + case NO: + finalDecisionStatus = DecisionStatus.NO; + return new ConcurrentSearchDecision( + finalDecisionStatus, + "composite decision evaluated to false due to " + decision.decisionReason + ); + case NO_OP: + // NOOP doesn't change the final decision + break; + } + } + return new ConcurrentSearchDecision(finalDecisionStatus, "composite decision result"); + } + +} diff --git a/server/src/main/java/org/opensearch/search/deciders/ConcurrentSearchRequestDecider.java b/server/src/main/java/org/opensearch/search/deciders/ConcurrentSearchRequestDecider.java new file mode 100644 index 0000000000000..ec40527314454 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/deciders/ConcurrentSearchRequestDecider.java @@ -0,0 +1,58 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.deciders; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.query.QueryBuilder; + +import java.util.Optional; + +/** + * {@link ConcurrentSearchRequestDecider} allows pluggable way to evaluate if a query in the search request + * can use concurrent segment search using the passed in queryBuilders from query tree and index settings + * on a per shard request basis. + * Implementations will need to implement the Factory interface that can be used to create the ConcurrentSearchRequestDecider + * This factory will be called on each shard search request to create the ConcurrentSearchRequestDecider and get the + * concurrent search decision from the created decider on a per-request basis. + * For all the deciders the evaluateForQuery method will be called for each node in the query tree. + * After traversing of the query tree is completed, the final decision from the deciders will be + * obtained using {@link ConcurrentSearchRequestDecider#getConcurrentSearchDecision} + */ +@ExperimentalApi +public abstract class ConcurrentSearchRequestDecider { + + /** + * Evaluate for the passed in queryBuilder node in the query tree of the search request + * if concurrent segment search can be used. + * This method will be called for each of the query builder node in the query tree of the request. + */ + public abstract void evaluateForQuery(QueryBuilder queryBuilder, IndexSettings indexSettings); + + /** + * Provide the final decision for concurrent search based on all evaluations + * Plugins may need to maintain internal state of evaluations to provide a final decision + * If decision is null, then it is ignored + * @return ConcurrentSearchDecision + */ + public abstract ConcurrentSearchDecision getConcurrentSearchDecision(); + + /** + * Factory interface that can be implemented to create the ConcurrentSearchRequestDecider object. + * Implementations can use the passed in indexSettings to decide whether to create the decider object or + * return {@link Optional#empty()}. + */ + @ExperimentalApi + public interface Factory { + default Optional create(IndexSettings indexSettings) { + return Optional.empty(); + } + } + +} diff --git a/server/src/main/java/org/opensearch/search/deciders/ConcurrentSearchVisitor.java b/server/src/main/java/org/opensearch/search/deciders/ConcurrentSearchVisitor.java new file mode 100644 index 0000000000000..d1a4fa982dc7e --- /dev/null +++ b/server/src/main/java/org/opensearch/search/deciders/ConcurrentSearchVisitor.java @@ -0,0 +1,46 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.deciders; + +import org.apache.lucene.search.BooleanClause; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilderVisitor; + +import java.util.Objects; +import java.util.Set; + +/** + * Class to traverse the QueryBuilder tree and invoke the + * {@link ConcurrentSearchRequestDecider#evaluateForQuery} at each node of the query tree + */ +@ExperimentalApi +public class ConcurrentSearchVisitor implements QueryBuilderVisitor { + + private final Set deciders; + private final IndexSettings indexSettings; + + public ConcurrentSearchVisitor(Set concurrentSearchVisitorDeciders, IndexSettings idxSettings) { + Objects.requireNonNull(concurrentSearchVisitorDeciders, "Concurrent search deciders cannot be null"); + deciders = concurrentSearchVisitorDeciders; + indexSettings = idxSettings; + } + + @Override + public void accept(QueryBuilder qb) { + // for each of the deciders, invoke evaluateForQuery using the current query builder and index settings. + deciders.forEach(concurrentSearchDecider -> { concurrentSearchDecider.evaluateForQuery(qb, indexSettings); }); + } + + @Override + public QueryBuilderVisitor getChildVisitor(BooleanClause.Occur occur) { + return this; + } +} diff --git a/server/src/main/java/org/opensearch/search/deciders/package-info.java b/server/src/main/java/org/opensearch/search/deciders/package-info.java new file mode 100644 index 0000000000000..4ef26f7be9cfc --- /dev/null +++ b/server/src/main/java/org/opensearch/search/deciders/package-info.java @@ -0,0 +1,14 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * This package contains classes assist in deciding + * whether to run a search request using concurrent search or not. + */ + +package org.opensearch.search.deciders; diff --git a/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java b/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java index fa00ace378df1..f118e4106db83 100644 --- a/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java +++ b/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java @@ -69,6 +69,7 @@ import org.opensearch.common.lucene.search.TopDocsAndMaxScore; import org.opensearch.search.DocValueFormat; import org.opensearch.search.SearchService; +import org.opensearch.search.approximate.ApproximateScoreQuery; import org.opensearch.search.dfs.AggregatedDfs; import org.opensearch.search.profile.ContextualProfileBreakdown; import org.opensearch.search.profile.Timer; @@ -218,6 +219,9 @@ public Weight createWeight(Query query, ScoreMode scoreMode, float boost) throws profiler.pollLastElement(); } return new ProfileWeight(query, weight, profile); + } else if (query instanceof ApproximateScoreQuery) { + ((ApproximateScoreQuery) query).setContext(searchContext); + return super.createWeight(query, scoreMode, boost); } else { return super.createWeight(query, scoreMode, boost); } diff --git a/server/src/main/java/org/opensearch/search/internal/SearchContext.java b/server/src/main/java/org/opensearch/search/internal/SearchContext.java index bc4b7058651dd..5357206e8c117 100644 --- a/server/src/main/java/org/opensearch/search/internal/SearchContext.java +++ b/server/src/main/java/org/opensearch/search/internal/SearchContext.java @@ -526,4 +526,9 @@ public int maxAggRewriteFilters() { public int cardinalityAggregationPruningThreshold() { return 0; } + + public boolean keywordIndexOrDocValuesEnabled() { + return false; + } + } diff --git a/server/src/main/java/org/opensearch/snapshots/InternalSnapshotsInfoService.java b/server/src/main/java/org/opensearch/snapshots/InternalSnapshotsInfoService.java index 797a58f3b0d9b..e300f845e6f58 100644 --- a/server/src/main/java/org/opensearch/snapshots/InternalSnapshotsInfoService.java +++ b/server/src/main/java/org/opensearch/snapshots/InternalSnapshotsInfoService.java @@ -238,14 +238,18 @@ protected void doRun() throws Exception { final Repository repository = repositories.repository(snapshotShard.snapshot.getRepository()); logger.debug("fetching snapshot shard size for {}", snapshotShard); - final long snapshotShardSize = repository.getShardSnapshotStatus( - snapshotShard.snapshot().getSnapshotId(), - snapshotShard.index(), - snapshotShard.shardId() - ).asCopy().getTotalSize(); + long snapshotShardSize; + if (snapshotShard.pinnedTimestamp > 0) { + snapshotShardSize = 0; + } else { + snapshotShardSize = repository.getShardSnapshotStatus( + snapshotShard.snapshot().getSnapshotId(), + snapshotShard.index(), + snapshotShard.shardId() + ).asCopy().getTotalSize(); + } logger.debug("snapshot shard size for {}: {} bytes", snapshotShard, snapshotShardSize); - boolean updated = false; synchronized (mutex) { removed = unknownSnapshotShards.remove(snapshotShard); @@ -354,7 +358,8 @@ private static Set listOfSnapshotShards(final ClusterState state) final SnapshotShard snapshotShard = new SnapshotShard( snapshotRecoverySource.snapshot(), snapshotRecoverySource.index(), - shardRouting.shardId() + shardRouting.shardId(), + snapshotRecoverySource.pinnedTimestamp() ); snapshotShards.add(snapshotShard); } @@ -374,10 +379,17 @@ public static class SnapshotShard { private final IndexId index; private final ShardId shardId; + private long pinnedTimestamp; + public SnapshotShard(Snapshot snapshot, IndexId index, ShardId shardId) { + this(snapshot, index, shardId, 0L); + } + + public SnapshotShard(Snapshot snapshot, IndexId index, ShardId shardId, long pinnedTimestamp) { this.snapshot = snapshot; this.index = index; this.shardId = shardId; + this.pinnedTimestamp = pinnedTimestamp; } public Snapshot snapshot() { diff --git a/server/src/main/java/org/opensearch/snapshots/RestoreService.java b/server/src/main/java/org/opensearch/snapshots/RestoreService.java index e464b71debf81..79a70d835f773 100644 --- a/server/src/main/java/org/opensearch/snapshots/RestoreService.java +++ b/server/src/main/java/org/opensearch/snapshots/RestoreService.java @@ -86,6 +86,7 @@ import org.opensearch.core.index.shard.ShardId; import org.opensearch.index.IndexModule; import org.opensearch.index.IndexSettings; +import org.opensearch.index.remote.RemoteStoreEnums.PathType; import org.opensearch.index.shard.IndexShard; import org.opensearch.index.snapshots.IndexShardSnapshotStatus; import org.opensearch.index.store.remote.filecache.FileCacheStats; @@ -427,7 +428,9 @@ public ClusterState execute(ClusterState currentState) { snapshotIndexId, isSearchableSnapshot, isRemoteStoreShallowCopy, - request.getSourceRemoteStoreRepository() + request.getSourceRemoteStoreRepository(), + request.getSourceRemoteTranslogRepository(), + snapshotInfo.getPinnedTimestamp() ); final Version minIndexCompatibilityVersion; if (isSearchableSnapshot && isSearchableSnapshotsExtendedCompatibilityEnabled()) { @@ -550,7 +553,7 @@ public ClusterState execute(ClusterState currentState) { for (int shard = 0; shard < snapshotIndexMetadata.getNumberOfShards(); shard++) { if (isRemoteSnapshot) { IndexShardSnapshotStatus.Copy shardStatus = repository.getShardSnapshotStatus( - snapshotInfo.snapshotId(), + snapshotInfo, snapshotIndexId, new ShardId(metadata.index(index).getIndex(), shard) ).asCopy(); @@ -1328,6 +1331,7 @@ private static IndexMetadata addSnapshotToIndexSettings(IndexMetadata metadata, .put(IndexSettings.SEARCHABLE_SNAPSHOT_ID_UUID.getKey(), snapshot.getSnapshotId().getUUID()) .put(IndexSettings.SEARCHABLE_SNAPSHOT_ID_NAME.getKey(), snapshot.getSnapshotId().getName()) .put(IndexSettings.SEARCHABLE_SNAPSHOT_INDEX_ID.getKey(), indexId.getId()) + .put(IndexSettings.SEARCHABLE_SNAPSHOT_SHARD_PATH_TYPE.getKey(), PathType.fromCode(indexId.getShardPathType())) .build(); return IndexMetadata.builder(metadata).settings(newSettings).build(); } diff --git a/server/src/main/java/org/opensearch/snapshots/SnapshotInfo.java b/server/src/main/java/org/opensearch/snapshots/SnapshotInfo.java index 7558c4456109e..08433dc6f2e0b 100644 --- a/server/src/main/java/org/opensearch/snapshots/SnapshotInfo.java +++ b/server/src/main/java/org/opensearch/snapshots/SnapshotInfo.java @@ -354,6 +354,38 @@ public SnapshotInfo(SnapshotsInProgress.Entry entry) { ); } + public SnapshotInfo( + SnapshotId snapshotId, + List indices, + List dataStreams, + long startTime, + String reason, + long endTime, + int totalShards, + List shardFailures, + Boolean includeGlobalState, + Map userMetadata, + Boolean remoteStoreIndexShallowCopy + ) { + this( + snapshotId, + indices, + dataStreams, + snapshotState(reason, shardFailures), + reason, + Version.CURRENT, + startTime, + endTime, + totalShards, + totalShards - shardFailures.size(), + shardFailures, + includeGlobalState, + userMetadata, + remoteStoreIndexShallowCopy, + 0 + ); + } + public SnapshotInfo( SnapshotId snapshotId, List indices, @@ -441,7 +473,7 @@ public SnapshotInfo(final StreamInput in) throws IOException { if (in.getVersion().onOrAfter(Version.V_2_9_0)) { remoteStoreIndexShallowCopy = in.readOptionalBoolean(); } - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + if (in.getVersion().onOrAfter(Version.V_2_17_0)) { pinnedTimestamp = in.readVLong(); } } @@ -908,7 +940,7 @@ public void writeTo(final StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(Version.V_2_9_0)) { out.writeOptionalBoolean(remoteStoreIndexShallowCopy); } - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + if (out.getVersion().onOrAfter(Version.V_2_17_0)) { out.writeVLong(pinnedTimestamp); } } diff --git a/server/src/main/java/org/opensearch/snapshots/SnapshotShardPaths.java b/server/src/main/java/org/opensearch/snapshots/SnapshotShardPaths.java new file mode 100644 index 0000000000000..88af14e2232f9 --- /dev/null +++ b/server/src/main/java/org/opensearch/snapshots/SnapshotShardPaths.java @@ -0,0 +1,142 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.snapshots; + +import org.opensearch.core.xcontent.ToXContent; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.index.remote.RemoteStoreEnums.PathHashAlgorithm; +import org.opensearch.index.remote.RemoteStoreEnums.PathType; +import org.opensearch.repositories.IndexId; + +import java.io.IOException; +import java.util.List; + +/** + * Snapshot Shard path information. + * + * @opensearch.internal + */ +public class SnapshotShardPaths implements ToXContent { + + public static final String DIR = "snapshot_shard_paths"; + + public static final String DELIMITER = "."; + + public static final String FILE_NAME_FORMAT = "%s"; + + private static final String PATHS_FIELD = "paths"; + private static final String INDEX_ID_FIELD = "indexId"; + private static final String INDEX_NAME_FIELD = "indexName"; + private static final String NUMBER_OF_SHARDS_FIELD = "number_of_shards"; + private static final String SHARD_PATH_TYPE_FIELD = "shard_path_type"; + private static final String SHARD_PATH_HASH_ALGORITHM_FIELD = "shard_path_hash_algorithm"; + + private final List paths; + private final String indexId; + private final String indexName; + private final int numberOfShards; + private final PathType shardPathType; + private final PathHashAlgorithm shardPathHashAlgorithm; + + public SnapshotShardPaths( + List paths, + String indexId, + String indexName, + int numberOfShards, + PathType shardPathType, + PathHashAlgorithm shardPathHashAlgorithm + ) { + assert !paths.isEmpty() : "paths must not be empty"; + assert indexId != null && !indexId.isEmpty() : "indexId must not be empty"; + assert indexName != null && !indexName.isEmpty() : "indexName must not be empty"; + assert numberOfShards > 0 : "numberOfShards must be > 0"; + assert shardPathType != null : "shardPathType must not be null"; + assert shardPathHashAlgorithm != null : "shardPathHashAlgorithm must not be null"; + + this.paths = paths; + this.indexId = indexId; + this.indexName = indexName; + this.numberOfShards = numberOfShards; + this.shardPathType = shardPathType; + this.shardPathHashAlgorithm = shardPathHashAlgorithm; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.field(INDEX_ID_FIELD, indexId); + builder.field(INDEX_NAME_FIELD, indexName); + builder.field(NUMBER_OF_SHARDS_FIELD, numberOfShards); + builder.field(SHARD_PATH_TYPE_FIELD, shardPathType.getCode()); + builder.field(SHARD_PATH_HASH_ALGORITHM_FIELD, shardPathHashAlgorithm.getCode()); + builder.startArray(PATHS_FIELD); + for (String path : paths) { + builder.value(path); + } + builder.endArray(); + return builder; + } + + public static SnapshotShardPaths fromXContent(XContentParser ignored) { + throw new UnsupportedOperationException("SnapshotShardPaths.fromXContent() is not supported"); + } + + /** + * Parses a shard path string and extracts relevant shard information. + * + * @param shardPath The shard path string to parse. Expected format is: + * [index_id]#[index_name]#[shard_count]#[path_type_code]#[path_hash_algorithm_code] + * @return A {@link ShardInfo} object containing the parsed index ID and shard count. + * @throws IllegalArgumentException if the shard path format is invalid or cannot be parsed. + */ + public static ShardInfo parseShardPath(String shardPath) { + String[] parts = shardPath.split("\\" + SnapshotShardPaths.DELIMITER); + if (parts.length != 5) { + throw new IllegalArgumentException("Invalid shard path format: " + shardPath); + } + try { + IndexId indexId = new IndexId(parts[1], parts[0], Integer.parseInt(parts[3])); + int shardCount = Integer.parseInt(parts[2]); + return new ShardInfo(indexId, shardCount); + } catch (NumberFormatException e) { + throw new IllegalArgumentException("Invalid shard path format: " + shardPath, e); + } + } + + /** + * Represents parsed information from a shard path. + * This class encapsulates the index ID and shard count extracted from a shard path string. + */ + public static class ShardInfo { + /** The ID of the index associated with this shard. */ + private final IndexId indexId; + + /** The total number of shards for this index. */ + private final int shardCount; + + /** + * Constructs a new ShardInfo instance. + * + * @param indexId The ID of the index associated with this shard. + * @param shardCount The total number of shards for this index. + */ + public ShardInfo(IndexId indexId, int shardCount) { + this.indexId = indexId; + this.shardCount = shardCount; + } + + public IndexId getIndexId() { + return indexId; + } + + public int getShardCount() { + return shardCount; + } + } +} diff --git a/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java b/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java index b7fea116a12b7..f6e550525a3e5 100644 --- a/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java @@ -91,7 +91,9 @@ import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.index.Index; import org.opensearch.core.index.shard.ShardId; +import org.opensearch.index.store.RemoteSegmentStoreDirectoryFactory; import org.opensearch.index.store.lockmanager.RemoteStoreLockManagerFactory; +import org.opensearch.indices.RemoteStoreSettings; import org.opensearch.node.remotestore.RemoteStorePinnedTimestampService; import org.opensearch.repositories.IndexId; import org.opensearch.repositories.RepositoriesService; @@ -123,6 +125,7 @@ import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.CountDownLatch; import java.util.concurrent.Executor; import java.util.function.Consumer; import java.util.function.Function; @@ -137,6 +140,7 @@ import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; import static org.opensearch.repositories.blobstore.BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY; import static org.opensearch.repositories.blobstore.BlobStoreRepository.SHALLOW_SNAPSHOT_V2; +import static org.opensearch.repositories.blobstore.BlobStoreRepository.SHARD_PATH_TYPE; import static org.opensearch.snapshots.SnapshotUtils.validateSnapshotsBackingAnyIndex; /** @@ -160,6 +164,8 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus private final RemoteStoreLockManagerFactory remoteStoreLockManagerFactory; + private final RemoteSegmentStoreDirectoryFactory remoteSegmentStoreDirectoryFactory; + private final ThreadPool threadPool; private final Map>>> snapshotCompletionListeners = @@ -203,7 +209,18 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus Setting.Property.Dynamic ); - private static final String SNAPSHOT_PINNED_TIMESTAMP_DELIMITER = ":"; + public static final String SNAPSHOT_PINNED_TIMESTAMP_DELIMITER = "__"; + /** + * Setting to specify the maximum number of shards that can be included in the result for the snapshot status + * API call. Note that it does not apply to V2-shallow snapshots. + */ + public static final Setting MAX_SHARDS_ALLOWED_IN_STATUS_API = Setting.intSetting( + "snapshot.max_shards_allowed_in_status_api", + 200000, + 1, + Setting.Property.NodeScope, + Setting.Property.Dynamic + ); private volatile int maxConcurrentOperations; public SnapshotsService( @@ -213,13 +230,22 @@ public SnapshotsService( RepositoriesService repositoriesService, TransportService transportService, ActionFilters actionFilters, - @Nullable RemoteStorePinnedTimestampService remoteStorePinnedTimestampService + @Nullable RemoteStorePinnedTimestampService remoteStorePinnedTimestampService, + RemoteStoreSettings remoteStoreSettings ) { this.clusterService = clusterService; this.indexNameExpressionResolver = indexNameExpressionResolver; this.repositoriesService = repositoriesService; - this.remoteStoreLockManagerFactory = new RemoteStoreLockManagerFactory(() -> repositoriesService); + this.remoteStoreLockManagerFactory = new RemoteStoreLockManagerFactory( + () -> repositoriesService, + remoteStoreSettings.getSegmentsPathFixedPrefix() + ); this.threadPool = transportService.getThreadPool(); + this.remoteSegmentStoreDirectoryFactory = new RemoteSegmentStoreDirectoryFactory( + () -> repositoriesService, + threadPool, + remoteStoreSettings.getSegmentsPathFixedPrefix() + ); this.transportService = transportService; this.remoteStorePinnedTimestampService = remoteStorePinnedTimestampService; @@ -262,7 +288,7 @@ public void executeSnapshot(final CreateSnapshotRequest request, final ActionLis if (remoteStoreIndexShallowCopy && isSnapshotV2 && request.indices().length == 0 - && clusterService.state().nodes().getMinNodeVersion().onOrAfter(Version.CURRENT)) { + && clusterService.state().nodes().getMinNodeVersion().onOrAfter(Version.V_2_17_0)) { createSnapshotV2(request, listener); } else { createSnapshot( @@ -335,9 +361,13 @@ public ClusterState execute(ClusterState currentState) { logger.trace("[{}][{}] creating snapshot for indices [{}]", repositoryName, snapshotName, indices); + int pathType = clusterService.state().nodes().getMinNodeVersion().onOrAfter(Version.V_2_17_0) + ? SHARD_PATH_TYPE.get(repository.getMetadata().settings()).getCode() + : IndexId.DEFAULT_SHARD_PATH_TYPE; final List indexIds = repositoryData.resolveNewIndices( indices, - getInFlightIndexIds(runningSnapshots, repositoryName) + getInFlightIndexIds(runningSnapshots, repositoryName), + pathType ); final Version version = minCompatibleVersion(currentState.nodes().getMinNodeVersion(), repositoryData, null); final Map shards = shards( @@ -475,7 +505,8 @@ public void createSnapshotV2(final CreateSnapshotRequest request, final ActionLi final List indexIds = repositoryData.resolveNewIndices( indices, - getInFlightIndexIds(runningSnapshots, repositoryName) + getInFlightIndexIds(runningSnapshots, repositoryName), + IndexId.DEFAULT_SHARD_PATH_TYPE ); final Version version = minCompatibleVersion(currentState.nodes().getMinNodeVersion(), repositoryData, null); final ShardGenerations shardGenerations = buildShardsGenerationFromRepositoryData( @@ -600,6 +631,34 @@ public void onFailure(Exception e) { ); } + private void cloneSnapshotPinnedTimestamp( + RepositoryData repositoryData, + SnapshotId sourceSnapshot, + Snapshot snapshot, + long timestampToPin, + ActionListener listener + ) { + remoteStorePinnedTimestampService.cloneTimestamp( + timestampToPin, + snapshot.getRepository() + SNAPSHOT_PINNED_TIMESTAMP_DELIMITER + sourceSnapshot.getUUID(), + snapshot.getRepository() + SNAPSHOT_PINNED_TIMESTAMP_DELIMITER + snapshot.getSnapshotId().getUUID(), + new ActionListener() { + @Override + public void onResponse(Void unused) { + logger.debug("Timestamp pinned successfully for clone snapshot {}", snapshot.getSnapshotId().getName()); + listener.onResponse(repositoryData); + } + + @Override + public void onFailure(Exception e) { + logger.error("Failed to pin timestamp for clone snapshot {} with exception {}", snapshot.getSnapshotId().getName(), e); + listener.onFailure(e); + + } + } + ); + } + private static void ensureSnapshotNameNotRunning( List runningSnapshots, String repositoryName, @@ -621,9 +680,14 @@ private static Map getInFlightIndexIds(List listener) { + /** + * This method does some pre-validation, checks for the presence of source snapshot in repository data. + * For shallow snapshot v2 clone, it checks the pinned timestamp to be greater than zero in the source snapshot. + * + * @param request snapshot request + * @param listener snapshot completion listener + */ + public void executeClone(CloneSnapshotRequest request, ActionListener listener) { final String repositoryName = request.repository(); Repository repository = repositoriesService.repository(repositoryName); if (repository.isReadOnly()) { @@ -632,10 +696,230 @@ public void cloneSnapshot(CloneSnapshotRequest request, ActionListener lis } final String snapshotName = indexNameExpressionResolver.resolveDateMathExpression(request.target()); validate(repositoryName, snapshotName); - // TODO: create snapshot UUID in CloneSnapshotRequest and make this operation idempotent to cleanly deal with transport layer - // retries final SnapshotId snapshotId = new SnapshotId(snapshotName, UUIDs.randomBase64UUID()); final Snapshot snapshot = new Snapshot(repositoryName, snapshotId); + try { + final StepListener repositoryDataListener = new StepListener<>(); + repositoriesService.getRepositoryData(repositoryName, repositoryDataListener); + repositoryDataListener.whenComplete(repositoryData -> { + final SnapshotId sourceSnapshotId = repositoryData.getSnapshotIds() + .stream() + .filter(src -> src.getName().equals(request.source())) + .findAny() + .orElseThrow(() -> new SnapshotMissingException(repositoryName, request.source())); + final StepListener snapshotInfoListener = new StepListener<>(); + final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT); + + executor.execute(ActionRunnable.supply(snapshotInfoListener, () -> repository.getSnapshotInfo(sourceSnapshotId))); + + snapshotInfoListener.whenComplete(sourceSnapshotInfo -> { + if (sourceSnapshotInfo.getPinnedTimestamp() > 0) { + if (hasWildCardPatterForCloneSnapshotV2(request.indices()) == false) { + throw new SnapshotException( + repositoryName, + snapshotName, + "Aborting clone for Snapshot-v2, only wildcard pattern '*' is supported for indices" + ); + } + cloneSnapshotV2(request, snapshot, repositoryName, repository, listener); + } else { + cloneSnapshot(request, snapshot, repositoryName, repository, listener); + } + }, e -> listener.onFailure(e)); + }, e -> listener.onFailure(e)); + + } catch (Exception e) { + assert false : new AssertionError(e); + logger.error("Snapshot {} clone failed with exception {}", snapshot.getSnapshotId().getName(), e); + listener.onFailure(e); + } + } + + /** + * This method is responsible for creating a clone of the shallow snapshot v2. + * It pins the same timestamp that is pinned by the source snapshot. + * + * Unlike traditional snapshot operations, this method performs a synchronous clone execution and doesn't + * upload any shard metadata to the snapshot repository. + * The pinned timestamp is later reconciled with remote store segment and translog metadata files during the restore + * operation. + * + * @param request snapshot request + * @param snapshot clone snapshot + * @param repositoryName snapshot repository name + * @param repository snapshot repository + * @param listener completion listener + */ + public void cloneSnapshotV2( + CloneSnapshotRequest request, + Snapshot snapshot, + String repositoryName, + Repository repository, + ActionListener listener + ) { + + long startTime = System.currentTimeMillis(); + ClusterState currentState = clusterService.state(); + String snapshotName = snapshot.getSnapshotId().getName(); + repository.executeConsistentStateUpdate(repositoryData -> new ClusterStateUpdateTask(Priority.URGENT) { + private SnapshotsInProgress.Entry newEntry; + private SnapshotId sourceSnapshotId; + private List indicesForSnapshot; + + @Override + public ClusterState execute(ClusterState currentState) { + createSnapshotPreValidations(currentState, repositoryData, repositoryName, snapshotName); + final SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY); + final List runningSnapshots = snapshots.entries(); + + sourceSnapshotId = repositoryData.getSnapshotIds() + .stream() + .filter(src -> src.getName().equals(request.source())) + .findAny() + .orElseThrow(() -> new SnapshotMissingException(repositoryName, request.source())); + + final SnapshotDeletionsInProgress deletionsInProgress = currentState.custom( + SnapshotDeletionsInProgress.TYPE, + SnapshotDeletionsInProgress.EMPTY + ); + if (deletionsInProgress.getEntries().stream().anyMatch(entry -> entry.getSnapshots().contains(sourceSnapshotId))) { + throw new ConcurrentSnapshotExecutionException( + repositoryName, + sourceSnapshotId.getName(), + "cannot clone from snapshot that is being deleted" + ); + } + indicesForSnapshot = new ArrayList<>(); + for (IndexId indexId : repositoryData.getIndices().values()) { + if (repositoryData.getSnapshots(indexId).contains(sourceSnapshotId)) { + indicesForSnapshot.add(indexId.getName()); + } + } + + newEntry = SnapshotsInProgress.startClone( + snapshot, + sourceSnapshotId, + repositoryData.resolveIndices(indicesForSnapshot), + threadPool.absoluteTimeInMillis(), + repositoryData.getGenId(), + minCompatibleVersion(currentState.nodes().getMinNodeVersion(), repositoryData, null) + ); + final List newEntries = new ArrayList<>(runningSnapshots); + newEntries.add(newEntry); + return ClusterState.builder(currentState).putCustom(SnapshotsInProgress.TYPE, SnapshotsInProgress.of(newEntries)).build(); + } + + @Override + public void onFailure(String source, Exception e) { + logger.warn(() -> new ParameterizedMessage("[{}][{}] failed to clone snapshot-v2", repositoryName, snapshotName), e); + listener.onFailure(e); + } + + @Override + public void clusterStateProcessed(String source, ClusterState oldState, final ClusterState newState) { + logger.info("snapshot-v2 clone [{}] started", snapshot); + final StepListener snapshotInfoListener = new StepListener<>(); + final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT); + + executor.execute(ActionRunnable.supply(snapshotInfoListener, () -> repository.getSnapshotInfo(sourceSnapshotId))); + final ShardGenerations shardGenerations = repositoryData.shardGenerations(); + + snapshotInfoListener.whenComplete(snapshotInfo -> { + final SnapshotInfo cloneSnapshotInfo = new SnapshotInfo( + snapshot.getSnapshotId(), + indicesForSnapshot, + newEntry.dataStreams(), + startTime, + null, + System.currentTimeMillis(), + snapshotInfo.totalShards(), + Collections.emptyList(), + newEntry.includeGlobalState(), + newEntry.userMetadata(), + true, + snapshotInfo.getPinnedTimestamp() + ); + if (!clusterService.state().nodes().isLocalNodeElectedClusterManager()) { + throw new SnapshotException(repositoryName, snapshotName, "Aborting snapshot-v2 clone, no longer cluster manager"); + } + final StepListener pinnedTimestampListener = new StepListener<>(); + pinnedTimestampListener.whenComplete(repoData -> { + logger.info("snapshot-v2 clone [{}] completed successfully", snapshot); + listener.onResponse(null); + }, listener::onFailure); + repository.finalizeSnapshot( + shardGenerations, + repositoryData.getGenId(), + metadataForSnapshot( + currentState.metadata(), + newEntry.includeGlobalState(), + false, + newEntry.dataStreams(), + newEntry.indices() + ), + cloneSnapshotInfo, + repositoryData.getVersion(sourceSnapshotId), + state -> stateWithoutSnapshot(state, snapshot), + Priority.IMMEDIATE, + new ActionListener() { + @Override + public void onResponse(RepositoryData repositoryData) { + if (!clusterService.state().nodes().isLocalNodeElectedClusterManager()) { + failSnapshotCompletionListeners( + snapshot, + new SnapshotException(snapshot, "Aborting Snapshot-v2 clone, no longer cluster manager") + ); + listener.onFailure( + new SnapshotException( + repositoryName, + snapshotName, + "Aborting Snapshot-v2 clone, no longer cluster manager" + ) + ); + return; + } + cloneSnapshotPinnedTimestamp( + repositoryData, + sourceSnapshotId, + snapshot, + snapshotInfo.getPinnedTimestamp(), + pinnedTimestampListener + ); + } + + @Override + public void onFailure(Exception e) { + logger.error( + "Failed to upload files to snapshot repo {} for clone snapshot-v2 {} ", + repositoryName, + snapshotName + ); + listener.onFailure(e); + } + } + ); + + }, listener::onFailure); + } + + @Override + public TimeValue timeout() { + return request.clusterManagerNodeTimeout(); + } + }, "clone_snapshot_v2 [" + request.source() + "][" + snapshotName + ']', listener::onFailure); + } + + // TODO: It is worth revisiting the design choice of creating a placeholder entry in snapshots-in-progress here once we have a cache + // for repository metadata and loading it has predictable performance + public void cloneSnapshot( + CloneSnapshotRequest request, + Snapshot snapshot, + String repositoryName, + Repository repository, + ActionListener listener + ) { + String snapshotName = snapshot.getSnapshotId().getName(); + initializingClones.add(snapshot); repository.executeConsistentStateUpdate(repositoryData -> new ClusterStateUpdateTask() { @@ -2446,18 +2730,67 @@ private void deleteSnapshotsFromRepository( // the flag. This can be improved by having the info whether there ever were any shallow snapshot present in this repository // or not in RepositoryData. // SEE https://github.com/opensearch-project/OpenSearch/issues/8610 - final boolean cleanupRemoteStoreLockFiles = REMOTE_STORE_INDEX_SHALLOW_COPY.get(repository.getMetadata().settings()); - if (cleanupRemoteStoreLockFiles) { - repository.deleteSnapshotsAndReleaseLockFiles( - snapshotIds, - repositoryData.getGenId(), - minCompatibleVersion(minNodeVersion, repositoryData, snapshotIds), - remoteStoreLockManagerFactory, - ActionListener.wrap(updatedRepoData -> { - logger.info("snapshots {} deleted", snapshotIds); - removeSnapshotDeletionFromClusterState(deleteEntry, null, updatedRepoData); - }, ex -> removeSnapshotDeletionFromClusterState(deleteEntry, ex, repositoryData)) - ); + final boolean remoteStoreShallowCopyEnabled = REMOTE_STORE_INDEX_SHALLOW_COPY.get(repository.getMetadata().settings()); + if (remoteStoreShallowCopyEnabled) { + Map snapshotsWithPinnedTimestamp = new ConcurrentHashMap<>(); + List snapshotsWithLockFiles = Collections.synchronizedList(new ArrayList<>()); + + CountDownLatch latch = new CountDownLatch(1); + + threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(() -> { + try { + for (SnapshotId snapshotId : snapshotIds) { + try { + SnapshotInfo snapshotInfo = repository.getSnapshotInfo(snapshotId); + if (snapshotInfo.getPinnedTimestamp() > 0) { + snapshotsWithPinnedTimestamp.put(snapshotId, snapshotInfo.getPinnedTimestamp()); + } else { + snapshotsWithLockFiles.add(snapshotId); + } + } catch (Exception e) { + logger.warn("Failed to get snapshot info for {} with exception {}", snapshotId, e); + removeSnapshotDeletionFromClusterState(deleteEntry, e, repositoryData); + } + } + } finally { + latch.countDown(); + } + }); + try { + latch.await(); + if (snapshotsWithLockFiles.size() > 0) { + repository.deleteSnapshotsAndReleaseLockFiles( + snapshotsWithLockFiles, + repositoryData.getGenId(), + minCompatibleVersion(minNodeVersion, repositoryData, snapshotsWithLockFiles), + remoteStoreLockManagerFactory, + ActionListener.wrap(updatedRepoData -> { + logger.info("snapshots {} deleted", snapshotsWithLockFiles); + removeSnapshotDeletionFromClusterState(deleteEntry, null, updatedRepoData); + }, ex -> removeSnapshotDeletionFromClusterState(deleteEntry, ex, repositoryData)) + ); + } + if (snapshotsWithPinnedTimestamp.size() > 0) { + + repository.deleteSnapshotsWithPinnedTimestamp( + snapshotsWithPinnedTimestamp, + repositoryData.getGenId(), + minCompatibleVersion(minNodeVersion, repositoryData, snapshotsWithPinnedTimestamp.keySet()), + remoteSegmentStoreDirectoryFactory, + remoteStorePinnedTimestampService, + ActionListener.wrap(updatedRepoData -> { + logger.info("snapshots {} deleted", snapshotsWithPinnedTimestamp); + removeSnapshotDeletionFromClusterState(deleteEntry, null, updatedRepoData); + }, ex -> removeSnapshotDeletionFromClusterState(deleteEntry, ex, repositoryData)) + ); + } + + } catch (InterruptedException e) { + logger.error("Interrupted while waiting for snapshot info processing", e); + Thread.currentThread().interrupt(); + removeSnapshotDeletionFromClusterState(deleteEntry, e, repositoryData); + } + } else { repository.deleteSnapshots( snapshotIds, @@ -3419,6 +3752,15 @@ private void startExecutableClones(SnapshotsInProgress snapshotsInProgress, @Nul } } + private boolean hasWildCardPatterForCloneSnapshotV2(String[] indices) { + for (String index : indices) { + if ("*".equals(index)) { + return true; + } + } + return false; + } + private class UpdateSnapshotStatusAction extends TransportClusterManagerNodeAction< UpdateIndexShardSnapshotStatusRequest, UpdateIndexShardSnapshotStatusResponse> { diff --git a/server/src/main/java/org/opensearch/snapshots/TooManyShardsInSnapshotsStatusException.java b/server/src/main/java/org/opensearch/snapshots/TooManyShardsInSnapshotsStatusException.java new file mode 100644 index 0000000000000..1689b3e4941ec --- /dev/null +++ b/server/src/main/java/org/opensearch/snapshots/TooManyShardsInSnapshotsStatusException.java @@ -0,0 +1,69 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.snapshots; + +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.rest.RestStatus; + +import java.io.IOException; + +/** + * Thrown if the number of shards across the requested resources (snapshot(s) or the index/indices of a particular snapshot) + * breaches the limit of snapshot.max_shards_allowed_in_status_api cluster setting + * + * @opensearch.internal + */ +public class TooManyShardsInSnapshotsStatusException extends SnapshotException { + + public TooManyShardsInSnapshotsStatusException( + final String repositoryName, + final SnapshotId snapshotId, + final String message, + final Throwable cause + ) { + super(repositoryName, snapshotId, message, cause); + } + + public TooManyShardsInSnapshotsStatusException(final String repositoryName, final String message, String... snapshotName) { + super(repositoryName, String.join(", ", snapshotName), message); + } + + public TooManyShardsInSnapshotsStatusException(StreamInput in) throws IOException { + super(in); + } + + @Override + public RestStatus status() { + return RestStatus.REQUEST_ENTITY_TOO_LARGE; + } +} diff --git a/server/src/main/java/org/opensearch/tasks/TaskCancellationMonitoringService.java b/server/src/main/java/org/opensearch/tasks/TaskCancellationMonitoringService.java index 343d4571593a7..2040703d88c38 100644 --- a/server/src/main/java/org/opensearch/tasks/TaskCancellationMonitoringService.java +++ b/server/src/main/java/org/opensearch/tasks/TaskCancellationMonitoringService.java @@ -125,7 +125,7 @@ public void onTaskCompleted(Task task) { if (!TASKS_TO_TRACK.contains(task.getClass())) { return; } - this.cancelledTaskTracker.entrySet().removeIf(entry -> entry.getKey() == task.getId()); + this.cancelledTaskTracker.remove(task.getId()); } /** diff --git a/server/src/main/java/org/opensearch/threadpool/ThreadPool.java b/server/src/main/java/org/opensearch/threadpool/ThreadPool.java index 056ef0fac0153..81220ab171b34 100644 --- a/server/src/main/java/org/opensearch/threadpool/ThreadPool.java +++ b/server/src/main/java/org/opensearch/threadpool/ThreadPool.java @@ -105,6 +105,7 @@ public static class Names { public static final String REFRESH = "refresh"; public static final String WARMER = "warmer"; public static final String SNAPSHOT = "snapshot"; + public static final String SNAPSHOT_DELETION = "snapshot_deletion"; public static final String FORCE_MERGE = "force_merge"; public static final String FETCH_SHARD_STARTED = "fetch_shard_started"; public static final String FETCH_SHARD_STORE = "fetch_shard_store"; @@ -176,6 +177,7 @@ public static ThreadPoolType fromType(String type) { map.put(Names.REFRESH, ThreadPoolType.SCALING); map.put(Names.WARMER, ThreadPoolType.SCALING); map.put(Names.SNAPSHOT, ThreadPoolType.SCALING); + map.put(Names.SNAPSHOT_DELETION, ThreadPoolType.SCALING); map.put(Names.FORCE_MERGE, ThreadPoolType.FIXED); map.put(Names.FETCH_SHARD_STARTED, ThreadPoolType.SCALING); map.put(Names.FETCH_SHARD_STORE, ThreadPoolType.SCALING); @@ -234,6 +236,7 @@ public ThreadPool( final int halfProcMaxAt5 = halfAllocatedProcessorsMaxFive(allocatedProcessors); final int halfProcMaxAt10 = halfAllocatedProcessorsMaxTen(allocatedProcessors); final int genericThreadPoolMax = boundedBy(4 * allocatedProcessors, 128, 512); + final int snapshotDeletionPoolMax = boundedBy(4 * allocatedProcessors, 64, 256); builders.put(Names.GENERIC, new ScalingExecutorBuilder(Names.GENERIC, 4, genericThreadPoolMax, TimeValue.timeValueSeconds(30))); builders.put(Names.WRITE, new FixedExecutorBuilder(settings, Names.WRITE, allocatedProcessors, 10000)); builders.put(Names.GET, new FixedExecutorBuilder(settings, Names.GET, allocatedProcessors, 1000)); @@ -251,6 +254,10 @@ public ThreadPool( builders.put(Names.REFRESH, new ScalingExecutorBuilder(Names.REFRESH, 1, halfProcMaxAt10, TimeValue.timeValueMinutes(5))); builders.put(Names.WARMER, new ScalingExecutorBuilder(Names.WARMER, 1, halfProcMaxAt5, TimeValue.timeValueMinutes(5))); builders.put(Names.SNAPSHOT, new ScalingExecutorBuilder(Names.SNAPSHOT, 1, halfProcMaxAt5, TimeValue.timeValueMinutes(5))); + builders.put( + Names.SNAPSHOT_DELETION, + new ScalingExecutorBuilder(Names.SNAPSHOT_DELETION, 1, snapshotDeletionPoolMax, TimeValue.timeValueMinutes(5)) + ); builders.put( Names.FETCH_SHARD_STARTED, new ScalingExecutorBuilder(Names.FETCH_SHARD_STARTED, 1, 2 * allocatedProcessors, TimeValue.timeValueMinutes(5)) diff --git a/server/src/main/java/org/opensearch/transport/TransportService.java b/server/src/main/java/org/opensearch/transport/TransportService.java index d08b28730d417..fff6d82b23c7e 100644 --- a/server/src/main/java/org/opensearch/transport/TransportService.java +++ b/server/src/main/java/org/opensearch/transport/TransportService.java @@ -752,7 +752,7 @@ public HandshakeResponse(StreamInput in) throws IOException { @Override public void writeTo(StreamOutput out) throws IOException { - out.writeOptionalWriteable(discoveryNode); + out.writeOptionalWriteable((stream, node) -> node.writeToWithAttribute(stream), discoveryNode); clusterName.writeTo(out); out.writeVersion(version); } diff --git a/server/src/main/java/org/opensearch/wlm/MutableQueryGroupFragment.java b/server/src/main/java/org/opensearch/wlm/MutableQueryGroupFragment.java new file mode 100644 index 0000000000000..8ea240132fea2 --- /dev/null +++ b/server/src/main/java/org/opensearch/wlm/MutableQueryGroupFragment.java @@ -0,0 +1,225 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.wlm; + +import org.opensearch.cluster.AbstractDiffable; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.core.xcontent.XContentParser; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.function.Function; + +/** + * Class to hold the fields that can be updated in a QueryGroup. + */ +@ExperimentalApi +public class MutableQueryGroupFragment extends AbstractDiffable { + + public static final String RESILIENCY_MODE_STRING = "resiliency_mode"; + public static final String RESOURCE_LIMITS_STRING = "resource_limits"; + private ResiliencyMode resiliencyMode; + private Map resourceLimits; + + public static final List acceptedFieldNames = List.of(RESILIENCY_MODE_STRING, RESOURCE_LIMITS_STRING); + + public MutableQueryGroupFragment() {} + + public MutableQueryGroupFragment(ResiliencyMode resiliencyMode, Map resourceLimits) { + validateResourceLimits(resourceLimits); + this.resiliencyMode = resiliencyMode; + this.resourceLimits = resourceLimits; + } + + public MutableQueryGroupFragment(StreamInput in) throws IOException { + if (in.readBoolean()) { + resourceLimits = in.readMap((i) -> ResourceType.fromName(i.readString()), StreamInput::readDouble); + } else { + resourceLimits = new HashMap<>(); + } + String updatedResiliencyMode = in.readOptionalString(); + resiliencyMode = updatedResiliencyMode == null ? null : ResiliencyMode.fromName(updatedResiliencyMode); + } + + interface FieldParser { + T parseField(XContentParser parser) throws IOException; + } + + static class ResiliencyModeParser implements FieldParser { + public ResiliencyMode parseField(XContentParser parser) throws IOException { + return ResiliencyMode.fromName(parser.text()); + } + } + + static class ResourceLimitsParser implements FieldParser> { + public Map parseField(XContentParser parser) throws IOException { + String fieldName = ""; + XContentParser.Token token; + final Map resourceLimits = new HashMap<>(); + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + fieldName = parser.currentName(); + } else { + resourceLimits.put(ResourceType.fromName(fieldName), parser.doubleValue()); + } + } + return resourceLimits; + } + } + + static class FieldParserFactory { + static Optional> fieldParserFor(String fieldName) { + if (fieldName.equals(RESOURCE_LIMITS_STRING)) { + return Optional.of(new ResourceLimitsParser()); + } else if (fieldName.equals(RESILIENCY_MODE_STRING)) { + return Optional.of(new ResiliencyModeParser()); + } + return Optional.empty(); + } + } + + private final Map> toXContentMap = Map.of(RESILIENCY_MODE_STRING, (builder) -> { + try { + builder.field(RESILIENCY_MODE_STRING, resiliencyMode.getName()); + return null; + } catch (IOException e) { + throw new IllegalStateException("writing error encountered for the field " + RESILIENCY_MODE_STRING); + } + }, RESOURCE_LIMITS_STRING, (builder) -> { + try { + builder.startObject(RESOURCE_LIMITS_STRING); + for (ResourceType resourceType : ResourceType.values()) { + if (resourceLimits.containsKey(resourceType)) { + builder.field(resourceType.getName(), resourceLimits.get(resourceType)); + } + } + builder.endObject(); + return null; + } catch (IOException e) { + throw new IllegalStateException("writing error encountered for the field " + RESOURCE_LIMITS_STRING); + } + }); + + public static boolean shouldParse(String field) { + return FieldParserFactory.fieldParserFor(field).isPresent(); + } + + public void parseField(XContentParser parser, String field) { + FieldParserFactory.fieldParserFor(field).ifPresent(fieldParser -> { + try { + Object value = fieldParser.parseField(parser); + if (field.equals(RESILIENCY_MODE_STRING)) { + setResiliencyMode((ResiliencyMode) value); + } else if (field.equals(RESOURCE_LIMITS_STRING)) { + setResourceLimits((Map) value); + } + } catch (IOException e) { + throw new IllegalArgumentException("parsing error encountered for the field " + field); + } + }); + } + + public void writeField(XContentBuilder builder, String field) { + toXContentMap.get(field).apply(builder); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + if (resourceLimits == null || resourceLimits.isEmpty()) { + out.writeBoolean(false); + } else { + out.writeBoolean(true); + out.writeMap(resourceLimits, ResourceType::writeTo, StreamOutput::writeDouble); + } + out.writeOptionalString(resiliencyMode == null ? null : resiliencyMode.getName()); + } + + public static void validateResourceLimits(Map resourceLimits) { + if (resourceLimits == null) { + return; + } + for (Map.Entry resource : resourceLimits.entrySet()) { + Double threshold = resource.getValue(); + Objects.requireNonNull(resource.getKey(), "resourceName can't be null"); + Objects.requireNonNull(threshold, "resource limit threshold for" + resource.getKey().getName() + " : can't be null"); + + if (Double.compare(threshold, 0.0) <= 0 || Double.compare(threshold, 1.0) > 0) { + throw new IllegalArgumentException("resource value should be greater than 0 and less or equal to 1.0"); + } + } + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + MutableQueryGroupFragment that = (MutableQueryGroupFragment) o; + return Objects.equals(resiliencyMode, that.resiliencyMode) && Objects.equals(resourceLimits, that.resourceLimits); + } + + @Override + public int hashCode() { + return Objects.hash(resiliencyMode, resourceLimits); + } + + public ResiliencyMode getResiliencyMode() { + return resiliencyMode; + } + + public Map getResourceLimits() { + return resourceLimits; + } + + /** + * This enum models the different QueryGroup resiliency modes + * SOFT - means that this query group can consume more than query group resource limits if node is not in duress + * ENFORCED - means that it will never breach the assigned limits and will cancel as soon as the limits are breached + * MONITOR - it will not cause any cancellation but just log the eligible task cancellations + */ + @ExperimentalApi + public enum ResiliencyMode { + SOFT("soft"), + ENFORCED("enforced"), + MONITOR("monitor"); + + private final String name; + + ResiliencyMode(String mode) { + this.name = mode; + } + + public String getName() { + return name; + } + + public static ResiliencyMode fromName(String s) { + for (ResiliencyMode mode : values()) { + if (mode.getName().equalsIgnoreCase(s)) return mode; + + } + throw new IllegalArgumentException("Invalid value for QueryGroupMode: " + s); + } + } + + public void setResiliencyMode(ResiliencyMode resiliencyMode) { + this.resiliencyMode = resiliencyMode; + } + + public void setResourceLimits(Map resourceLimits) { + validateResourceLimits(resourceLimits); + this.resourceLimits = resourceLimits; + } +} diff --git a/server/src/main/java/org/opensearch/wlm/QueryGroupLevelResourceUsageView.java b/server/src/main/java/org/opensearch/wlm/QueryGroupLevelResourceUsageView.java index 7577c8573ec10..de213eaab64a8 100644 --- a/server/src/main/java/org/opensearch/wlm/QueryGroupLevelResourceUsageView.java +++ b/server/src/main/java/org/opensearch/wlm/QueryGroupLevelResourceUsageView.java @@ -8,8 +8,6 @@ package org.opensearch.wlm; -import org.opensearch.tasks.Task; - import java.util.List; import java.util.Map; @@ -20,11 +18,11 @@ */ public class QueryGroupLevelResourceUsageView { // resourceUsage holds the resource usage data for a QueryGroup at a point in time - private final Map resourceUsage; + private final Map resourceUsage; // activeTasks holds the list of active tasks for a QueryGroup at a point in time - private final List activeTasks; + private final List activeTasks; - public QueryGroupLevelResourceUsageView(Map resourceUsage, List activeTasks) { + public QueryGroupLevelResourceUsageView(Map resourceUsage, List activeTasks) { this.resourceUsage = resourceUsage; this.activeTasks = activeTasks; } @@ -34,7 +32,7 @@ public QueryGroupLevelResourceUsageView(Map resourceUsage, L * * @return The map of resource usage data */ - public Map getResourceUsageData() { + public Map getResourceUsageData() { return resourceUsage; } @@ -43,7 +41,7 @@ public Map getResourceUsageData() { * * @return The list of active tasks */ - public List getActiveTasks() { + public List getActiveTasks() { return activeTasks; } } diff --git a/server/src/main/java/org/opensearch/wlm/QueryGroupService.java b/server/src/main/java/org/opensearch/wlm/QueryGroupService.java index 97c4e5169b4ed..6545598dd9951 100644 --- a/server/src/main/java/org/opensearch/wlm/QueryGroupService.java +++ b/server/src/main/java/org/opensearch/wlm/QueryGroupService.java @@ -9,11 +9,59 @@ package org.opensearch.wlm; import org.opensearch.core.concurrency.OpenSearchRejectedExecutionException; +import org.opensearch.wlm.stats.QueryGroupState; +import org.opensearch.wlm.stats.QueryGroupStats; +import org.opensearch.wlm.stats.QueryGroupStats.QueryGroupStatsHolder; + +import java.util.HashMap; +import java.util.Map; /** - * This is stub at this point in time and will be replace by an acutal one in couple of days + * As of now this is a stub and main implementation PR will be raised soon.Coming PR will collate these changes with core QueryGroupService changes */ public class QueryGroupService { + // This map does not need to be concurrent since we will process the cluster state change serially and update + // this map with new additions and deletions of entries. QueryGroupState is thread safe + private final Map queryGroupStateMap; + + public QueryGroupService() { + this(new HashMap<>()); + } + + public QueryGroupService(Map queryGroupStateMap) { + this.queryGroupStateMap = queryGroupStateMap; + } + + /** + * updates the failure stats for the query group + * @param queryGroupId query group identifier + */ + public void incrementFailuresFor(final String queryGroupId) { + QueryGroupState queryGroupState = queryGroupStateMap.get(queryGroupId); + // This can happen if the request failed for a deleted query group + // or new queryGroup is being created and has not been acknowledged yet + if (queryGroupState == null) { + return; + } + queryGroupState.failures.inc(); + } + + /** + * + * @return node level query group stats + */ + public QueryGroupStats nodeStats() { + final Map statsHolderMap = new HashMap<>(); + for (Map.Entry queryGroupsState : queryGroupStateMap.entrySet()) { + final String queryGroupId = queryGroupsState.getKey(); + final QueryGroupState currentState = queryGroupsState.getValue(); + + statsHolderMap.put(queryGroupId, QueryGroupStatsHolder.from(currentState)); + } + + return new QueryGroupStats(statsHolderMap); + } + /** * * @param queryGroupId query group identifier diff --git a/server/src/main/java/org/opensearch/wlm/QueryGroupTask.java b/server/src/main/java/org/opensearch/wlm/QueryGroupTask.java index 4eb413be61b72..a1cb766579d43 100644 --- a/server/src/main/java/org/opensearch/wlm/QueryGroupTask.java +++ b/server/src/main/java/org/opensearch/wlm/QueryGroupTask.java @@ -10,6 +10,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.opensearch.common.annotation.PublicApi; import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.concurrent.ThreadContext; import org.opensearch.core.tasks.TaskId; @@ -17,6 +18,7 @@ import java.util.Map; import java.util.Optional; +import java.util.function.LongSupplier; import java.util.function.Supplier; import static org.opensearch.search.SearchService.NO_TIMEOUT; @@ -24,15 +26,17 @@ /** * Base class to define QueryGroup tasks */ +@PublicApi(since = "2.18.0") public class QueryGroupTask extends CancellableTask { private static final Logger logger = LogManager.getLogger(QueryGroupTask.class); public static final String QUERY_GROUP_ID_HEADER = "queryGroupId"; public static final Supplier DEFAULT_QUERY_GROUP_ID_SUPPLIER = () -> "DEFAULT_QUERY_GROUP"; + private final LongSupplier nanoTimeSupplier; private String queryGroupId; public QueryGroupTask(long id, String type, String action, String description, TaskId parentTaskId, Map headers) { - this(id, type, action, description, parentTaskId, headers, NO_TIMEOUT); + this(id, type, action, description, parentTaskId, headers, NO_TIMEOUT, System::nanoTime); } public QueryGroupTask( @@ -43,8 +47,22 @@ public QueryGroupTask( TaskId parentTaskId, Map headers, TimeValue cancelAfterTimeInterval + ) { + this(id, type, action, description, parentTaskId, headers, cancelAfterTimeInterval, System::nanoTime); + } + + public QueryGroupTask( + long id, + String type, + String action, + String description, + TaskId parentTaskId, + Map headers, + TimeValue cancelAfterTimeInterval, + LongSupplier nanoTimeSupplier ) { super(id, type, action, description, parentTaskId, headers, cancelAfterTimeInterval); + this.nanoTimeSupplier = nanoTimeSupplier; } /** @@ -69,6 +87,10 @@ public final void setQueryGroupId(final ThreadContext threadContext) { .orElse(DEFAULT_QUERY_GROUP_ID_SUPPLIER.get()); } + public long getElapsedTime() { + return nanoTimeSupplier.getAsLong() - getStartTimeNanos(); + } + @Override public boolean shouldCancelChildrenOnCancellation() { return false; diff --git a/server/src/main/java/org/opensearch/wlm/ResourceType.java b/server/src/main/java/org/opensearch/wlm/ResourceType.java index c3f48f5f793ce..a560268a66853 100644 --- a/server/src/main/java/org/opensearch/wlm/ResourceType.java +++ b/server/src/main/java/org/opensearch/wlm/ResourceType.java @@ -10,10 +10,12 @@ import org.opensearch.common.annotation.PublicApi; import org.opensearch.core.common.io.stream.StreamOutput; -import org.opensearch.core.tasks.resourcetracker.ResourceStats; -import org.opensearch.tasks.Task; +import org.opensearch.wlm.tracker.CpuUsageCalculator; +import org.opensearch.wlm.tracker.MemoryUsageCalculator; +import org.opensearch.wlm.tracker.ResourceUsageCalculator; import java.io.IOException; +import java.util.List; import java.util.function.Function; /** @@ -23,17 +25,25 @@ */ @PublicApi(since = "2.17.0") public enum ResourceType { - CPU("cpu", task -> task.getTotalResourceUtilization(ResourceStats.CPU), true), - MEMORY("memory", task -> task.getTotalResourceUtilization(ResourceStats.MEMORY), true); + CPU("cpu", true, CpuUsageCalculator.INSTANCE, WorkloadManagementSettings::getNodeLevelCpuCancellationThreshold), + MEMORY("memory", true, MemoryUsageCalculator.INSTANCE, WorkloadManagementSettings::getNodeLevelMemoryCancellationThreshold); private final String name; - private final Function getResourceUsage; private final boolean statsEnabled; + private final ResourceUsageCalculator resourceUsageCalculator; + private final Function nodeLevelThresholdSupplier; + private static List sortedValues = List.of(CPU, MEMORY); - ResourceType(String name, Function getResourceUsage, boolean statsEnabled) { + ResourceType( + String name, + boolean statsEnabled, + ResourceUsageCalculator resourceUsageCalculator, + Function nodeLevelThresholdSupplier + ) { this.name = name; - this.getResourceUsage = getResourceUsage; this.statsEnabled = statsEnabled; + this.resourceUsageCalculator = resourceUsageCalculator; + this.nodeLevelThresholdSupplier = nodeLevelThresholdSupplier; } /** @@ -58,17 +68,19 @@ public String getName() { return name; } - /** - * Gets the resource usage for a given resource type and task. - * - * @param task the task for which to calculate resource usage - * @return the resource usage - */ - public long getResourceUsage(Task task) { - return getResourceUsage.apply(task); - } - public boolean hasStatsEnabled() { return statsEnabled; } + + public ResourceUsageCalculator getResourceUsageCalculator() { + return resourceUsageCalculator; + } + + public double getNodeLevelThreshold(WorkloadManagementSettings settings) { + return nodeLevelThresholdSupplier.apply(settings); + } + + public static List getSortedValues() { + return sortedValues; + } } diff --git a/server/src/main/java/org/opensearch/wlm/WorkloadManagementSettings.java b/server/src/main/java/org/opensearch/wlm/WorkloadManagementSettings.java index b104925df77b3..b3577c1b3219d 100644 --- a/server/src/main/java/org/opensearch/wlm/WorkloadManagementSettings.java +++ b/server/src/main/java/org/opensearch/wlm/WorkloadManagementSettings.java @@ -8,6 +8,7 @@ package org.opensearch.wlm; +import org.opensearch.common.annotation.PublicApi; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; @@ -15,6 +16,7 @@ /** * Main class to declare Workload Management related settings */ +@PublicApi(since = "2.18.0") public class WorkloadManagementSettings { private static final Double DEFAULT_NODE_LEVEL_MEMORY_REJECTION_THRESHOLD = 0.8; private static final Double DEFAULT_NODE_LEVEL_MEMORY_CANCELLATION_THRESHOLD = 0.9; diff --git a/server/src/main/java/org/opensearch/wlm/cancellation/MaximumResourceTaskSelectionStrategy.java b/server/src/main/java/org/opensearch/wlm/cancellation/MaximumResourceTaskSelectionStrategy.java new file mode 100644 index 0000000000000..ffb326c07e7ac --- /dev/null +++ b/server/src/main/java/org/opensearch/wlm/cancellation/MaximumResourceTaskSelectionStrategy.java @@ -0,0 +1,71 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.wlm.cancellation; + +import org.opensearch.wlm.QueryGroupTask; +import org.opensearch.wlm.ResourceType; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.stream.Collectors; + +import static org.opensearch.wlm.cancellation.QueryGroupTaskCancellationService.MIN_VALUE; + +/** + * Represents the highest resource consuming task first selection strategy. + */ +public class MaximumResourceTaskSelectionStrategy implements TaskSelectionStrategy { + + public MaximumResourceTaskSelectionStrategy() {} + + /** + * Returns a comparator that defines the sorting condition for tasks. + * This is the default implementation since the most resource consuming tasks are the likely to regress the performance. + * from resiliency point of view it makes sense to cancel them first + * + * @return The comparator + */ + private Comparator sortingCondition(ResourceType resourceType) { + return Comparator.comparingDouble(task -> resourceType.getResourceUsageCalculator().calculateTaskResourceUsage(task)); + } + + /** + * Selects tasks for cancellation based on the provided limit and resource type. + * The tasks are sorted based on the sorting condition and then selected until the accumulated resource usage reaches the limit. + * + * @param tasks The list of tasks from which to select + * @param limit The limit on the accumulated resource usage + * @param resourceType The type of resource to consider + * @return The list of selected tasks + * @throws IllegalArgumentException If the limit is less than zero + */ + public List selectTasksForCancellation(List tasks, double limit, ResourceType resourceType) { + if (limit < 0) { + throw new IllegalArgumentException("limit has to be greater than zero"); + } + if (limit < MIN_VALUE) { + return Collections.emptyList(); + } + + List sortedTasks = tasks.stream().sorted(sortingCondition(resourceType).reversed()).collect(Collectors.toList()); + + List selectedTasks = new ArrayList<>(); + double accumulated = 0; + for (QueryGroupTask task : sortedTasks) { + selectedTasks.add(task); + accumulated += resourceType.getResourceUsageCalculator().calculateTaskResourceUsage(task); + if ((accumulated - limit) > MIN_VALUE) { + break; + } + } + return selectedTasks; + } +} diff --git a/server/src/main/java/org/opensearch/wlm/cancellation/QueryGroupTaskCancellationService.java b/server/src/main/java/org/opensearch/wlm/cancellation/QueryGroupTaskCancellationService.java new file mode 100644 index 0000000000000..a2c97c8d8635b --- /dev/null +++ b/server/src/main/java/org/opensearch/wlm/cancellation/QueryGroupTaskCancellationService.java @@ -0,0 +1,205 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.wlm.cancellation; + +import org.opensearch.cluster.metadata.QueryGroup; +import org.opensearch.tasks.CancellableTask; +import org.opensearch.tasks.TaskCancellation; +import org.opensearch.wlm.MutableQueryGroupFragment.ResiliencyMode; +import org.opensearch.wlm.QueryGroupLevelResourceUsageView; +import org.opensearch.wlm.QueryGroupTask; +import org.opensearch.wlm.ResourceType; +import org.opensearch.wlm.WorkloadManagementSettings; +import org.opensearch.wlm.tracker.QueryGroupResourceUsageTrackerService; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.function.BooleanSupplier; +import java.util.function.Consumer; +import java.util.stream.Collectors; + +import static org.opensearch.wlm.tracker.QueryGroupResourceUsageTrackerService.TRACKED_RESOURCES; + +/** + * Manages the cancellation of tasks enforced by QueryGroup thresholds on resource usage criteria. + * This class utilizes a strategy pattern through {@link MaximumResourceTaskSelectionStrategy} to identify tasks that exceed + * predefined resource usage limits and are therefore eligible for cancellation. + * + *

The cancellation process is initiated by evaluating the resource usage of each QueryGroup against its + * resource limits. Tasks that contribute to exceeding these limits are selected for cancellation based on the + * implemented task selection strategy.

+ * + *

Instances of this class are configured with a map linking QueryGroup IDs to their corresponding resource usage + * views, a set of active QueryGroups, and a task selection strategy. These components collectively facilitate the + * identification and cancellation of tasks that threaten to breach QueryGroup resource limits.

+ * + * @see MaximumResourceTaskSelectionStrategy + * @see QueryGroup + * @see ResourceType + */ +public class QueryGroupTaskCancellationService { + public static final double MIN_VALUE = 1e-9; + + private final WorkloadManagementSettings workloadManagementSettings; + private final TaskSelectionStrategy taskSelectionStrategy; + private final QueryGroupResourceUsageTrackerService resourceUsageTrackerService; + // a map of QueryGroupId to its corresponding QueryGroupLevelResourceUsageView object + Map queryGroupLevelResourceUsageViews; + private final Collection activeQueryGroups; + private final Collection deletedQueryGroups; + + public QueryGroupTaskCancellationService( + WorkloadManagementSettings workloadManagementSettings, + TaskSelectionStrategy taskSelectionStrategy, + QueryGroupResourceUsageTrackerService resourceUsageTrackerService, + Collection activeQueryGroups, + Collection deletedQueryGroups + ) { + this.workloadManagementSettings = workloadManagementSettings; + this.taskSelectionStrategy = taskSelectionStrategy; + this.resourceUsageTrackerService = resourceUsageTrackerService; + this.activeQueryGroups = activeQueryGroups; + this.deletedQueryGroups = deletedQueryGroups; + } + + /** + * Cancel tasks based on the implemented strategy. + */ + public final void cancelTasks(BooleanSupplier isNodeInDuress) { + queryGroupLevelResourceUsageViews = resourceUsageTrackerService.constructQueryGroupLevelUsageViews(); + // cancel tasks from QueryGroups that are in Enforced mode that are breaching their resource limits + cancelTasks(ResiliencyMode.ENFORCED); + // if the node is in duress, cancel tasks accordingly. + handleNodeDuress(isNodeInDuress); + } + + private void handleNodeDuress(BooleanSupplier isNodeInDuress) { + if (!isNodeInDuress.getAsBoolean()) { + return; + } + // List of tasks to be executed in order if the node is in duress + List> duressActions = List.of(v -> cancelTasksFromDeletedQueryGroups(), v -> cancelTasks(ResiliencyMode.SOFT)); + + for (Consumer duressAction : duressActions) { + if (!isNodeInDuress.getAsBoolean()) { + break; + } + duressAction.accept(null); + } + } + + private void cancelTasksFromDeletedQueryGroups() { + cancelTasks(getAllCancellableTasks(this.deletedQueryGroups)); + } + + /** + * Get all cancellable tasks from the QueryGroups. + * + * @return List of tasks that can be cancelled + */ + List getAllCancellableTasks(ResiliencyMode resiliencyMode) { + return getAllCancellableTasks( + activeQueryGroups.stream().filter(queryGroup -> queryGroup.getResiliencyMode() == resiliencyMode).collect(Collectors.toList()) + ); + } + + /** + * Get all cancellable tasks from the given QueryGroups. + * + * @return List of tasks that can be cancelled + */ + List getAllCancellableTasks(Collection queryGroups) { + List taskCancellations = new ArrayList<>(); + for (QueryGroup queryGroup : queryGroups) { + final List reasons = new ArrayList<>(); + List selectedTasks = new ArrayList<>(); + for (ResourceType resourceType : TRACKED_RESOURCES) { + // We need to consider the already selected tasks since those tasks also consumed the resources + double excessUsage = getExcessUsage(queryGroup, resourceType) - resourceType.getResourceUsageCalculator() + .calculateResourceUsage(selectedTasks); + if (excessUsage > MIN_VALUE) { + reasons.add(new TaskCancellation.Reason(generateReasonString(queryGroup, resourceType), 1)); + // TODO: We will need to add the cancellation callback for these resources for the queryGroup to reflect stats + + // Only add tasks not already added to avoid double cancellations + selectedTasks.addAll( + taskSelectionStrategy.selectTasksForCancellation(getTasksFor(queryGroup), excessUsage, resourceType) + .stream() + .filter(x -> selectedTasks.stream().noneMatch(y -> x.getId() != y.getId())) + .collect(Collectors.toList()) + ); + } + } + + if (!reasons.isEmpty()) { + taskCancellations.addAll( + selectedTasks.stream().map(task -> createTaskCancellation(task, reasons)).collect(Collectors.toList()) + ); + } + } + return taskCancellations; + } + + private String generateReasonString(QueryGroup queryGroup, ResourceType resourceType) { + final double currentUsage = getCurrentUsage(queryGroup, resourceType); + return "QueryGroup ID : " + + queryGroup.get_id() + + " breached the resource limit: (" + + currentUsage + + " > " + + queryGroup.getResourceLimits().get(resourceType) + + ") for resource type : " + + resourceType.getName(); + } + + private List getTasksFor(QueryGroup queryGroup) { + return queryGroupLevelResourceUsageViews.get(queryGroup.get_id()).getActiveTasks(); + } + + private void cancelTasks(ResiliencyMode resiliencyMode) { + cancelTasks(getAllCancellableTasks(resiliencyMode)); + } + + private void cancelTasks(List cancellableTasks) { + cancellableTasks.forEach(TaskCancellation::cancel); + } + + private TaskCancellation createTaskCancellation(CancellableTask task, List reasons) { + return new TaskCancellation(task, reasons, List.of(this::callbackOnCancel)); + } + + private double getExcessUsage(QueryGroup queryGroup, ResourceType resourceType) { + if (queryGroup.getResourceLimits().get(resourceType) == null + || !queryGroupLevelResourceUsageViews.containsKey(queryGroup.get_id())) { + return 0; + } + return getCurrentUsage(queryGroup, resourceType) - getNormalisedThreshold(queryGroup, resourceType); + } + + private double getCurrentUsage(QueryGroup queryGroup, ResourceType resourceType) { + final QueryGroupLevelResourceUsageView queryGroupResourceUsageView = queryGroupLevelResourceUsageViews.get(queryGroup.get_id()); + return queryGroupResourceUsageView.getResourceUsageData().get(resourceType); + } + + /** + * normalises configured value with respect to node level cancellation thresholds + * @param queryGroup instance + * @return normalised value with respect to node level cancellation thresholds + */ + private double getNormalisedThreshold(QueryGroup queryGroup, ResourceType resourceType) { + double nodeLevelCancellationThreshold = resourceType.getNodeLevelThreshold(workloadManagementSettings); + return queryGroup.getResourceLimits().get(resourceType) * nodeLevelCancellationThreshold; + } + + private void callbackOnCancel() { + // TODO Implement callback logic here mostly used for Stats + } +} diff --git a/server/src/main/java/org/opensearch/wlm/cancellation/TaskSelectionStrategy.java b/server/src/main/java/org/opensearch/wlm/cancellation/TaskSelectionStrategy.java new file mode 100644 index 0000000000000..63fbf9b791a33 --- /dev/null +++ b/server/src/main/java/org/opensearch/wlm/cancellation/TaskSelectionStrategy.java @@ -0,0 +1,28 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.wlm.cancellation; + +import org.opensearch.wlm.QueryGroupTask; +import org.opensearch.wlm.ResourceType; + +import java.util.List; + +/** + * This interface exposes a method which implementations can use + */ +public interface TaskSelectionStrategy { + /** + * Determines how the tasks are selected from the list of given tasks based on resource type + * @param tasks to select from + * @param limit min cumulative resource usage sum of selected tasks + * @param resourceType + * @return list of tasks + */ + List selectTasksForCancellation(List tasks, double limit, ResourceType resourceType); +} diff --git a/server/src/main/java/org/opensearch/wlm/cancellation/package-info.java b/server/src/main/java/org/opensearch/wlm/cancellation/package-info.java new file mode 100644 index 0000000000000..1ce7b571e9a9c --- /dev/null +++ b/server/src/main/java/org/opensearch/wlm/cancellation/package-info.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Workload management resource based cancellation artifacts + */ +package org.opensearch.wlm.cancellation; diff --git a/server/src/main/java/org/opensearch/wlm/listeners/QueryGroupRequestRejectionOperationListener.java b/server/src/main/java/org/opensearch/wlm/listeners/QueryGroupRequestOperationListener.java similarity index 64% rename from server/src/main/java/org/opensearch/wlm/listeners/QueryGroupRequestRejectionOperationListener.java rename to server/src/main/java/org/opensearch/wlm/listeners/QueryGroupRequestOperationListener.java index 89f6fe709667f..a2ce2b57bfe0f 100644 --- a/server/src/main/java/org/opensearch/wlm/listeners/QueryGroupRequestRejectionOperationListener.java +++ b/server/src/main/java/org/opensearch/wlm/listeners/QueryGroupRequestOperationListener.java @@ -8,6 +8,7 @@ package org.opensearch.wlm.listeners; +import org.opensearch.action.search.SearchPhaseContext; import org.opensearch.action.search.SearchRequestContext; import org.opensearch.action.search.SearchRequestOperationsListener; import org.opensearch.threadpool.ThreadPool; @@ -15,14 +16,14 @@ import org.opensearch.wlm.QueryGroupTask; /** - * This listener is used to perform the rejections for incoming requests into a queryGroup + * This listener is used to listen for request lifecycle events for a queryGroup */ -public class QueryGroupRequestRejectionOperationListener extends SearchRequestOperationsListener { +public class QueryGroupRequestOperationListener extends SearchRequestOperationsListener { private final QueryGroupService queryGroupService; private final ThreadPool threadPool; - public QueryGroupRequestRejectionOperationListener(QueryGroupService queryGroupService, ThreadPool threadPool) { + public QueryGroupRequestOperationListener(QueryGroupService queryGroupService, ThreadPool threadPool) { this.queryGroupService = queryGroupService; this.threadPool = threadPool; } @@ -36,4 +37,10 @@ protected void onRequestStart(SearchRequestContext searchRequestContext) { final String queryGroupId = threadPool.getThreadContext().getHeader(QueryGroupTask.QUERY_GROUP_ID_HEADER); queryGroupService.rejectIfNeeded(queryGroupId); } + + @Override + protected void onRequestFailure(SearchPhaseContext context, SearchRequestContext searchRequestContext) { + final String queryGroupId = threadPool.getThreadContext().getHeader(QueryGroupTask.QUERY_GROUP_ID_HEADER); + queryGroupService.incrementFailuresFor(queryGroupId); + } } diff --git a/server/src/main/java/org/opensearch/wlm/stats/QueryGroupState.java b/server/src/main/java/org/opensearch/wlm/stats/QueryGroupState.java index 93cfcea697c43..376d34dd7c8ca 100644 --- a/server/src/main/java/org/opensearch/wlm/stats/QueryGroupState.java +++ b/server/src/main/java/org/opensearch/wlm/stats/QueryGroupState.java @@ -31,7 +31,7 @@ public class QueryGroupState { /** * this will track the cumulative failures in a query group */ - final CounterMetric failures = new CounterMetric(); + public final CounterMetric failures = new CounterMetric(); /** * This will track total number of cancellations in the query group due to all resource type breaches @@ -95,9 +95,18 @@ public static class ResourceTypeState { final ResourceType resourceType; final CounterMetric cancellations = new CounterMetric(); final CounterMetric rejections = new CounterMetric(); + private double lastRecordedUsage = 0; public ResourceTypeState(ResourceType resourceType) { this.resourceType = resourceType; } + + public void setLastRecordedUsage(double recordedUsage) { + lastRecordedUsage = recordedUsage; + } + + public double getLastRecordedUsage() { + return lastRecordedUsage; + } } } diff --git a/server/src/main/java/org/opensearch/wlm/stats/QueryGroupStats.java b/server/src/main/java/org/opensearch/wlm/stats/QueryGroupStats.java index d39bf104332da..2b389c2167778 100644 --- a/server/src/main/java/org/opensearch/wlm/stats/QueryGroupStats.java +++ b/server/src/main/java/org/opensearch/wlm/stats/QueryGroupStats.java @@ -14,8 +14,12 @@ import org.opensearch.core.xcontent.ToXContentObject; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.wlm.ResourceType; +import org.opensearch.wlm.stats.QueryGroupState.ResourceTypeState; import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Objects; @@ -52,7 +56,11 @@ public void writeTo(StreamOutput out) throws IOException { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject("query_groups"); - for (Map.Entry queryGroupStats : stats.entrySet()) { + // to keep the toXContent consistent + List> entryList = new ArrayList<>(stats.entrySet()); + entryList.sort((k1, k2) -> k1.getKey().compareTo(k2.getKey())); + + for (Map.Entry queryGroupStats : entryList) { builder.startObject(queryGroupStats.getKey()); queryGroupStats.getValue().toXContent(builder, params); builder.endObject(); @@ -83,11 +91,14 @@ public static class QueryGroupStatsHolder implements ToXContentObject, Writeable public static final String REJECTIONS = "rejections"; public static final String TOTAL_CANCELLATIONS = "total_cancellations"; public static final String FAILURES = "failures"; - private final long completions; - private final long rejections; - private final long failures; - private final long totalCancellations; - private final Map resourceStats; + private long completions; + private long rejections; + private long failures; + private long totalCancellations; + private Map resourceStats; + + // this is needed to support the factory method + public QueryGroupStatsHolder() {} public QueryGroupStatsHolder( long completions, @@ -112,7 +123,29 @@ public QueryGroupStatsHolder(StreamInput in) throws IOException { } /** - * Writes the {@param statsHolder} to {@param out} + * static factory method to convert {@link QueryGroupState} into {@link QueryGroupStatsHolder} + * @param queryGroupState which needs to be converted + * @return QueryGroupStatsHolder object + */ + public static QueryGroupStatsHolder from(QueryGroupState queryGroupState) { + final QueryGroupStatsHolder statsHolder = new QueryGroupStatsHolder(); + + Map resourceStatsMap = new HashMap<>(); + + for (Map.Entry resourceTypeStateEntry : queryGroupState.getResourceState().entrySet()) { + resourceStatsMap.put(resourceTypeStateEntry.getKey(), ResourceStats.from(resourceTypeStateEntry.getValue())); + } + + statsHolder.completions = queryGroupState.getCompletions(); + statsHolder.rejections = queryGroupState.getTotalRejections(); + statsHolder.failures = queryGroupState.getFailures(); + statsHolder.totalCancellations = queryGroupState.getTotalCancellations(); + statsHolder.resourceStats = resourceStatsMap; + return statsHolder; + } + + /** + * Writes the @param {statsHolder} to @param {out} * @param out StreamOutput * @param statsHolder QueryGroupStatsHolder * @throws IOException exception @@ -136,9 +169,10 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(REJECTIONS, rejections); builder.field(FAILURES, failures); builder.field(TOTAL_CANCELLATIONS, totalCancellations); - for (Map.Entry resourceStat : resourceStats.entrySet()) { - ResourceType resourceType = resourceStat.getKey(); - ResourceStats resourceStats1 = resourceStat.getValue(); + + for (ResourceType resourceType : ResourceType.getSortedValues()) { + ResourceStats resourceStats1 = resourceStats.get(resourceType); + if (resourceStats1 == null) continue; builder.startObject(resourceType.getName()); resourceStats1.toXContent(builder, params); builder.endObject(); @@ -188,7 +222,20 @@ public ResourceStats(StreamInput in) throws IOException { } /** - * Writes the {@param stats} to {@param out} + * static factory method to convert {@link ResourceTypeState} into {@link ResourceStats} + * @param resourceTypeState which needs to be converted + * @return QueryGroupStatsHolder object + */ + public static ResourceStats from(ResourceTypeState resourceTypeState) { + return new ResourceStats( + resourceTypeState.getLastRecordedUsage(), + resourceTypeState.cancellations.count(), + resourceTypeState.rejections.count() + ); + } + + /** + * Writes the @param {stats} to @param {out} * @param out StreamOutput * @param stats QueryGroupStatsHolder * @throws IOException exception diff --git a/server/src/main/java/org/opensearch/wlm/tracker/CpuUsageCalculator.java b/server/src/main/java/org/opensearch/wlm/tracker/CpuUsageCalculator.java new file mode 100644 index 0000000000000..05c84cd767b1f --- /dev/null +++ b/server/src/main/java/org/opensearch/wlm/tracker/CpuUsageCalculator.java @@ -0,0 +1,38 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.wlm.tracker; + +import org.opensearch.core.tasks.resourcetracker.ResourceStats; +import org.opensearch.wlm.QueryGroupTask; + +import java.util.List; + +/** + * class to help make cpu usage calculations for the query group + */ +public class CpuUsageCalculator extends ResourceUsageCalculator { + // This value should be initialised at the start time of the process and be used throughout the codebase + public static final int PROCESSOR_COUNT = Runtime.getRuntime().availableProcessors(); + public static final CpuUsageCalculator INSTANCE = new CpuUsageCalculator(); + + private CpuUsageCalculator() {} + + @Override + public double calculateResourceUsage(List tasks) { + double usage = tasks.stream().mapToDouble(this::calculateTaskResourceUsage).sum(); + + usage /= PROCESSOR_COUNT; + return usage; + } + + @Override + public double calculateTaskResourceUsage(QueryGroupTask task) { + return (1.0f * task.getTotalResourceUtilization(ResourceStats.CPU)) / task.getElapsedTime(); + } +} diff --git a/server/src/main/java/org/opensearch/wlm/tracker/MemoryUsageCalculator.java b/server/src/main/java/org/opensearch/wlm/tracker/MemoryUsageCalculator.java new file mode 100644 index 0000000000000..fb66ff47f58d0 --- /dev/null +++ b/server/src/main/java/org/opensearch/wlm/tracker/MemoryUsageCalculator.java @@ -0,0 +1,35 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.wlm.tracker; + +import org.opensearch.core.tasks.resourcetracker.ResourceStats; +import org.opensearch.monitor.jvm.JvmStats; +import org.opensearch.wlm.QueryGroupTask; + +import java.util.List; + +/** + * class to help make memory usage calculations for the query group + */ +public class MemoryUsageCalculator extends ResourceUsageCalculator { + public static final long HEAP_SIZE_BYTES = JvmStats.jvmStats().getMem().getHeapMax().getBytes(); + public static final MemoryUsageCalculator INSTANCE = new MemoryUsageCalculator(); + + private MemoryUsageCalculator() {} + + @Override + public double calculateResourceUsage(List tasks) { + return tasks.stream().mapToDouble(this::calculateTaskResourceUsage).sum(); + } + + @Override + public double calculateTaskResourceUsage(QueryGroupTask task) { + return (1.0f * task.getTotalResourceUtilization(ResourceStats.MEMORY)) / HEAP_SIZE_BYTES; + } +} diff --git a/server/src/main/java/org/opensearch/wlm/tracker/QueryGroupResourceUsageTrackerService.java b/server/src/main/java/org/opensearch/wlm/tracker/QueryGroupResourceUsageTrackerService.java index 15852b5bbe6a8..b23d9ff342139 100644 --- a/server/src/main/java/org/opensearch/wlm/tracker/QueryGroupResourceUsageTrackerService.java +++ b/server/src/main/java/org/opensearch/wlm/tracker/QueryGroupResourceUsageTrackerService.java @@ -8,7 +8,6 @@ package org.opensearch.wlm.tracker; -import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.wlm.QueryGroupLevelResourceUsageView; import org.opensearch.wlm.QueryGroupTask; @@ -25,7 +24,6 @@ * This class tracks resource usage per QueryGroup */ public class QueryGroupResourceUsageTrackerService { - public static final EnumSet TRACKED_RESOURCES = EnumSet.allOf(ResourceType.class); private final TaskResourceTrackingService taskResourceTrackingService; @@ -44,19 +42,16 @@ public QueryGroupResourceUsageTrackerService(TaskResourceTrackingService taskRes * @return Map of QueryGroup views */ public Map constructQueryGroupLevelUsageViews() { - final Map> tasksByQueryGroup = getTasksGroupedByQueryGroup(); + final Map> tasksByQueryGroup = getTasksGroupedByQueryGroup(); final Map queryGroupViews = new HashMap<>(); // Iterate over each QueryGroup entry - for (Map.Entry> queryGroupEntry : tasksByQueryGroup.entrySet()) { - // Compute the QueryGroup usage - final EnumMap queryGroupUsage = new EnumMap<>(ResourceType.class); + for (Map.Entry> queryGroupEntry : tasksByQueryGroup.entrySet()) { + // Compute the QueryGroup resource usage + final Map queryGroupUsage = new EnumMap<>(ResourceType.class); for (ResourceType resourceType : TRACKED_RESOURCES) { - long queryGroupResourceUsage = 0; - for (Task task : queryGroupEntry.getValue()) { - queryGroupResourceUsage += resourceType.getResourceUsage(task); - } - queryGroupUsage.put(resourceType, queryGroupResourceUsage); + double usage = resourceType.getResourceUsageCalculator().calculateResourceUsage(queryGroupEntry.getValue()); + queryGroupUsage.put(resourceType, usage); } // Add to the QueryGroup View @@ -73,12 +68,12 @@ public Map constructQueryGroupLevelUsa * * @return Map of tasks grouped by QueryGroup */ - private Map> getTasksGroupedByQueryGroup() { + private Map> getTasksGroupedByQueryGroup() { return taskResourceTrackingService.getResourceAwareTasks() .values() .stream() .filter(QueryGroupTask.class::isInstance) .map(QueryGroupTask.class::cast) - .collect(Collectors.groupingBy(QueryGroupTask::getQueryGroupId, Collectors.mapping(task -> (Task) task, Collectors.toList()))); + .collect(Collectors.groupingBy(QueryGroupTask::getQueryGroupId, Collectors.mapping(task -> task, Collectors.toList()))); } } diff --git a/server/src/main/java/org/opensearch/wlm/tracker/ResourceUsageCalculator.java b/server/src/main/java/org/opensearch/wlm/tracker/ResourceUsageCalculator.java new file mode 100644 index 0000000000000..bc8317cbfbf92 --- /dev/null +++ b/server/src/main/java/org/opensearch/wlm/tracker/ResourceUsageCalculator.java @@ -0,0 +1,34 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.wlm.tracker; + +import org.opensearch.common.annotation.PublicApi; +import org.opensearch.wlm.QueryGroupTask; + +import java.util.List; + +/** + * This class is used to track query group level resource usage + */ +@PublicApi(since = "2.18.0") +public abstract class ResourceUsageCalculator { + /** + * calculates the current resource usage for the query group + * + * @param tasks list of tasks in the query group + */ + public abstract double calculateResourceUsage(List tasks); + + /** + * calculates the task level resource usage + * @param task QueryGroupTask + * @return task level resource usage + */ + public abstract double calculateTaskResourceUsage(QueryGroupTask task); +} diff --git a/server/src/test/java/org/opensearch/ExceptionSerializationTests.java b/server/src/test/java/org/opensearch/ExceptionSerializationTests.java index d7026159d9ec0..eff312a36dbc6 100644 --- a/server/src/test/java/org/opensearch/ExceptionSerializationTests.java +++ b/server/src/test/java/org/opensearch/ExceptionSerializationTests.java @@ -99,6 +99,7 @@ import org.opensearch.index.shard.PrimaryShardClosedException; import org.opensearch.index.shard.ShardNotInPrimaryModeException; import org.opensearch.indices.IndexTemplateMissingException; +import org.opensearch.indices.InvalidIndexContextException; import org.opensearch.indices.InvalidIndexTemplateException; import org.opensearch.indices.recovery.PeerRecoveryNotFound; import org.opensearch.indices.recovery.RecoverFilesRecoveryException; @@ -118,6 +119,7 @@ import org.opensearch.snapshots.SnapshotId; import org.opensearch.snapshots.SnapshotInProgressException; import org.opensearch.snapshots.SnapshotInUseDeletionException; +import org.opensearch.snapshots.TooManyShardsInSnapshotsStatusException; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.VersionUtils; import org.opensearch.transport.ActionNotFoundTransportException; @@ -896,6 +898,8 @@ public void testIds() { ids.put(171, CryptoRegistryException.class); ids.put(172, ViewNotFoundException.class); ids.put(173, ViewAlreadyExistsException.class); + ids.put(174, InvalidIndexContextException.class); + ids.put(175, TooManyShardsInSnapshotsStatusException.class); ids.put(10001, IndexCreateBlockException.class); Map, Integer> reverse = new HashMap<>(); diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java b/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java index a0225a0bf6193..11902728eed07 100644 --- a/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java +++ b/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java @@ -823,7 +823,7 @@ public static NodeStats createNodeStats(boolean remoteStoreStats) throws IOExcep : null; ClusterStateStats stateStats = new ClusterStateStats(); RemotePersistenceStats remoteStateStats = new RemotePersistenceStats(); - stateStats.setPersistenceStats(Arrays.asList(remoteStateStats)); + stateStats.setPersistenceStats(Arrays.asList(remoteStateStats.getUploadStats())); DiscoveryStats discoveryStats = frequently() ? new DiscoveryStats( randomBoolean() ? new PendingClusterStateStats(randomInt(), randomInt(), randomInt()) : null, diff --git a/server/src/test/java/org/opensearch/action/admin/indices/create/CreateIndexRequestTests.java b/server/src/test/java/org/opensearch/action/admin/indices/create/CreateIndexRequestTests.java index 89e072d783747..ee150c7b2bb71 100644 --- a/server/src/test/java/org/opensearch/action/admin/indices/create/CreateIndexRequestTests.java +++ b/server/src/test/java/org/opensearch/action/admin/indices/create/CreateIndexRequestTests.java @@ -165,6 +165,27 @@ public void testToString() throws IOException { assertThat(request.toString(), containsString("mappings='{\"_doc\":{}}'")); } + public void testContext() throws IOException { + String contextName = "Test"; + String contextVersion = "1"; + Map paramsMap = Map.of("foo", "bar"); + try (XContentBuilder builder = MediaTypeRegistry.contentBuilder(randomFrom(XContentType.values()))) { + builder.startObject() + .startObject("context") + .field("name", contextName) + .field("version", contextVersion) + .field("params", paramsMap) + .endObject() + .endObject(); + + CreateIndexRequest parsedCreateIndexRequest = new CreateIndexRequest(); + parsedCreateIndexRequest.source(builder); + assertEquals(contextName, parsedCreateIndexRequest.context().name()); + assertEquals(contextVersion, parsedCreateIndexRequest.context().version()); + assertEquals(paramsMap, parsedCreateIndexRequest.context().params()); + } + } + public static void assertMappingsEqual(Map expected, Map actual) throws IOException { assertEquals(expected.keySet(), actual.keySet()); diff --git a/server/src/test/java/org/opensearch/action/admin/indices/get/GetIndexActionTests.java b/server/src/test/java/org/opensearch/action/admin/indices/get/GetIndexActionTests.java index 2d9ec2b6d3c02..67d2163affd28 100644 --- a/server/src/test/java/org/opensearch/action/admin/indices/get/GetIndexActionTests.java +++ b/server/src/test/java/org/opensearch/action/admin/indices/get/GetIndexActionTests.java @@ -36,6 +36,7 @@ import org.opensearch.action.support.ActionFilters; import org.opensearch.action.support.replication.ClusterStateCreationUtils; import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.metadata.Context; import org.opensearch.cluster.metadata.IndexNameExpressionResolver; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.settings.IndexScopedSettings; @@ -68,7 +69,7 @@ public class GetIndexActionTests extends OpenSearchSingleNodeTestCase { private ThreadPool threadPool; private SettingsFilter settingsFilter; private final String indexName = "test_index"; - + private Context context; private TestTransportGetIndexAction getIndexAction; @Before @@ -91,6 +92,7 @@ public void setUp() throws Exception { ); transportService.start(); transportService.acceptIncomingRequests(); + context = new Context(randomAlphaOfLength(5)); getIndexAction = new GetIndexActionTests.TestTransportGetIndexAction(); } @@ -135,6 +137,23 @@ public void testDoNotIncludeDefaults() { ); } + public void testContextInResponse() { + GetIndexRequest contextIndexRequest = new GetIndexRequest().indices(indexName); + getIndexAction.execute( + null, + contextIndexRequest, + ActionListener.wrap( + resp -> assertTrue( + "index context should be present as it was set", + resp.contexts().get(indexName) != null && resp.contexts().get(indexName).equals(context) + ), + exception -> { + throw new AssertionError(exception); + } + ) + ); + } + class TestTransportGetIndexAction extends TransportGetIndexAction { TestTransportGetIndexAction() { @@ -157,7 +176,7 @@ protected void doClusterManagerOperation( ClusterState state, ActionListener listener ) { - ClusterState stateWithIndex = ClusterStateCreationUtils.state(indexName, 1, 1); + ClusterState stateWithIndex = ClusterStateCreationUtils.stateWithContext(indexName, 1, 1, context); super.doClusterManagerOperation(request, concreteIndices, stateWithIndex, listener); } } diff --git a/server/src/test/java/org/opensearch/action/admin/indices/get/GetIndexResponseTests.java b/server/src/test/java/org/opensearch/action/admin/indices/get/GetIndexResponseTests.java index 89d47328a08ed..c9b7858ed24ca 100644 --- a/server/src/test/java/org/opensearch/action/admin/indices/get/GetIndexResponseTests.java +++ b/server/src/test/java/org/opensearch/action/admin/indices/get/GetIndexResponseTests.java @@ -36,6 +36,7 @@ import org.opensearch.action.admin.indices.alias.get.GetAliasesResponseTests; import org.opensearch.action.admin.indices.mapping.get.GetMappingsResponseTests; import org.opensearch.cluster.metadata.AliasMetadata; +import org.opensearch.cluster.metadata.Context; import org.opensearch.cluster.metadata.MappingMetadata; import org.opensearch.common.settings.IndexScopedSettings; import org.opensearch.common.settings.Settings; @@ -66,6 +67,7 @@ protected GetIndexResponse createTestInstance() { Map settings = new HashMap<>(); Map defaultSettings = new HashMap<>(); Map dataStreams = new HashMap<>(); + Map contexts = new HashMap<>(); IndexScopedSettings indexScopedSettings = IndexScopedSettings.DEFAULT_SCOPED_SETTINGS; boolean includeDefaults = randomBoolean(); for (String index : indices) { @@ -90,7 +92,10 @@ protected GetIndexResponse createTestInstance() { if (randomBoolean()) { dataStreams.put(index, randomAlphaOfLength(5).toLowerCase(Locale.ROOT)); } + if (randomBoolean()) { + contexts.put(index, new Context(randomAlphaOfLength(5).toLowerCase(Locale.ROOT))); + } } - return new GetIndexResponse(indices, mappings, aliases, settings, defaultSettings, dataStreams); + return new GetIndexResponse(indices, mappings, aliases, settings, defaultSettings, dataStreams, contexts); } } diff --git a/server/src/test/java/org/opensearch/action/admin/indices/segments/IndicesSegmentResponseTests.java b/server/src/test/java/org/opensearch/action/admin/indices/segments/IndicesSegmentResponseTests.java index b90be87e3a600..c01335eb38afc 100644 --- a/server/src/test/java/org/opensearch/action/admin/indices/segments/IndicesSegmentResponseTests.java +++ b/server/src/test/java/org/opensearch/action/admin/indices/segments/IndicesSegmentResponseTests.java @@ -42,7 +42,10 @@ import org.opensearch.index.engine.Segment; import org.opensearch.test.OpenSearchTestCase; +import java.util.ArrayList; import java.util.Collections; +import java.util.List; +import java.util.Map; import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder; @@ -68,4 +71,54 @@ public void testToXContentSerialiationWithSortedFields() throws Exception { response.toXContent(builder, ToXContent.EMPTY_PARAMS); } } + + public void testGetIndices() { + final int totalIndices = 5; + final int shardsPerIndex = 3; + final int segmentsPerShard = 2; + // Preparing a ShardSegments list, which will have (totalIndices * shardsPerIndex) shardSegments. + // Indices will be named -> foo1, foo2, ..., foo{totalIndices} + List shardSegmentsList = new ArrayList<>(); + for (int indexName = 0; indexName < totalIndices; indexName++) { + for (int shardId = 0; shardId < shardsPerIndex; shardId++) { + ShardRouting shardRouting = TestShardRouting.newShardRouting( + "foo" + indexName, + shardId, + "node_id", + true, + ShardRoutingState.STARTED + ); + List segmentList = new ArrayList<>(); + for (int segmentNum = 0; segmentNum < segmentsPerShard; segmentNum++) { + segmentList.add(new Segment("foo" + indexName + shardId + segmentNum)); + } + shardSegmentsList.add(new ShardSegments(shardRouting, segmentList)); + } + } + Collections.shuffle(shardSegmentsList, random()); + + // Prepare the IndicesSegmentResponse object and get the indicesSegments map + IndicesSegmentResponse response = new IndicesSegmentResponse( + shardSegmentsList.toArray(new ShardSegments[0]), + totalIndices * shardsPerIndex, + totalIndices * shardsPerIndex, + 0, + Collections.emptyList() + ); + Map indicesSegments = response.getIndices(); + + assertEquals(totalIndices, indicesSegments.size()); + for (Map.Entry indexSegmentEntry : indicesSegments.entrySet()) { + assertEquals(shardsPerIndex, indexSegmentEntry.getValue().getShards().size()); + for (IndexShardSegments indexShardSegment : indexSegmentEntry.getValue().getShards().values()) { + for (ShardSegments shardSegment : indexShardSegment.getShards()) { + assertEquals(segmentsPerShard, shardSegment.getSegments().size()); + for (int i = 0; i < segmentsPerShard; i++) { + String segmentName = indexSegmentEntry.getKey() + shardSegment.getShardRouting().getId() + i; + assertEquals(segmentName, shardSegment.getSegments().get(i).getName()); + } + } + } + } + } } diff --git a/server/src/test/java/org/opensearch/action/admin/indices/shards/CatShardsResponseTests.java b/server/src/test/java/org/opensearch/action/admin/indices/shards/CatShardsResponseTests.java new file mode 100644 index 0000000000000..d0b98f4d286ae --- /dev/null +++ b/server/src/test/java/org/opensearch/action/admin/indices/shards/CatShardsResponseTests.java @@ -0,0 +1,61 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.action.admin.indices.shards; + +import org.opensearch.action.admin.cluster.shards.CatShardsResponse; +import org.opensearch.action.admin.cluster.state.ClusterStateResponse; +import org.opensearch.action.admin.indices.stats.IndicesStatsResponse; +import org.opensearch.cluster.ClusterName; +import org.opensearch.test.OpenSearchTestCase; + +import static org.junit.Assert.assertEquals; + +public class CatShardsResponseTests extends OpenSearchTestCase { + + private final CatShardsResponse catShardsResponse = new CatShardsResponse(); + + public void testGetAndSetClusterStateResponse() { + ClusterName clusterName = new ClusterName("1"); + ClusterStateResponse clusterStateResponse = new ClusterStateResponse(clusterName, null, false); + catShardsResponse.setClusterStateResponse(clusterStateResponse); + + assertEquals(clusterStateResponse, catShardsResponse.getClusterStateResponse()); + } + + public void testGetAndSetIndicesStatsResponse() { + final IndicesStatsResponse indicesStatsResponse = new IndicesStatsResponse(null, 0, 0, 0, null); + catShardsResponse.setIndicesStatsResponse(indicesStatsResponse); + + assertEquals(indicesStatsResponse, catShardsResponse.getIndicesStatsResponse()); + } +} diff --git a/server/src/test/java/org/opensearch/action/ingest/SimulateExecutionServiceTests.java b/server/src/test/java/org/opensearch/action/ingest/SimulateExecutionServiceTests.java index a5a082286f123..a26afeee0f912 100644 --- a/server/src/test/java/org/opensearch/action/ingest/SimulateExecutionServiceTests.java +++ b/server/src/test/java/org/opensearch/action/ingest/SimulateExecutionServiceTests.java @@ -39,6 +39,7 @@ import org.opensearch.ingest.DropProcessor; import org.opensearch.ingest.IngestDocument; import org.opensearch.ingest.IngestProcessorException; +import org.opensearch.ingest.IngestService; import org.opensearch.ingest.Pipeline; import org.opensearch.ingest.Processor; import org.opensearch.ingest.RandomDocumentPicks; @@ -67,6 +68,8 @@ import static org.hamcrest.Matchers.notNullValue; import static org.hamcrest.Matchers.nullValue; import static org.hamcrest.Matchers.sameInstance; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; public class SimulateExecutionServiceTests extends OpenSearchTestCase { @@ -75,11 +78,13 @@ public class SimulateExecutionServiceTests extends OpenSearchTestCase { private TestThreadPool threadPool; private SimulateExecutionService executionService; private IngestDocument ingestDocument; + private IngestService ingestService; @Before public void setup() { + ingestService = mock(IngestService.class); threadPool = new TestThreadPool(SimulateExecutionServiceTests.class.getSimpleName()); - executionService = new SimulateExecutionService(threadPool); + executionService = new SimulateExecutionService(threadPool, ingestService); ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); } @@ -400,6 +405,22 @@ public String getType() { } } + public void testValidateProcessorCountForIngestPipelineThrowsException() { + + int numDocs = randomIntBetween(1, 64); + List documents = new ArrayList<>(numDocs); + for (int id = 0; id < numDocs; id++) { + documents.add(new IngestDocument("_index", Integer.toString(id), null, 0L, VersionType.INTERNAL, new HashMap<>())); + } + + Pipeline pipeline = new Pipeline("_id", "_description", version, new CompoundProcessor()); + SimulatePipelineRequest.Parsed request = new SimulatePipelineRequest.Parsed(pipeline, documents, false); + + doThrow(new IllegalStateException()).when(ingestService).validateProcessorCountForIngestPipeline(pipeline); + + expectThrows(IllegalStateException.class, () -> executionService.execute(request, ActionListener.wrap(response -> {}, e -> {}))); + } + private static void assertVerboseResult( SimulateProcessorResult result, String expectedPipelineId, diff --git a/server/src/test/java/org/opensearch/action/support/clustermanager/TransportClusterManagerNodeActionTests.java b/server/src/test/java/org/opensearch/action/support/clustermanager/TransportClusterManagerNodeActionTests.java index 37e884502b613..00198364fc8d7 100644 --- a/server/src/test/java/org/opensearch/action/support/clustermanager/TransportClusterManagerNodeActionTests.java +++ b/server/src/test/java/org/opensearch/action/support/clustermanager/TransportClusterManagerNodeActionTests.java @@ -21,6 +21,7 @@ import org.opensearch.action.support.ActionFilters; import org.opensearch.action.support.PlainActionFuture; import org.opensearch.action.support.ThreadedActionListener; +import org.opensearch.action.support.clustermanager.term.GetTermVersionResponse; import org.opensearch.action.support.replication.ClusterStateCreationUtils; import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; @@ -30,6 +31,8 @@ import org.opensearch.cluster.block.ClusterBlockException; import org.opensearch.cluster.block.ClusterBlockLevel; import org.opensearch.cluster.block.ClusterBlocks; +import org.opensearch.cluster.coordination.ClusterStateTermVersion; +import org.opensearch.cluster.coordination.CoordinationMetadata; import org.opensearch.cluster.coordination.FailedToCommitClusterStateException; import org.opensearch.cluster.metadata.IndexNameExpressionResolver; import org.opensearch.cluster.metadata.Metadata; @@ -54,6 +57,8 @@ import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.rest.RestStatus; import org.opensearch.discovery.ClusterManagerNotDiscoveredException; +import org.opensearch.gateway.remote.ClusterMetadataManifest; +import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.node.NodeClosedException; import org.opensearch.node.remotestore.RemoteStoreNodeService; import org.opensearch.snapshots.EmptySnapshotsInfoService; @@ -77,6 +82,7 @@ import java.util.HashSet; import java.util.Map; import java.util.Objects; +import java.util.Optional; import java.util.Set; import java.util.concurrent.BrokenBarrierException; import java.util.concurrent.CyclicBarrier; @@ -84,7 +90,11 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import org.mockito.Mockito; + import static org.opensearch.index.remote.RemoteMigrationIndexMetadataUpdaterTests.createIndexMetadataWithRemoteStoreSettings; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; @@ -93,6 +103,8 @@ import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.when; public class TransportClusterManagerNodeActionTests extends OpenSearchTestCase { private static ThreadPool threadPool; @@ -208,6 +220,8 @@ public void writeTo(StreamOutput out) throws IOException { } class Action extends TransportClusterManagerNodeAction { + private boolean localExecuteSupported = false; + Action(String actionName, TransportService transportService, ClusterService clusterService, ThreadPool threadPool) { super( actionName, @@ -220,6 +234,18 @@ class Action extends TransportClusterManagerNodeAction { ); } + Action( + String actionName, + TransportService transportService, + ClusterService clusterService, + ThreadPool threadPool, + RemoteClusterStateService clusterStateService + ) { + this(actionName, transportService, clusterService, threadPool); + this.remoteClusterStateService = clusterStateService; + this.localExecuteSupported = true; + } + @Override protected void doExecute(Task task, final Request request, ActionListener listener) { // remove unneeded threading by wrapping listener with SAME to prevent super.doExecute from wrapping it with LISTENER @@ -246,6 +272,10 @@ protected void clusterManagerOperation(Request request, ClusterState state, Acti protected ClusterBlockException checkBlock(Request request, ClusterState state) { return null; // default implementation, overridden in specific tests } + + public boolean localExecuteSupportedByAction() { + return localExecuteSupported; + } } public void testLocalOperationWithoutBlocks() throws ExecutionException, InterruptedException { @@ -714,6 +744,69 @@ protected void masterOperation(Task task, Request request, ClusterState state, A assertFalse(exception.get()); } + public void testFetchFromRemoteStore() throws InterruptedException, BrokenBarrierException, ExecutionException, IOException { + Map attributes = new HashMap<>(); + attributes.put(REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, "repo1"); + attributes.put(REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY, "repo2"); + + localNode = new DiscoveryNode( + "local_node", + buildNewFakeTransportAddress(), + attributes, + Collections.singleton(DiscoveryNodeRole.CLUSTER_MANAGER_ROLE), + Version.CURRENT + ); + remoteNode = new DiscoveryNode( + "remote_node", + buildNewFakeTransportAddress(), + attributes, + Collections.singleton(DiscoveryNodeRole.CLUSTER_MANAGER_ROLE), + Version.CURRENT + ); + allNodes = new DiscoveryNode[] { localNode, remoteNode }; + + setState(clusterService, ClusterStateCreationUtils.state(localNode, remoteNode, allNodes)); + + ClusterState state = clusterService.state(); + RemoteClusterStateService remoteClusterStateService = Mockito.mock(RemoteClusterStateService.class); + ClusterMetadataManifest manifest = ClusterMetadataManifest.builder() + .clusterTerm(state.term() + 1) + .stateVersion(state.version() + 1) + .build(); + when( + remoteClusterStateService.getClusterMetadataManifestByTermVersion( + eq(state.getClusterName().value()), + eq(state.metadata().clusterUUID()), + eq(state.term() + 1), + eq(state.version() + 1) + ) + ).thenReturn(Optional.of(manifest)); + when(remoteClusterStateService.getClusterStateForManifest(state.getClusterName().value(), manifest, localNode.getId(), true)) + .thenReturn(buildClusterState(state, state.term() + 1, state.version() + 1)); + + PlainActionFuture listener = new PlainActionFuture<>(); + Request request = new Request(); + Action action = new Action("internal:testAction", transportService, clusterService, threadPool, remoteClusterStateService); + action.execute(request, listener); + + CapturingTransport.CapturedRequest capturedRequest = transport.capturedRequests()[0]; + // mismatch term and version + GetTermVersionResponse termResp = new GetTermVersionResponse( + new ClusterStateTermVersion(state.getClusterName(), state.metadata().clusterUUID(), state.term() + 1, state.version() + 1), + true + ); + transport.handleResponse(capturedRequest.requestId, termResp); + // no more transport calls + assertThat(transport.capturedRequests().length, equalTo(1)); + assertTrue(listener.isDone()); + } + + private ClusterState buildClusterState(ClusterState state, long term, long version) { + CoordinationMetadata.Builder coordMetadataBuilder = CoordinationMetadata.builder().term(term); + Metadata newMetadata = Metadata.builder().coordinationMetadata(coordMetadataBuilder.build()).build(); + return ClusterState.builder(state).version(version).metadata(newMetadata).build(); + } + public void testDontAllowSwitchingToStrictCompatibilityModeForMixedCluster() { // request to change cluster compatibility mode to STRICT Settings currentCompatibilityModeSettings = Settings.builder() @@ -901,6 +994,7 @@ public void testDontAllowSwitchingCompatibilityModeForClusterWithMultipleVersion private Map getRemoteStoreNodeAttributes() { Map remoteStoreNodeAttributes = new HashMap<>(); + remoteStoreNodeAttributes.put(REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-cluster-repo-1"); remoteStoreNodeAttributes.put(REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-segment-repo-1"); remoteStoreNodeAttributes.put(REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-translog-repo-1"); return remoteStoreNodeAttributes; diff --git a/server/src/test/java/org/opensearch/action/support/clustermanager/term/ClusterTermVersionIT.java b/server/src/test/java/org/opensearch/action/support/clustermanager/term/ClusterTermVersionIT.java index 7b783e025a575..7ab9da231896c 100644 --- a/server/src/test/java/org/opensearch/action/support/clustermanager/term/ClusterTermVersionIT.java +++ b/server/src/test/java/org/opensearch/action/support/clustermanager/term/ClusterTermVersionIT.java @@ -11,9 +11,19 @@ import org.opensearch.action.admin.cluster.state.ClusterStateAction; import org.opensearch.action.admin.cluster.state.ClusterStateRequest; import org.opensearch.action.admin.cluster.state.ClusterStateResponse; +import org.opensearch.action.admin.indices.create.CreateIndexResponse; +import org.opensearch.client.Client; +import org.opensearch.cluster.ClusterChangedEvent; import org.opensearch.cluster.ClusterName; +import org.opensearch.cluster.ClusterStateApplier; import org.opensearch.cluster.coordination.ClusterStateTermVersion; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.action.ActionFuture; +import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.FeatureFlags; +import org.opensearch.index.mapper.MapperService; import org.opensearch.plugins.Plugin; import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.test.transport.MockTransportService; @@ -22,6 +32,7 @@ import java.util.Collection; import java.util.List; import java.util.Map; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import static org.hamcrest.Matchers.is; @@ -100,12 +111,107 @@ public void testDatanodeOutOfSync() throws Exception { assertThat(stateResponse.getState().nodes().getSize(), is(internalCluster().getNodeNames().length)); } + public void testDatanodeWithSlowClusterApplierFallbackToPublish() throws Exception { + List masters = internalCluster().startClusterManagerOnlyNodes( + 3, + Settings.builder().put(FeatureFlags.TERM_VERSION_PRECOMMIT_ENABLE, "true").build() + ); + List datas = internalCluster().startDataOnlyNodes(3); + + Map callCounters = Map.ofEntries( + Map.entry(ClusterStateAction.NAME, new AtomicInteger()), + Map.entry(GetTermVersionAction.NAME, new AtomicInteger()) + ); + ensureGreen(); + + String master = internalCluster().getClusterManagerName(); + + AtomicBoolean processState = new AtomicBoolean(); + ClusterService cmClsService = internalCluster().getInstance(ClusterService.class, datas.get(0)); + cmClsService.addStateApplier(new ClusterStateApplier() { + @Override + public void applyClusterState(ClusterChangedEvent event) { + logger.info("Slow applier started"); + while (processState.get()) { + try { + logger.info("Sleeping for 1s"); + Thread.sleep(1000); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + logger.info("Slow applier ended"); + } + }); + + ensureGreen(); + + GetTermVersionResponse respBeforeUpdate = internalCluster().getInstance(Client.class, master) + .execute(GetTermVersionAction.INSTANCE, new GetTermVersionRequest()) + .get(); + + processState.set(true); + String index = "index_1"; + ActionFuture startCreateIndex1 = prepareCreate(index).setSettings( + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put(MapperService.INDEX_MAPPING_TOTAL_FIELDS_LIMIT_SETTING.getKey(), Long.MAX_VALUE) + .build() + ).execute(); + + ActionFuture startCreateIndex2 = prepareCreate("index_2").setSettings( + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put(MapperService.INDEX_MAPPING_TOTAL_FIELDS_LIMIT_SETTING.getKey(), Long.MAX_VALUE) + .build() + ).execute(); + + // wait for cluster-manager to publish new state + waitUntil(() -> { + try { + // node is yet to ack commit to cluster-manager , only the state-update corresponding to index_1 should have been published + GetTermVersionResponse respAfterUpdate = internalCluster().getInstance(Client.class, master) + .execute(GetTermVersionAction.INSTANCE, new GetTermVersionRequest()) + .get(); + logger.info( + "data has latest , {} , {}", + respAfterUpdate.getClusterStateTermVersion().getTerm(), + respAfterUpdate.getClusterStateTermVersion().getVersion() + ); + return respBeforeUpdate.getClusterStateTermVersion().getVersion() + 1 == respAfterUpdate.getClusterStateTermVersion() + .getVersion(); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + + addCallCountInterceptor(master, callCounters); + ClusterStateResponse stateResponseD = internalCluster().getInstance(Client.class, datas.get(0)) + .admin() + .cluster() + .state(new ClusterStateRequest()) + .actionGet(); + logger.info("data has the version , {} , {}", stateResponseD.getState().term(), stateResponseD.getState().version()); + assertTrue(respBeforeUpdate.getClusterStateTermVersion().getVersion() + 1 == stateResponseD.getState().version()); + + processState.set(false); + + AtomicInteger clusterStateCallsOnMaster = callCounters.get(ClusterStateAction.NAME); + AtomicInteger termCallsOnMaster = callCounters.get(GetTermVersionAction.NAME); + startCreateIndex1.get(); + startCreateIndex2.get(); + assertThat(clusterStateCallsOnMaster.get(), is(0)); + assertThat(termCallsOnMaster.get(), is(1)); + } + private void addCallCountInterceptor(String nodeName, Map callCounters) { MockTransportService primaryService = (MockTransportService) internalCluster().getInstance(TransportService.class, nodeName); for (var ctrEnty : callCounters.entrySet()) { primaryService.addRequestHandlingBehavior(ctrEnty.getKey(), (handler, request, channel, task) -> { ctrEnty.getValue().incrementAndGet(); - logger.info("--> {} response redirect", ClusterStateAction.NAME); + logger.info("--> {} response redirect", ctrEnty.getKey()); handler.messageReceived(request, channel, task); }); } diff --git a/server/src/test/java/org/opensearch/action/support/nodes/TransportClusterStatsActionTests.java b/server/src/test/java/org/opensearch/action/support/nodes/TransportClusterStatsActionTests.java index f8e14b477b8ef..fc920b847cef9 100644 --- a/server/src/test/java/org/opensearch/action/support/nodes/TransportClusterStatsActionTests.java +++ b/server/src/test/java/org/opensearch/action/support/nodes/TransportClusterStatsActionTests.java @@ -33,48 +33,12 @@ public class TransportClusterStatsActionTests extends TransportNodesActionTests { - /** - * By default, we send discovery nodes list to each request that is sent across from the coordinator node. This - * behavior is asserted in this test. - */ - public void testClusterStatsActionWithRetentionOfDiscoveryNodesList() { - ClusterStatsRequest request = new ClusterStatsRequest(); - request.setIncludeDiscoveryNodes(true); - Map> combinedSentRequest = performNodesInfoAction(request); - - assertNotNull(combinedSentRequest); - combinedSentRequest.forEach((node, capturedRequestList) -> { - assertNotNull(capturedRequestList); - capturedRequestList.forEach(sentRequest -> { - assertNotNull(sentRequest.getDiscoveryNodes()); - assertEquals(sentRequest.getDiscoveryNodes().length, clusterService.state().nodes().getSize()); - }); - }); - } - - public void testClusterStatsActionWithPreFilledConcreteNodesAndWithRetentionOfDiscoveryNodesList() { - ClusterStatsRequest request = new ClusterStatsRequest(); - Collection discoveryNodes = clusterService.state().getNodes().getNodes().values(); - request.setConcreteNodes(discoveryNodes.toArray(DiscoveryNode[]::new)); - Map> combinedSentRequest = performNodesInfoAction(request); - - assertNotNull(combinedSentRequest); - combinedSentRequest.forEach((node, capturedRequestList) -> { - assertNotNull(capturedRequestList); - capturedRequestList.forEach(sentRequest -> { - assertNotNull(sentRequest.getDiscoveryNodes()); - assertEquals(sentRequest.getDiscoveryNodes().length, clusterService.state().nodes().getSize()); - }); - }); - } - /** * In the optimized ClusterStats Request, we do not send the DiscoveryNodes List to each node. This behavior is * asserted in this test. */ public void testClusterStatsActionWithoutRetentionOfDiscoveryNodesList() { ClusterStatsRequest request = new ClusterStatsRequest(); - request.setIncludeDiscoveryNodes(false); Map> combinedSentRequest = performNodesInfoAction(request); assertNotNull(combinedSentRequest); @@ -88,7 +52,6 @@ public void testClusterStatsActionWithPreFilledConcreteNodesAndWithoutRetentionO ClusterStatsRequest request = new ClusterStatsRequest(); Collection discoveryNodes = clusterService.state().getNodes().getNodes().values(); request.setConcreteNodes(discoveryNodes.toArray(DiscoveryNode[]::new)); - request.setIncludeDiscoveryNodes(false); Map> combinedSentRequest = performNodesInfoAction(request); assertNotNull(combinedSentRequest); diff --git a/server/src/test/java/org/opensearch/action/support/nodes/TransportNodesActionTests.java b/server/src/test/java/org/opensearch/action/support/nodes/TransportNodesActionTests.java index 7e968aa8fb199..a338e68276bbc 100644 --- a/server/src/test/java/org/opensearch/action/support/nodes/TransportNodesActionTests.java +++ b/server/src/test/java/org/opensearch/action/support/nodes/TransportNodesActionTests.java @@ -62,6 +62,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -71,6 +72,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReferenceArray; import java.util.function.Supplier; +import java.util.stream.Collectors; import static org.opensearch.test.ClusterServiceUtils.createClusterService; import static org.opensearch.test.ClusterServiceUtils.setState; @@ -166,6 +168,36 @@ public void testCustomResolving() throws Exception { assertEquals(clusterService.state().nodes().getDataNodes().size(), capturedRequests.size()); } + public void testTransportNodesActionWithDiscoveryNodesIncluded() { + String[] nodeIds = clusterService.state().nodes().getNodes().keySet().toArray(new String[0]); + TestNodesRequest request = new TestNodesRequest(true, nodeIds); + getTestTransportNodesAction().new AsyncAction(null, request, new PlainActionFuture<>()).start(); + Map> capturedRequests = transport.getCapturedRequestsByTargetNodeAndClear(); + List capturedTransportNodeRequestList = capturedRequests.values() + .stream() + .flatMap(Collection::stream) + .map(capturedRequest -> (TestNodeRequest) capturedRequest.request) + .collect(Collectors.toList()); + assertEquals(nodeIds.length, capturedTransportNodeRequestList.size()); + capturedTransportNodeRequestList.forEach( + capturedRequest -> assertEquals(nodeIds.length, capturedRequest.testNodesRequest.concreteNodes().length) + ); + } + + public void testTransportNodesActionWithDiscoveryNodesReset() { + String[] nodeIds = clusterService.state().nodes().getNodes().keySet().toArray(new String[0]); + TestNodesRequest request = new TestNodesRequest(false, nodeIds); + getTestTransportNodesAction().new AsyncAction(null, request, new PlainActionFuture<>()).start(); + Map> capturedRequests = transport.getCapturedRequestsByTargetNodeAndClear(); + List capturedTransportNodeRequestList = capturedRequests.values() + .stream() + .flatMap(Collection::stream) + .map(capturedRequest -> (TestNodeRequest) capturedRequest.request) + .collect(Collectors.toList()); + assertEquals(nodeIds.length, capturedTransportNodeRequestList.size()); + capturedTransportNodeRequestList.forEach(capturedRequest -> assertNull(capturedRequest.testNodesRequest.concreteNodes())); + } + private List mockList(Supplier supplier, int size) { List failures = new ArrayList<>(size); for (int i = 0; i < size; ++i) { @@ -314,7 +346,7 @@ protected TestNodesResponse newResponse( @Override protected TestNodeRequest newNodeRequest(TestNodesRequest request) { - return new TestNodeRequest(); + return new TestNodeRequest(request); } @Override @@ -357,6 +389,10 @@ private static class TestNodesRequest extends BaseNodesRequest TestNodesRequest(String... nodesIds) { super(nodesIds); } + + TestNodesRequest(boolean includeDiscoveryNodes, String... nodesIds) { + super(includeDiscoveryNodes, nodesIds); + } } private static class TestNodesResponse extends BaseNodesResponse { @@ -385,10 +421,24 @@ protected void writeNodesTo(StreamOutput out, List nodes) thro } private static class TestNodeRequest extends TransportRequest { + + protected TestNodesRequest testNodesRequest; + TestNodeRequest() {} + TestNodeRequest(TestNodesRequest testNodesRequest) { + this.testNodesRequest = testNodesRequest; + } + TestNodeRequest(StreamInput in) throws IOException { super(in); + testNodesRequest = new TestNodesRequest(in); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + testNodesRequest.writeTo(out); } } diff --git a/server/src/test/java/org/opensearch/action/support/nodes/TransportNodesInfoActionTests.java b/server/src/test/java/org/opensearch/action/support/nodes/TransportNodesInfoActionTests.java index e9e09d0dbbbf9..8277dcd363a8d 100644 --- a/server/src/test/java/org/opensearch/action/support/nodes/TransportNodesInfoActionTests.java +++ b/server/src/test/java/org/opensearch/action/support/nodes/TransportNodesInfoActionTests.java @@ -31,32 +31,12 @@ public class TransportNodesInfoActionTests extends TransportNodesActionTests { - /** - * By default, we send discovery nodes list to each request that is sent across from the coordinator node. This - * behavior is asserted in this test. - */ - public void testNodesInfoActionWithRetentionOfDiscoveryNodesList() { - NodesInfoRequest request = new NodesInfoRequest(); - request.setIncludeDiscoveryNodes(true); - Map> combinedSentRequest = performNodesInfoAction(request); - - assertNotNull(combinedSentRequest); - combinedSentRequest.forEach((node, capturedRequestList) -> { - assertNotNull(capturedRequestList); - capturedRequestList.forEach(sentRequest -> { - assertNotNull(sentRequest.getDiscoveryNodes()); - assertEquals(sentRequest.getDiscoveryNodes().length, clusterService.state().nodes().getSize()); - }); - }); - } - /** * In the optimized ClusterStats Request, we do not send the DiscoveryNodes List to each node. This behavior is * asserted in this test. */ public void testNodesInfoActionWithoutRetentionOfDiscoveryNodesList() { NodesInfoRequest request = new NodesInfoRequest(); - request.setIncludeDiscoveryNodes(false); Map> combinedSentRequest = performNodesInfoAction(request); assertNotNull(combinedSentRequest); diff --git a/server/src/test/java/org/opensearch/action/support/nodes/TransportNodesStatsActionTests.java b/server/src/test/java/org/opensearch/action/support/nodes/TransportNodesStatsActionTests.java index c7c420e353e1a..5e74dcdbc4953 100644 --- a/server/src/test/java/org/opensearch/action/support/nodes/TransportNodesStatsActionTests.java +++ b/server/src/test/java/org/opensearch/action/support/nodes/TransportNodesStatsActionTests.java @@ -31,31 +31,11 @@ public class TransportNodesStatsActionTests extends TransportNodesActionTests { /** - * By default, we send discovery nodes list to each request that is sent across from the coordinator node. This - * behavior is asserted in this test. - */ - public void testNodesStatsActionWithRetentionOfDiscoveryNodesList() { - NodesStatsRequest request = new NodesStatsRequest(); - request.setIncludeDiscoveryNodes(true); - Map> combinedSentRequest = performNodesStatsAction(request); - - assertNotNull(combinedSentRequest); - combinedSentRequest.forEach((node, capturedRequestList) -> { - assertNotNull(capturedRequestList); - capturedRequestList.forEach(sentRequest -> { - assertNotNull(sentRequest.getDiscoveryNodes()); - assertEquals(sentRequest.getDiscoveryNodes().length, clusterService.state().nodes().getSize()); - }); - }); - } - - /** - * By default, we send discovery nodes list to each request that is sent across from the coordinator node. This - * behavior is asserted in this test. + * We don't want to send discovery nodes list to each request that is sent across from the coordinator node. + * This behavior is asserted in this test. */ public void testNodesStatsActionWithoutRetentionOfDiscoveryNodesList() { NodesStatsRequest request = new NodesStatsRequest(); - request.setIncludeDiscoveryNodes(false); Map> combinedSentRequest = performNodesStatsAction(request); assertNotNull(combinedSentRequest); diff --git a/server/src/test/java/org/opensearch/action/support/replication/ReplicationOperationTests.java b/server/src/test/java/org/opensearch/action/support/replication/ReplicationOperationTests.java index ec5fc1d19e40d..6eb697d493bf4 100644 --- a/server/src/test/java/org/opensearch/action/support/replication/ReplicationOperationTests.java +++ b/server/src/test/java/org/opensearch/action/support/replication/ReplicationOperationTests.java @@ -42,13 +42,19 @@ import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.action.shard.ShardStateAction; import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodeRole; import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.cluster.routing.AllocationId; +import org.opensearch.cluster.routing.IndexRoutingTable; import org.opensearch.cluster.routing.IndexShardRoutingTable; +import org.opensearch.cluster.routing.RoutingTable; import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.cluster.routing.ShardRoutingState; +import org.opensearch.cluster.routing.TestShardRouting; import org.opensearch.common.collect.Tuple; +import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.concurrent.ConcurrentCollections; import org.opensearch.common.util.set.Sets; @@ -89,6 +95,7 @@ import static org.opensearch.action.support.replication.ClusterStateCreationUtils.state; import static org.opensearch.action.support.replication.ClusterStateCreationUtils.stateWithActivePrimary; import static org.opensearch.action.support.replication.ReplicationOperation.RetryOnPrimaryException; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS; import static org.opensearch.cluster.routing.TestShardRouting.newShardRouting; import static org.hamcrest.Matchers.arrayWithSize; import static org.hamcrest.Matchers.equalTo; @@ -944,6 +951,83 @@ public void updateLocalCheckpointForShard(String allocationId, long checkpoint) assertThat(shardInfo.getSuccessful(), equalTo(1 + getExpectedReplicas(shardId, state, trackedShards).size())); } + public void testReplicationOperationsAreNotSentToSearchReplicas() throws Exception { + final String index = "test"; + final ShardId shardId = new ShardId(index, "_na_", 0); + + ClusterState initialState = stateWithActivePrimary(index, true, randomInt(5)); + IndexMetadata indexMetadata = initialState.getMetadata().index(index); + // add a search only replica + DiscoveryNode node = new DiscoveryNode( + "nodeForSearchShard", + OpenSearchTestCase.buildNewFakeTransportAddress(), + Collections.emptyMap(), + new HashSet<>(DiscoveryNodeRole.BUILT_IN_ROLES), + Version.CURRENT + ); + IndexMetadata.Builder indexMetadataBuilder = new IndexMetadata.Builder(indexMetadata); + indexMetadataBuilder.settings(Settings.builder().put(indexMetadata.getSettings()).put(SETTING_NUMBER_OF_SEARCH_REPLICAS, 1)); + + ShardRouting searchShardRouting = TestShardRouting.newShardRouting( + shardId, + node.getId(), + null, + false, + true, + ShardRoutingState.STARTED, + null + ); + IndexShardRoutingTable indexShardRoutingTable = initialState.getRoutingTable().shardRoutingTable(shardId); + IndexShardRoutingTable.Builder indexShardRoutingBuilder = new IndexShardRoutingTable.Builder(indexShardRoutingTable); + indexShardRoutingBuilder.addShard(searchShardRouting); + indexShardRoutingTable = indexShardRoutingBuilder.build(); + + ClusterState.Builder state = ClusterState.builder(initialState); + state.nodes(DiscoveryNodes.builder(initialState.nodes()).add(node).build()); + state.metadata(Metadata.builder().put(indexMetadataBuilder.build(), false)); + state.routingTable( + RoutingTable.builder().add(IndexRoutingTable.builder(indexMetadata.getIndex()).addIndexShard(indexShardRoutingTable)).build() + ); + initialState = state.build(); + // execute a request and check hits + + final Set trackedShards = new HashSet<>(); + final Set untrackedShards = new HashSet<>(); + ShardRouting primaryShard = indexShardRoutingTable.primaryShard(); + addTrackingInfo(indexShardRoutingTable, primaryShard, trackedShards, untrackedShards); + final ReplicationGroup replicationGroup = new ReplicationGroup( + indexShardRoutingTable, + indexMetadata.inSyncAllocationIds(0), + trackedShards, + 0 + ); + + // shards are not part of the rg + assertFalse(replicationGroup.getReplicationTargets().stream().anyMatch(ShardRouting::isSearchOnly)); + + Set initial = getExpectedReplicas(shardId, initialState, trackedShards); + final Set expectedReplicas = initial.stream().filter(shr -> shr.isSearchOnly() == false).collect(Collectors.toSet()); + Request request = new Request(shardId); + PlainActionFuture listener = new PlainActionFuture<>(); + final TestReplicaProxy replicasProxy = new TestReplicaProxy(new HashMap<>()); + + final TestPrimary primary = new TestPrimary(primaryShard, () -> replicationGroup, threadPool); + final TestReplicationOperation op = new TestReplicationOperation( + request, + primary, + listener, + replicasProxy, + indexMetadata.primaryTerm(0), + new FanoutReplicationProxy<>(replicasProxy) + ); + op.execute(); + assertTrue("request was not processed on primary", request.processedOnPrimary.get()); + assertEquals(request.processedOnReplicas, expectedReplicas); + assertEquals(replicasProxy.failedReplicas, Collections.emptySet()); + assertEquals(replicasProxy.markedAsStaleCopies, Collections.emptySet()); + assertTrue(listener.isDone()); + } + private Set getExpectedReplicas(ShardId shardId, ClusterState state, Set trackedShards) { Set expectedReplicas = new HashSet<>(); String localNodeId = state.nodes().getLocalNodeId(); diff --git a/server/src/test/java/org/opensearch/cluster/ClusterModuleTests.java b/server/src/test/java/org/opensearch/cluster/ClusterModuleTests.java index 97706927ba857..f8240e775cfa5 100644 --- a/server/src/test/java/org/opensearch/cluster/ClusterModuleTests.java +++ b/server/src/test/java/org/opensearch/cluster/ClusterModuleTests.java @@ -337,6 +337,19 @@ public void testQueryGroupMetadataRegister() { ); } + public void testRerouteServiceSetForBalancedShardsAllocator() { + ClusterModule clusterModule = new ClusterModule( + Settings.EMPTY, + clusterService, + Collections.emptyList(), + clusterInfoService, + null, + threadContext, + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) + ); + clusterModule.setRerouteServiceForAllocator((reason, priority, listener) -> listener.onResponse(clusterService.state())); + } + private static ClusterPlugin existingShardsAllocatorPlugin(final String allocatorName) { return new ClusterPlugin() { @Override diff --git a/server/src/test/java/org/opensearch/cluster/block/ClusterBlocksTests.java b/server/src/test/java/org/opensearch/cluster/block/ClusterBlocksTests.java new file mode 100644 index 0000000000000..839e831d38b1b --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/block/ClusterBlocksTests.java @@ -0,0 +1,55 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.block; + +import org.opensearch.common.io.stream.BytesStreamOutput; +import org.opensearch.core.common.io.stream.BufferedChecksumStreamOutput; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.test.OpenSearchTestCase; + +import static org.opensearch.cluster.block.ClusterBlockTests.randomClusterBlock; + +public class ClusterBlocksTests extends OpenSearchTestCase { + + public void testWriteVerifiableTo() throws Exception { + ClusterBlock clusterBlock1 = randomClusterBlock(); + ClusterBlock clusterBlock2 = randomClusterBlock(); + ClusterBlock clusterBlock3 = randomClusterBlock(); + + ClusterBlocks clusterBlocks = ClusterBlocks.builder() + .addGlobalBlock(clusterBlock1) + .addGlobalBlock(clusterBlock2) + .addGlobalBlock(clusterBlock3) + .addIndexBlock("index-1", clusterBlock1) + .addIndexBlock("index-2", clusterBlock2) + .build(); + BytesStreamOutput out = new BytesStreamOutput(); + BufferedChecksumStreamOutput checksumOut = new BufferedChecksumStreamOutput(out); + clusterBlocks.writeVerifiableTo(checksumOut); + StreamInput in = out.bytes().streamInput(); + ClusterBlocks result = ClusterBlocks.readFrom(in); + + assertEquals(clusterBlocks.global().size(), result.global().size()); + assertEquals(clusterBlocks.global(), result.global()); + assertEquals(clusterBlocks.indices().size(), result.indices().size()); + assertEquals(clusterBlocks.indices(), result.indices()); + + ClusterBlocks clusterBlocks2 = ClusterBlocks.builder() + .addGlobalBlock(clusterBlock3) + .addGlobalBlock(clusterBlock1) + .addGlobalBlock(clusterBlock2) + .addIndexBlock("index-2", clusterBlock2) + .addIndexBlock("index-1", clusterBlock1) + .build(); + BytesStreamOutput out2 = new BytesStreamOutput(); + BufferedChecksumStreamOutput checksumOut2 = new BufferedChecksumStreamOutput(out2); + clusterBlocks2.writeVerifiableTo(checksumOut2); + assertEquals(checksumOut.getChecksum(), checksumOut2.getChecksum()); + } +} diff --git a/server/src/test/java/org/opensearch/cluster/coordination/CoordinationMetadataTests.java b/server/src/test/java/org/opensearch/cluster/coordination/CoordinationMetadataTests.java index 290479941aaa9..74c25246c2aa4 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/CoordinationMetadataTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/CoordinationMetadataTests.java @@ -33,10 +33,13 @@ import org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfigExclusion; import org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfiguration; +import org.opensearch.common.io.stream.BytesStreamOutput; import org.opensearch.common.util.set.Sets; import org.opensearch.common.xcontent.json.JsonXContent; import org.opensearch.core.common.bytes.BytesReference; +import org.opensearch.core.common.io.stream.BufferedChecksumStreamOutput; import org.opensearch.core.common.io.stream.NamedWriteableRegistry; +import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.xcontent.ToXContent; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentParser; @@ -46,8 +49,11 @@ import java.io.IOException; import java.util.Collections; +import java.util.Comparator; import java.util.HashSet; +import java.util.LinkedHashSet; import java.util.Set; +import java.util.stream.Collectors; import static org.hamcrest.Matchers.equalTo; @@ -231,4 +237,29 @@ public void testXContent() throws IOException { assertThat(originalMeta, equalTo(fromXContentMeta)); } } + + public void testWriteVerifiableTo() throws IOException { + VotingConfiguration votingConfiguration = randomVotingConfig(); + Set votingTombstones = randomVotingTombstones(); + CoordinationMetadata meta1 = new CoordinationMetadata(1, votingConfiguration, votingConfiguration, votingTombstones); + BytesStreamOutput out = new BytesStreamOutput(); + BufferedChecksumStreamOutput checksumOut = new BufferedChecksumStreamOutput(out); + meta1.writeVerifiableTo(checksumOut); + StreamInput in = out.bytes().streamInput(); + CoordinationMetadata result = new CoordinationMetadata(in); + + assertEquals(meta1, result); + + VotingConfiguration votingConfiguration2 = new VotingConfiguration( + (Set) votingConfiguration.getNodeIds().stream().sorted().collect(Collectors.toCollection(LinkedHashSet::new)) + ); + Set votingTombstones2 = votingTombstones.stream() + .sorted(Comparator.comparing(VotingConfigExclusion::getNodeId)) + .collect(Collectors.toCollection(LinkedHashSet::new)); + CoordinationMetadata meta2 = new CoordinationMetadata(1, votingConfiguration2, votingConfiguration2, votingTombstones2); + BytesStreamOutput out2 = new BytesStreamOutput(); + BufferedChecksumStreamOutput checksumOut2 = new BufferedChecksumStreamOutput(out2); + meta2.writeVerifiableTo(checksumOut2); + assertEquals(checksumOut.getChecksum(), checksumOut2.getChecksum()); + } } diff --git a/server/src/test/java/org/opensearch/cluster/coordination/CoordinationStateTests.java b/server/src/test/java/org/opensearch/cluster/coordination/CoordinationStateTests.java index 3ee2278aec546..32cb95e0c04f6 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/CoordinationStateTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/CoordinationStateTests.java @@ -57,6 +57,7 @@ import java.io.IOException; import java.util.Collections; import java.util.Locale; +import java.util.Map; import java.util.Optional; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -66,25 +67,34 @@ import static java.util.Collections.emptyMap; import static java.util.Collections.emptySet; -import static org.opensearch.common.util.FeatureFlags.REMOTE_PUBLICATION_EXPERIMENTAL; +import static org.opensearch.gateway.remote.ClusterMetadataManifest.MANIFEST_CURRENT_CODEC_VERSION; import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING; +import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_PUBLICATION_SETTING_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.nullValue; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoInteractions; +import static org.mockito.Mockito.when; public class CoordinationStateTests extends OpenSearchTestCase { private DiscoveryNode node1; private DiscoveryNode node2; private DiscoveryNode node3; + private DiscoveryNode nodeWithPub; private ClusterState initialStateNode1; + private ClusterState initialStateNode2; private PersistedState ps1; private PersistedStateRegistry psr1; @@ -98,16 +108,18 @@ public void setupNodes() { node1 = createNode("node1"); node2 = createNode("node2"); node3 = createNode("node3"); + nodeWithPub = createNode( + "nodeWithPub", + Map.of( + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, + "", + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY, + "" + ) + ); initialStateNode1 = clusterState(0L, 0L, node1, VotingConfiguration.EMPTY_CONFIG, VotingConfiguration.EMPTY_CONFIG, 42L); - ClusterState initialStateNode2 = clusterState( - 0L, - 0L, - node2, - VotingConfiguration.EMPTY_CONFIG, - VotingConfiguration.EMPTY_CONFIG, - 42L - ); + initialStateNode2 = clusterState(0L, 0L, node2, VotingConfiguration.EMPTY_CONFIG, VotingConfiguration.EMPTY_CONFIG, 42L); ClusterState initialStateNode3 = clusterState( 0L, 0L, @@ -127,6 +139,10 @@ public void setupNodes() { } public static DiscoveryNode createNode(String id) { + return createNode(id, Collections.emptyMap()); + } + + public static DiscoveryNode createNode(String id, Map attributes) { final TransportAddress address = buildNewFakeTransportAddress(); return new DiscoveryNode( "", @@ -135,7 +151,7 @@ public static DiscoveryNode createNode(String id) { address.address().getHostString(), address.getAddress(), address, - Collections.emptyMap(), + attributes, DiscoveryNodeRole.BUILT_IN_ROLES, Version.CURRENT ); @@ -912,7 +928,7 @@ public void testSafety() { public void testHandlePrePublishAndCommitWhenRemoteStateDisabled() { final PersistedStateRegistry persistedStateRegistry = persistedStateRegistry(); persistedStateRegistry.addPersistedState(PersistedStateType.LOCAL, ps1); - final PersistedStateRegistry persistedStateRegistrySpy = Mockito.spy(persistedStateRegistry); + final PersistedStateRegistry persistedStateRegistrySpy = spy(persistedStateRegistry); final CoordinationState coordinationState = createCoordinationState(persistedStateRegistrySpy, node1, Settings.EMPTY); final VotingConfiguration initialConfig = VotingConfiguration.of(node1); final ClusterState clusterState = clusterState(0L, 0L, node1, initialConfig, initialConfig, 42L); @@ -931,7 +947,7 @@ public void testHandlePrePublishAndCommitWhenRemoteStateDisabled() { public void testHandlePrePublishAndCommitWhenRemoteStateEnabled() throws IOException { final RemoteClusterStateService remoteClusterStateService = Mockito.mock(RemoteClusterStateService.class); final VotingConfiguration initialConfig = VotingConfiguration.of(node1); - final ClusterState clusterState = clusterState(0L, 0L, node1, initialConfig, initialConfig, 42L); + final ClusterState clusterState = clusterStateWithClusterManager(0L, 0L, node1, node1, initialConfig, initialConfig, 42L); final String previousClusterUUID = "prev-cluster-uuid"; final ClusterMetadataManifest manifest = ClusterMetadataManifest.builder() .clusterTerm(0L) @@ -947,8 +963,9 @@ public void testHandlePrePublishAndCommitWhenRemoteStateEnabled() throws IOExcep .previousClusterUUID(randomAlphaOfLength(10)) .clusterUUIDCommitted(true) .build(); - Mockito.when(remoteClusterStateService.writeFullMetadata(clusterState, previousClusterUUID)) - .thenReturn(new RemoteClusterStateManifestInfo(manifest, "path/to/manifest")); + when(remoteClusterStateService.writeFullMetadata(clusterState, previousClusterUUID, MANIFEST_CURRENT_CODEC_VERSION)).thenReturn( + new RemoteClusterStateManifestInfo(manifest, "path/to/manifest") + ); final PersistedStateRegistry persistedStateRegistry = persistedStateRegistry(); persistedStateRegistry.addPersistedState(PersistedStateType.LOCAL, ps1); @@ -957,39 +974,21 @@ public void testHandlePrePublishAndCommitWhenRemoteStateEnabled() throws IOExcep new RemotePersistedState(remoteClusterStateService, previousClusterUUID) ); - String randomRepoName = "randomRepoName"; - String stateRepoTypeAttributeKey = String.format( - Locale.getDefault(), - "node.attr." + REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT, - randomRepoName - ); - String stateRepoSettingsAttributeKeyPrefix = String.format( - Locale.getDefault(), - "node.attr." + REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX, - randomRepoName - ); - - Settings settings = Settings.builder() - .put("node.attr." + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, randomRepoName) - .put(stateRepoTypeAttributeKey, FsRepository.TYPE) - .put(stateRepoSettingsAttributeKeyPrefix + "location", "randomRepoPath") - .put(REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true) - .build(); - - final CoordinationState coordinationState = createCoordinationState(persistedStateRegistry, node1, settings); + final CoordinationState coordinationState = createCoordinationState(persistedStateRegistry, node1, remoteStateSettings()); coordinationState.handlePrePublish(clusterState); - Mockito.verify(remoteClusterStateService, Mockito.times(1)).writeFullMetadata(clusterState, previousClusterUUID); + Mockito.verify(remoteClusterStateService, Mockito.times(1)) + .writeFullMetadata(clusterState, previousClusterUUID, MANIFEST_CURRENT_CODEC_VERSION); assertThat(persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).getLastAcceptedState(), equalTo(clusterState)); - Mockito.when(remoteClusterStateService.markLastStateAsCommitted(any(), any())) - .thenReturn(new RemoteClusterStateManifestInfo(manifest, "path/to/manifest")); + when(remoteClusterStateService.markLastStateAsCommitted(any(), any(), eq(false))).thenReturn( + new RemoteClusterStateManifestInfo(manifest, "path/to/manifest") + ); coordinationState.handlePreCommit(); ClusterState committedClusterState = ClusterState.builder(clusterState) .metadata(Metadata.builder(clusterState.metadata()).clusterUUIDCommitted(true).build()) .build(); - // Mockito.verify(remoteClusterStateService, Mockito.times(1)).markLastStateAsCommitted(committedClusterState, manifest); ArgumentCaptor clusterStateCaptor = ArgumentCaptor.forClass(ClusterState.class); - verify(remoteClusterStateService, times(1)).markLastStateAsCommitted(clusterStateCaptor.capture(), any()); + verify(remoteClusterStateService, times(1)).markLastStateAsCommitted(clusterStateCaptor.capture(), any(), eq(false)); assertThat(clusterStateCaptor.getValue().metadata().indices(), equalTo(committedClusterState.metadata().indices())); assertThat(clusterStateCaptor.getValue().metadata().clusterUUID(), equalTo(committedClusterState.metadata().clusterUUID())); assertThat(clusterStateCaptor.getValue().stateUUID(), equalTo(committedClusterState.stateUUID())); @@ -1004,11 +1003,276 @@ public void testHandlePrePublishAndCommitWhenRemoteStateEnabled() throws IOExcep ); } + public void testHandlePublishRequestOnFollowerWhenRemotePublicationEnabled() { + final RemoteClusterStateService remoteClusterStateService = Mockito.mock(RemoteClusterStateService.class); + // cluster manager is node1 and node2 is a follower node + VotingConfiguration initialConfig = VotingConfiguration.of(node1); + ClusterState state1 = clusterState( + 0L, + 0L, + DiscoveryNodes.builder() + .add(node1) + .add(nodeWithPub) + .clusterManagerNodeId(node1.getId()) + .localNodeId(nodeWithPub.getId()) + .build(), + initialConfig, + initialConfig, + 42L + ); + final PersistedStateRegistry persistedStateRegistry = persistedStateRegistry(); + persistedStateRegistry.addPersistedState(PersistedStateType.LOCAL, new InMemoryPersistedState(0L, initialStateNode2)); + persistedStateRegistry.addPersistedState( + PersistedStateType.REMOTE, + new RemotePersistedState(remoteClusterStateService, state1.metadata().clusterUUID()) + ); + + final CoordinationState coordinationState = createCoordinationState( + persistedStateRegistry, + nodeWithPub, + remotePublicationSettings() + ); + coordinationState.setInitialState(state1); + long newTerm = randomLongBetween(1, 10); + StartJoinRequest startJoinRequest = new StartJoinRequest(nodeWithPub, newTerm); + + coordinationState.handleStartJoin(startJoinRequest); + + ClusterState state2 = setValue( + ClusterState.builder(state1) + .metadata( + Metadata.builder(state1.metadata()) + .coordinationMetadata(CoordinationMetadata.builder(state1.coordinationMetadata()).term(newTerm).build()) + .build() + ) + .version(randomLongBetween(1, 10)) + .build(), + 43L + ); + final ClusterMetadataManifest manifest = ClusterMetadataManifest.builder() + .clusterTerm(1L) + .stateVersion(state2.version()) + .clusterUUID(state2.metadata().clusterUUID()) + .nodeId(node1.getId()) + .stateUUID(randomAlphaOfLength(10)) + .opensearchVersion(Version.CURRENT) + .committed(false) + .codecVersion(1) + .globalMetadataFileName(randomAlphaOfLength(10)) + .indices(Collections.emptyList()) + .previousClusterUUID(randomAlphaOfLength(10)) + .clusterUUIDCommitted(true) + .build(); + + PublishResponse publishResponse = coordinationState.handlePublishRequest(new RemoteStatePublishRequest(state2, manifest)); + assertEquals(state2.term(), publishResponse.getTerm()); + assertEquals(state2.version(), publishResponse.getVersion()); + verifyNoInteractions(remoteClusterStateService); + assertEquals(state2, persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).getLastAcceptedState()); + assertEquals(manifest, persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).getLastAcceptedManifest()); + } + + public void testHandleCommitOnFollowerNodeWhenRemotePublicationEnabled() { + RemoteClusterStateService remoteClusterStateService = mock(RemoteClusterStateService.class); + VotingConfiguration initialConfig = VotingConfiguration.of(node1, nodeWithPub); + ClusterState state1 = clusterState( + 0L, + 0L, + DiscoveryNodes.builder() + .add(node1) + .add(nodeWithPub) + .clusterManagerNodeId(node1.getId()) + .localNodeId(nodeWithPub.getId()) + .build(), + initialConfig, + initialConfig, + 42L + ); + final PersistedStateRegistry persistedStateRegistry = persistedStateRegistry(); + persistedStateRegistry.addPersistedState(PersistedStateType.LOCAL, new InMemoryPersistedState(0L, initialStateNode2)); + persistedStateRegistry.addPersistedState( + PersistedStateType.REMOTE, + new RemotePersistedState(remoteClusterStateService, state1.metadata().clusterUUID()) + ); + + final CoordinationState coordinationState = createCoordinationState( + persistedStateRegistry, + nodeWithPub, + remotePublicationSettings() + ); + coordinationState.setInitialState(state1); + long newTerm = randomLongBetween(1, 10); + StartJoinRequest startJoinRequest = new StartJoinRequest(nodeWithPub, newTerm); + + Join v1 = cs1.handleStartJoin(startJoinRequest); + Join v2 = coordinationState.handleStartJoin(startJoinRequest); + assertTrue(coordinationState.handleJoin(v1)); + assertTrue(coordinationState.handleJoin(v2)); + assertTrue(coordinationState.electionWon()); + VotingConfiguration newConfig = VotingConfiguration.of(node1, nodeWithPub, node3); + ClusterState state2 = clusterState( + startJoinRequest.getTerm(), + 2L, + DiscoveryNodes.builder() + .add(node1) + .add(nodeWithPub) + .clusterManagerNodeId(node1.getId()) + .localNodeId(nodeWithPub.getId()) + .build(), + initialConfig, + newConfig, + 7L + ); + final ClusterMetadataManifest manifest = ClusterMetadataManifest.builder() + .clusterTerm(1L) + .stateVersion(state2.version()) + .clusterUUID(state2.metadata().clusterUUID()) + .nodeId(node1.getId()) + .stateUUID(randomAlphaOfLength(10)) + .opensearchVersion(Version.CURRENT) + .committed(false) + .codecVersion(1) + .globalMetadataFileName(randomAlphaOfLength(10)) + .indices(Collections.emptyList()) + .previousClusterUUID(randomAlphaOfLength(10)) + .clusterUUIDCommitted(true) + .build(); + + PublishRequest publishRequest = coordinationState.handleClientValue(state2); + coordinationState.handlePublishRequest(new RemoteStatePublishRequest(publishRequest.getAcceptedState(), manifest)); + ApplyCommitRequest applyCommitRequest = new ApplyCommitRequest(node2, state2.term(), state2.version()); + coordinationState.handleCommit(applyCommitRequest); + verifyNoInteractions(remoteClusterStateService); + assertTrue( + persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).getLastAcceptedState().metadata().clusterUUIDCommitted() + ); + assertTrue(persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).getLastAcceptedManifest().isCommitted()); + assertEquals(coordinationState.getLastCommittedConfiguration(), newConfig); + } + + public void testRemotePersistedStateResetsForPublicationEnabledAfterLocalPublication() { + final RemoteClusterStateService remoteClusterStateService = Mockito.mock(RemoteClusterStateService.class); + // cluster manager is node1 and nodeWithPub is a follower node + VotingConfiguration initialConfig = VotingConfiguration.of(node1); + ClusterState state1 = clusterState( + 0L, + 0L, + DiscoveryNodes.builder() + .add(node1) + .add(nodeWithPub) + .clusterManagerNodeId(node1.getId()) + .localNodeId(nodeWithPub.getId()) + .build(), + initialConfig, + initialConfig, + 42L + ); + final PersistedStateRegistry persistedStateRegistry = persistedStateRegistry(); + persistedStateRegistry.addPersistedState(PersistedStateType.LOCAL, new InMemoryPersistedState(0L, initialStateNode2)); + persistedStateRegistry.addPersistedState( + PersistedStateType.REMOTE, + new RemotePersistedState(remoteClusterStateService, state1.metadata().clusterUUID()) + ); + + final CoordinationState coordinationState = createCoordinationState( + persistedStateRegistry, + nodeWithPub, + remotePublicationSettings() + ); + coordinationState.setInitialState(state1); + long newTerm = randomLongBetween(1, 10); + StartJoinRequest startJoinRequest = new StartJoinRequest(nodeWithPub, newTerm); + + coordinationState.handleStartJoin(startJoinRequest); + + ClusterState state2 = setValue( + ClusterState.builder(state1) + .metadata( + Metadata.builder(state1.metadata()) + .coordinationMetadata(CoordinationMetadata.builder(state1.coordinationMetadata()).term(newTerm).build()) + .build() + ) + .version(randomLongBetween(1, 10)) + .build(), + 43L + ); + + PublishResponse publishResponse = coordinationState.handlePublishRequest(new PublishRequest(state2)); + assertEquals(state2.term(), publishResponse.getTerm()); + assertEquals(state2.version(), publishResponse.getVersion()); + verifyNoInteractions(remoteClusterStateService); + assertNull(persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).getLastAcceptedState()); + assertNull(persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).getLastAcceptedManifest()); + } + + public void testHandleCommitOnFollowerNodeWhenRemotePublicationEnabledWithNullRemotePersistedState() { + RemoteClusterStateService remoteClusterStateService = mock(RemoteClusterStateService.class); + VotingConfiguration initialConfig = VotingConfiguration.of(node1, nodeWithPub); + ClusterState state1 = clusterState( + 0L, + 0L, + DiscoveryNodes.builder() + .add(node1) + .add(nodeWithPub) + .clusterManagerNodeId(node1.getId()) + .localNodeId(nodeWithPub.getId()) + .build(), + initialConfig, + initialConfig, + 42L + ); + final PersistedStateRegistry persistedStateRegistry = persistedStateRegistry(); + persistedStateRegistry.addPersistedState(PersistedStateType.LOCAL, new InMemoryPersistedState(0L, initialStateNode2)); + persistedStateRegistry.addPersistedState( + PersistedStateType.REMOTE, + new RemotePersistedState(remoteClusterStateService, state1.metadata().clusterUUID()) + ); + + final CoordinationState coordinationState = createCoordinationState( + persistedStateRegistry, + nodeWithPub, + remotePublicationSettings() + ); + coordinationState.setInitialState(state1); + long newTerm = randomLongBetween(1, 10); + StartJoinRequest startJoinRequest = new StartJoinRequest(nodeWithPub, newTerm); + + Join v1 = cs1.handleStartJoin(startJoinRequest); + Join v2 = coordinationState.handleStartJoin(startJoinRequest); + assertTrue(coordinationState.handleJoin(v1)); + assertTrue(coordinationState.handleJoin(v2)); + assertTrue(coordinationState.electionWon()); + VotingConfiguration newConfig = VotingConfiguration.of(node1, nodeWithPub, node3); + ClusterState state2 = clusterState( + startJoinRequest.getTerm(), + 2L, + DiscoveryNodes.builder() + .add(node1) + .add(nodeWithPub) + .clusterManagerNodeId(node1.getId()) + .localNodeId(nodeWithPub.getId()) + .build(), + initialConfig, + newConfig, + 7L + ); + + PublishRequest publishRequest = coordinationState.handleClientValue(state2); + coordinationState.handlePublishRequest(new PublishRequest(publishRequest.getAcceptedState())); + assertNull(persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).getLastAcceptedState()); + assertNull(persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).getLastAcceptedManifest()); + ApplyCommitRequest applyCommitRequest = new ApplyCommitRequest(node2, state2.term(), state2.version()); + PersistedState spyRPS = spy(persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE)); + coordinationState.handleCommit(applyCommitRequest); + verifyNoInteractions(spyRPS); + verifyNoInteractions(remoteClusterStateService); + } + public void testIsRemotePublicationEnabled_WithInconsistentSettings() { // create settings with remote state disabled but publication enabled Settings settings = Settings.builder() .put(REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), false) - .put(REMOTE_PUBLICATION_EXPERIMENTAL, true) + .put(REMOTE_PUBLICATION_SETTING_KEY, true) .build(); CoordinationState coordinationState = createCoordinationState(psr1, node1, settings); assertFalse(coordinationState.isRemotePublicationEnabled()); @@ -1040,6 +1304,30 @@ public static ClusterState clusterState( ); } + public static ClusterState clusterStateWithClusterManager( + long term, + long version, + DiscoveryNode localNode, + DiscoveryNode clusterManagerNode, + VotingConfiguration lastCommittedConfig, + VotingConfiguration lastAcceptedConfig, + long value + ) { + return clusterState( + term, + version, + DiscoveryNodes.builder() + .add(localNode) + .add(clusterManagerNode) + .localNodeId(localNode.getId()) + .clusterManagerNodeId(clusterManagerNode.getId()) + .build(), + lastCommittedConfig, + lastAcceptedConfig, + value + ); + } + public static ClusterState clusterState( long term, long version, @@ -1088,4 +1376,30 @@ private static PersistedStateRegistry createPersistedStateRegistry(ClusterState persistedStateRegistry.addPersistedState(PersistedStateType.LOCAL, new InMemoryPersistedState(0L, clusterState)); return persistedStateRegistry; } + + private static Settings remoteStateSettings() { + String randomRepoName = "randomRepoName"; + String stateRepoTypeAttributeKey = String.format( + Locale.getDefault(), + "node.attr." + REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT, + randomRepoName + ); + String stateRepoSettingsAttributeKeyPrefix = String.format( + Locale.getDefault(), + "node.attr." + REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX, + randomRepoName + ); + + Settings settings = Settings.builder() + .put("node.attr." + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, randomRepoName) + .put(stateRepoTypeAttributeKey, FsRepository.TYPE) + .put(stateRepoSettingsAttributeKeyPrefix + "location", "randomRepoPath") + .put(RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true) + .build(); + return settings; + } + + private static Settings remotePublicationSettings() { + return Settings.builder().put(remoteStateSettings()).put(REMOTE_PUBLICATION_SETTING_KEY, true).build(); + } } diff --git a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java index 9f463673aa6a6..d192a2556c36b 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java @@ -611,6 +611,100 @@ public void testJoinClusterWithRemoteStateNodeJoiningRemoteStateCluster() { JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()); } + public void testJoinRemotePublicationClusterWithNonRemoteNodes() { + final DiscoveryNode existingNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + remotePublicationNodeAttributes(), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT) + .nodes(DiscoveryNodes.builder().add(existingNode).localNodeId(existingNode.getId()).build()) + .build(); + + DiscoveryNode joiningNode = newDiscoveryNode(new HashMap<>()); + JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()); + } + + public void testJoinRemotePublicationCluster() { + final DiscoveryNode existingNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + remotePublicationNodeAttributes(), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT) + .nodes(DiscoveryNodes.builder().add(existingNode).localNodeId(existingNode.getId()).build()) + .build(); + + DiscoveryNode joiningNode = newDiscoveryNode(remotePublicationNodeAttributes()); + JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()); + } + + public void testJoinRemotePubClusterWithRemoteStoreNodes() { + final DiscoveryNode existingNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + remotePublicationNodeAttributes(), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT) + .nodes(DiscoveryNodes.builder().add(existingNode).localNodeId(existingNode.getId()).build()) + .build(); + + Map newNodeAttributes = new HashMap<>(); + newNodeAttributes.putAll(remoteStateNodeAttributes(CLUSTER_STATE_REPO)); + newNodeAttributes.putAll(remoteRoutingTableAttributes(ROUTING_TABLE_REPO)); + newNodeAttributes.putAll(remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO)); + + DiscoveryNode joiningNode = newDiscoveryNode(newNodeAttributes); + Exception e = assertThrows( + IllegalStateException.class, + () -> JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()) + ); + assertTrue(e.getMessage().equals("a remote store node [" + joiningNode + "] is trying to join a non remote store cluster")); + } + + public void testPreventJoinRemotePublicationClusterWithIncompatibleAttributes() { + Map existingNodeAttributes = remotePublicationNodeAttributes(); + Map remoteStoreNodeAttributes = remotePublicationNodeAttributes(); + final DiscoveryNode existingNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + existingNodeAttributes, + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT) + .nodes(DiscoveryNodes.builder().add(existingNode).localNodeId(existingNode.getId()).build()) + .build(); + + for (Map.Entry nodeAttribute : existingNodeAttributes.entrySet()) { + remoteStoreNodeAttributes.put(nodeAttribute.getKey(), null); + DiscoveryNode joiningNode = newDiscoveryNode(remoteStoreNodeAttributes); + Exception e = assertThrows( + IllegalStateException.class, + () -> JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()) + ); + assertTrue( + e.getMessage().equals("joining node [" + joiningNode + "] doesn't have the node attribute [" + nodeAttribute.getKey() + "]") + || e.getMessage() + .equals( + "a remote store node [" + + joiningNode + + "] is trying to join a remote store cluster with incompatible node attributes in comparison with existing node [" + + currentState.getNodes().getNodes().values().stream().findFirst().get() + + "]" + ) + ); + + remoteStoreNodeAttributes.put(nodeAttribute.getKey(), nodeAttribute.getValue()); + } + } + public void testPreventJoinClusterWithRemoteStateNodeJoiningRemoteStoreCluster() { Map existingNodeAttributes = remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO); final DiscoveryNode existingNode = new DiscoveryNode( @@ -628,16 +722,7 @@ public void testPreventJoinClusterWithRemoteStateNodeJoiningRemoteStoreCluster() IllegalStateException.class, () -> JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()) ); - assertTrue( - e.getMessage() - .equals( - "a remote store node [" - + joiningNode - + "] is trying to join a remote store cluster with incompatible node attributes in comparison with existing node [" - + currentState.getNodes().getNodes().values().stream().findFirst().get() - + "]" - ) - ); + assertTrue(e.getMessage().equals("a non remote store node [" + joiningNode + "] is trying to join a remote store cluster")); } public void testPreventJoinClusterWithRemoteStoreNodeJoiningRemoteStateCluster() { @@ -657,16 +742,7 @@ public void testPreventJoinClusterWithRemoteStoreNodeJoiningRemoteStateCluster() IllegalStateException.class, () -> JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()) ); - assertTrue( - e.getMessage() - .equals( - "a remote store node [" - + joiningNode - + "] is trying to join a remote store cluster with incompatible node attributes in comparison with existing node [" - + currentState.getNodes().getNodes().values().stream().findFirst().get() - + "]" - ) - ); + assertTrue(e.getMessage().equals("a remote store node [" + joiningNode + "] is trying to join a non remote store cluster")); } public void testUpdatesClusterStateWithSingleNodeCluster() throws Exception { @@ -1077,6 +1153,39 @@ public void testRemoteRoutingTableNodeJoinNodeWithRemoteAndRoutingRepoDifference JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()); } + public void testJoinRemoteStoreClusterWithRemotePublicationNodeInMixedMode() { + final DiscoveryNode remoteStoreNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + remoteStoreNodeAttributes(SEGMENT_REPO, TRANSLOG_REPO), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + + final DiscoveryNode nonRemoteStoreNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + new HashMap<>(), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + + final Settings settings = Settings.builder() + .put(MIGRATION_DIRECTION_SETTING.getKey(), RemoteStoreNodeService.Direction.REMOTE_STORE) + .put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), "mixed") + .build(); + final Settings nodeSettings = Settings.builder().put(REMOTE_STORE_MIGRATION_EXPERIMENTAL, "true").build(); + FeatureFlags.initializeFeatureFlags(nodeSettings); + Metadata metadata = Metadata.builder().persistentSettings(settings).build(); + ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT) + .nodes(DiscoveryNodes.builder().add(remoteStoreNode).add(nonRemoteStoreNode).localNodeId(remoteStoreNode.getId()).build()) + .metadata(metadata) + .build(); + + DiscoveryNode joiningNode = newDiscoveryNode(remotePublicationNodeAttributes()); + JoinTaskExecutor.ensureNodesCompatibility(joiningNode, currentState.getNodes(), currentState.metadata()); + } + private void validateRepositoryMetadata(ClusterState updatedState, DiscoveryNode existingNode, int expectedRepositories) throws Exception { @@ -1115,6 +1224,7 @@ private DiscoveryNode newDiscoveryNode(Map attributes) { } private static final String SEGMENT_REPO = "segment-repo"; + private static final String TRANSLOG_REPO = "translog-repo"; private static final String CLUSTER_STATE_REPO = "cluster-state-repo"; private static final String COMMON_REPO = "remote-repo"; @@ -1161,6 +1271,13 @@ private Map remoteStoreNodeAttributes(String segmentRepoName, St }; } + private Map remotePublicationNodeAttributes() { + Map existingNodeAttributes = new HashMap<>(); + existingNodeAttributes.putAll(remoteStateNodeAttributes(CLUSTER_STATE_REPO)); + existingNodeAttributes.putAll(remoteRoutingTableAttributes(ROUTING_TABLE_REPO)); + return existingNodeAttributes; + } + private Map remoteStateNodeAttributes(String clusterStateRepo) { String clusterStateRepositoryTypeAttributeKey = String.format( Locale.getDefault(), diff --git a/server/src/test/java/org/opensearch/cluster/coordination/NoOpClusterApplier.java b/server/src/test/java/org/opensearch/cluster/coordination/NoOpClusterApplier.java index 9b865ace3b082..6e0d86540cf99 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/NoOpClusterApplier.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/NoOpClusterApplier.java @@ -42,6 +42,11 @@ public void setInitialState(ClusterState initialState) { } + @Override + public void setPreCommitState(ClusterState clusterState) { + + } + @Override public void onNewClusterState(String source, Supplier clusterStateSupplier, ClusterApplyListener listener) { listener.onSuccess(source); diff --git a/server/src/test/java/org/opensearch/cluster/coordination/PublicationTransportHandlerTests.java b/server/src/test/java/org/opensearch/cluster/coordination/PublicationTransportHandlerTests.java index 08e3f47100d8c..266928c919fe2 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/PublicationTransportHandlerTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/PublicationTransportHandlerTests.java @@ -37,33 +37,50 @@ import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.Diff; import org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfiguration; +import org.opensearch.cluster.coordination.PersistedStateRegistry.PersistedStateType; +import org.opensearch.cluster.coordination.PublicationTransportHandler.PublicationContext; +import org.opensearch.cluster.coordination.PublicationTransportHandler.RemotePublicationContext; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodeRole; import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; +import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.gateway.GatewayMetaState.RemotePersistedState; import org.opensearch.gateway.remote.ClusterMetadataManifest; +import org.opensearch.gateway.remote.ClusterStateDiffManifest; import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.node.Node; import org.opensearch.telemetry.tracing.noop.NoopTracer; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.transport.CapturingTransport; +import org.opensearch.test.transport.CapturingTransport.CapturedRequest; import org.opensearch.transport.TransportService; import org.junit.Before; import java.io.IOException; import java.util.Collections; +import java.util.Map; import java.util.Optional; import java.util.function.Function; import org.mockito.Mockito; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.not; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoMoreInteractions; import static org.mockito.Mockito.when; public class PublicationTransportHandlerTests extends OpenSearchTestCase { @@ -80,6 +97,8 @@ public class PublicationTransportHandlerTests extends OpenSearchTestCase { private DiscoveryNode localNode; private DiscoveryNode secondNode; + private CapturingTransport capturingTransport; + @Before public void setup() { deterministicTaskQueue = new DeterministicTaskQueue( @@ -89,7 +108,8 @@ public void setup() { final ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); localNode = new DiscoveryNode(LOCAL_NODE_ID, buildNewFakeTransportAddress(), Version.CURRENT); secondNode = new DiscoveryNode("secondNode", buildNewFakeTransportAddress(), Version.CURRENT); - transportService = new CapturingTransport().createTransportService( + capturingTransport = new CapturingTransport(); + transportService = capturingTransport.createTransportService( Settings.EMPTY, deterministicTaskQueue.getThreadPool(), TransportService.NOOP_TRANSPORT_INTERCEPTOR, @@ -160,7 +180,9 @@ public void testHandleIncomingRemotePublishRequestWhenNoCurrentPublishRequest() () -> handler.handleIncomingRemotePublishRequest(remotePublishRequest) ); assertThat(e.getMessage(), containsString("publication to self failed")); - Mockito.verifyNoInteractions(remoteClusterStateService); + verify(remoteClusterStateService, times(0)).fullDownloadFailed(); + verify(remoteClusterStateService, times(1)).diffDownloadFailed(); + verifyNoMoreInteractions(remoteClusterStateService); } public void testHandleIncomingRemotePublishRequestWhenTermMismatch() { @@ -185,7 +207,9 @@ public void testHandleIncomingRemotePublishRequestWhenTermMismatch() { () -> handler.handleIncomingRemotePublishRequest(remotePublishRequest) ); assertThat(e.getMessage(), containsString("publication to self failed")); - Mockito.verifyNoInteractions(remoteClusterStateService); + verify(remoteClusterStateService, times(0)).fullDownloadFailed(); + verify(remoteClusterStateService, times(1)).diffDownloadFailed(); + verifyNoMoreInteractions(remoteClusterStateService); } public void testHandleIncomingRemotePublishRequestWhenVersionMismatch() { @@ -210,7 +234,9 @@ public void testHandleIncomingRemotePublishRequestWhenVersionMismatch() { () -> handler.handleIncomingRemotePublishRequest(remotePublishRequest) ); assertThat(e.getMessage(), containsString("publication to self failed")); - Mockito.verifyNoInteractions(remoteClusterStateService); + verify(remoteClusterStateService, times(1)).diffDownloadFailed(); + verify(remoteClusterStateService, times(0)).fullDownloadFailed(); + verifyNoMoreInteractions(remoteClusterStateService); } public void testHandleIncomingRemotePublishRequestForLocalNode() throws IOException { @@ -235,6 +261,82 @@ public void testHandleIncomingRemotePublishRequestForLocalNode() throws IOExcept Mockito.verifyNoInteractions(remoteClusterStateService); } + public void testDownloadRemotePersistedFullStateFailedStats() throws IOException { + RemoteClusterStateService remoteClusterStateService = mock(RemoteClusterStateService.class); + PersistedStateStats remoteFullDownloadStats = new PersistedStateStats("dummy_full_stats"); + PersistedStateStats remoteDiffDownloadStats = new PersistedStateStats("dummy_diff_stats"); + when(remoteClusterStateService.getFullDownloadStats()).thenReturn(remoteFullDownloadStats); + when(remoteClusterStateService.getDiffDownloadStats()).thenReturn(remoteDiffDownloadStats); + + doAnswer((i) -> { + remoteFullDownloadStats.stateFailed(); + return null; + }).when(remoteClusterStateService).fullDownloadFailed(); + + doAnswer((i) -> { + remoteDiffDownloadStats.stateFailed(); + return null; + }).when(remoteClusterStateService).diffDownloadFailed(); + + PublishWithJoinResponse expectedPublishResponse = new PublishWithJoinResponse(new PublishResponse(TERM, VERSION), Optional.empty()); + Function handlePublishRequest = p -> expectedPublishResponse; + final PublicationTransportHandler handler = getPublicationTransportHandler(handlePublishRequest, remoteClusterStateService); + RemotePublishRequest remotePublishRequest = new RemotePublishRequest( + secondNode, + TERM, + VERSION, + CLUSTER_NAME, + CLUSTER_UUID, + MANIFEST_FILE + ); + ClusterState clusterState = buildClusterState(TERM, VERSION); + PublishRequest publishRequest = new PublishRequest(clusterState); + handler.setCurrentPublishRequestToSelf(publishRequest); + + assertThrows(IllegalStateException.class, () -> handler.handleIncomingRemotePublishRequest(remotePublishRequest)); + assertEquals(1, remoteClusterStateService.getDiffDownloadStats().getFailedCount()); + assertEquals(0, remoteClusterStateService.getFullDownloadStats().getFailedCount()); + } + + public void testDownloadRemotePersistedDiffStateFailedStats() throws IOException { + RemoteClusterStateService remoteClusterStateService = mock(RemoteClusterStateService.class); + PersistedStateStats remoteDiffDownloadStats = new PersistedStateStats("dummy_stats"); + when(remoteClusterStateService.getDiffDownloadStats()).thenReturn(remoteDiffDownloadStats); + + ClusterMetadataManifest metadataManifest = new ClusterMetadataManifest.Builder().diffManifest( + new ClusterStateDiffManifest.Builder().fromStateUUID("state-uuid").build() + ).build(); + when(remoteClusterStateService.getClusterMetadataManifestByFileName(any(), any())).thenReturn(metadataManifest); + + doAnswer((i) -> { + remoteDiffDownloadStats.stateFailed(); + return null; + }).when(remoteClusterStateService).diffDownloadFailed(); + + PublishWithJoinResponse expectedPublishResponse = new PublishWithJoinResponse(new PublishResponse(TERM, VERSION), Optional.empty()); + Function handlePublishRequest = p -> expectedPublishResponse; + final PublicationTransportHandler handler = getPublicationTransportHandler(handlePublishRequest, remoteClusterStateService); + ClusterState clusterState = mock(ClusterState.class); + when(clusterState.nodes()).thenReturn(mock(DiscoveryNodes.class)); + handler.setLastSeenClusterState(clusterState); + when(clusterState.stateUUID()).thenReturn("state-uuid"); + RemotePublishRequest remotePublishRequest = new RemotePublishRequest( + secondNode, + TERM, + VERSION, + CLUSTER_NAME, + CLUSTER_UUID, + MANIFEST_FILE + ); + clusterState = buildClusterState(TERM, VERSION); + PublishRequest publishRequest = new PublishRequest(clusterState); + handler.setCurrentPublishRequestToSelf(publishRequest); + + assertThrows(NullPointerException.class, () -> handler.handleIncomingRemotePublishRequest(remotePublishRequest)); + assertEquals(1, remoteClusterStateService.getDiffDownloadStats().getFailedCount()); + + } + public void testHandleIncomingRemotePublishRequestWhenManifestNotFound() throws IOException { RemoteClusterStateService remoteClusterStateService = mock(RemoteClusterStateService.class); @@ -288,6 +390,74 @@ public void testHandleIncomingRemotePublishRequestWhenNoLastSeenState() throws I Mockito.verify(remoteClusterStateService, times(1)).getClusterMetadataManifestByFileName(Mockito.any(), Mockito.any()); } + public void testNewPublicationContext() { + RemoteClusterStateService remoteClusterStateService = mock(RemoteClusterStateService.class); + PublishWithJoinResponse expectedPublishResponse = new PublishWithJoinResponse(new PublishResponse(TERM, VERSION), Optional.empty()); + Function handlePublishRequest = p -> expectedPublishResponse; + final PublicationTransportHandler handler = getPublicationTransportHandler(handlePublishRequest, remoteClusterStateService); + + // Remote publication disabled + ClusterChangedEvent event1 = new ClusterChangedEvent( + "source1", + buildClusterState(TERM, VERSION + 1), + buildClusterState(TERM, VERSION) + ); + PublicationContext publicationContext = handler.newPublicationContext(event1, false, new PersistedStateRegistry()); + assertNotNull(publicationContext); + assertThat(publicationContext, not(instanceOf(RemotePublicationContext.class))); + + // Remote publication enabled but some nodes are remote enabled and some remote disabled + ClusterChangedEvent event2 = new ClusterChangedEvent( + "source2", + buildClusterStateWithMixedNodes(TERM, VERSION + 1), + buildClusterState(TERM, VERSION) + ); + PublicationContext publicationContext2 = handler.newPublicationContext(event2, true, new PersistedStateRegistry()); + assertNotNull(publicationContext2); + assertThat(publicationContext2, not(instanceOf(RemotePublicationContext.class))); + + // Remote publication enabled and all nodes are remote enabled + ClusterChangedEvent event3 = new ClusterChangedEvent( + "source3", + buildClusterStateWithRemoteNodes(TERM, VERSION + 1), + buildClusterState(TERM, VERSION) + ); + PublicationContext publicationContext3 = handler.newPublicationContext(event3, true, new PersistedStateRegistry()); + assertNotNull(publicationContext3); + assertThat(publicationContext3, instanceOf(RemotePublicationContext.class)); + } + + public void testRemotePublicationContext() throws Exception { + ClusterChangedEvent event = new ClusterChangedEvent( + "source3", + buildClusterStateWithRemoteNodes(TERM, VERSION + 1), + buildClusterState(TERM, VERSION) + ); + RemoteClusterStateService remoteClusterStateService = mock(RemoteClusterStateService.class); + PublishWithJoinResponse expectedPublishResponse = new PublishWithJoinResponse(new PublishResponse(TERM, VERSION), Optional.empty()); + Function handlePublishRequest = p -> expectedPublishResponse; + final PublicationTransportHandler handler = getPublicationTransportHandler(handlePublishRequest, remoteClusterStateService); + PersistedStateRegistry persistedStateRegistry = new PersistedStateRegistry(); + RemotePersistedState remotePersistedState = mock(RemotePersistedState.class); + when(remotePersistedState.getLastUploadedManifestFile()).thenReturn("/path/to/manifest"); + persistedStateRegistry.addPersistedState(PersistedStateType.REMOTE, remotePersistedState); + + PublicationContext publicationContext3 = handler.newPublicationContext(event, true, persistedStateRegistry); + ActionListener listener = new ActionListener<>() { + @Override + public void onResponse(PublishWithJoinResponse publishWithJoinResponse) {} + + @Override + public void onFailure(Exception e) {} + }; + DiscoveryNode discoveryNode = new DiscoveryNode("node1", buildNewFakeTransportAddress(), Version.CURRENT); + publicationContext3.sendClusterState(discoveryNode, listener); + CapturedRequest[] capturedRequests1 = capturingTransport.getCapturedRequestsAndClear(); + assertThat(capturedRequests1.length, equalTo(1)); + CapturedRequest capturedRequest1 = capturedRequests1[0]; + assertThat(capturedRequest1.request, instanceOf(RemotePublishRequest.class)); + } + private PublicationTransportHandler getPublicationTransportHandler( Function handlePublishRequest, RemoteClusterStateService remoteClusterStateService @@ -310,4 +480,54 @@ private ClusterState buildClusterState(long term, long version) { DiscoveryNodes nodes = DiscoveryNodes.builder().add(localNode).add(secondNode).localNodeId(LOCAL_NODE_ID).build(); return ClusterState.builder(ClusterState.EMPTY_STATE).version(version).metadata(newMetadata).nodes(nodes).build(); } + + private ClusterState buildClusterStateWithMixedNodes(long term, long version) { + CoordinationMetadata.Builder coordMetadataBuilder = CoordinationMetadata.builder().term(term); + Metadata newMetadata = Metadata.builder().coordinationMetadata(coordMetadataBuilder.build()).build(); + DiscoveryNode remoteNode = new DiscoveryNode( + "remoteNode", + buildNewFakeTransportAddress(), + Map.of( + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, + "remote_state_repo", + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY, + "remote_routing_repo" + ), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + DiscoveryNodes nodes = DiscoveryNodes.builder().add(localNode).add(remoteNode).localNodeId(LOCAL_NODE_ID).build(); + return ClusterState.builder(ClusterState.EMPTY_STATE).version(version).metadata(newMetadata).nodes(nodes).build(); + } + + private ClusterState buildClusterStateWithRemoteNodes(long term, long version) { + CoordinationMetadata.Builder coordMetadataBuilder = CoordinationMetadata.builder().term(term); + Metadata newMetadata = Metadata.builder().coordinationMetadata(coordMetadataBuilder.build()).build(); + DiscoveryNode remoteNode1 = new DiscoveryNode( + "remoteNode1", + buildNewFakeTransportAddress(), + Map.of( + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, + "remote_state_repo", + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY, + "remote_routing_repo" + ), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + DiscoveryNode remoteNode2 = new DiscoveryNode( + "remoteNode2", + buildNewFakeTransportAddress(), + Map.of( + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, + "remote_state_repo", + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY, + "remote_routing_repo" + ), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + DiscoveryNodes nodes = DiscoveryNodes.builder().add(remoteNode1).add(remoteNode2).localNodeId(remoteNode1.getId()).build(); + return ClusterState.builder(ClusterState.EMPTY_STATE).version(version).metadata(newMetadata).nodes(nodes).build(); + } } diff --git a/server/src/test/java/org/opensearch/cluster/health/ClusterIndexHealthTests.java b/server/src/test/java/org/opensearch/cluster/health/ClusterIndexHealthTests.java index 557f7f74db7a7..0d542c211ac76 100644 --- a/server/src/test/java/org/opensearch/cluster/health/ClusterIndexHealthTests.java +++ b/server/src/test/java/org/opensearch/cluster/health/ClusterIndexHealthTests.java @@ -69,13 +69,20 @@ public void testClusterIndexHealth() { IndexRoutingTable indexRoutingTable = routingTableGenerator.genIndexRoutingTable(indexMetadata, counter); ClusterIndexHealth indexHealth = new ClusterIndexHealth(indexMetadata, indexRoutingTable); - assertIndexHealth(indexHealth, counter, indexMetadata); + assertIndexHealth(indexHealth, counter, indexMetadata, ClusterHealthRequest.Level.SHARDS); + + indexHealth = new ClusterIndexHealth(indexMetadata, indexRoutingTable, ClusterHealthRequest.Level.INDICES); + assertIndexHealth(indexHealth, counter, indexMetadata, ClusterHealthRequest.Level.INDICES); + + indexHealth = new ClusterIndexHealth(indexMetadata, indexRoutingTable, ClusterHealthRequest.Level.SHARDS); + assertIndexHealth(indexHealth, counter, indexMetadata, ClusterHealthRequest.Level.SHARDS); } private void assertIndexHealth( ClusterIndexHealth indexHealth, RoutingTableGenerator.ShardCounter counter, - IndexMetadata indexMetadata + IndexMetadata indexMetadata, + ClusterHealthRequest.Level clusterHealthRequestLevel ) { assertThat(indexHealth.getStatus(), equalTo(counter.status())); assertThat(indexHealth.getNumberOfShards(), equalTo(indexMetadata.getNumberOfShards())); @@ -84,13 +91,17 @@ private void assertIndexHealth( assertThat(indexHealth.getRelocatingShards(), equalTo(counter.relocating)); assertThat(indexHealth.getInitializingShards(), equalTo(counter.initializing)); assertThat(indexHealth.getUnassignedShards(), equalTo(counter.unassigned)); - assertThat(indexHealth.getShards().size(), equalTo(indexMetadata.getNumberOfShards())); - int totalShards = 0; - for (ClusterShardHealth shardHealth : indexHealth.getShards().values()) { - totalShards += shardHealth.getActiveShards() + shardHealth.getInitializingShards() + shardHealth.getUnassignedShards(); - } + if (ClusterHealthRequest.Level.SHARDS.equals(clusterHealthRequestLevel)) { + assertThat(indexHealth.getShards().size(), equalTo(indexMetadata.getNumberOfShards())); + int totalShards = 0; + for (ClusterShardHealth shardHealth : indexHealth.getShards().values()) { + totalShards += shardHealth.getActiveShards() + shardHealth.getInitializingShards() + shardHealth.getUnassignedShards(); + } - assertThat(totalShards, equalTo(indexMetadata.getNumberOfShards() * (1 + indexMetadata.getNumberOfReplicas()))); + assertThat(totalShards, equalTo(indexMetadata.getNumberOfShards() * (1 + indexMetadata.getNumberOfReplicas()))); + } else { + assertTrue(indexHealth.getShards().isEmpty()); + } } @Override diff --git a/server/src/test/java/org/opensearch/cluster/health/ClusterShardHealthTests.java b/server/src/test/java/org/opensearch/cluster/health/ClusterShardHealthTests.java index 734ad4058630d..01319867bcd86 100644 --- a/server/src/test/java/org/opensearch/cluster/health/ClusterShardHealthTests.java +++ b/server/src/test/java/org/opensearch/cluster/health/ClusterShardHealthTests.java @@ -31,17 +31,139 @@ package org.opensearch.cluster.health; +import org.opensearch.cluster.routing.IndexShardRoutingTable; +import org.opensearch.cluster.routing.RecoverySource; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.ShardRoutingState; +import org.opensearch.cluster.routing.TestShardRouting; +import org.opensearch.cluster.routing.UnassignedInfo; import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.index.Index; +import org.opensearch.core.index.shard.ShardId; import org.opensearch.core.xcontent.XContentParser; import org.opensearch.test.AbstractSerializingTestCase; import java.io.IOException; import java.util.Arrays; +import java.util.Collections; +import java.util.UUID; import java.util.function.Predicate; import java.util.stream.Collectors; public class ClusterShardHealthTests extends AbstractSerializingTestCase { + public void testClusterShardGreenHealth() { + String indexName = "test"; + int shardID = 0; + Index index = new Index(indexName, UUID.randomUUID().toString()); + ShardId shardId = new ShardId(index, shardID); + IndexShardRoutingTable.Builder indexShardRoutingBuilder = new IndexShardRoutingTable.Builder(shardId); + indexShardRoutingBuilder.addShard( + TestShardRouting.newShardRouting(indexName, shardID, "node_0", null, true, ShardRoutingState.STARTED) + ); + indexShardRoutingBuilder.addShard( + TestShardRouting.newShardRouting(indexName, shardID, "node_1", null, false, ShardRoutingState.STARTED) + ); + IndexShardRoutingTable indexShardRoutingTable = indexShardRoutingBuilder.build(); + ClusterShardHealth clusterShardHealth = new ClusterShardHealth(shardID, indexShardRoutingTable); + assertEquals(2, clusterShardHealth.getActiveShards()); + assertEquals(0, clusterShardHealth.getInitializingShards()); + assertEquals(0, clusterShardHealth.getRelocatingShards()); + assertEquals(0, clusterShardHealth.getUnassignedShards()); + assertEquals(0, clusterShardHealth.getDelayedUnassignedShards()); + assertEquals(ClusterHealthStatus.GREEN, clusterShardHealth.getStatus()); + assertTrue(clusterShardHealth.isPrimaryActive()); + } + + public void testClusterShardYellowHealth() { + String indexName = "test"; + int shardID = 0; + Index index = new Index(indexName, UUID.randomUUID().toString()); + ShardId shardId = new ShardId(index, shardID); + IndexShardRoutingTable.Builder indexShardRoutingBuilder = new IndexShardRoutingTable.Builder(shardId); + indexShardRoutingBuilder.addShard( + TestShardRouting.newShardRouting(indexName, shardID, "node_0", null, true, ShardRoutingState.STARTED) + ); + indexShardRoutingBuilder.addShard( + TestShardRouting.newShardRouting(indexName, shardID, "node_1", "node_5", false, ShardRoutingState.RELOCATING) + ); + indexShardRoutingBuilder.addShard( + TestShardRouting.newShardRouting(indexName, shardID, "node_2", null, false, ShardRoutingState.INITIALIZING) + ); + indexShardRoutingBuilder.addShard( + TestShardRouting.newShardRouting(indexName, shardID, null, null, false, ShardRoutingState.UNASSIGNED) + ); + indexShardRoutingBuilder.addShard( + ShardRouting.newUnassigned( + shardId, + false, + RecoverySource.PeerRecoverySource.INSTANCE, + new UnassignedInfo( + UnassignedInfo.Reason.NODE_LEFT, + "node_4 left", + null, + 0, + 0, + 0, + true, + UnassignedInfo.AllocationStatus.NO_ATTEMPT, + Collections.emptySet() + ) + ) + ); + IndexShardRoutingTable indexShardRoutingTable = indexShardRoutingBuilder.build(); + ClusterShardHealth clusterShardHealth = new ClusterShardHealth(shardID, indexShardRoutingTable); + assertEquals(2, clusterShardHealth.getActiveShards()); + assertEquals(1, clusterShardHealth.getInitializingShards()); + assertEquals(1, clusterShardHealth.getRelocatingShards()); + assertEquals(2, clusterShardHealth.getUnassignedShards()); + assertEquals(1, clusterShardHealth.getDelayedUnassignedShards()); + assertEquals(ClusterHealthStatus.YELLOW, clusterShardHealth.getStatus()); + assertTrue(clusterShardHealth.isPrimaryActive()); + } + + public void testClusterShardRedHealth() { + String indexName = "test"; + int shardID = 0; + Index index = new Index(indexName, UUID.randomUUID().toString()); + ShardId shardId = new ShardId(index, shardID); + IndexShardRoutingTable.Builder indexShardRoutingBuilder = new IndexShardRoutingTable.Builder(shardId); + indexShardRoutingBuilder.addShard( + ShardRouting.newUnassigned( + shardId, + true, + RecoverySource.PeerRecoverySource.INSTANCE, + new UnassignedInfo( + UnassignedInfo.Reason.NODE_LEFT, + "node_4 left", + null, + 0, + 0, + 0, + false, + UnassignedInfo.AllocationStatus.DECIDERS_NO, + Collections.emptySet() + ) + ) + ); + indexShardRoutingBuilder.addShard( + TestShardRouting.newShardRouting(indexName, shardID, null, null, false, ShardRoutingState.UNASSIGNED) + ); + IndexShardRoutingTable indexShardRoutingTable = indexShardRoutingBuilder.build(); + ClusterShardHealth clusterShardHealth = new ClusterShardHealth(shardID, indexShardRoutingTable); + assertEquals(0, clusterShardHealth.getActiveShards()); + assertEquals(0, clusterShardHealth.getInitializingShards()); + assertEquals(0, clusterShardHealth.getRelocatingShards()); + assertEquals(2, clusterShardHealth.getUnassignedShards()); + assertEquals(0, clusterShardHealth.getDelayedUnassignedShards()); + assertEquals(ClusterHealthStatus.RED, clusterShardHealth.getStatus()); + assertFalse(clusterShardHealth.isPrimaryActive()); + } + + public void testShardRoutingNullCheck() { + assertThrows(AssertionError.class, () -> ClusterShardHealth.getShardHealth(null, 0, 0)); + } + @Override protected ClusterShardHealth doParseInstance(XContentParser parser) throws IOException { return ClusterShardHealth.fromXContent(parser); diff --git a/server/src/test/java/org/opensearch/cluster/health/ClusterStateHealthTests.java b/server/src/test/java/org/opensearch/cluster/health/ClusterStateHealthTests.java index 795dc8a624e38..67e2fb59b0d74 100644 --- a/server/src/test/java/org/opensearch/cluster/health/ClusterStateHealthTests.java +++ b/server/src/test/java/org/opensearch/cluster/health/ClusterStateHealthTests.java @@ -73,6 +73,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -83,6 +84,9 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; +import static org.opensearch.action.admin.cluster.health.ClusterHealthRequest.Level.CLUSTER; +import static org.opensearch.action.admin.cluster.health.ClusterHealthRequest.Level.INDICES; +import static org.opensearch.action.admin.cluster.health.ClusterHealthRequest.Level.SHARDS; import static org.opensearch.test.ClusterServiceUtils.createClusterService; import static org.opensearch.test.ClusterServiceUtils.setState; import static org.hamcrest.CoreMatchers.allOf; @@ -225,7 +229,13 @@ public void testClusterHealth() throws IOException { clusterStateHealth.hasDiscoveredClusterManager(), equalTo(clusterService.state().nodes().getClusterManagerNodeId() != null) ); - assertClusterHealth(clusterStateHealth, counter); + assertClusterHealth(clusterStateHealth, counter, SHARDS); + clusterStateHealth = new ClusterStateHealth(clusterState, concreteIndices, ClusterHealthRequest.Level.CLUSTER); + assertClusterHealth(clusterStateHealth, counter, CLUSTER); + clusterStateHealth = new ClusterStateHealth(clusterState, concreteIndices, INDICES); + assertClusterHealth(clusterStateHealth, counter, INDICES); + clusterStateHealth = new ClusterStateHealth(clusterState, concreteIndices, SHARDS); + assertClusterHealth(clusterStateHealth, counter, SHARDS); } public void testClusterHealthOnIndexCreation() { @@ -586,7 +596,11 @@ private boolean primaryInactiveDueToRecovery(final String indexName, final Clust return true; } - private void assertClusterHealth(ClusterStateHealth clusterStateHealth, RoutingTableGenerator.ShardCounter counter) { + private void assertClusterHealth( + ClusterStateHealth clusterStateHealth, + RoutingTableGenerator.ShardCounter counter, + ClusterHealthRequest.Level level + ) { assertThat(clusterStateHealth.getStatus(), equalTo(counter.status())); assertThat(clusterStateHealth.getActiveShards(), equalTo(counter.active)); assertThat(clusterStateHealth.getActivePrimaryShards(), equalTo(counter.primaryActive)); @@ -594,5 +608,16 @@ private void assertClusterHealth(ClusterStateHealth clusterStateHealth, RoutingT assertThat(clusterStateHealth.getRelocatingShards(), equalTo(counter.relocating)); assertThat(clusterStateHealth.getUnassignedShards(), equalTo(counter.unassigned)); assertThat(clusterStateHealth.getActiveShardsPercent(), is(allOf(greaterThanOrEqualTo(0.0), lessThanOrEqualTo(100.0)))); + if (level.equals(INDICES) || level.equals(SHARDS)) { + assertTrue(clusterStateHealth.getIndices().size() > 0); + if (level.equals(SHARDS)) { + Collection clusterIndexHealths = clusterStateHealth.getIndices().values(); + for (ClusterIndexHealth clusterIndexHealth : clusterIndexHealths) { + assertFalse(clusterIndexHealth.getShards().isEmpty()); + } + } + } else { + assertTrue(clusterStateHealth.getIndices().isEmpty()); + } } } diff --git a/server/src/test/java/org/opensearch/cluster/metadata/IndexMetadataTests.java b/server/src/test/java/org/opensearch/cluster/metadata/IndexMetadataTests.java index 393a652952771..92988ab7e9cba 100644 --- a/server/src/test/java/org/opensearch/cluster/metadata/IndexMetadataTests.java +++ b/server/src/test/java/org/opensearch/cluster/metadata/IndexMetadataTests.java @@ -43,6 +43,7 @@ import org.opensearch.common.xcontent.json.JsonXContent; import org.opensearch.core.common.Strings; import org.opensearch.core.common.bytes.BytesReference; +import org.opensearch.core.common.io.stream.BufferedChecksumStreamOutput; import org.opensearch.core.common.io.stream.NamedWriteableAwareStreamInput; import org.opensearch.core.common.io.stream.NamedWriteableRegistry; import org.opensearch.core.common.io.stream.StreamInput; @@ -118,6 +119,7 @@ public void testIndexMetadataSerialization() throws IOException { randomNonNegativeLong() ) ) + .context(new Context(randomAlphaOfLength(5))) .build(); assertEquals(system, metadata.isSystem()); @@ -145,6 +147,7 @@ public void testIndexMetadataSerialization() throws IOException { assertEquals(metadata.getRoutingFactor(), fromXContentMeta.getRoutingFactor()); assertEquals(metadata.primaryTerm(0), fromXContentMeta.primaryTerm(0)); assertEquals(metadata.isSystem(), fromXContentMeta.isSystem()); + assertEquals(metadata.context(), fromXContentMeta.context()); final Map expectedCustom = Map.of("my_custom", new DiffableStringMap(customMap)); assertEquals(metadata.getCustomData(), expectedCustom); assertEquals(metadata.getCustomData(), fromXContentMeta.getCustomData()); @@ -167,9 +170,116 @@ public void testIndexMetadataSerialization() throws IOException { assertEquals(deserialized.getCustomData(), expectedCustom); assertEquals(metadata.getCustomData(), deserialized.getCustomData()); assertEquals(metadata.isSystem(), deserialized.isSystem()); + assertEquals(metadata.context(), deserialized.context()); } } + public void testWriteVerifiableTo() throws IOException { + int numberOfReplicas = randomIntBetween(0, 10); + final boolean system = randomBoolean(); + Map customMap = new HashMap<>(); + customMap.put(randomAlphaOfLength(5), randomAlphaOfLength(10)); + customMap.put(randomAlphaOfLength(10), randomAlphaOfLength(15)); + + RolloverInfo info1 = new RolloverInfo( + randomAlphaOfLength(5), + Arrays.asList( + new MaxAgeCondition(TimeValue.timeValueMillis(randomNonNegativeLong())), + new MaxSizeCondition(new ByteSizeValue(randomNonNegativeLong())), + new MaxDocsCondition(randomNonNegativeLong()) + ), + randomNonNegativeLong() + ); + RolloverInfo info2 = new RolloverInfo( + randomAlphaOfLength(5), + Arrays.asList( + new MaxAgeCondition(TimeValue.timeValueMillis(randomNonNegativeLong())), + new MaxSizeCondition(new ByteSizeValue(randomNonNegativeLong())), + new MaxDocsCondition(randomNonNegativeLong()) + ), + randomNonNegativeLong() + ); + String mappings = " {\n" + + " \"_doc\": {\n" + + " \"properties\": {\n" + + " \"actiongroups\": {\n" + + " \"type\": \"text\",\n" + + " \"fields\": {\n" + + " \"keyword\": {\n" + + " \"type\": \"keyword\",\n" + + " \"ignore_above\": 256\n" + + " }\n" + + " }\n" + + " },\n" + + " \"allowlist\": {\n" + + " \"type\": \"text\",\n" + + " \"fields\": {\n" + + " \"keyword\": {\n" + + " \"type\": \"keyword\",\n" + + " \"ignore_above\": 256\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + " }"; + IndexMetadata metadata1 = IndexMetadata.builder("foo") + .settings( + Settings.builder() + .put("index.version.created", 1) + .put("index.number_of_shards", 4) + .put("index.number_of_replicas", numberOfReplicas) + .build() + ) + .creationDate(randomLong()) + .primaryTerm(0, 2) + .primaryTerm(1, 3) + .setRoutingNumShards(32) + .system(system) + .putCustom("my_custom", customMap) + .putCustom("my_custom2", customMap) + .putAlias(AliasMetadata.builder("alias-1").routing("routing-1").build()) + .putAlias(AliasMetadata.builder("alias-2").routing("routing-2").build()) + .putRolloverInfo(info1) + .putRolloverInfo(info2) + .putInSyncAllocationIds(0, Set.of("1", "2", "3")) + .putMapping(mappings) + .build(); + + BytesStreamOutput out = new BytesStreamOutput(); + BufferedChecksumStreamOutput checksumOut = new BufferedChecksumStreamOutput(out); + metadata1.writeVerifiableTo(checksumOut); + assertNotNull(metadata1.toString()); + + IndexMetadata metadata2 = IndexMetadata.builder(metadata1.getIndex().getName()) + .settings( + Settings.builder() + .put("index.number_of_replicas", numberOfReplicas) + .put("index.number_of_shards", 4) + .put("index.version.created", 1) + .build() + ) + .creationDate(metadata1.getCreationDate()) + .primaryTerm(1, 3) + .primaryTerm(0, 2) + .setRoutingNumShards(32) + .system(system) + .putCustom("my_custom2", customMap) + .putCustom("my_custom", customMap) + .putAlias(AliasMetadata.builder("alias-2").routing("routing-2").build()) + .putAlias(AliasMetadata.builder("alias-1").routing("routing-1").build()) + .putRolloverInfo(info2) + .putRolloverInfo(info1) + .putInSyncAllocationIds(0, Set.of("3", "1", "2")) + .putMapping(mappings) + .build(); + + BytesStreamOutput out2 = new BytesStreamOutput(); + BufferedChecksumStreamOutput checksumOut2 = new BufferedChecksumStreamOutput(out2); + metadata2.writeVerifiableTo(checksumOut2); + assertEquals(checksumOut.getChecksum(), checksumOut2.getChecksum()); + } + public void testGetRoutingFactor() { Integer numShard = randomFrom(1, 2, 4, 8, 16); int routingFactor = IndexMetadata.getRoutingFactor(32, numShard); diff --git a/server/src/test/java/org/opensearch/cluster/metadata/IndexTemplateMetadataTests.java b/server/src/test/java/org/opensearch/cluster/metadata/IndexTemplateMetadataTests.java index 0ea2834cc3024..6b0d5bcd980d8 100644 --- a/server/src/test/java/org/opensearch/cluster/metadata/IndexTemplateMetadataTests.java +++ b/server/src/test/java/org/opensearch/cluster/metadata/IndexTemplateMetadataTests.java @@ -31,11 +31,14 @@ package org.opensearch.cluster.metadata; +import org.opensearch.common.io.stream.BytesStreamOutput; import org.opensearch.common.settings.Settings; import org.opensearch.common.xcontent.XContentHelper; import org.opensearch.common.xcontent.json.JsonXContent; import org.opensearch.core.common.bytes.BytesArray; import org.opensearch.core.common.bytes.BytesReference; +import org.opensearch.core.common.io.stream.BufferedChecksumStreamOutput; +import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.xcontent.DeprecationHandler; import org.opensearch.core.xcontent.MediaTypeRegistry; import org.opensearch.core.xcontent.NamedXContentRegistry; @@ -232,6 +235,50 @@ public void testFromToXContent() throws Exception { } } + public void testWriteVerifiableTo() throws Exception { + String templateName = randomUnicodeOfCodepointLengthBetween(1, 10); + IndexTemplateMetadata.Builder templateBuilder = IndexTemplateMetadata.builder(templateName); + templateBuilder.patterns(Arrays.asList("pattern-1", "pattern-2")); + int numAlias = between(2, 5); + for (int i = 0; i < numAlias; i++) { + AliasMetadata.Builder alias = AliasMetadata.builder(randomRealisticUnicodeOfLengthBetween(1, 100)); + alias.indexRouting(randomRealisticUnicodeOfLengthBetween(1, 100)); + alias.searchRouting(randomRealisticUnicodeOfLengthBetween(1, 100)); + templateBuilder.putAlias(alias); + } + templateBuilder.settings(Settings.builder().put("index.setting-1", randomLong())); + templateBuilder.settings(Settings.builder().put("index.setting-2", randomTimeValue())); + templateBuilder.order(randomInt()); + templateBuilder.version(between(0, 100)); + templateBuilder.putMapping("doc", "{\"doc\":{\"properties\":{\"type\":\"text\"}}}"); + + IndexTemplateMetadata template = templateBuilder.build(); + BytesStreamOutput out = new BytesStreamOutput(); + BufferedChecksumStreamOutput checksumOut = new BufferedChecksumStreamOutput(out); + template.writeVerifiableTo(checksumOut); + StreamInput in = out.bytes().streamInput(); + IndexTemplateMetadata result = IndexTemplateMetadata.readFrom(in); + assertEquals(result, template); + + IndexTemplateMetadata.Builder templateBuilder2 = IndexTemplateMetadata.builder(templateName); + templateBuilder2.patterns(Arrays.asList("pattern-2", "pattern-1")); + template.getAliases() + .entrySet() + .stream() + .sorted(Map.Entry.comparingByKey()) + .forEachOrdered(entry -> templateBuilder2.putAlias(entry.getValue())); + templateBuilder2.settings(template.settings()); + templateBuilder2.order(template.order()); + templateBuilder2.version(template.version()); + templateBuilder2.putMapping("doc", template.mappings()); + + IndexTemplateMetadata template2 = templateBuilder.build(); + BytesStreamOutput out2 = new BytesStreamOutput(); + BufferedChecksumStreamOutput checksumOut2 = new BufferedChecksumStreamOutput(out2); + template2.writeVerifiableTo(checksumOut2); + assertEquals(checksumOut.getChecksum(), checksumOut2.getChecksum()); + } + public void testDeprecationWarningsOnMultipleMappings() throws IOException { IndexTemplateMetadata.Builder builder = IndexTemplateMetadata.builder("my-template"); builder.patterns(Arrays.asList("a", "b")); diff --git a/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java b/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java index 86ca8b3ad6319..3f223706819b7 100644 --- a/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java @@ -56,6 +56,7 @@ import org.opensearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.UUIDs; +import org.opensearch.common.ValidationException; import org.opensearch.common.blobstore.BlobStore; import org.opensearch.common.compress.CompressedXContent; import org.opensearch.common.settings.ClusterSettings; @@ -66,12 +67,16 @@ import org.opensearch.common.util.BigArrays; import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.xcontent.XContentFactory; +import org.opensearch.common.xcontent.json.JsonXContent; import org.opensearch.core.index.Index; +import org.opensearch.core.xcontent.DeprecationHandler; import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.core.xcontent.XContentParser; import org.opensearch.env.Environment; import org.opensearch.index.IndexModule; import org.opensearch.index.IndexNotFoundException; import org.opensearch.index.IndexSettings; +import org.opensearch.index.engine.EngineConfig; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.query.QueryShardContext; import org.opensearch.index.remote.RemoteStoreEnums.PathHashAlgorithm; @@ -81,6 +86,7 @@ import org.opensearch.indices.IndexCreationException; import org.opensearch.indices.IndicesService; import org.opensearch.indices.InvalidAliasNameException; +import org.opensearch.indices.InvalidIndexContextException; import org.opensearch.indices.InvalidIndexNameException; import org.opensearch.indices.RemoteStoreSettings; import org.opensearch.indices.ShardLimitValidator; @@ -114,8 +120,11 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.TreeMap; import java.util.UUID; +import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; import java.util.function.BiConsumer; import java.util.function.BiFunction; import java.util.function.Consumer; @@ -147,6 +156,7 @@ import static org.opensearch.cluster.metadata.MetadataCreateIndexService.resolveAndValidateAliases; import static org.opensearch.common.util.FeatureFlags.REMOTE_STORE_MIGRATION_EXPERIMENTAL; import static org.opensearch.index.IndexModule.INDEX_STORE_TYPE_SETTING; +import static org.opensearch.index.IndexSettings.INDEX_MERGE_POLICY; import static org.opensearch.index.IndexSettings.INDEX_REFRESH_INTERVAL_SETTING; import static org.opensearch.index.IndexSettings.INDEX_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING; import static org.opensearch.index.IndexSettings.INDEX_SOFT_DELETES_SETTING; @@ -158,6 +168,7 @@ import static org.opensearch.indices.IndicesService.CLUSTER_REPLICATION_TYPE_SETTING; import static org.opensearch.indices.ShardLimitValidatorTests.createTestShardLimitService; import static org.opensearch.node.Node.NODE_ATTRIBUTES; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.getRemoteStoreTranslogRepo; @@ -1196,7 +1207,7 @@ public void testParseMappingsWithTypelessTemplate() throws Exception { assertThat(mappings, Matchers.hasKey(MapperService.SINGLE_MAPPING_NAME)); } - public void testvalidateIndexSettings() { + public void testValidateIndexSettings() { ClusterService clusterService = mock(ClusterService.class); Metadata metadata = Metadata.builder() .transientSettings(Settings.builder().put(Metadata.DEFAULT_REPLICA_COUNT_SETTING.getKey(), 1).build()) @@ -1644,7 +1655,12 @@ public void testNewIndexIsRemoteStoreBackedForRemoteStoreDirectionAndMixedMode() null ); - Map missingTranslogAttribute = Map.of(REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-segment-repo-1"); + Map missingTranslogAttribute = Map.of( + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, + "cluster-state-repo-1", + REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, + "my-segment-repo-1" + ); DiscoveryNodes finalDiscoveryNodes = DiscoveryNodes.builder() .add(nonRemoteClusterManagerNode) @@ -1704,7 +1720,8 @@ public void testBuildIndexMetadata() { 4, sourceIndexMetadata, false, - new HashMap<>() + new HashMap<>(), + null ); assertThat(indexMetadata.getAliases().size(), is(1)); @@ -2231,6 +2248,262 @@ public void testIndexCreationWithIndexStoreTypeRemoteStoreThrowsException() { ); } + public void testCreateIndexWithContextDisabled() throws Exception { + request = new CreateIndexClusterStateUpdateRequest("create index", "test", "test").context(new Context(randomAlphaOfLength(5))); + withTemporaryClusterService((clusterService, threadPool) -> { + MetadataCreateIndexService checkerService = new MetadataCreateIndexService( + Settings.EMPTY, + clusterService, + indicesServices, + null, + null, + createTestShardLimitService(randomIntBetween(1, 1000), false, clusterService), + mock(Environment.class), + IndexScopedSettings.DEFAULT_SCOPED_SETTINGS, + threadPool, + null, + new SystemIndices(Collections.emptyMap()), + false, + new AwarenessReplicaBalance(Settings.EMPTY, clusterService.getClusterSettings()), + DefaultRemoteStoreSettings.INSTANCE, + repositoriesServiceSupplier + ); + CountDownLatch counter = new CountDownLatch(1); + InvalidIndexContextException exception = expectThrows( + InvalidIndexContextException.class, + () -> checkerService.validateContext(request) + ); + assertTrue( + "Invalid exception message." + exception.getMessage(), + exception.getMessage().contains("index specifies a context which cannot be used without enabling") + ); + }); + } + + public void testCreateIndexWithContextAbsent() throws Exception { + FeatureFlags.initializeFeatureFlags(Settings.builder().put(FeatureFlags.APPLICATION_BASED_CONFIGURATION_TEMPLATES, true).build()); + try { + request = new CreateIndexClusterStateUpdateRequest("create index", "test", "test").context(new Context(randomAlphaOfLength(5))); + withTemporaryClusterService((clusterService, threadPool) -> { + MetadataCreateIndexService checkerService = new MetadataCreateIndexService( + Settings.EMPTY, + clusterService, + indicesServices, + null, + null, + createTestShardLimitService(randomIntBetween(1, 1000), false, clusterService), + mock(Environment.class), + IndexScopedSettings.DEFAULT_SCOPED_SETTINGS, + threadPool, + null, + new SystemIndices(Collections.emptyMap()), + false, + new AwarenessReplicaBalance(Settings.EMPTY, clusterService.getClusterSettings()), + DefaultRemoteStoreSettings.INSTANCE, + repositoriesServiceSupplier + ); + CountDownLatch counter = new CountDownLatch(1); + InvalidIndexContextException exception = expectThrows( + InvalidIndexContextException.class, + () -> checkerService.validateContext(request) + ); + assertTrue( + "Invalid exception message." + exception.getMessage(), + exception.getMessage().contains("index specifies a context which is not loaded on the cluster.") + ); + }); + } finally { + // Disable so that other tests which are not dependent on this are not impacted. + FeatureFlags.initializeFeatureFlags( + Settings.builder().put(FeatureFlags.APPLICATION_BASED_CONFIGURATION_TEMPLATES, false).build() + ); + } + } + + public void testApplyContext() throws IOException { + FeatureFlags.initializeFeatureFlags(Settings.builder().put(FeatureFlags.APPLICATION_BASED_CONFIGURATION_TEMPLATES, true).build()); + request = new CreateIndexClusterStateUpdateRequest("create index", "test", "test").context(new Context(randomAlphaOfLength(5))); + + final Map mappings = new HashMap<>(); + mappings.put("_doc", "\"properties\": { \"field1\": {\"type\": \"text\"}}"); + List> allMappings = new ArrayList<>(); + allMappings.add(mappings); + + Settings.Builder settingsBuilder = Settings.builder().put(INDEX_SOFT_DELETES_SETTING.getKey(), "false"); + + String templateContent = "{\n" + + " \"template\": {\n" + + " \"settings\": {\n" + + " \"index.codec\": \"best_compression\",\n" + + " \"index.merge.policy\": \"log_byte_size\",\n" + + " \"index.refresh_interval\": \"60s\"\n" + + " },\n" + + " \"mappings\": {\n" + + " \"properties\": {\n" + + " \"field1\": {\n" + + " \"type\": \"integer\"\n" + + " }\n" + + " }\n" + + " }\n" + + " },\n" + + " \"_meta\": {\n" + + " \"_type\": \"@abc_template\",\n" + + " \"_version\": 1\n" + + " },\n" + + " \"version\": 1\n" + + "}\n"; + + AtomicReference componentTemplate = new AtomicReference<>(); + try ( + XContentParser contentParser = JsonXContent.jsonXContent.createParser( + NamedXContentRegistry.EMPTY, + DeprecationHandler.IGNORE_DEPRECATIONS, + templateContent + ) + ) { + componentTemplate.set(ComponentTemplate.parse(contentParser)); + } + + String contextName = randomAlphaOfLength(5); + try { + request = new CreateIndexClusterStateUpdateRequest("create index", "test", "test").context(new Context(contextName)); + withTemporaryClusterService((clusterService, threadPool) -> { + MetadataCreateIndexService checkerService = new MetadataCreateIndexService( + Settings.EMPTY, + clusterService, + indicesServices, + null, + null, + createTestShardLimitService(randomIntBetween(1, 1000), false, clusterService), + mock(Environment.class), + IndexScopedSettings.DEFAULT_SCOPED_SETTINGS, + threadPool, + null, + new SystemIndices(Collections.emptyMap()), + false, + new AwarenessReplicaBalance(Settings.EMPTY, clusterService.getClusterSettings()), + DefaultRemoteStoreSettings.INSTANCE, + repositoriesServiceSupplier + ); + + ClusterState mockState = mock(ClusterState.class); + Metadata metadata = mock(Metadata.class); + + when(mockState.metadata()).thenReturn(metadata); + when(metadata.systemTemplatesLookup()).thenReturn(Map.of(contextName, new TreeMap<>() { + { + put(1L, contextName); + } + })); + when(metadata.componentTemplates()).thenReturn(Map.of(contextName, componentTemplate.get())); + + try { + Template template = checkerService.applyContext(request, mockState, allMappings, settingsBuilder); + assertEquals(componentTemplate.get().template(), template); + + assertEquals(2, allMappings.size()); + assertEquals(mappings, allMappings.get(0)); + assertEquals( + MapperService.parseMapping(NamedXContentRegistry.EMPTY, componentTemplate.get().template().mappings().toString()), + allMappings.get(1) + ); + + assertEquals("60s", settingsBuilder.get(INDEX_REFRESH_INTERVAL_SETTING.getKey())); + assertEquals("log_byte_size", settingsBuilder.get(INDEX_MERGE_POLICY.getKey())); + assertEquals("best_compression", settingsBuilder.get(EngineConfig.INDEX_CODEC_SETTING.getKey())); + assertEquals("false", settingsBuilder.get(INDEX_SOFT_DELETES_SETTING.getKey())); + } catch (IOException ex) { + throw new AssertionError(ex); + } + }); + } finally { + // Disable so that other tests which are not dependent on this are not impacted. + FeatureFlags.initializeFeatureFlags( + Settings.builder().put(FeatureFlags.APPLICATION_BASED_CONFIGURATION_TEMPLATES, false).build() + ); + } + } + + public void testApplyContextWithSettingsOverlap() throws IOException { + FeatureFlags.initializeFeatureFlags(Settings.builder().put(FeatureFlags.APPLICATION_BASED_CONFIGURATION_TEMPLATES, true).build()); + request = new CreateIndexClusterStateUpdateRequest("create index", "test", "test").context(new Context(randomAlphaOfLength(5))); + Settings.Builder settingsBuilder = Settings.builder().put(INDEX_REFRESH_INTERVAL_SETTING.getKey(), "30s"); + String templateContent = "{\n" + + " \"template\": {\n" + + " \"settings\": {\n" + + " \"index.refresh_interval\": \"60s\"\n" + + " }\n" + + " },\n" + + " \"_meta\": {\n" + + " \"_type\": \"@abc_template\",\n" + + " \"_version\": 1\n" + + " },\n" + + " \"version\": 1\n" + + "}\n"; + + AtomicReference componentTemplate = new AtomicReference<>(); + try ( + XContentParser contentParser = JsonXContent.jsonXContent.createParser( + NamedXContentRegistry.EMPTY, + DeprecationHandler.IGNORE_DEPRECATIONS, + templateContent + ) + ) { + componentTemplate.set(ComponentTemplate.parse(contentParser)); + } + + String contextName = randomAlphaOfLength(5); + try { + request = new CreateIndexClusterStateUpdateRequest("create index", "test", "test").context(new Context(contextName)); + withTemporaryClusterService((clusterService, threadPool) -> { + MetadataCreateIndexService checkerService = new MetadataCreateIndexService( + Settings.EMPTY, + clusterService, + indicesServices, + null, + null, + createTestShardLimitService(randomIntBetween(1, 1000), false, clusterService), + mock(Environment.class), + IndexScopedSettings.DEFAULT_SCOPED_SETTINGS, + threadPool, + null, + new SystemIndices(Collections.emptyMap()), + false, + new AwarenessReplicaBalance(Settings.EMPTY, clusterService.getClusterSettings()), + DefaultRemoteStoreSettings.INSTANCE, + repositoriesServiceSupplier + ); + + ClusterState mockState = mock(ClusterState.class); + Metadata metadata = mock(Metadata.class); + + when(mockState.metadata()).thenReturn(metadata); + when(metadata.systemTemplatesLookup()).thenReturn(Map.of(contextName, new TreeMap<>() { + { + put(1L, contextName); + } + })); + when(metadata.componentTemplates()).thenReturn(Map.of(contextName, componentTemplate.get())); + + ValidationException validationException = expectThrows( + ValidationException.class, + () -> checkerService.applyContext(request, mockState, List.of(), settingsBuilder) + ); + assertEquals(1, validationException.validationErrors().size()); + assertTrue( + "Invalid exception message: " + validationException.getMessage(), + validationException.getMessage() + .contains("Cannot apply context template as user provide settings have overlap with the included context template") + ); + }); + } finally { + // Disable so that other tests which are not dependent on this are not impacted. + FeatureFlags.initializeFeatureFlags( + Settings.builder().put(FeatureFlags.APPLICATION_BASED_CONFIGURATION_TEMPLATES, false).build() + ); + } + } + private IndexTemplateMetadata addMatchingTemplate(Consumer configurator) { IndexTemplateMetadata.Builder builder = templateMetadataBuilder("template1", "te*"); configurator.accept(builder); @@ -2292,6 +2565,7 @@ private void verifyRemoteStoreIndexSettings( private DiscoveryNode getRemoteNode() { Map attributes = new HashMap<>(); + attributes.put(REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-cluster-rep-1"); attributes.put(REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-segment-repo-1"); attributes.put(REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-translog-repo-1"); return new DiscoveryNode( diff --git a/server/src/test/java/org/opensearch/cluster/metadata/MetadataIndexTemplateServiceTests.java b/server/src/test/java/org/opensearch/cluster/metadata/MetadataIndexTemplateServiceTests.java index cb98c34988cbe..99e259c8170f3 100644 --- a/server/src/test/java/org/opensearch/cluster/metadata/MetadataIndexTemplateServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/metadata/MetadataIndexTemplateServiceTests.java @@ -57,7 +57,10 @@ import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.core.xcontent.XContentParser; import org.opensearch.env.Environment; +import org.opensearch.index.IndexSettings; import org.opensearch.index.codec.CodecService; +import org.opensearch.index.compositeindex.CompositeIndexSettings; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeIndexSettings; import org.opensearch.index.engine.EngineConfig; import org.opensearch.index.mapper.MapperParsingException; import org.opensearch.index.mapper.MapperService; @@ -2410,9 +2413,44 @@ public void testLegacyNoopUpdate() { assertThat(MetadataIndexTemplateService.innerPutTemplate(state, pr, new IndexTemplateMetadata.Builder("id")), equalTo(state)); } + public void testAsyncTranslogDurabilityBlocked() { + Settings clusterSettings = Settings.builder() + .put(IndicesService.CLUSTER_REMOTE_INDEX_RESTRICT_ASYNC_DURABILITY_SETTING.getKey(), true) + .build(); + PutRequest request = new PutRequest("test", "test_replicas"); + request.patterns(singletonList("test_shards_wait*")); + Settings.Builder settingsBuilder = builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, "1") + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, "1") + .put(IndexSettings.INDEX_TRANSLOG_DURABILITY_SETTING.getKey(), "async"); + request.settings(settingsBuilder.build()); + List throwables = putTemplate(xContentRegistry(), request, clusterSettings); + assertThat(throwables.get(0), instanceOf(IllegalArgumentException.class)); + } + + public void testMaxTranslogFlushSizeWithCompositeIndex() { + Settings clusterSettings = Settings.builder() + .put(CompositeIndexSettings.COMPOSITE_INDEX_MAX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), "130m") + .build(); + PutRequest request = new PutRequest("test", "test_replicas"); + request.patterns(singletonList("test_shards_wait*")); + Settings.Builder settingsBuilder = builder().put(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey(), "true") + .put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), "131m"); + request.settings(settingsBuilder.build()); + List throwables = putTemplate(xContentRegistry(), request, clusterSettings); + assertThat(throwables.get(0), instanceOf(IllegalArgumentException.class)); + } + private static List putTemplate(NamedXContentRegistry xContentRegistry, PutRequest request) { + return putTemplate(xContentRegistry, request, Settings.EMPTY); + } + + private static List putTemplate( + NamedXContentRegistry xContentRegistry, + PutRequest request, + Settings incomingNodeScopedSettings + ) { ClusterService clusterService = mock(ClusterService.class); - Settings settings = Settings.builder().put(PATH_HOME_SETTING.getKey(), "dummy").build(); + Settings settings = Settings.builder().put(incomingNodeScopedSettings).put(PATH_HOME_SETTING.getKey(), "dummy").build(); ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); Metadata metadata = Metadata.builder().build(); ClusterState clusterState = ClusterState.builder(org.opensearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) diff --git a/server/src/test/java/org/opensearch/cluster/metadata/MetadataTests.java b/server/src/test/java/org/opensearch/cluster/metadata/MetadataTests.java index a434a713f330b..c6cde72ab60b1 100644 --- a/server/src/test/java/org/opensearch/cluster/metadata/MetadataTests.java +++ b/server/src/test/java/org/opensearch/cluster/metadata/MetadataTests.java @@ -1536,6 +1536,41 @@ public static Metadata randomMetadata() { return md.build(); } + public void testXContentWithTemplateMetadata() throws IOException { + final TemplatesMetadata templatesMetadata = getTemplatesMetadata(0); + verifyTemplatesMetadata(templatesMetadata); + final TemplatesMetadata templatesMetadata2 = getTemplatesMetadata(2); + verifyTemplatesMetadata(templatesMetadata2); + } + + private void verifyTemplatesMetadata(TemplatesMetadata templatesMetadata) throws IOException { + final Metadata originalMeta = Metadata.builder().templates(templatesMetadata).build(); + final XContentBuilder builder = JsonXContent.contentBuilder(); + builder.startObject(); + Metadata.FORMAT.toXContent(builder, originalMeta); + builder.endObject(); + try (XContentParser parser = createParser(JsonXContent.jsonXContent, BytesReference.bytes(builder))) { + final Metadata fromXContentMeta = Metadata.fromXContent(parser); + assertThat(fromXContentMeta.templates(), equalTo(originalMeta.templates())); + } + } + + private static TemplatesMetadata getTemplatesMetadata(int numberOfTemplates) { + TemplatesMetadata.Builder builder = TemplatesMetadata.builder(); + for (int i = 0; i < numberOfTemplates; i++) { + builder.put( + IndexTemplateMetadata.builder("template" + i) + .order(1234) + .patterns(Arrays.asList(randomAlphaOfLength(3) + "-*")) + .settings( + Settings.builder().put("index.random_index_setting_" + randomAlphaOfLength(3), randomAlphaOfLength(5)).build() + ) + .build() + ); + } + return builder.build(); + } + private static CreateIndexResult createIndices(int numIndices, int numBackingIndices, String dataStreamName) { // create some indices that do not back a data stream final List indices = new ArrayList<>(); diff --git a/server/src/test/java/org/opensearch/cluster/metadata/QueryGroupMetadataTests.java b/server/src/test/java/org/opensearch/cluster/metadata/QueryGroupMetadataTests.java index f5e667de73d93..3f8d231ffb91e 100644 --- a/server/src/test/java/org/opensearch/cluster/metadata/QueryGroupMetadataTests.java +++ b/server/src/test/java/org/opensearch/cluster/metadata/QueryGroupMetadataTests.java @@ -15,6 +15,7 @@ import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentParser; import org.opensearch.test.AbstractDiffableSerializationTestCase; +import org.opensearch.wlm.MutableQueryGroupFragment; import org.opensearch.wlm.ResourceType; import java.io.IOException; @@ -33,8 +34,7 @@ public void testToXContent() throws IOException { new QueryGroup( "test", "ajakgakg983r92_4242", - QueryGroup.ResiliencyMode.ENFORCED, - Map.of(ResourceType.MEMORY, 0.5), + new MutableQueryGroupFragment(MutableQueryGroupFragment.ResiliencyMode.ENFORCED, Map.of(ResourceType.MEMORY, 0.5)), updatedAt ) ) @@ -44,7 +44,7 @@ public void testToXContent() throws IOException { queryGroupMetadata.toXContent(builder, null); builder.endObject(); assertEquals( - "{\"ajakgakg983r92_4242\":{\"_id\":\"ajakgakg983r92_4242\",\"name\":\"test\",\"resiliency_mode\":\"enforced\",\"updated_at\":1720047207,\"resource_limits\":{\"memory\":0.5}}}", + "{\"ajakgakg983r92_4242\":{\"_id\":\"ajakgakg983r92_4242\",\"name\":\"test\",\"resiliency_mode\":\"enforced\",\"resource_limits\":{\"memory\":0.5},\"updated_at\":1720047207}}", builder.toString() ); } diff --git a/server/src/test/java/org/opensearch/cluster/metadata/QueryGroupTests.java b/server/src/test/java/org/opensearch/cluster/metadata/QueryGroupTests.java index f4d3e5ceb1784..ce1b1270fc94e 100644 --- a/server/src/test/java/org/opensearch/cluster/metadata/QueryGroupTests.java +++ b/server/src/test/java/org/opensearch/cluster/metadata/QueryGroupTests.java @@ -15,6 +15,8 @@ import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentParser; import org.opensearch.test.AbstractSerializingTestCase; +import org.opensearch.wlm.MutableQueryGroupFragment; +import org.opensearch.wlm.MutableQueryGroupFragment.ResiliencyMode; import org.opensearch.wlm.ResourceType; import org.joda.time.Instant; @@ -26,20 +28,16 @@ public class QueryGroupTests extends AbstractSerializingTestCase { - private static final List allowedModes = List.of( - QueryGroup.ResiliencyMode.SOFT, - QueryGroup.ResiliencyMode.ENFORCED, - QueryGroup.ResiliencyMode.MONITOR - ); + private static final List allowedModes = List.of(ResiliencyMode.SOFT, ResiliencyMode.ENFORCED, ResiliencyMode.MONITOR); static QueryGroup createRandomQueryGroup(String _id) { String name = randomAlphaOfLength(10); Map resourceLimit = new HashMap<>(); resourceLimit.put(ResourceType.MEMORY, randomDoubleBetween(0.0, 0.80, false)); - return new QueryGroup(name, _id, randomMode(), resourceLimit, Instant.now().getMillis()); + return new QueryGroup(name, _id, new MutableQueryGroupFragment(randomMode(), resourceLimit), Instant.now().getMillis()); } - private static QueryGroup.ResiliencyMode randomMode() { + private static ResiliencyMode randomMode() { return allowedModes.get(randomIntBetween(0, allowedModes.size() - 1)); } @@ -74,37 +72,60 @@ protected QueryGroup createTestInstance() { public void testNullName() { assertThrows( NullPointerException.class, - () -> new QueryGroup(null, "_id", randomMode(), Collections.emptyMap(), Instant.now().getMillis()) + () -> new QueryGroup( + null, + "_id", + new MutableQueryGroupFragment(randomMode(), Collections.emptyMap()), + Instant.now().getMillis() + ) ); } public void testNullId() { assertThrows( NullPointerException.class, - () -> new QueryGroup("Dummy", null, randomMode(), Collections.emptyMap(), Instant.now().getMillis()) + () -> new QueryGroup( + "Dummy", + null, + new MutableQueryGroupFragment(randomMode(), Collections.emptyMap()), + Instant.now().getMillis() + ) ); } public void testNullResourceLimits() { - assertThrows(NullPointerException.class, () -> new QueryGroup("analytics", "_id", randomMode(), null, Instant.now().getMillis())); + assertThrows( + NullPointerException.class, + () -> new QueryGroup("analytics", "_id", new MutableQueryGroupFragment(randomMode(), null), Instant.now().getMillis()) + ); } public void testEmptyResourceLimits() { assertThrows( IllegalArgumentException.class, - () -> new QueryGroup("analytics", "_id", randomMode(), Collections.emptyMap(), Instant.now().getMillis()) + () -> new QueryGroup( + "analytics", + "_id", + new MutableQueryGroupFragment(randomMode(), Collections.emptyMap()), + Instant.now().getMillis() + ) ); } public void testIllegalQueryGroupMode() { assertThrows( NullPointerException.class, - () -> new QueryGroup("analytics", "_id", null, Map.of(ResourceType.MEMORY, 0.4), Instant.now().getMillis()) + () -> new QueryGroup( + "analytics", + "_id", + new MutableQueryGroupFragment(null, Map.of(ResourceType.MEMORY, 0.4)), + Instant.now().getMillis() + ) ); } public void testQueryGroupInitiation() { - QueryGroup queryGroup = new QueryGroup("analytics", randomMode(), Map.of(ResourceType.MEMORY, 0.4)); + QueryGroup queryGroup = new QueryGroup("analytics", new MutableQueryGroupFragment(randomMode(), Map.of(ResourceType.MEMORY, 0.4))); assertNotNull(queryGroup.getName()); assertNotNull(queryGroup.get_id()); assertNotNull(queryGroup.getResourceLimits()); @@ -117,11 +138,11 @@ public void testQueryGroupInitiation() { public void testIllegalQueryGroupName() { assertThrows( NullPointerException.class, - () -> new QueryGroup("a".repeat(51), "_id", null, Map.of(ResourceType.MEMORY, 0.4), Instant.now().getMillis()) + () -> new QueryGroup("a".repeat(51), "_id", new MutableQueryGroupFragment(), Instant.now().getMillis()) ); assertThrows( NullPointerException.class, - () -> new QueryGroup("", "_id", null, Map.of(ResourceType.MEMORY, 0.4), Instant.now().getMillis()) + () -> new QueryGroup("", "_id", new MutableQueryGroupFragment(), Instant.now().getMillis()) ); } @@ -132,8 +153,7 @@ public void testInvalidResourceLimitWhenInvalidSystemResourceValueIsGiven() { () -> new QueryGroup( "analytics", "_id", - randomMode(), - Map.of(ResourceType.MEMORY, randomDoubleBetween(1.1, 1.8, false)), + new MutableQueryGroupFragment(randomMode(), Map.of(ResourceType.MEMORY, randomDoubleBetween(1.1, 1.8, false))), Instant.now().getMillis() ) ); @@ -143,8 +163,7 @@ public void testValidQueryGroup() { QueryGroup queryGroup = new QueryGroup( "analytics", "_id", - randomMode(), - Map.of(ResourceType.MEMORY, randomDoubleBetween(0.01, 0.8, false)), + new MutableQueryGroupFragment(randomMode(), Map.of(ResourceType.MEMORY, randomDoubleBetween(0.01, 0.8, false))), Instant.ofEpochMilli(1717187289).getMillis() ); @@ -163,8 +182,7 @@ public void testToXContent() throws IOException { QueryGroup queryGroup = new QueryGroup( "TestQueryGroup", queryGroupId, - QueryGroup.ResiliencyMode.ENFORCED, - Map.of(ResourceType.CPU, 0.30, ResourceType.MEMORY, 0.40), + new MutableQueryGroupFragment(ResiliencyMode.ENFORCED, Map.of(ResourceType.CPU, 0.30, ResourceType.MEMORY, 0.40)), currentTimeInMillis ); XContentBuilder builder = JsonXContent.contentBuilder(); @@ -172,9 +190,9 @@ public void testToXContent() throws IOException { assertEquals( "{\"_id\":\"" + queryGroupId - + "\",\"name\":\"TestQueryGroup\",\"resiliency_mode\":\"enforced\",\"updated_at\":" + + "\",\"name\":\"TestQueryGroup\",\"resiliency_mode\":\"enforced\",\"resource_limits\":{\"cpu\":0.3,\"memory\":0.4},\"updated_at\":" + currentTimeInMillis - + ",\"resource_limits\":{\"cpu\":0.3,\"memory\":0.4}}", + + "}", builder.toString() ); } diff --git a/server/src/test/java/org/opensearch/cluster/metadata/SearchOnlyReplicaTests.java b/server/src/test/java/org/opensearch/cluster/metadata/SearchOnlyReplicaTests.java new file mode 100644 index 0000000000000..3d11193a07884 --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/metadata/SearchOnlyReplicaTests.java @@ -0,0 +1,253 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.metadata; + +import org.opensearch.Version; +import org.opensearch.action.admin.cluster.reroute.ClusterRerouteRequest; +import org.opensearch.action.admin.indices.create.CreateIndexRequest; +import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest; +import org.opensearch.action.support.ActiveShardCount; +import org.opensearch.action.support.replication.ClusterStateCreationUtils; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodeRole; +import org.opensearch.cluster.routing.IndexShardRoutingTable; +import org.opensearch.cluster.routing.ShardRoutingState; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; +import org.opensearch.indices.ShardLimitValidator; +import org.opensearch.indices.cluster.ClusterStateChanges; +import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.test.OpenSearchSingleNodeTestCase; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; +import org.junit.After; +import org.junit.Before; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.opensearch.cluster.metadata.IndexMetadata.INDEX_REPLICATION_TYPE_SETTING; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS; + +public class SearchOnlyReplicaTests extends OpenSearchSingleNodeTestCase { + + private ThreadPool threadPool; + + @Before + public void setUp() throws Exception { + super.setUp(); + this.threadPool = new TestThreadPool(getClass().getName()); + } + + @After + public void tearDown() throws Exception { + super.tearDown(); + terminate(threadPool); + } + + @Override + protected Settings featureFlagSettings() { + return Settings.builder() + .put(super.featureFlagSettings()) + .put(FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL_SETTING.getKey(), true) + .build(); + } + + public void testCreateWithDefaultSearchReplicasSetting() { + final ClusterStateChanges cluster = new ClusterStateChanges(xContentRegistry(), threadPool); + ClusterState state = createIndexWithSettings(cluster, Settings.builder().build()); + IndexShardRoutingTable indexShardRoutingTable = state.getRoutingTable().index("index").getShards().get(0); + assertEquals(1, indexShardRoutingTable.replicaShards().size()); + assertEquals(0, indexShardRoutingTable.searchOnlyReplicas().size()); + assertEquals(1, indexShardRoutingTable.writerReplicas().size()); + } + + public void testSearchReplicasValidationWithDocumentReplication() { + final ClusterStateChanges cluster = new ClusterStateChanges(xContentRegistry(), threadPool); + RuntimeException exception = expectThrows( + RuntimeException.class, + () -> createIndexWithSettings( + cluster, + Settings.builder() + .put(SETTING_NUMBER_OF_SHARDS, 1) + .put(SETTING_NUMBER_OF_REPLICAS, 0) + .put(INDEX_REPLICATION_TYPE_SETTING.getKey(), ReplicationType.DOCUMENT) + .put(SETTING_NUMBER_OF_SEARCH_REPLICAS, 1) + .build() + ) + ); + assertEquals( + "To set index.number_of_search_only_replicas, index.replication.type must be set to SEGMENT", + exception.getCause().getMessage() + ); + } + + public void testUpdateSearchReplicaCount() { + final ClusterStateChanges cluster = new ClusterStateChanges(xContentRegistry(), threadPool); + + ClusterState state = createIndexWithSettings( + cluster, + Settings.builder() + .put(SETTING_NUMBER_OF_SHARDS, 1) + .put(SETTING_NUMBER_OF_REPLICAS, 0) + .put(INDEX_REPLICATION_TYPE_SETTING.getKey(), ReplicationType.SEGMENT) + .put(SETTING_NUMBER_OF_SEARCH_REPLICAS, 1) + .build() + ); + assertTrue(state.metadata().hasIndex("index")); + rerouteUntilActive(state, cluster); + IndexShardRoutingTable indexShardRoutingTable = state.getRoutingTable().index("index").getShards().get(0); + assertEquals(1, indexShardRoutingTable.replicaShards().size()); + assertEquals(1, indexShardRoutingTable.searchOnlyReplicas().size()); + assertEquals(0, indexShardRoutingTable.writerReplicas().size()); + + // add another replica + state = cluster.updateSettings( + state, + new UpdateSettingsRequest("index").settings(Settings.builder().put(SETTING_NUMBER_OF_SEARCH_REPLICAS, 2).build()) + ); + rerouteUntilActive(state, cluster); + indexShardRoutingTable = state.getRoutingTable().index("index").getShards().get(0); + assertEquals(2, indexShardRoutingTable.replicaShards().size()); + assertEquals(2, indexShardRoutingTable.searchOnlyReplicas().size()); + assertEquals(0, indexShardRoutingTable.writerReplicas().size()); + + // remove all replicas + state = cluster.updateSettings( + state, + new UpdateSettingsRequest("index").settings(Settings.builder().put(SETTING_NUMBER_OF_SEARCH_REPLICAS, 0).build()) + ); + rerouteUntilActive(state, cluster); + indexShardRoutingTable = state.getRoutingTable().index("index").getShards().get(0); + assertEquals(0, indexShardRoutingTable.replicaShards().size()); + assertEquals(0, indexShardRoutingTable.searchOnlyReplicas().size()); + assertEquals(0, indexShardRoutingTable.writerReplicas().size()); + } + + private ClusterState createIndexWithSettings(ClusterStateChanges cluster, Settings settings) { + List allNodes = new ArrayList<>(); + // node for primary/local + DiscoveryNode localNode = createNode(Version.CURRENT, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE, DiscoveryNodeRole.DATA_ROLE); + allNodes.add(localNode); + // node for search replicas - we'll start with 1 and add another + for (int i = 0; i < 2; i++) { + allNodes.add(createNode(Version.CURRENT, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE, DiscoveryNodeRole.DATA_ROLE)); + } + ClusterState state = ClusterStateCreationUtils.state(localNode, localNode, allNodes.toArray(new DiscoveryNode[0])); + + CreateIndexRequest request = new CreateIndexRequest("index", settings).waitForActiveShards(ActiveShardCount.NONE); + state = cluster.createIndex(state, request); + return state; + } + + public void testUpdateSearchReplicasOverShardLimit() { + final ClusterStateChanges cluster = new ClusterStateChanges(xContentRegistry(), threadPool); + + List allNodes = new ArrayList<>(); + // node for primary/local + DiscoveryNode localNode = createNode(Version.CURRENT, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE, DiscoveryNodeRole.DATA_ROLE); + allNodes.add(localNode); + + allNodes.add(createNode(Version.CURRENT, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE, DiscoveryNodeRole.DATA_ROLE)); + + ClusterState state = ClusterStateCreationUtils.state(localNode, localNode, allNodes.toArray(new DiscoveryNode[0])); + + CreateIndexRequest request = new CreateIndexRequest( + "index", + Settings.builder() + .put(SETTING_NUMBER_OF_SHARDS, 1) + .put(SETTING_NUMBER_OF_REPLICAS, 0) + .put(INDEX_REPLICATION_TYPE_SETTING.getKey(), ReplicationType.SEGMENT) + .put(SETTING_NUMBER_OF_SEARCH_REPLICAS, 1) + .build() + ).waitForActiveShards(ActiveShardCount.NONE); + state = cluster.createIndex(state, request); + assertTrue(state.metadata().hasIndex("index")); + rerouteUntilActive(state, cluster); + + // add another replica + ClusterState finalState = state; + Integer maxShardPerNode = ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getDefault(Settings.EMPTY); + expectThrows( + RuntimeException.class, + () -> cluster.updateSettings( + finalState, + new UpdateSettingsRequest("index").settings( + Settings.builder().put(SETTING_NUMBER_OF_SEARCH_REPLICAS, maxShardPerNode * 2).build() + ) + ) + ); + } + + public void testUpdateSearchReplicasOnDocrepCluster() { + final ClusterStateChanges cluster = new ClusterStateChanges(xContentRegistry(), threadPool); + + List allNodes = new ArrayList<>(); + // node for primary/local + DiscoveryNode localNode = createNode(Version.CURRENT, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE, DiscoveryNodeRole.DATA_ROLE); + allNodes.add(localNode); + + allNodes.add(createNode(Version.CURRENT, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE, DiscoveryNodeRole.DATA_ROLE)); + + ClusterState state = ClusterStateCreationUtils.state(localNode, localNode, allNodes.toArray(new DiscoveryNode[0])); + + CreateIndexRequest request = new CreateIndexRequest( + "index", + Settings.builder() + .put(SETTING_NUMBER_OF_SHARDS, 1) + .put(SETTING_NUMBER_OF_REPLICAS, 0) + .put(INDEX_REPLICATION_TYPE_SETTING.getKey(), ReplicationType.DOCUMENT) + .build() + ).waitForActiveShards(ActiveShardCount.NONE); + state = cluster.createIndex(state, request); + assertTrue(state.metadata().hasIndex("index")); + rerouteUntilActive(state, cluster); + + // add another replica + ClusterState finalState = state; + Integer maxShardPerNode = ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getDefault(Settings.EMPTY); + expectThrows( + RuntimeException.class, + () -> cluster.updateSettings( + finalState, + new UpdateSettingsRequest("index").settings( + Settings.builder().put(SETTING_NUMBER_OF_SEARCH_REPLICAS, maxShardPerNode * 2).build() + ) + ) + ); + + } + + private static void rerouteUntilActive(ClusterState state, ClusterStateChanges cluster) { + while (state.routingTable().index("index").shard(0).allShardsStarted() == false) { + state = cluster.applyStartedShards( + state, + state.routingTable().index("index").shard(0).shardsWithState(ShardRoutingState.INITIALIZING) + ); + state = cluster.reroute(state, new ClusterRerouteRequest()); + } + } + + private static final AtomicInteger nodeIdGenerator = new AtomicInteger(); + + protected DiscoveryNode createNode(Version version, DiscoveryNodeRole... mustHaveRoles) { + Set roles = new HashSet<>(randomSubsetOf(DiscoveryNodeRole.BUILT_IN_ROLES)); + Collections.addAll(roles, mustHaveRoles); + final String id = String.format(Locale.ROOT, "node_%03d", nodeIdGenerator.incrementAndGet()); + return new DiscoveryNode(id, id, buildNewFakeTransportAddress(), Collections.emptyMap(), roles, version); + } +} diff --git a/server/src/test/java/org/opensearch/cluster/metadata/TranslogFlushIntervalSettingsTests.java b/server/src/test/java/org/opensearch/cluster/metadata/TranslogFlushIntervalSettingsTests.java new file mode 100644 index 0000000000000..f54b6cdf1b152 --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/metadata/TranslogFlushIntervalSettingsTests.java @@ -0,0 +1,146 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.metadata; + +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.compositeindex.CompositeIndexSettings; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeIndexSettings; +import org.opensearch.test.OpenSearchTestCase; + +import java.util.Optional; + +/** + * Tests for translog flush interval settings update with and without composite index + */ +public class TranslogFlushIntervalSettingsTests extends OpenSearchTestCase { + + Settings settings = Settings.builder() + .put(CompositeIndexSettings.COMPOSITE_INDEX_MAX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), "130mb") + .build(); + ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + + public void testValidSettings() { + Settings requestSettings = Settings.builder() + .put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), "50mb") + .put(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey(), true) + .build(); + + // This should not throw an exception + MetadataCreateIndexService.validateTranslogFlushIntervalSettingsForCompositeIndex(requestSettings, clusterSettings); + } + + public void testDefaultTranslogFlushSetting() { + Settings requestSettings = Settings.builder().put(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey(), true).build(); + + // This should not throw an exception + IllegalArgumentException ex = expectThrows( + IllegalArgumentException.class, + () -> MetadataCreateIndexService.validateTranslogFlushIntervalSettingsForCompositeIndex(requestSettings, clusterSettings) + ); + assertEquals("You can configure 'index.translog.flush_threshold_size' with upto '130mb' for composite index", ex.getMessage()); + } + + public void testMissingCompositeIndexSetting() { + Settings requestSettings = Settings.builder() + .put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), "50mb") + .build(); + + // This should not throw an exception + MetadataCreateIndexService.validateTranslogFlushIntervalSettingsForCompositeIndex(requestSettings, clusterSettings); + } + + public void testNullTranslogFlushSetting() { + Settings requestSettings = Settings.builder() + .putNull(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey()) + .put(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey(), true) + .build(); + + // This should not throw an exception + IllegalArgumentException ex = expectThrows( + IllegalArgumentException.class, + () -> MetadataCreateIndexService.validateTranslogFlushIntervalSettingsForCompositeIndex(requestSettings, clusterSettings) + ); + assertEquals("You can configure 'index.translog.flush_threshold_size' with upto '130mb' for composite index", ex.getMessage()); + } + + public void testExceedingMaxFlushSize() { + Settings requestSettings = Settings.builder() + .put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), "150mb") + .put(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey(), true) + .build(); + + IllegalArgumentException ex = expectThrows( + IllegalArgumentException.class, + () -> MetadataCreateIndexService.validateTranslogFlushIntervalSettingsForCompositeIndex(requestSettings, clusterSettings) + ); + assertEquals("You can configure 'index.translog.flush_threshold_size' with upto '130mb' for composite index", ex.getMessage()); + } + + public void testEqualToMaxFlushSize() { + Settings requestSettings = Settings.builder() + .put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), "100mb") + .put(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey(), true) + .build(); + + // This should not throw an exception + MetadataCreateIndexService.validateTranslogFlushIntervalSettingsForCompositeIndex(requestSettings, clusterSettings); + } + + public void testUpdateIndexThresholdFlushSize() { + Settings requestSettings = Settings.builder() + .put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), "100mb") + .build(); + + Settings indexSettings = Settings.builder().put(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey(), true).build(); + + // This should not throw an exception + assertTrue( + MetadataCreateIndexService.validateTranslogFlushIntervalSettingsForCompositeIndex( + requestSettings, + clusterSettings, + indexSettings + ).isEmpty() + ); + } + + public void testUpdateFlushSizeAboveThresholdWithCompositeIndex() { + Settings requestSettings = Settings.builder() + .put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), "131mb") + .build(); + + Settings indexSettings = Settings.builder().put(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey(), true).build(); + + Optional err = MetadataCreateIndexService.validateTranslogFlushIntervalSettingsForCompositeIndex( + requestSettings, + clusterSettings, + indexSettings + ); + assertTrue(err.isPresent()); + assertEquals("You can configure 'index.translog.flush_threshold_size' with upto '130mb' for composite index", err.get()); + } + + public void testUpdateFlushSizeAboveThresholdWithoutCompositeIndex() { + Settings requestSettings = Settings.builder() + .put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), "131mb") + .build(); + + Settings indexSettings = Settings.builder().build(); + + // This should not throw an exception + assertTrue( + MetadataCreateIndexService.validateTranslogFlushIntervalSettingsForCompositeIndex( + requestSettings, + clusterSettings, + indexSettings + ).isEmpty() + ); + } +} diff --git a/server/src/test/java/org/opensearch/cluster/node/DiscoveryNodeTests.java b/server/src/test/java/org/opensearch/cluster/node/DiscoveryNodeTests.java index c8a6fc76ce820..525a53f3e6158 100644 --- a/server/src/test/java/org/opensearch/cluster/node/DiscoveryNodeTests.java +++ b/server/src/test/java/org/opensearch/cluster/node/DiscoveryNodeTests.java @@ -36,6 +36,7 @@ import org.opensearch.common.io.stream.BytesStreamOutput; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; +import org.opensearch.core.common.io.stream.BufferedChecksumStreamOutput; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.transport.TransportAddress; import org.opensearch.node.remotestore.RemoteStoreNodeAttribute; @@ -46,6 +47,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.HashSet; +import java.util.LinkedHashSet; import java.util.Locale; import java.util.Map; import java.util.Set; @@ -128,6 +130,43 @@ public void testDiscoveryNodeSerializationKeepsHost() throws Exception { assertEquals(transportAddress.getPort(), serialized.getAddress().getPort()); } + public void testWriteVerifiableTo() throws Exception { + InetAddress inetAddress = InetAddress.getByAddress("name1", new byte[] { (byte) 192, (byte) 168, (byte) 0, (byte) 1 }); + TransportAddress transportAddress = new TransportAddress(inetAddress, randomIntBetween(0, 65535)); + final Set roles = new HashSet<>(randomSubsetOf(DiscoveryNodeRole.BUILT_IN_ROLES)); + Map attributes = new HashMap<>(); + attributes.put("att-1", "test-repo"); + attributes.put("att-2", "test-repo"); + DiscoveryNode node = new DiscoveryNode("name1", "id1", transportAddress, attributes, roles, Version.CURRENT); + + BytesStreamOutput out = new BytesStreamOutput(); + BufferedChecksumStreamOutput checksumOut = new BufferedChecksumStreamOutput(out); + node.writeVerifiableTo(checksumOut); + StreamInput in = out.bytes().streamInput(); + DiscoveryNode result = new DiscoveryNode(in); + assertEquals(result, node); + + Map attributes2 = new HashMap<>(); + attributes2.put("att-2", "test-repo"); + attributes2.put("att-1", "test-repo"); + + DiscoveryNode node2 = new DiscoveryNode( + node.getName(), + node.getId(), + node.getEphemeralId(), + node.getHostName(), + node.getHostAddress(), + transportAddress, + attributes2, + roles.stream().sorted().collect(Collectors.toCollection(LinkedHashSet::new)), + Version.CURRENT + ); + BytesStreamOutput out2 = new BytesStreamOutput(); + BufferedChecksumStreamOutput checksumOut2 = new BufferedChecksumStreamOutput(out2); + node2.writeVerifiableTo(checksumOut2); + assertEquals(checksumOut.getChecksum(), checksumOut2.getChecksum()); + } + public void testDiscoveryNodeRoleWithOldVersion() throws Exception { InetAddress inetAddress = InetAddress.getByAddress("name1", new byte[] { (byte) 192, (byte) 168, (byte) 0, (byte) 1 }); TransportAddress transportAddress = new TransportAddress(inetAddress, randomIntBetween(0, 65535)); diff --git a/server/src/test/java/org/opensearch/cluster/node/DiscoveryNodesTests.java b/server/src/test/java/org/opensearch/cluster/node/DiscoveryNodesTests.java index d2450859dfcd4..61b86856c9ebc 100644 --- a/server/src/test/java/org/opensearch/cluster/node/DiscoveryNodesTests.java +++ b/server/src/test/java/org/opensearch/cluster/node/DiscoveryNodesTests.java @@ -36,10 +36,14 @@ import org.opensearch.LegacyESVersion; import org.opensearch.Version; +import org.opensearch.common.io.stream.BytesStreamOutput; import org.opensearch.common.settings.Setting; +import org.opensearch.core.common.io.stream.BufferedChecksumStreamOutput; +import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.transport.TransportAddress; import org.opensearch.test.OpenSearchTestCase; +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -328,6 +332,28 @@ public void testDeprecatedMasterNodeFilter() { assertThat(discoveryNodes.resolveNodes("master:false", "_all"), arrayContainingInAnyOrder(allNodes)); } + public void testWriteVerifiableTo() throws IOException { + final DiscoveryNodes discoveryNodes = buildDiscoveryNodes(); + BytesStreamOutput out = new BytesStreamOutput(); + BufferedChecksumStreamOutput checksumOut = new BufferedChecksumStreamOutput(out); + discoveryNodes.writeVerifiableTo(checksumOut); + StreamInput in = out.bytes().streamInput(); + DiscoveryNodes result = DiscoveryNodes.readFrom(in, discoveryNodes.getLocalNode()); + assertEquals(result, discoveryNodes); + + final DiscoveryNodes.Builder discoveryNodesBuilder = DiscoveryNodes.builder() + .clusterManagerNodeId(discoveryNodes.getClusterManagerNodeId()); + discoveryNodes.getNodes() + .entrySet() + .stream() + .sorted(Map.Entry.comparingByKey()) + .forEachOrdered(entry -> discoveryNodesBuilder.add(entry.getValue())); + BytesStreamOutput out2 = new BytesStreamOutput(); + BufferedChecksumStreamOutput checksumOut2 = new BufferedChecksumStreamOutput(out2); + discoveryNodesBuilder.build().writeVerifiableTo(checksumOut2); + assertEquals(checksumOut.getChecksum(), checksumOut2.getChecksum()); + } + private static AtomicInteger idGenerator = new AtomicInteger(); private static List randomNodes(final int numNodes) { diff --git a/server/src/test/java/org/opensearch/cluster/routing/IndexShardRoutingTableTests.java b/server/src/test/java/org/opensearch/cluster/routing/IndexShardRoutingTableTests.java index 6bfe60980adf3..5f8dabdcd4e45 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/IndexShardRoutingTableTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/IndexShardRoutingTableTests.java @@ -32,10 +32,13 @@ package org.opensearch.cluster.routing; +import org.opensearch.common.io.stream.BytesStreamOutput; +import org.opensearch.core.common.io.stream.BufferedChecksumStreamOutput; import org.opensearch.core.index.Index; import org.opensearch.core.index.shard.ShardId; import org.opensearch.test.OpenSearchTestCase; +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -77,6 +80,25 @@ public void testEquals() { assertEquals(table4, table5); } + public void testWriteVerifiableTo() throws IOException { + Index index = new Index("a", "b"); + ShardId shardId = new ShardId(index, 1); + ShardRouting shard1 = TestShardRouting.newShardRouting(shardId, "node-1", true, ShardRoutingState.STARTED); + ShardRouting shard2 = TestShardRouting.newShardRouting(shardId, "node-2", false, ShardRoutingState.STARTED); + ShardRouting shard3 = TestShardRouting.newShardRouting(shardId, null, false, ShardRoutingState.UNASSIGNED); + + IndexShardRoutingTable table1 = new IndexShardRoutingTable(shardId, Arrays.asList(shard1, shard2, shard3)); + BytesStreamOutput out = new BytesStreamOutput(); + BufferedChecksumStreamOutput checksumOut = new BufferedChecksumStreamOutput(out); + IndexShardRoutingTable.Builder.writeVerifiableTo(table1, checksumOut); + + IndexShardRoutingTable table2 = new IndexShardRoutingTable(shardId, Arrays.asList(shard3, shard1, shard2)); + BytesStreamOutput out2 = new BytesStreamOutput(); + BufferedChecksumStreamOutput checksumOut2 = new BufferedChecksumStreamOutput(out2); + IndexShardRoutingTable.Builder.writeVerifiableTo(table2, checksumOut2); + assertEquals(checksumOut.getChecksum(), checksumOut2.getChecksum()); + } + public void testShardsMatchingPredicate() { ShardId shardId = new ShardId(new Index("a", UUID.randomUUID().toString()), 0); ShardRouting primary = TestShardRouting.newShardRouting(shardId, "node-1", true, ShardRoutingState.STARTED); diff --git a/server/src/test/java/org/opensearch/cluster/routing/OperationRoutingTests.java b/server/src/test/java/org/opensearch/cluster/routing/OperationRoutingTests.java index ad8b48d56c417..aaeeb52ab5709 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/OperationRoutingTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/OperationRoutingTests.java @@ -1118,6 +1118,82 @@ public void testPartialIndexPrimaryDefault() throws Exception { } } + public void testSearchReplicaDefaultRouting() throws Exception { + final int numShards = 1; + final int numReplicas = 2; + final int numSearchReplicas = 2; + final String indexName = "test"; + final String[] indexNames = new String[] { indexName }; + + ClusterService clusterService = null; + ThreadPool threadPool = null; + + try { + OperationRouting opRouting = new OperationRouting( + Settings.builder().put(FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL, "true").build(), + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) + ); + + ClusterState state = ClusterStateCreationUtils.stateWithAssignedPrimariesAndReplicas( + indexNames, + numShards, + numReplicas, + numSearchReplicas + ); + IndexShardRoutingTable indexShardRoutingTable = state.getRoutingTable().index(indexName).getShards().get(0); + ShardId shardId = indexShardRoutingTable.searchOnlyReplicas().get(0).shardId(); + + threadPool = new TestThreadPool("testSearchReplicaDefaultRouting"); + clusterService = ClusterServiceUtils.createClusterService(threadPool); + + // add a search replica in initializing state: + DiscoveryNode node = new DiscoveryNode( + "node_initializing", + OpenSearchTestCase.buildNewFakeTransportAddress(), + Collections.emptyMap(), + new HashSet<>(DiscoveryNodeRole.BUILT_IN_ROLES), + Version.CURRENT + ); + + IndexMetadata indexMetadata = IndexMetadata.builder(indexName) + .settings(Settings.builder().put(state.metadata().index(indexName).getSettings()).build()) + .numberOfSearchReplicas(3) + .numberOfReplicas(2) + .build(); + Metadata.Builder metadataBuilder = Metadata.builder(state.metadata()).put(indexMetadata, false).generateClusterUuidIfNeeded(); + IndexRoutingTable.Builder indexShardRoutingBuilder = IndexRoutingTable.builder(indexMetadata.getIndex()); + indexShardRoutingBuilder.addIndexShard(indexShardRoutingTable); + indexShardRoutingBuilder.addShard( + TestShardRouting.newShardRouting(shardId, node.getId(), null, false, true, ShardRoutingState.INITIALIZING, null) + ); + state = ClusterState.builder(state) + .routingTable(RoutingTable.builder().add(indexShardRoutingBuilder).build()) + .metadata(metadataBuilder.build()) + .build(); + + // Verify default preference is primary only + GroupShardsIterator groupIterator = opRouting.searchShards(state, indexNames, null, null); + assertThat("one group per shard", groupIterator.size(), equalTo(numShards)); + for (ShardIterator shardIterator : groupIterator) { + assertEquals("We should have 3 shards returned", shardIterator.size(), 3); + int i = 0; + for (ShardRouting shardRouting : shardIterator) { + assertTrue( + "Only search replicas should exist with preference SEARCH_REPLICA", + shardIterator.nextOrNull().isSearchOnly() + ); + if (i == shardIterator.size()) { + assertTrue("Initializing shard should appear last", shardRouting.initializing()); + assertFalse("Initializing shard should appear last", shardRouting.active()); + } + } + } + } finally { + IOUtils.close(clusterService); + terminate(threadPool); + } + } + private DiscoveryNode[] setupNodes() { // Sets up two data nodes in zone-a and one data node in zone-b List zones = Arrays.asList("a", "a", "b"); diff --git a/server/src/test/java/org/opensearch/cluster/routing/RoutingTableDiffTests.java b/server/src/test/java/org/opensearch/cluster/routing/RoutingTableDiffTests.java new file mode 100644 index 0000000000000..d8c93523c34b9 --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/routing/RoutingTableDiffTests.java @@ -0,0 +1,325 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.routing; + +import org.opensearch.Version; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.Diff; +import org.opensearch.cluster.OpenSearchAllocationTestCase; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.allocation.AllocationService; +import org.opensearch.cluster.routing.allocation.decider.ThrottlingAllocationDecider; +import org.opensearch.common.settings.Settings; +import org.junit.Before; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.not; + +public class RoutingTableDiffTests extends OpenSearchAllocationTestCase { + + private static final String TEST_INDEX_1 = "test1"; + private static final String TEST_INDEX_2 = "test2"; + private static final String TEST_INDEX_3 = "test3"; + private int numberOfShards; + private int numberOfReplicas; + private int shardsPerIndex; + private int totalNumberOfShards; + private static final Settings DEFAULT_SETTINGS = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).build(); + private final AllocationService ALLOCATION_SERVICE = createAllocationService( + Settings.builder() + .put("cluster.routing.allocation.node_concurrent_recoveries", Integer.MAX_VALUE) // don't limit recoveries + .put("cluster.routing.allocation.node_initial_primaries_recoveries", Integer.MAX_VALUE) + .put( + ThrottlingAllocationDecider.CLUSTER_ROUTING_ALLOCATION_NODE_INITIAL_REPLICAS_RECOVERIES_SETTING.getKey(), + Integer.MAX_VALUE + ) + .build() + ); + private ClusterState clusterState; + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + this.numberOfShards = randomIntBetween(1, 5); + this.numberOfReplicas = randomIntBetween(1, 5); + this.shardsPerIndex = this.numberOfShards * (this.numberOfReplicas + 1); + this.totalNumberOfShards = this.shardsPerIndex * 2; + logger.info("Setup test with {} shards and {} replicas.", this.numberOfShards, this.numberOfReplicas); + RoutingTable emptyRoutingTable = new RoutingTable.Builder().build(); + Metadata metadata = Metadata.builder().put(createIndexMetadata(TEST_INDEX_1)).put(createIndexMetadata(TEST_INDEX_2)).build(); + + RoutingTable testRoutingTable = new RoutingTable.Builder().add( + new IndexRoutingTable.Builder(metadata.index(TEST_INDEX_1).getIndex()).initializeAsNew(metadata.index(TEST_INDEX_1)).build() + ) + .add( + new IndexRoutingTable.Builder(metadata.index(TEST_INDEX_2).getIndex()).initializeAsNew(metadata.index(TEST_INDEX_2)).build() + ) + .build(); + this.clusterState = ClusterState.builder(org.opensearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) + .metadata(metadata) + .routingTable(testRoutingTable) + .build(); + } + + /** + * puts primary shard indexRoutings into initializing state + */ + private void initPrimaries() { + logger.info("adding {} nodes and performing rerouting", this.numberOfReplicas + 1); + DiscoveryNodes.Builder discoBuilder = DiscoveryNodes.builder(); + for (int i = 0; i < this.numberOfReplicas + 1; i++) { + discoBuilder = discoBuilder.add(newNode("node" + i)); + } + this.clusterState = ClusterState.builder(clusterState).nodes(discoBuilder).build(); + ClusterState rerouteResult = ALLOCATION_SERVICE.reroute(clusterState, "reroute"); + assertThat(rerouteResult, not(equalTo(this.clusterState))); + this.clusterState = rerouteResult; + } + + private void startInitializingShards(String index) { + logger.info("start primary shards for index {}", index); + clusterState = startInitializingShardsAndReroute(ALLOCATION_SERVICE, clusterState, index); + } + + private IndexMetadata.Builder createIndexMetadata(String indexName) { + return new IndexMetadata.Builder(indexName).settings(DEFAULT_SETTINGS) + .numberOfReplicas(this.numberOfReplicas) + .numberOfShards(this.numberOfShards); + } + + public void testRoutingTableUpserts() { + assertThat(clusterState.routingTable().shardsWithState(ShardRoutingState.UNASSIGNED).size(), is(this.totalNumberOfShards)); + initPrimaries(); + int expectedUnassignedShardCount = this.totalNumberOfShards - 2 * this.numberOfShards; + assertThat(clusterState.routingTable().shardsWithState(ShardRoutingState.UNASSIGNED).size(), is(expectedUnassignedShardCount)); + assertThat(clusterState.routingTable().shardsWithState(ShardRoutingState.INITIALIZING).size(), is(2 * this.numberOfShards)); + Metadata metadata = Metadata.builder().put(createIndexMetadata(TEST_INDEX_1)).put(createIndexMetadata(TEST_INDEX_2)).build(); + ClusterState oldClusterState = clusterState; + + // create index routing table for TEST_INDEX_3 + metadata = Metadata.builder(metadata).put(createIndexMetadata(TEST_INDEX_3)).build(); + RoutingTable testRoutingTable = new RoutingTable.Builder(clusterState.routingTable()).add( + new IndexRoutingTable.Builder(metadata.index(TEST_INDEX_3).getIndex()).initializeAsNew(metadata.index(TEST_INDEX_3)).build() + ).build(); + this.clusterState = ClusterState.builder(org.opensearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) + .metadata(metadata) + .routingTable(testRoutingTable) + .build(); + this.totalNumberOfShards = this.shardsPerIndex * 3; + assertThat( + clusterState.routingTable().shardsWithState(ShardRoutingState.UNASSIGNED).size(), + is(expectedUnassignedShardCount + this.shardsPerIndex) + ); + Diff fullDiff = clusterState.routingTable().diff(oldClusterState.getRoutingTable()); + Diff incrementalDiff = clusterState.routingTable().incrementalDiff(oldClusterState.getRoutingTable()); + RoutingTable newRoutingTable = incrementalDiff.apply(oldClusterState.getRoutingTable()); + for (IndexRoutingTable indexRoutingTable : clusterState.routingTable()) { + assertEquals(clusterState.routingTable().version(), newRoutingTable.version()); + assertEquals(indexRoutingTable, newRoutingTable.index(indexRoutingTable.getIndex())); + } + RoutingTable newRoutingTableWithFullDiff = fullDiff.apply(oldClusterState.getRoutingTable()); + for (IndexRoutingTable indexRoutingTable : clusterState.routingTable()) { + assertEquals(clusterState.routingTable().version(), newRoutingTableWithFullDiff.version()); + assertEquals(indexRoutingTable, newRoutingTableWithFullDiff.index(indexRoutingTable.getIndex())); + } + } + + public void testRoutingTableDeletes() { + assertThat(clusterState.routingTable().shardsWithState(ShardRoutingState.UNASSIGNED).size(), is(this.totalNumberOfShards)); + initPrimaries(); + int expectedUnassignedShardCount = this.totalNumberOfShards - 2 * this.numberOfShards; + assertThat(clusterState.routingTable().shardsWithState(ShardRoutingState.UNASSIGNED).size(), is(expectedUnassignedShardCount)); + assertThat(clusterState.routingTable().shardsWithState(ShardRoutingState.INITIALIZING).size(), is(2 * this.numberOfShards)); + Metadata metadata = Metadata.builder().put(createIndexMetadata(TEST_INDEX_1)).put(createIndexMetadata(TEST_INDEX_2)).build(); + ClusterState oldClusterState = clusterState; + + // delete index routing table for TEST_INDEX_1 + metadata = Metadata.builder(metadata).put(createIndexMetadata(TEST_INDEX_3)).build(); + RoutingTable testRoutingTable = new RoutingTable.Builder(clusterState.routingTable()).remove(TEST_INDEX_1).build(); + this.clusterState = ClusterState.builder(org.opensearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) + .metadata(metadata) + .routingTable(testRoutingTable) + .build(); + this.totalNumberOfShards = this.shardsPerIndex; + assertThat( + clusterState.routingTable().shardsWithState(ShardRoutingState.UNASSIGNED).size(), + is(expectedUnassignedShardCount - this.numberOfShards * this.numberOfReplicas) + ); + Diff fullDiff = clusterState.routingTable().diff(oldClusterState.getRoutingTable()); + Diff incrementalDiff = clusterState.routingTable().incrementalDiff(oldClusterState.getRoutingTable()); + RoutingTable newRoutingTable = incrementalDiff.apply(oldClusterState.getRoutingTable()); + for (IndexRoutingTable indexRoutingTable : clusterState.routingTable()) { + assertEquals(clusterState.routingTable().version(), newRoutingTable.version()); + assertEquals(indexRoutingTable, newRoutingTable.index(indexRoutingTable.getIndex())); + } + RoutingTable newRoutingTableWithFullDiff = fullDiff.apply(oldClusterState.getRoutingTable()); + for (IndexRoutingTable indexRoutingTable : clusterState.routingTable()) { + assertEquals(clusterState.routingTable().version(), newRoutingTableWithFullDiff.version()); + assertEquals(indexRoutingTable, newRoutingTableWithFullDiff.index(indexRoutingTable.getIndex())); + } + } + + public void testRoutingTableUpsertsWithDiff() { + assertThat(clusterState.routingTable().shardsWithState(ShardRoutingState.UNASSIGNED).size(), is(this.totalNumberOfShards)); + initPrimaries(); + int expectedUnassignedShardCount = this.totalNumberOfShards - 2 * this.numberOfShards; + assertThat(clusterState.routingTable().shardsWithState(ShardRoutingState.UNASSIGNED).size(), is(expectedUnassignedShardCount)); + assertThat(clusterState.routingTable().shardsWithState(ShardRoutingState.INITIALIZING).size(), is(2 * this.numberOfShards)); + Metadata metadata = Metadata.builder().put(createIndexMetadata(TEST_INDEX_1)).put(createIndexMetadata(TEST_INDEX_2)).build(); + ClusterState oldClusterState = clusterState; + + // create index routing table for TEST_INDEX_3 + metadata = Metadata.builder(metadata).put(createIndexMetadata(TEST_INDEX_3)).build(); + RoutingTable testRoutingTable = new RoutingTable.Builder(clusterState.routingTable()).add( + new IndexRoutingTable.Builder(metadata.index(TEST_INDEX_3).getIndex()).initializeAsNew(metadata.index(TEST_INDEX_3)).build() + ).build(); + this.clusterState = ClusterState.builder(org.opensearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) + .metadata(metadata) + .routingTable(testRoutingTable) + .build(); + this.totalNumberOfShards = this.shardsPerIndex * 3; + assertThat( + clusterState.routingTable().shardsWithState(ShardRoutingState.UNASSIGNED).size(), + is(expectedUnassignedShardCount + this.shardsPerIndex) + ); + initPrimaries(); + clusterState = startRandomInitializingShard(clusterState, ALLOCATION_SERVICE, TEST_INDEX_2); + // assertThat(clusterState.routingTable().shardsWithState(ShardRoutingState.INITIALIZING).size(), is(2 * this.numberOfShards + 1)); + Diff fullDiff = clusterState.routingTable().diff(oldClusterState.getRoutingTable()); + Diff incrementalDiff = clusterState.routingTable().incrementalDiff(oldClusterState.getRoutingTable()); + RoutingTable newRoutingTable = incrementalDiff.apply(oldClusterState.getRoutingTable()); + for (IndexRoutingTable indexRoutingTable : clusterState.routingTable()) { + assertEquals(clusterState.routingTable().version(), newRoutingTable.version()); + assertEquals(indexRoutingTable, newRoutingTable.index(indexRoutingTable.getIndex())); + } + RoutingTable newRoutingTableWithFullDiff = fullDiff.apply(oldClusterState.getRoutingTable()); + for (IndexRoutingTable indexRoutingTable : clusterState.routingTable()) { + assertEquals(clusterState.routingTable().version(), newRoutingTableWithFullDiff.version()); + assertEquals(indexRoutingTable, newRoutingTableWithFullDiff.index(indexRoutingTable.getIndex())); + } + } + + public void testRoutingTableDiffWithReplicaAdded() { + assertThat(clusterState.routingTable().shardsWithState(ShardRoutingState.UNASSIGNED).size(), is(this.totalNumberOfShards)); + initPrimaries(); + int expectedUnassignedShardCount = this.totalNumberOfShards - 2 * this.numberOfShards; + assertThat(clusterState.routingTable().shardsWithState(ShardRoutingState.UNASSIGNED).size(), is(expectedUnassignedShardCount)); + assertThat(clusterState.routingTable().shardsWithState(ShardRoutingState.INITIALIZING).size(), is(2 * this.numberOfShards)); + ClusterState oldClusterState = clusterState; + + // update replica count for TEST_INDEX_1 + RoutingTable updatedRoutingTable = RoutingTable.builder(clusterState.routingTable()) + .updateNumberOfReplicas(this.numberOfReplicas + 1, new String[] { TEST_INDEX_1 }) + .build(); + Metadata metadata = Metadata.builder(clusterState.metadata()) + .updateNumberOfReplicas(this.numberOfReplicas + 1, new String[] { TEST_INDEX_1 }) + .build(); + clusterState = ClusterState.builder(clusterState).routingTable(updatedRoutingTable).metadata(metadata).build(); + assertThat( + clusterState.routingTable().shardsWithState(ShardRoutingState.UNASSIGNED).size(), + is(expectedUnassignedShardCount + this.numberOfShards) + ); + Diff fullDiff = clusterState.routingTable().diff(oldClusterState.getRoutingTable()); + Diff incrementalDiff = clusterState.routingTable().incrementalDiff(oldClusterState.getRoutingTable()); + RoutingTable newRoutingTable = incrementalDiff.apply(oldClusterState.getRoutingTable()); + for (IndexRoutingTable indexRoutingTable : clusterState.routingTable()) { + assertEquals(clusterState.routingTable().version(), newRoutingTable.version()); + assertEquals(indexRoutingTable, newRoutingTable.index(indexRoutingTable.getIndex())); + } + RoutingTable newRoutingTableWithFullDiff = fullDiff.apply(oldClusterState.getRoutingTable()); + for (IndexRoutingTable indexRoutingTable : clusterState.routingTable()) { + assertEquals(clusterState.routingTable().version(), newRoutingTableWithFullDiff.version()); + assertEquals(indexRoutingTable, newRoutingTableWithFullDiff.index(indexRoutingTable.getIndex())); + } + } + + public void testRoutingTableDiffWithReplicaRemoved() { + assertThat(clusterState.routingTable().shardsWithState(ShardRoutingState.UNASSIGNED).size(), is(this.totalNumberOfShards)); + initPrimaries(); + int expectedUnassignedShardCount = this.totalNumberOfShards - 2 * this.numberOfShards; + assertThat(clusterState.routingTable().shardsWithState(ShardRoutingState.UNASSIGNED).size(), is(expectedUnassignedShardCount)); + assertThat(clusterState.routingTable().shardsWithState(ShardRoutingState.INITIALIZING).size(), is(2 * this.numberOfShards)); + ClusterState oldClusterState = clusterState; + + // update replica count for TEST_INDEX_1 + RoutingTable updatedRoutingTable = RoutingTable.builder(clusterState.routingTable()) + .updateNumberOfReplicas(this.numberOfReplicas - 1, new String[] { TEST_INDEX_1 }) + .build(); + Metadata metadata = Metadata.builder(clusterState.metadata()) + .updateNumberOfReplicas(this.numberOfReplicas - 1, new String[] { TEST_INDEX_1 }) + .build(); + clusterState = ClusterState.builder(clusterState).routingTable(updatedRoutingTable).metadata(metadata).build(); + assertThat( + clusterState.routingTable().shardsWithState(ShardRoutingState.UNASSIGNED).size(), + is(expectedUnassignedShardCount - this.numberOfShards) + ); + Diff fullDiff = clusterState.routingTable().diff(oldClusterState.getRoutingTable()); + Diff incrementalDiff = clusterState.routingTable().incrementalDiff(oldClusterState.getRoutingTable()); + RoutingTable newRoutingTable = incrementalDiff.apply(oldClusterState.getRoutingTable()); + for (IndexRoutingTable indexRoutingTable : clusterState.routingTable()) { + assertEquals(clusterState.routingTable().version(), newRoutingTable.version()); + assertEquals(indexRoutingTable, newRoutingTable.index(indexRoutingTable.getIndex())); + } + RoutingTable newRoutingTableWithFullDiff = fullDiff.apply(oldClusterState.getRoutingTable()); + for (IndexRoutingTable indexRoutingTable : clusterState.routingTable()) { + assertEquals(clusterState.routingTable().version(), newRoutingTableWithFullDiff.version()); + assertEquals(indexRoutingTable, newRoutingTableWithFullDiff.index(indexRoutingTable.getIndex())); + } + } + + public void testRoutingTableDiffsWithStartedState() { + assertThat(clusterState.routingTable().shardsWithState(ShardRoutingState.UNASSIGNED).size(), is(this.totalNumberOfShards)); + initPrimaries(); + assertThat( + clusterState.routingTable().shardsWithState(ShardRoutingState.UNASSIGNED).size(), + is(this.totalNumberOfShards - 2 * this.numberOfShards) + ); + assertThat(clusterState.routingTable().shardsWithState(ShardRoutingState.INITIALIZING).size(), is(2 * this.numberOfShards)); + + startInitializingShards(TEST_INDEX_1); + assertThat(clusterState.routingTable().shardsWithState(ShardRoutingState.STARTED).size(), is(this.numberOfShards)); + int initializingExpected = this.numberOfShards + this.numberOfShards * this.numberOfReplicas; + assertThat(clusterState.routingTable().shardsWithState(ShardRoutingState.INITIALIZING).size(), is(initializingExpected)); + assertThat( + clusterState.routingTable().shardsWithState(ShardRoutingState.UNASSIGNED).size(), + is(this.totalNumberOfShards - initializingExpected - this.numberOfShards) + ); + + startInitializingShards(TEST_INDEX_2); + assertThat(clusterState.routingTable().shardsWithState(ShardRoutingState.STARTED).size(), is(2 * this.numberOfShards)); + initializingExpected = 2 * this.numberOfShards * this.numberOfReplicas; + assertThat(clusterState.routingTable().shardsWithState(ShardRoutingState.INITIALIZING).size(), is(initializingExpected)); + assertThat( + clusterState.routingTable().shardsWithState(ShardRoutingState.UNASSIGNED).size(), + is(this.totalNumberOfShards - initializingExpected - 2 * this.numberOfShards) + ); + ClusterState oldClusterState = clusterState; + // start a random replica to change a single shard routing + clusterState = startRandomInitializingShard(clusterState, ALLOCATION_SERVICE); + assertThat(clusterState.routingTable().shardsWithState(ShardRoutingState.INITIALIZING).size(), is(initializingExpected - 1)); + assertThat(clusterState.routingTable().shardsWithState(ShardRoutingState.STARTED).size(), is(2 * this.numberOfShards + 1)); + Diff fullDiff = clusterState.routingTable().diff(oldClusterState.getRoutingTable()); + Diff incrementalDiff = clusterState.routingTable().incrementalDiff(oldClusterState.getRoutingTable()); + RoutingTable newRoutingTable = incrementalDiff.apply(oldClusterState.getRoutingTable()); + for (IndexRoutingTable indexRoutingTable : clusterState.routingTable()) { + assertEquals(clusterState.routingTable().version(), newRoutingTable.version()); + assertEquals(indexRoutingTable, newRoutingTable.index(indexRoutingTable.getIndex())); + } + RoutingTable newRoutingTableWithFullDiff = fullDiff.apply(oldClusterState.getRoutingTable()); + for (IndexRoutingTable indexRoutingTable : clusterState.routingTable()) { + assertEquals(clusterState.routingTable().version(), newRoutingTableWithFullDiff.version()); + assertEquals(indexRoutingTable, newRoutingTableWithFullDiff.index(indexRoutingTable.getIndex())); + } + } + +} diff --git a/server/src/test/java/org/opensearch/cluster/routing/RoutingTableTests.java b/server/src/test/java/org/opensearch/cluster/routing/RoutingTableTests.java index 97283f561d6d4..d8a67c09e442c 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/RoutingTableTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/RoutingTableTests.java @@ -714,4 +714,5 @@ public static IndexMetadata updateActiveAllocations(IndexRoutingTable indexRouti } return imdBuilder.build(); } + } diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/FailedShardsRoutingTests.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/FailedShardsRoutingTests.java index 5e3b74ee138ab..f8e1c609e6ee8 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/allocation/FailedShardsRoutingTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/FailedShardsRoutingTests.java @@ -68,6 +68,7 @@ import static org.opensearch.cluster.routing.ShardRoutingState.STARTED; import static org.opensearch.cluster.routing.ShardRoutingState.UNASSIGNED; import static org.opensearch.common.util.FeatureFlags.REMOTE_STORE_MIGRATION_EXPERIMENTAL; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING; @@ -853,6 +854,8 @@ public void testPreferReplicaOnRemoteNodeForPrimaryPromotion() { // add a remote node and start primary shard Map remoteStoreNodeAttributes = Map.of( + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, + "REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY", REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, "REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_VALUE", REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY, diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteStoreMigrationAllocationDeciderTests.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteStoreMigrationAllocationDeciderTests.java index 5b29922f2400c..e6e81c94e7f32 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteStoreMigrationAllocationDeciderTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteStoreMigrationAllocationDeciderTests.java @@ -68,6 +68,7 @@ import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REPLICATION_TYPE; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeService.Direction.NONE; import static org.opensearch.node.remotestore.RemoteStoreNodeService.Direction.REMOTE_STORE; import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING; @@ -617,6 +618,7 @@ private DiscoveryNode getRemoteNode() { REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, "REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_VALUE" ); + attributes.put(REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, "REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_VALUE"); return new DiscoveryNode( UUIDs.base64UUID(), buildNewFakeTransportAddress(), diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/ShardsTieringAllocationTests.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/ShardsTieringAllocationTests.java new file mode 100644 index 0000000000000..8d45ebd2781b1 --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/ShardsTieringAllocationTests.java @@ -0,0 +1,128 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.routing.allocation; + +import org.opensearch.Version; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.routing.RoutingNode; +import org.opensearch.cluster.routing.RoutingNodes; +import org.opensearch.cluster.routing.RoutingPool; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.common.util.FeatureFlags; +import org.opensearch.index.IndexModule; +import org.opensearch.test.FeatureFlagSetter; +import org.junit.Before; + +import static org.opensearch.cluster.routing.RoutingPool.LOCAL_ONLY; +import static org.opensearch.cluster.routing.RoutingPool.REMOTE_CAPABLE; +import static org.opensearch.cluster.routing.RoutingPool.getIndexPool; +import static org.opensearch.index.IndexModule.INDEX_STORE_LOCALITY_SETTING; + +public class ShardsTieringAllocationTests extends TieringAllocationBaseTestCase { + + @Before + public void setup() { + FeatureFlagSetter.set(FeatureFlags.TIERED_REMOTE_INDEX); + } + + public void testShardsInLocalPool() { + int localOnlyNodes = 5; + int remoteCapableNodes = 3; + int localIndices = 5; + int remoteIndices = 0; + ClusterState clusterState = createInitialCluster(localOnlyNodes, remoteCapableNodes, localIndices, remoteIndices); + AllocationService service = this.createRemoteCapableAllocationService(); + // assign shards to respective nodes + clusterState = allocateShardsAndBalance(clusterState, service); + RoutingNodes routingNodes = clusterState.getRoutingNodes(); + RoutingAllocation allocation = getRoutingAllocation(clusterState, routingNodes); + assertEquals(0, routingNodes.unassigned().size()); + + for (ShardRouting shard : clusterState.getRoutingTable().allShards()) { + assertFalse(shard.unassigned()); + RoutingPool shardPool = RoutingPool.getShardPool(shard, allocation); + assertEquals(LOCAL_ONLY, shardPool); + } + } + + public void testShardsInRemotePool() { + int localOnlyNodes = 7; + int remoteCapableNodes = 3; + int localIndices = 0; + int remoteIndices = 13; + ClusterState clusterState = createInitialCluster(localOnlyNodes, remoteCapableNodes, localIndices, remoteIndices); + AllocationService service = this.createRemoteCapableAllocationService(); + // assign shards to respective nodes + clusterState = allocateShardsAndBalance(clusterState, service); + RoutingNodes routingNodes = clusterState.getRoutingNodes(); + RoutingAllocation allocation = getRoutingAllocation(clusterState, routingNodes); + assertEquals(0, routingNodes.unassigned().size()); + + for (ShardRouting shard : clusterState.getRoutingTable().allShards()) { + assertFalse(shard.unassigned()); + RoutingPool shardPool = RoutingPool.getShardPool(shard, allocation); + assertEquals(REMOTE_CAPABLE, shardPool); + } + } + + public void testShardsWithTiering() { + int localOnlyNodes = 15; + int remoteCapableNodes = 13; + int localIndices = 10; + int remoteIndices = 0; + ClusterState clusterState = createInitialCluster(localOnlyNodes, remoteCapableNodes, localIndices, remoteIndices); + AllocationService service = this.createRemoteCapableAllocationService(); + // assign shards to respective nodes + clusterState = allocateShardsAndBalance(clusterState, service); + // put indices in the hot to warm tiering state + clusterState = updateIndexMetadataForTiering( + clusterState, + localIndices, + IndexModule.TieringState.HOT_TO_WARM.name(), + IndexModule.DataLocalityType.PARTIAL.name() + ); + // trigger shard relocation + clusterState = allocateShardsAndBalance(clusterState, service); + RoutingNodes routingNodes = clusterState.getRoutingNodes(); + RoutingAllocation allocation = getRoutingAllocation(clusterState, routingNodes); + assertEquals(0, routingNodes.unassigned().size()); + + for (ShardRouting shard : clusterState.getRoutingTable().allShards()) { + assertFalse(shard.unassigned()); + RoutingNode node = routingNodes.node(shard.currentNodeId()); + RoutingPool nodePool = RoutingPool.getNodePool(node); + RoutingPool shardPool = RoutingPool.getShardPool(shard, allocation); + assertEquals(RoutingPool.REMOTE_CAPABLE, shardPool); + assertEquals(nodePool, shardPool); + } + } + + public void testShardPoolForPartialIndices() { + String index = "test-index"; + IndexMetadata indexMetadata = IndexMetadata.builder(index) + .settings(settings(Version.CURRENT).put(INDEX_STORE_LOCALITY_SETTING.getKey(), IndexModule.DataLocalityType.PARTIAL.name())) + .numberOfShards(PRIMARIES) + .numberOfReplicas(REPLICAS) + .build(); + RoutingPool indexPool = getIndexPool(indexMetadata); + assertEquals(REMOTE_CAPABLE, indexPool); + } + + public void testShardPoolForFullIndices() { + String index = "test-index"; + IndexMetadata indexMetadata = IndexMetadata.builder(index) + .settings(settings(Version.CURRENT).put(INDEX_STORE_LOCALITY_SETTING.getKey(), IndexModule.DataLocalityType.FULL.name())) + .numberOfShards(PRIMARIES) + .numberOfReplicas(REPLICAS) + .build(); + RoutingPool indexPool = getIndexPool(indexMetadata); + assertEquals(LOCAL_ONLY, indexPool); + } +} diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/TieringAllocationBaseTestCase.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/TieringAllocationBaseTestCase.java new file mode 100644 index 0000000000000..aba6fe74e0634 --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/TieringAllocationBaseTestCase.java @@ -0,0 +1,47 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.routing.allocation; + +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.common.SuppressForbidden; +import org.opensearch.common.settings.Settings; + +import static org.opensearch.index.IndexModule.INDEX_STORE_LOCALITY_SETTING; +import static org.opensearch.index.IndexModule.INDEX_TIERING_STATE; + +@SuppressForbidden(reason = "feature flag overrides") +public abstract class TieringAllocationBaseTestCase extends RemoteShardsBalancerBaseTestCase { + + public ClusterState updateIndexMetadataForTiering( + ClusterState clusterState, + int localIndices, + String tieringState, + String dataLocality + ) { + Metadata.Builder mb = Metadata.builder(clusterState.metadata()); + for (int i = 0; i < localIndices; i++) { + IndexMetadata indexMetadata = clusterState.metadata().index(getIndexName(i, false)); + Settings settings = indexMetadata.getSettings(); + mb.put( + IndexMetadata.builder(indexMetadata) + .settings( + Settings.builder() + .put(settings) + .put(settings) + .put(INDEX_TIERING_STATE.getKey(), tieringState) + .put(INDEX_STORE_LOCALITY_SETTING.getKey(), dataLocality) + ) + ); + } + Metadata metadata = mb.build(); + return ClusterState.builder(clusterState).metadata(metadata).build(); + } +} diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/UpdateNumberOfReplicasTests.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/UpdateNumberOfReplicasTests.java index b53e520581321..5082d6ab0a37c 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/allocation/UpdateNumberOfReplicasTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/UpdateNumberOfReplicasTests.java @@ -41,7 +41,9 @@ import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.IndexShardRoutingTable; import org.opensearch.cluster.routing.RoutingTable; +import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.common.settings.Settings; import static org.opensearch.cluster.routing.ShardRoutingState.INITIALIZING; @@ -188,4 +190,248 @@ public void testUpdateNumberOfReplicas() { newState = strategy.reroute(clusterState, "reroute"); assertThat(newState, equalTo(clusterState)); } + + public void testUpdateNumberOfReplicasDoesNotImpactSearchReplicas() { + AllocationService strategy = createAllocationService( + Settings.builder().put("cluster.routing.allocation.node_concurrent_recoveries", 10).build() + ); + + logger.info("Building initial routing table"); + + Metadata metadata = Metadata.builder() + .put( + IndexMetadata.builder("test") + .settings(settings(Version.CURRENT)) + .numberOfShards(1) + .numberOfReplicas(1) + .numberOfSearchReplicas(1) + ) + .build(); + + RoutingTable routingTable = RoutingTable.builder().addAsNew(metadata.index("test")).build(); + + ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) + .metadata(metadata) + .routingTable(routingTable) + .build(); + + assertEquals(1, routingTable.index("test").shards().size()); + IndexShardRoutingTable shardRoutingTable = routingTable.index("test").shard(0); + // 1 primary, 1 replica, 1 search replica + assertEquals(3, shardRoutingTable.size()); + assertEquals(2, shardRoutingTable.replicaShards().size()); + assertEquals(1, shardRoutingTable.searchOnlyReplicas().size()); + assertEquals(UNASSIGNED, shardRoutingTable.shards().get(0).state()); + assertEquals(UNASSIGNED, shardRoutingTable.shards().get(1).state()); + assertEquals(UNASSIGNED, shardRoutingTable.shards().get(2).state()); + assertNull(shardRoutingTable.shards().get(0).currentNodeId()); + assertNull(shardRoutingTable.shards().get(1).currentNodeId()); + assertNull(shardRoutingTable.shards().get(2).currentNodeId()); + + logger.info("Adding two nodes and performing rerouting"); + clusterState = ClusterState.builder(clusterState) + .nodes(DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2")).add(newNode("node3"))) + .build(); + + clusterState = strategy.reroute(clusterState, "reroute"); + + logger.info("Start all the primary shards"); + clusterState = startInitializingShardsAndReroute(strategy, clusterState); + + logger.info("Start all the replica and search shards"); + ClusterState newState = startInitializingShardsAndReroute(strategy, clusterState); + assertNotEquals(newState, clusterState); + clusterState = newState; + + shardRoutingTable = clusterState.routingTable().index("test").shard(0); + final String nodeHoldingPrimary = shardRoutingTable.primaryShard().currentNodeId(); + final String nodeHoldingSearchReplica = shardRoutingTable.searchOnlyReplicas().get(0).currentNodeId(); + final String nodeHoldingReplica = shardRoutingTable.writerReplicas().get(0).currentNodeId(); + + assertNotEquals(nodeHoldingPrimary, nodeHoldingReplica); + assertNotEquals(nodeHoldingPrimary, nodeHoldingSearchReplica); + assertNotEquals(nodeHoldingReplica, nodeHoldingSearchReplica); + + assertEquals( + "There is a single routing shard routing table in the cluster", + clusterState.routingTable().index("test").shards().size(), + 1 + ); + assertEquals("There are three shards as part of the shard routing table", 3, shardRoutingTable.size()); + assertEquals("There are two replicas one search and one write", 2, shardRoutingTable.replicaShards().size()); + assertEquals(1, shardRoutingTable.searchOnlyReplicas().size()); + assertEquals(STARTED, shardRoutingTable.shards().get(0).state()); + assertEquals(STARTED, shardRoutingTable.shards().get(1).state()); + assertEquals(STARTED, shardRoutingTable.shards().get(2).state()); + + logger.info("add another replica"); + final String[] indices = { "test" }; + routingTable = RoutingTable.builder(clusterState.routingTable()).updateNumberOfReplicas(2, indices).build(); + metadata = Metadata.builder(clusterState.metadata()).updateNumberOfReplicas(2, indices).build(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).metadata(metadata).build(); + IndexMetadata indexMetadata = clusterState.metadata().index("test"); + assertEquals(2, indexMetadata.getNumberOfReplicas()); + assertEquals(1, indexMetadata.getNumberOfSearchOnlyReplicas()); + shardRoutingTable = clusterState.routingTable().index("test").shard(0); + assertEquals(4, shardRoutingTable.size()); + assertEquals(3, shardRoutingTable.replicaShards().size()); + assertEquals(2, shardRoutingTable.writerReplicas().size()); + assertEquals(1, shardRoutingTable.searchOnlyReplicas().size()); + assertEquals(shardRoutingTable.primaryShard().state(), STARTED); + assertEquals(shardRoutingTable.searchOnlyReplicas().get(0).state(), STARTED); + + ShardRouting existingReplica = shardRoutingTable.writerReplicas().get(0); + assertEquals(existingReplica.state(), STARTED); + assertEquals(existingReplica.currentNodeId(), nodeHoldingReplica); + ShardRouting newReplica = shardRoutingTable.writerReplicas().get(0); + assertEquals(newReplica.state(), STARTED); + + logger.info("Add another node and start the added replica"); + clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).add(newNode("node4"))).build(); + newState = strategy.reroute(clusterState, "reroute"); + newState = startInitializingShardsAndReroute(strategy, newState); + assertNotEquals(newState, clusterState); + clusterState = newState; + shardRoutingTable = clusterState.routingTable().index("test").shard(0); + for (ShardRouting replicaShard : shardRoutingTable.replicaShards()) { + assertEquals(replicaShard.state(), STARTED); + } + assertTrue(shardRoutingTable.replicaShards().stream().allMatch(r -> r.state().equals(STARTED))); + + // remove both replicas and assert search replica is unchanged + routingTable = RoutingTable.builder(clusterState.routingTable()).updateNumberOfReplicas(0, indices).build(); + metadata = Metadata.builder(clusterState.metadata()).updateNumberOfReplicas(0, indices).build(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).metadata(metadata).build(); + indexMetadata = clusterState.metadata().index("test"); + assertEquals(0, indexMetadata.getNumberOfReplicas()); + assertEquals(1, indexMetadata.getNumberOfSearchOnlyReplicas()); + shardRoutingTable = clusterState.routingTable().index("test").shard(0); + assertEquals(2, shardRoutingTable.size()); + assertEquals(1, shardRoutingTable.replicaShards().size()); + assertEquals(0, shardRoutingTable.writerReplicas().size()); + assertEquals(1, shardRoutingTable.searchOnlyReplicas().size()); + assertEquals(shardRoutingTable.primaryShard().state(), STARTED); + assertEquals(shardRoutingTable.searchOnlyReplicas().get(0).state(), STARTED); + assertEquals(shardRoutingTable.searchOnlyReplicas().get(0).currentNodeId(), nodeHoldingSearchReplica); + } + + public void testUpdateSearchReplicasDoesNotImpactRegularReplicas() { + AllocationService strategy = createAllocationService( + Settings.builder().put("cluster.routing.allocation.node_concurrent_recoveries", 10).build() + ); + + logger.info("Building initial routing table"); + + Metadata metadata = Metadata.builder() + .put( + IndexMetadata.builder("test") + .settings(settings(Version.CURRENT)) + .numberOfShards(1) + .numberOfReplicas(1) + .numberOfSearchReplicas(1) + ) + .build(); + + RoutingTable routingTable = RoutingTable.builder().addAsNew(metadata.index("test")).build(); + + ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) + .metadata(metadata) + .routingTable(routingTable) + .build(); + + assertEquals(1, routingTable.index("test").shards().size()); + IndexShardRoutingTable shardRoutingTable = routingTable.index("test").shard(0); + // 1 primary, 1 replica, 1 search replica + assertEquals(3, shardRoutingTable.size()); + assertEquals(2, shardRoutingTable.replicaShards().size()); + assertEquals(1, shardRoutingTable.searchOnlyReplicas().size()); + assertEquals(UNASSIGNED, shardRoutingTable.shards().get(0).state()); + assertEquals(UNASSIGNED, shardRoutingTable.shards().get(1).state()); + assertEquals(UNASSIGNED, shardRoutingTable.shards().get(2).state()); + assertNull(shardRoutingTable.shards().get(0).currentNodeId()); + assertNull(shardRoutingTable.shards().get(1).currentNodeId()); + assertNull(shardRoutingTable.shards().get(2).currentNodeId()); + + logger.info("Adding three nodes and performing rerouting"); + clusterState = ClusterState.builder(clusterState) + .nodes(DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2")).add(newNode("node3"))) + .build(); + + clusterState = strategy.reroute(clusterState, "reroute"); + + logger.info("Start all the primary shards"); + clusterState = startInitializingShardsAndReroute(strategy, clusterState); + + logger.info("Start all the replica and search shards"); + ClusterState newState = startInitializingShardsAndReroute(strategy, clusterState); + assertNotEquals(newState, clusterState); + clusterState = newState; + + shardRoutingTable = clusterState.routingTable().index("test").shard(0); + final String nodeHoldingPrimary = shardRoutingTable.primaryShard().currentNodeId(); + final String nodeHoldingSearchReplica = shardRoutingTable.searchOnlyReplicas().get(0).currentNodeId(); + final String nodeHoldingReplica = shardRoutingTable.writerReplicas().get(0).currentNodeId(); + + assertNotEquals(nodeHoldingPrimary, nodeHoldingReplica); + assertNotEquals(nodeHoldingPrimary, nodeHoldingSearchReplica); + assertNotEquals(nodeHoldingReplica, nodeHoldingSearchReplica); + + assertEquals( + "There is a single routing shard routing table in the cluster", + clusterState.routingTable().index("test").shards().size(), + 1 + ); + assertEquals("There are three shards as part of the shard routing table", 3, shardRoutingTable.size()); + assertEquals("There are two replicas one search and one write", 2, shardRoutingTable.replicaShards().size()); + assertEquals(1, shardRoutingTable.searchOnlyReplicas().size()); + assertEquals(STARTED, shardRoutingTable.shards().get(0).state()); + assertEquals(STARTED, shardRoutingTable.shards().get(1).state()); + assertEquals(STARTED, shardRoutingTable.shards().get(2).state()); + + logger.info("add another replica"); + final String[] indices = { "test" }; + routingTable = RoutingTable.builder(clusterState.routingTable()).updateNumberOfSearchReplicas(2, indices).build(); + metadata = Metadata.builder(clusterState.metadata()).updateNumberOfSearchReplicas(2, indices).build(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).metadata(metadata).build(); + IndexMetadata indexMetadata = clusterState.metadata().index("test"); + assertEquals(1, indexMetadata.getNumberOfReplicas()); + assertEquals(2, indexMetadata.getNumberOfSearchOnlyReplicas()); + shardRoutingTable = clusterState.routingTable().index("test").shard(0); + assertEquals(4, shardRoutingTable.size()); + assertEquals(3, shardRoutingTable.replicaShards().size()); + assertEquals(1, shardRoutingTable.writerReplicas().size()); + assertEquals(2, shardRoutingTable.searchOnlyReplicas().size()); + assertEquals(shardRoutingTable.primaryShard().state(), STARTED); + assertEquals(shardRoutingTable.writerReplicas().get(0).state(), STARTED); + assertEquals(shardRoutingTable.searchOnlyReplicas().get(0).state(), STARTED); + assertEquals(shardRoutingTable.searchOnlyReplicas().get(1).state(), UNASSIGNED); + + logger.info("Add another node and start the added replica"); + clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).add(newNode("node4"))).build(); + newState = strategy.reroute(clusterState, "reroute"); + newState = startInitializingShardsAndReroute(strategy, newState); + assertNotEquals(newState, clusterState); + clusterState = newState; + shardRoutingTable = clusterState.routingTable().index("test").shard(0); + for (ShardRouting replicaShard : shardRoutingTable.replicaShards()) { + assertEquals(replicaShard.state(), STARTED); + } + assertTrue(shardRoutingTable.replicaShards().stream().allMatch(r -> r.state().equals(STARTED))); + + // remove both replicas and assert search replica is unchanged + routingTable = RoutingTable.builder(clusterState.routingTable()).updateNumberOfSearchReplicas(0, indices).build(); + metadata = Metadata.builder(clusterState.metadata()).updateNumberOfSearchReplicas(0, indices).build(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).metadata(metadata).build(); + indexMetadata = clusterState.metadata().index("test"); + assertEquals(1, indexMetadata.getNumberOfReplicas()); + assertEquals(0, indexMetadata.getNumberOfSearchOnlyReplicas()); + shardRoutingTable = clusterState.routingTable().index("test").shard(0); + assertEquals(2, shardRoutingTable.size()); + assertEquals(1, shardRoutingTable.replicaShards().size()); + assertEquals(1, shardRoutingTable.writerReplicas().size()); + assertEquals(0, shardRoutingTable.searchOnlyReplicas().size()); + assertEquals(shardRoutingTable.primaryShard().state(), STARTED); + assertEquals(shardRoutingTable.replicaShards().get(0).state(), STARTED); + assertEquals(shardRoutingTable.replicaShards().get(0).currentNodeId(), nodeHoldingReplica); + } } diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/allocator/TimeBoundBalancedShardsAllocatorTests.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/allocator/TimeBoundBalancedShardsAllocatorTests.java index a10c305686638..45a0bd7b18afd 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/allocation/allocator/TimeBoundBalancedShardsAllocatorTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/allocator/TimeBoundBalancedShardsAllocatorTests.java @@ -8,6 +8,7 @@ package org.opensearch.cluster.routing.allocation.allocator; +import org.opensearch.OpenSearchException; import org.opensearch.Version; import org.opensearch.cluster.ClusterInfo; import org.opensearch.cluster.ClusterName; @@ -17,6 +18,7 @@ import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.RerouteService; import org.opensearch.cluster.routing.RoutingNodes; import org.opensearch.cluster.routing.RoutingTable; import org.opensearch.cluster.routing.ShardRouting; @@ -26,14 +28,20 @@ import org.opensearch.cluster.routing.allocation.decider.AllocationDeciders; import org.opensearch.cluster.routing.allocation.decider.Decision; import org.opensearch.cluster.routing.allocation.decider.SameShardAllocationDecider; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.Priority; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; +import org.opensearch.test.ClusterServiceUtils; +import org.opensearch.threadpool.TestThreadPool; +import org.junit.After; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.atomic.AtomicBoolean; import static org.opensearch.cluster.routing.ShardRoutingState.INITIALIZING; import static org.opensearch.cluster.routing.ShardRoutingState.STARTED; @@ -41,26 +49,49 @@ public class TimeBoundBalancedShardsAllocatorTests extends OpenSearchAllocationTestCase { + private TestThreadPool threadPool; + private ClusterService clusterService; + private ClusterState state; + private final DiscoveryNode node1 = newNode("node1", "node1", Collections.singletonMap("zone", "1a")); private final DiscoveryNode node2 = newNode("node2", "node2", Collections.singletonMap("zone", "1b")); private final DiscoveryNode node3 = newNode("node3", "node3", Collections.singletonMap("zone", "1c")); - public void testAllUnassignedShardsAllocatedWhenNoTimeOut() { + @After + @Override + public void tearDown() throws Exception { + super.tearDown(); + if (threadPool != null) { + final boolean terminated = terminate(threadPool); + assert terminated; + } + if (clusterService != null) { + clusterService.close(); + } + } + + public void setupStateAndService(Metadata metadata, RoutingTable routingTable) { + state = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) + .metadata(metadata) + .routingTable(routingTable) + .nodes(DiscoveryNodes.builder().add(node1).add(node2).add(node3)) + .build(); + threadPool = new TestThreadPool(getTestName()); + clusterService = ClusterServiceUtils.createClusterService(state, threadPool); + } + + public void testAllUnassignedShardsAllocatedWhenNoTimeOutAndRerouteNotScheduled() { int numberOfIndices = 2; int numberOfShards = 5; int numberOfReplicas = 1; int totalPrimaryCount = numberOfIndices * numberOfShards; int totalShardCount = numberOfIndices * (numberOfShards * (numberOfReplicas + 1)); Settings.Builder settings = Settings.builder(); - // passing total shard count for timed out latch such that no shard times out - BalancedShardsAllocator allocator = new TestBalancedShardsAllocator(settings.build(), new CountDownLatch(totalShardCount)); + // passing sufficiently high count for timeout latch to simulate no time out + BalancedShardsAllocator allocator = new TestBalancedShardsAllocator(settings.build(), new CountDownLatch(Integer.MAX_VALUE)); Metadata metadata = buildMetadata(Metadata.builder(), numberOfIndices, numberOfShards, numberOfReplicas); RoutingTable routingTable = buildRoutingTable(metadata); - ClusterState state = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) - .metadata(metadata) - .routingTable(routingTable) - .nodes(DiscoveryNodes.builder().add(node1).add(node2).add(node3)) - .build(); + setupStateAndService(metadata, routingTable); RoutingAllocation allocation = new RoutingAllocation( yesAllocationDeciders(), new RoutingNodes(state, false), @@ -69,6 +100,18 @@ public void testAllUnassignedShardsAllocatedWhenNoTimeOut() { null, System.nanoTime() ); + AtomicBoolean rerouteScheduled = new AtomicBoolean(false); + final RerouteService rerouteService = (reason, priority, listener) -> { + if (randomBoolean()) { + listener.onFailure(new OpenSearchException("simulated")); + } else { + listener.onResponse(clusterService.state()); + } + assertEquals("reroute after balanced shards allocator timed out", reason); + assertEquals(Priority.HIGH, priority); + rerouteScheduled.compareAndSet(false, true); + }; + allocator.setRerouteService(rerouteService); allocator.allocate(allocation); List initializingShards = allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING); int node1Recoveries = allocation.routingNodes().getInitialPrimariesIncomingRecoveries(node1.getId()); @@ -77,9 +120,10 @@ public void testAllUnassignedShardsAllocatedWhenNoTimeOut() { assertEquals(totalShardCount, initializingShards.size()); assertEquals(0, allocation.routingNodes().unassigned().ignored().size()); assertEquals(totalPrimaryCount, node1Recoveries + node2Recoveries + node3Recoveries); + assertFalse(rerouteScheduled.get()); } - public void testAllUnassignedShardsIgnoredWhenTimedOut() { + public void testAllUnassignedShardsIgnoredWhenTimedOutAndRerouteScheduled() { int numberOfIndices = 2; int numberOfShards = 5; int numberOfReplicas = 1; @@ -89,11 +133,7 @@ public void testAllUnassignedShardsIgnoredWhenTimedOut() { BalancedShardsAllocator allocator = new TestBalancedShardsAllocator(settings.build(), new CountDownLatch(0)); Metadata metadata = buildMetadata(Metadata.builder(), numberOfIndices, numberOfShards, numberOfReplicas); RoutingTable routingTable = buildRoutingTable(metadata); - ClusterState state = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) - .metadata(metadata) - .routingTable(routingTable) - .nodes(DiscoveryNodes.builder().add(node1).add(node2).add(node3)) - .build(); + setupStateAndService(metadata, routingTable); RoutingAllocation allocation = new RoutingAllocation( yesAllocationDeciders(), new RoutingNodes(state, false), @@ -102,6 +142,18 @@ public void testAllUnassignedShardsIgnoredWhenTimedOut() { null, System.nanoTime() ); + AtomicBoolean rerouteScheduled = new AtomicBoolean(false); + final RerouteService rerouteService = (reason, priority, listener) -> { + if (randomBoolean()) { + listener.onFailure(new OpenSearchException("simulated")); + } else { + listener.onResponse(clusterService.state()); + } + assertEquals("reroute after balanced shards allocator timed out", reason); + assertEquals(Priority.HIGH, priority); + rerouteScheduled.compareAndSet(false, true); + }; + allocator.setRerouteService(rerouteService); allocator.allocate(allocation); List initializingShards = allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING); int node1Recoveries = allocation.routingNodes().getInitialPrimariesIncomingRecoveries(node1.getId()); @@ -110,9 +162,10 @@ public void testAllUnassignedShardsIgnoredWhenTimedOut() { assertEquals(0, initializingShards.size()); assertEquals(totalShardCount, allocation.routingNodes().unassigned().ignored().size()); assertEquals(0, node1Recoveries + node2Recoveries + node3Recoveries); + assertTrue(rerouteScheduled.get()); } - public void testAllocatePartialPrimaryShardsUntilTimedOut() { + public void testAllocatePartialPrimaryShardsUntilTimedOutAndRerouteScheduled() { int numberOfIndices = 2; int numberOfShards = 5; int numberOfReplicas = 1; @@ -123,11 +176,7 @@ public void testAllocatePartialPrimaryShardsUntilTimedOut() { BalancedShardsAllocator allocator = new TestBalancedShardsAllocator(settings.build(), new CountDownLatch(shardsToAllocate)); Metadata metadata = buildMetadata(Metadata.builder(), numberOfIndices, numberOfShards, numberOfReplicas); RoutingTable routingTable = buildRoutingTable(metadata); - ClusterState state = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) - .metadata(metadata) - .routingTable(routingTable) - .nodes(DiscoveryNodes.builder().add(node1).add(node2).add(node3)) - .build(); + setupStateAndService(metadata, routingTable); RoutingAllocation allocation = new RoutingAllocation( yesAllocationDeciders(), new RoutingNodes(state, false), @@ -136,6 +185,18 @@ public void testAllocatePartialPrimaryShardsUntilTimedOut() { null, System.nanoTime() ); + AtomicBoolean rerouteScheduled = new AtomicBoolean(false); + final RerouteService rerouteService = (reason, priority, listener) -> { + if (randomBoolean()) { + listener.onFailure(new OpenSearchException("simulated")); + } else { + listener.onResponse(clusterService.state()); + } + assertEquals("reroute after balanced shards allocator timed out", reason); + assertEquals(Priority.HIGH, priority); + rerouteScheduled.compareAndSet(false, true); + }; + allocator.setRerouteService(rerouteService); allocator.allocate(allocation); List initializingShards = allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING); int node1Recoveries = allocation.routingNodes().getInitialPrimariesIncomingRecoveries(node1.getId()); @@ -144,9 +205,10 @@ public void testAllocatePartialPrimaryShardsUntilTimedOut() { assertEquals(shardsToAllocate, initializingShards.size()); assertEquals(totalShardCount - shardsToAllocate, allocation.routingNodes().unassigned().ignored().size()); assertEquals(shardsToAllocate, node1Recoveries + node2Recoveries + node3Recoveries); + assertTrue(rerouteScheduled.get()); } - public void testAllocateAllPrimaryShardsAndPartialReplicaShardsUntilTimedOut() { + public void testAllocateAllPrimaryShardsAndPartialReplicaShardsUntilTimedOutAndRerouteScheduled() { int numberOfIndices = 2; int numberOfShards = 5; int numberOfReplicas = 1; @@ -158,11 +220,7 @@ public void testAllocateAllPrimaryShardsAndPartialReplicaShardsUntilTimedOut() { BalancedShardsAllocator allocator = new TestBalancedShardsAllocator(settings.build(), new CountDownLatch(shardsToAllocate)); Metadata metadata = buildMetadata(Metadata.builder(), numberOfIndices, numberOfShards, numberOfReplicas); RoutingTable routingTable = buildRoutingTable(metadata); - ClusterState state = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) - .metadata(metadata) - .routingTable(routingTable) - .nodes(DiscoveryNodes.builder().add(node1).add(node2).add(node3)) - .build(); + setupStateAndService(metadata, routingTable); RoutingAllocation allocation = new RoutingAllocation( yesAllocationDeciders(), new RoutingNodes(state, false), @@ -171,6 +229,18 @@ public void testAllocateAllPrimaryShardsAndPartialReplicaShardsUntilTimedOut() { null, System.nanoTime() ); + AtomicBoolean rerouteScheduled = new AtomicBoolean(false); + final RerouteService rerouteService = (reason, priority, listener) -> { + if (randomBoolean()) { + listener.onFailure(new OpenSearchException("simulated")); + } else { + listener.onResponse(clusterService.state()); + } + assertEquals("reroute after balanced shards allocator timed out", reason); + assertEquals(Priority.HIGH, priority); + rerouteScheduled.compareAndSet(false, true); + }; + allocator.setRerouteService(rerouteService); allocator.allocate(allocation); List initializingShards = allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING); int node1Recoveries = allocation.routingNodes().getInitialPrimariesIncomingRecoveries(node1.getId()); @@ -179,20 +249,17 @@ public void testAllocateAllPrimaryShardsAndPartialReplicaShardsUntilTimedOut() { assertEquals(shardsToAllocate, initializingShards.size()); assertEquals(totalShardCount - shardsToAllocate, allocation.routingNodes().unassigned().ignored().size()); assertEquals(numberOfShards * numberOfIndices, node1Recoveries + node2Recoveries + node3Recoveries); + assertTrue(rerouteScheduled.get()); } - public void testAllShardsMoveWhenExcludedAndTimeoutNotBreached() { + public void testAllShardsMoveWhenExcludedAndTimeoutNotBreachedAndRerouteNotScheduled() { int numberOfIndices = 3; int numberOfShards = 5; int numberOfReplicas = 1; int totalShardCount = numberOfIndices * (numberOfShards * (numberOfReplicas + 1)); Metadata metadata = buildMetadata(Metadata.builder(), numberOfIndices, numberOfShards, numberOfReplicas); RoutingTable routingTable = buildRoutingTable(metadata); - ClusterState state = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) - .metadata(metadata) - .routingTable(routingTable) - .nodes(DiscoveryNodes.builder().add(node1).add(node2).add(node3)) - .build(); + setupStateAndService(metadata, routingTable); MockAllocationService allocationService = createAllocationService(); state = applyStartedShardsUntilNoChange(state, allocationService); // check all shards allocated @@ -200,8 +267,7 @@ public void testAllShardsMoveWhenExcludedAndTimeoutNotBreached() { assertEquals(totalShardCount, state.getRoutingNodes().shardsWithState(STARTED).size()); int node1ShardCount = state.getRoutingNodes().node("node1").size(); Settings settings = Settings.builder().put("cluster.routing.allocation.exclude.zone", "1a").build(); - int shardsToMove = 10 + 1000; // such that time out is never breached - BalancedShardsAllocator allocator = new TestBalancedShardsAllocator(settings, new CountDownLatch(shardsToMove)); + BalancedShardsAllocator allocator = new TestBalancedShardsAllocator(settings, new CountDownLatch(Integer.MAX_VALUE)); RoutingAllocation allocation = new RoutingAllocation( allocationDecidersForExcludeAPI(settings), new RoutingNodes(state, false), @@ -210,30 +276,39 @@ public void testAllShardsMoveWhenExcludedAndTimeoutNotBreached() { null, System.nanoTime() ); + AtomicBoolean rerouteScheduled = new AtomicBoolean(false); + final RerouteService rerouteService = (reason, priority, listener) -> { + if (randomBoolean()) { + listener.onFailure(new OpenSearchException("simulated")); + } else { + listener.onResponse(clusterService.state()); + } + assertEquals("reroute after balanced shards allocator timed out", reason); + assertEquals(Priority.HIGH, priority); + rerouteScheduled.compareAndSet(false, true); + }; + allocator.setRerouteService(rerouteService); allocator.allocate(allocation); List relocatingShards = allocation.routingNodes().shardsWithState(ShardRoutingState.RELOCATING); assertEquals(node1ShardCount, relocatingShards.size()); + assertFalse(rerouteScheduled.get()); } - public void testNoShardsMoveWhenExcludedAndTimeoutBreached() { + public void testNoShardsMoveWhenExcludedAndTimeoutBreachedAndRerouteScheduled() { int numberOfIndices = 3; int numberOfShards = 5; int numberOfReplicas = 1; int totalShardCount = numberOfIndices * (numberOfShards * (numberOfReplicas + 1)); Metadata metadata = buildMetadata(Metadata.builder(), numberOfIndices, numberOfShards, numberOfReplicas); RoutingTable routingTable = buildRoutingTable(metadata); - ClusterState state = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) - .metadata(metadata) - .routingTable(routingTable) - .nodes(DiscoveryNodes.builder().add(node1).add(node2).add(node3)) - .build(); + setupStateAndService(metadata, routingTable); MockAllocationService allocationService = createAllocationService(); state = applyStartedShardsUntilNoChange(state, allocationService); // check all shards allocated assertEquals(0, state.getRoutingNodes().shardsWithState(INITIALIZING).size()); assertEquals(totalShardCount, state.getRoutingNodes().shardsWithState(STARTED).size()); Settings settings = Settings.builder().put("cluster.routing.allocation.exclude.zone", "1a").build(); - int shardsToMove = 0; // such that time out is never breached + int shardsToMove = 0; // such that time out is breached BalancedShardsAllocator allocator = new TestBalancedShardsAllocator(settings, new CountDownLatch(shardsToMove)); RoutingAllocation allocation = new RoutingAllocation( allocationDecidersForExcludeAPI(settings), @@ -243,23 +318,32 @@ public void testNoShardsMoveWhenExcludedAndTimeoutBreached() { null, System.nanoTime() ); + AtomicBoolean rerouteScheduled = new AtomicBoolean(false); + final RerouteService rerouteService = (reason, priority, listener) -> { + if (randomBoolean()) { + listener.onFailure(new OpenSearchException("simulated")); + } else { + listener.onResponse(clusterService.state()); + } + assertEquals("reroute after balanced shards allocator timed out", reason); + assertEquals(Priority.HIGH, priority); + rerouteScheduled.compareAndSet(false, true); + }; + allocator.setRerouteService(rerouteService); allocator.allocate(allocation); List relocatingShards = allocation.routingNodes().shardsWithState(ShardRoutingState.RELOCATING); assertEquals(0, relocatingShards.size()); + assertTrue(rerouteScheduled.get()); } - public void testPartialShardsMoveWhenExcludedAndTimeoutBreached() { + public void testPartialShardsMoveWhenExcludedAndTimeoutBreachedAndRerouteScheduled() { int numberOfIndices = 3; int numberOfShards = 5; int numberOfReplicas = 1; int totalShardCount = numberOfIndices * (numberOfShards * (numberOfReplicas + 1)); Metadata metadata = buildMetadata(Metadata.builder(), numberOfIndices, numberOfShards, numberOfReplicas); RoutingTable routingTable = buildRoutingTable(metadata); - ClusterState state = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) - .metadata(metadata) - .routingTable(routingTable) - .nodes(DiscoveryNodes.builder().add(node1).add(node2).add(node3)) - .build(); + setupStateAndService(metadata, routingTable); MockAllocationService allocationService = createAllocationService(); state = applyStartedShardsUntilNoChange(state, allocationService); // check all shards allocated @@ -279,23 +363,32 @@ public void testPartialShardsMoveWhenExcludedAndTimeoutBreached() { null, System.nanoTime() ); + AtomicBoolean rerouteScheduled = new AtomicBoolean(false); + final RerouteService rerouteService = (reason, priority, listener) -> { + if (randomBoolean()) { + listener.onFailure(new OpenSearchException("simulated")); + } else { + listener.onResponse(clusterService.state()); + } + assertEquals("reroute after balanced shards allocator timed out", reason); + assertEquals(Priority.HIGH, priority); + rerouteScheduled.compareAndSet(false, true); + }; + allocator.setRerouteService(rerouteService); allocator.allocate(allocation); List relocatingShards = allocation.routingNodes().shardsWithState(ShardRoutingState.RELOCATING); assertEquals(shardsToMove / 3, relocatingShards.size()); + assertTrue(rerouteScheduled.get()); } - public void testClusterRebalancedWhenNotTimedOut() { + public void testClusterRebalancedWhenNotTimedOutAndRerouteNotScheduled() { int numberOfIndices = 1; int numberOfShards = 15; int numberOfReplicas = 1; int totalShardCount = numberOfIndices * (numberOfShards * (numberOfReplicas + 1)); Metadata metadata = buildMetadata(Metadata.builder(), numberOfIndices, numberOfShards, numberOfReplicas); RoutingTable routingTable = buildRoutingTable(metadata); - ClusterState state = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) - .metadata(metadata) - .routingTable(routingTable) - .nodes(DiscoveryNodes.builder().add(node1).add(node2).add(node3)) - .build(); + setupStateAndService(metadata, routingTable); MockAllocationService allocationService = createAllocationService( Settings.builder().put("cluster.routing.allocation.exclude.zone", "1a").build() ); // such that no shards are allocated to node1 @@ -306,8 +399,7 @@ public void testClusterRebalancedWhenNotTimedOut() { assertEquals(totalShardCount, state.getRoutingNodes().shardsWithState(STARTED).size()); assertEquals(0, node1ShardCount); Settings newSettings = Settings.builder().put("cluster.routing.allocation.exclude.zone", "").build(); - int shardsToMove = 1000; // such that time out is never breached - BalancedShardsAllocator allocator = new TestBalancedShardsAllocator(newSettings, new CountDownLatch(shardsToMove)); + BalancedShardsAllocator allocator = new TestBalancedShardsAllocator(newSettings, new CountDownLatch(Integer.MAX_VALUE)); RoutingAllocation allocation = new RoutingAllocation( allocationDecidersForExcludeAPI(newSettings), new RoutingNodes(state, false), @@ -316,23 +408,32 @@ public void testClusterRebalancedWhenNotTimedOut() { null, System.nanoTime() ); + AtomicBoolean rerouteScheduled = new AtomicBoolean(false); + final RerouteService rerouteService = (reason, priority, listener) -> { + if (randomBoolean()) { + listener.onFailure(new OpenSearchException("simulated")); + } else { + listener.onResponse(clusterService.state()); + } + assertEquals("reroute after balanced shards allocator timed out", reason); + assertEquals(Priority.HIGH, priority); + rerouteScheduled.compareAndSet(false, true); + }; + allocator.setRerouteService(rerouteService); allocator.allocate(allocation); List relocatingShards = allocation.routingNodes().shardsWithState(ShardRoutingState.RELOCATING); assertEquals(totalShardCount / 3, relocatingShards.size()); + assertFalse(rerouteScheduled.get()); } - public void testClusterNotRebalancedWhenTimedOut() { + public void testClusterNotRebalancedWhenTimedOutAndRerouteScheduled() { int numberOfIndices = 1; int numberOfShards = 15; int numberOfReplicas = 1; int totalShardCount = numberOfIndices * (numberOfShards * (numberOfReplicas + 1)); Metadata metadata = buildMetadata(Metadata.builder(), numberOfIndices, numberOfShards, numberOfReplicas); RoutingTable routingTable = buildRoutingTable(metadata); - ClusterState state = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) - .metadata(metadata) - .routingTable(routingTable) - .nodes(DiscoveryNodes.builder().add(node1).add(node2).add(node3)) - .build(); + setupStateAndService(metadata, routingTable); MockAllocationService allocationService = createAllocationService( Settings.builder().put("cluster.routing.allocation.exclude.zone", "1a").build() ); // such that no shards are allocated to node1 @@ -353,23 +454,32 @@ public void testClusterNotRebalancedWhenTimedOut() { null, System.nanoTime() ); + AtomicBoolean rerouteScheduled = new AtomicBoolean(false); + final RerouteService rerouteService = (reason, priority, listener) -> { + if (randomBoolean()) { + listener.onFailure(new OpenSearchException("simulated")); + } else { + listener.onResponse(clusterService.state()); + } + assertEquals("reroute after balanced shards allocator timed out", reason); + assertEquals(Priority.HIGH, priority); + rerouteScheduled.compareAndSet(false, true); + }; + allocator.setRerouteService(rerouteService); allocator.allocate(allocation); List relocatingShards = allocation.routingNodes().shardsWithState(ShardRoutingState.RELOCATING); assertEquals(0, relocatingShards.size()); + assertTrue(rerouteScheduled.get()); } - public void testClusterPartialRebalancedWhenTimedOut() { + public void testClusterPartialRebalancedWhenTimedOutAndRerouteScheduled() { int numberOfIndices = 1; int numberOfShards = 15; int numberOfReplicas = 1; int totalShardCount = numberOfIndices * (numberOfShards * (numberOfReplicas + 1)); Metadata metadata = buildMetadata(Metadata.builder(), numberOfIndices, numberOfShards, numberOfReplicas); RoutingTable routingTable = buildRoutingTable(metadata); - ClusterState state = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) - .metadata(metadata) - .routingTable(routingTable) - .nodes(DiscoveryNodes.builder().add(node1).add(node2).add(node3)) - .build(); + setupStateAndService(metadata, routingTable); MockAllocationService allocationService = createAllocationService( Settings.builder().put("cluster.routing.allocation.exclude.zone", "1a").build() ); // such that no shards are allocated to node1 @@ -404,9 +514,22 @@ public Decision canRebalance(ShardRouting shardRouting, RoutingAllocation alloca null, System.nanoTime() ); + AtomicBoolean rerouteScheduled = new AtomicBoolean(false); + final RerouteService rerouteService = (reason, priority, listener) -> { + if (randomBoolean()) { + listener.onFailure(new OpenSearchException("simulated")); + } else { + listener.onResponse(clusterService.state()); + } + assertEquals("reroute after balanced shards allocator timed out", reason); + assertEquals(Priority.HIGH, priority); + rerouteScheduled.compareAndSet(false, true); + }; + allocator.setRerouteService(rerouteService); allocator.allocate(allocation); List relocatingShards = allocation.routingNodes().shardsWithState(ShardRoutingState.RELOCATING); assertEquals(3, relocatingShards.size()); + assertTrue(rerouteScheduled.get()); } public void testAllocatorNeverTimedOutIfValueIsMinusOne() { diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/decider/FilterAllocationDeciderTests.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/decider/FilterAllocationDeciderTests.java index 2e303887e0f1b..f226c45553d57 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/allocation/decider/FilterAllocationDeciderTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/decider/FilterAllocationDeciderTests.java @@ -323,7 +323,7 @@ private ClusterState createInitialClusterState(AllocationService service, Settin return createInitialClusterState(service, indexSettings, Settings.EMPTY); } - private ClusterState createInitialClusterState(AllocationService service, Settings idxSettings, Settings clusterSettings) { + static ClusterState createInitialClusterState(AllocationService service, Settings idxSettings, Settings clusterSettings) { Metadata.Builder metadata = Metadata.builder(); metadata.persistentSettings(clusterSettings); final Settings.Builder indexSettings = settings(Version.CURRENT).put(idxSettings); diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/decider/SearchReplicaAllocationDeciderTests.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/decider/SearchReplicaAllocationDeciderTests.java new file mode 100644 index 0000000000000..8d4f4cdee26cc --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/decider/SearchReplicaAllocationDeciderTests.java @@ -0,0 +1,133 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.routing.allocation.decider; + +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.EmptyClusterInfoService; +import org.opensearch.cluster.OpenSearchAllocationTestCase; +import org.opensearch.cluster.routing.RecoverySource; +import org.opensearch.cluster.routing.RoutingTable; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.UnassignedInfo; +import org.opensearch.cluster.routing.allocation.AllocationService; +import org.opensearch.cluster.routing.allocation.RoutingAllocation; +import org.opensearch.cluster.routing.allocation.allocator.BalancedShardsAllocator; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.snapshots.EmptySnapshotsInfoService; +import org.opensearch.test.gateway.TestGatewayAllocator; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; + +import static org.opensearch.cluster.routing.allocation.decider.SearchReplicaAllocationDecider.SEARCH_REPLICA_ROUTING_INCLUDE_GROUP_SETTING; + +public class SearchReplicaAllocationDeciderTests extends OpenSearchAllocationTestCase { + + public void testSearchReplicaRoutingDedicatedIncludes() { + // we aren't using a settingsModule here so we need to set feature flag gated setting + Set> settings = new HashSet<>(ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + settings.add(SEARCH_REPLICA_ROUTING_INCLUDE_GROUP_SETTING); + ClusterSettings clusterSettings = new ClusterSettings(Settings.builder().build(), settings); + Settings initialSettings = Settings.builder() + .put("cluster.routing.allocation.search.replica.dedicated.include._id", "node1,node2") + .build(); + + SearchReplicaAllocationDecider filterAllocationDecider = new SearchReplicaAllocationDecider(initialSettings, clusterSettings); + AllocationDeciders allocationDeciders = new AllocationDeciders( + Arrays.asList( + filterAllocationDecider, + new SameShardAllocationDecider(Settings.EMPTY, clusterSettings), + new ReplicaAfterPrimaryActiveAllocationDecider() + ) + ); + AllocationService service = new AllocationService( + allocationDeciders, + new TestGatewayAllocator(), + new BalancedShardsAllocator(Settings.EMPTY), + EmptyClusterInfoService.INSTANCE, + EmptySnapshotsInfoService.INSTANCE + ); + ClusterState state = FilterAllocationDeciderTests.createInitialClusterState(service, Settings.EMPTY, Settings.EMPTY); + RoutingTable routingTable = state.routingTable(); + RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, state.getRoutingNodes(), state, null, null, 0); + allocation.debugDecision(true); + + ShardRouting searchReplica = ShardRouting.newUnassigned( + routingTable.index("sourceIndex").shard(0).shardId(), + false, + true, + RecoverySource.PeerRecoverySource.INSTANCE, + new UnassignedInfo(UnassignedInfo.Reason.NODE_LEFT, "") + ); + + ShardRouting regularReplica = ShardRouting.newUnassigned( + routingTable.index("sourceIndex").shard(0).shardId(), + false, + false, + RecoverySource.PeerRecoverySource.INSTANCE, + new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "") + ); + + ShardRouting primary = ShardRouting.newUnassigned( + routingTable.index("sourceIndex").shard(0).shardId(), + true, + false, + RecoverySource.PeerRecoverySource.INSTANCE, + new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "") + ); + + Decision.Single decision = (Decision.Single) filterAllocationDecider.canAllocate( + searchReplica, + state.getRoutingNodes().node("node2"), + allocation + ); + assertEquals(decision.toString(), Decision.Type.YES, decision.type()); + decision = (Decision.Single) filterAllocationDecider.canAllocate(searchReplica, state.getRoutingNodes().node("node1"), allocation); + assertEquals(decision.toString(), Decision.Type.YES, decision.type()); + + decision = (Decision.Single) filterAllocationDecider.canAllocate(regularReplica, state.getRoutingNodes().node("node2"), allocation); + assertEquals(decision.toString(), Decision.Type.NO, decision.type()); + decision = (Decision.Single) filterAllocationDecider.canAllocate(regularReplica, state.getRoutingNodes().node("node1"), allocation); + assertEquals(decision.toString(), Decision.Type.NO, decision.type()); + + decision = (Decision.Single) filterAllocationDecider.canAllocate(primary, state.getRoutingNodes().node("node1"), allocation); + assertEquals(decision.toString(), Decision.Type.NO, decision.type()); + decision = (Decision.Single) filterAllocationDecider.canAllocate(primary, state.getRoutingNodes().node("node2"), allocation); + assertEquals(decision.toString(), Decision.Type.NO, decision.type()); + + Settings updatedSettings = Settings.builder() + .put("cluster.routing.allocation.search.replica.dedicated.include._id", "node2") + .build(); + clusterSettings.applySettings(updatedSettings); + + decision = (Decision.Single) filterAllocationDecider.canAllocate(searchReplica, state.getRoutingNodes().node("node2"), allocation); + assertEquals(decision.toString(), Decision.Type.YES, decision.type()); + decision = (Decision.Single) filterAllocationDecider.canAllocate(searchReplica, state.getRoutingNodes().node("node1"), allocation); + assertEquals(decision.toString(), Decision.Type.NO, decision.type()); + decision = (Decision.Single) filterAllocationDecider.canRemain(searchReplica, state.getRoutingNodes().node("node1"), allocation); + assertEquals(decision.toString(), Decision.Type.NO, decision.type()); + + decision = (Decision.Single) filterAllocationDecider.canAllocate(regularReplica, state.getRoutingNodes().node("node2"), allocation); + assertEquals(decision.toString(), Decision.Type.NO, decision.type()); + decision = (Decision.Single) filterAllocationDecider.canAllocate(regularReplica, state.getRoutingNodes().node("node1"), allocation); + assertEquals(decision.toString(), Decision.Type.YES, decision.type()); + decision = (Decision.Single) filterAllocationDecider.canRemain(regularReplica, state.getRoutingNodes().node("node1"), allocation); + assertEquals(decision.toString(), Decision.Type.YES, decision.type()); + + decision = (Decision.Single) filterAllocationDecider.canAllocate(primary, state.getRoutingNodes().node("node1"), allocation); + assertEquals(decision.toString(), Decision.Type.YES, decision.type()); + decision = (Decision.Single) filterAllocationDecider.canAllocate(primary, state.getRoutingNodes().node("node2"), allocation); + assertEquals(decision.toString(), Decision.Type.NO, decision.type()); + decision = (Decision.Single) filterAllocationDecider.canRemain(primary, state.getRoutingNodes().node("node1"), allocation); + assertEquals(decision.toString(), Decision.Type.YES, decision.type()); + } +} diff --git a/server/src/test/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableServiceFactoryTests.java b/server/src/test/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableServiceFactoryTests.java index 86f4b9502d6ab..683942fd34a37 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableServiceFactoryTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableServiceFactoryTests.java @@ -10,7 +10,6 @@ import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; -import org.opensearch.common.util.FeatureFlags; import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.fs.FsRepository; import org.opensearch.test.OpenSearchTestCase; @@ -20,7 +19,7 @@ import java.util.function.Supplier; -import static org.opensearch.common.util.FeatureFlags.REMOTE_PUBLICATION_EXPERIMENTAL; +import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_PUBLICATION_SETTING_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY; public class RemoteRoutingTableServiceFactoryTests extends OpenSearchTestCase { @@ -50,9 +49,8 @@ public void testGetServiceWhenRemoteRoutingEnabled() { Settings settings = Settings.builder() .put("node.attr." + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY, "routing_repository") .put(FsRepository.REPOSITORIES_COMPRESS_SETTING.getKey(), false) + .put(REMOTE_PUBLICATION_SETTING_KEY, "true") .build(); - Settings nodeSettings = Settings.builder().put(REMOTE_PUBLICATION_EXPERIMENTAL, "true").build(); - FeatureFlags.initializeFeatureFlags(nodeSettings); RemoteRoutingTableService service = RemoteRoutingTableServiceFactory.getService( repositoriesService, settings, diff --git a/server/src/test/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableServiceTests.java b/server/src/test/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableServiceTests.java index 74254f1a1987f..63501f878d55d 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/remote/RemoteRoutingTableServiceTests.java @@ -13,14 +13,13 @@ import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.Diff; -import org.opensearch.cluster.DiffableUtils; import org.opensearch.cluster.coordination.CoordinationMetadata; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.routing.IndexRoutingTable; -import org.opensearch.cluster.routing.IndexShardRoutingTable; import org.opensearch.cluster.routing.RoutingTable; import org.opensearch.cluster.routing.RoutingTableIncrementalDiff; +import org.opensearch.cluster.routing.StringKeyDiffProvider; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.blobstore.BlobContainer; import org.opensearch.common.blobstore.BlobPath; @@ -29,7 +28,6 @@ import org.opensearch.common.compress.DeflateCompressor; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; -import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.util.TestCapturingListener; import org.opensearch.core.action.ActionListener; import org.opensearch.core.compress.Compressor; @@ -57,17 +55,16 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Base64; -import java.util.HashMap; import java.util.List; import java.util.Locale; -import java.util.Map; import java.util.concurrent.CountDownLatch; import java.util.function.Supplier; import org.mockito.Mockito; -import static org.opensearch.common.util.FeatureFlags.REMOTE_PUBLICATION_EXPERIMENTAL; import static org.opensearch.gateway.remote.ClusterMetadataManifestTests.randomUploadedIndexMetadataList; +import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_PUBLICATION_SETTING_KEY; +import static org.opensearch.gateway.remote.RemoteClusterStateServiceTests.generateClusterStateWithOneIndex; import static org.opensearch.gateway.remote.RemoteClusterStateUtils.CLUSTER_STATE_PATH_TOKEN; import static org.opensearch.gateway.remote.RemoteClusterStateUtils.DELIMITER; import static org.opensearch.gateway.remote.RemoteClusterStateUtils.PATH_DELIMITER; @@ -116,6 +113,7 @@ public void setup() { Settings settings = Settings.builder() .put("node.attr." + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY, "routing_repository") + .put(REMOTE_PUBLICATION_SETTING_KEY, "true") .build(); clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); clusterService = mock(ClusterService.class); @@ -128,8 +126,6 @@ public void setup() { when(repositoriesService.repository("routing_repository")).thenReturn(blobStoreRepository); when(blobStoreRepository.blobStore()).thenReturn(blobStore); when(blobStore.blobContainer(any())).thenReturn(blobContainer); - Settings nodeSettings = Settings.builder().put(REMOTE_PUBLICATION_EXPERIMENTAL, "true").build(); - FeatureFlags.initializeFeatureFlags(nodeSettings); compressor = new NoneCompressor(); basePath = BlobPath.cleanPath().add("base-path"); when(blobStoreRepository.basePath()).thenReturn(basePath); @@ -187,10 +183,9 @@ public void testGetIndicesRoutingMapDiff() { RoutingTable routingTable = RoutingTable.builder().addAsNew(indexMetadata).build(); - DiffableUtils.MapDiff> diff = remoteRoutingTableService - .getIndicesRoutingMapDiff(routingTable, routingTable); - assertEquals(0, diff.getUpserts().size()); - assertEquals(0, diff.getDeletes().size()); + StringKeyDiffProvider diff = remoteRoutingTableService.getIndicesRoutingMapDiff(routingTable, routingTable); + assertEquals(0, diff.provideDiff().getUpserts().size()); + assertEquals(0, diff.provideDiff().getDeletes().size()); // Reversing order to check for equality without order. IndexRoutingTable indexRouting = routingTable.getIndicesRouting().get(indexName); @@ -201,8 +196,8 @@ public void testGetIndicesRoutingMapDiff() { RoutingTable routingTable2 = RoutingTable.builder().add(indexRoutingTableReversed).build(); diff = remoteRoutingTableService.getIndicesRoutingMapDiff(routingTable, routingTable2); - assertEquals(0, diff.getUpserts().size()); - assertEquals(0, diff.getDeletes().size()); + assertEquals(0, diff.provideDiff().getUpserts().size()); + assertEquals(0, diff.provideDiff().getDeletes().size()); } public void testGetChangedIndicesRouting() { @@ -220,7 +215,10 @@ public void testGetChangedIndicesRouting() { assertEquals( 0, - remoteRoutingTableService.getIndicesRoutingMapDiff(state.getRoutingTable(), state.getRoutingTable()).getUpserts().size() + remoteRoutingTableService.getIndicesRoutingMapDiff(state.getRoutingTable(), state.getRoutingTable()) + .provideDiff() + .getUpserts() + .size() ); // Reversing order to check for equality without order. @@ -234,7 +232,10 @@ public void testGetChangedIndicesRouting() { .build(); assertEquals( 0, - remoteRoutingTableService.getIndicesRoutingMapDiff(state.getRoutingTable(), newState.getRoutingTable()).getUpserts().size() + remoteRoutingTableService.getIndicesRoutingMapDiff(state.getRoutingTable(), newState.getRoutingTable()) + .provideDiff() + .getUpserts() + .size() ); } @@ -259,13 +260,12 @@ public void testGetIndicesRoutingMapDiffIndexAdded() { ).numberOfShards(noOfShards).numberOfReplicas(noOfReplicas).build(); RoutingTable routingTable2 = RoutingTable.builder(routingTable).addAsNew(indexMetadata2).build(); - DiffableUtils.MapDiff> diff = remoteRoutingTableService - .getIndicesRoutingMapDiff(routingTable, routingTable2); - assertEquals(1, diff.getUpserts().size()); - assertNotNull(diff.getUpserts().get(indexName2)); - assertEquals(noOfShards, diff.getUpserts().get(indexName2).getShards().size()); + StringKeyDiffProvider diff = remoteRoutingTableService.getIndicesRoutingMapDiff(routingTable, routingTable2); + assertEquals(1, diff.provideDiff().getUpserts().size()); + assertNotNull(diff.provideDiff().getUpserts().get(indexName2)); + assertEquals(noOfShards, diff.provideDiff().getUpserts().get(indexName2).getShards().size()); - assertEquals(0, diff.getDeletes().size()); + assertEquals(0, diff.provideDiff().getDeletes().size()); } public void testGetIndicesRoutingMapDiffShardChanged() { @@ -289,17 +289,19 @@ public void testGetIndicesRoutingMapDiffShardChanged() { ).numberOfShards(noOfShards + 1).numberOfReplicas(noOfReplicas).build(); RoutingTable routingTable2 = RoutingTable.builder().addAsNew(indexMetadata2).build(); - DiffableUtils.MapDiff> diff = remoteRoutingTableService - .getIndicesRoutingMapDiff(routingTable, routingTable2); - assertEquals(0, diff.getUpserts().size()); - assertEquals(1, diff.getDiffs().size()); - assertNotNull(diff.getDiffs().get(indexName)); - assertEquals(noOfShards + 1, diff.getDiffs().get(indexName).apply(routingTable.indicesRouting().get(indexName)).shards().size()); + StringKeyDiffProvider diff = remoteRoutingTableService.getIndicesRoutingMapDiff(routingTable, routingTable2); + assertEquals(0, diff.provideDiff().getUpserts().size()); + assertEquals(1, diff.provideDiff().getDiffs().size()); + assertNotNull(diff.provideDiff().getDiffs().get(indexName)); + assertEquals( + noOfShards + 1, + diff.provideDiff().getDiffs().get(indexName).apply(routingTable.indicesRouting().get(indexName)).shards().size() + ); assertEquals( noOfReplicas + 1, - diff.getDiffs().get(indexName).apply(routingTable.indicesRouting().get(indexName)).getShards().get(0).getSize() + diff.provideDiff().getDiffs().get(indexName).apply(routingTable.indicesRouting().get(indexName)).getShards().get(0).getSize() ); - assertEquals(0, diff.getDeletes().size()); + assertEquals(0, diff.provideDiff().getDeletes().size()); final IndexMetadata indexMetadata3 = new IndexMetadata.Builder(indexName).settings( Settings.builder() @@ -310,15 +312,18 @@ public void testGetIndicesRoutingMapDiffShardChanged() { RoutingTable routingTable3 = RoutingTable.builder().addAsNew(indexMetadata3).build(); diff = remoteRoutingTableService.getIndicesRoutingMapDiff(routingTable2, routingTable3); - assertEquals(0, diff.getUpserts().size()); - assertEquals(1, diff.getDiffs().size()); - assertNotNull(diff.getDiffs().get(indexName)); - assertEquals(noOfShards + 1, diff.getDiffs().get(indexName).apply(routingTable.indicesRouting().get(indexName)).shards().size()); + assertEquals(0, diff.provideDiff().getUpserts().size()); + assertEquals(1, diff.provideDiff().getDiffs().size()); + assertNotNull(diff.provideDiff().getDiffs().get(indexName)); + assertEquals( + noOfShards + 1, + diff.provideDiff().getDiffs().get(indexName).apply(routingTable.indicesRouting().get(indexName)).shards().size() + ); assertEquals( noOfReplicas + 2, - diff.getDiffs().get(indexName).apply(routingTable.indicesRouting().get(indexName)).getShards().get(0).getSize() + diff.provideDiff().getDiffs().get(indexName).apply(routingTable.indicesRouting().get(indexName)).getShards().get(0).getSize() ); - assertEquals(0, diff.getDeletes().size()); + assertEquals(0, diff.provideDiff().getDeletes().size()); } public void testGetIndicesRoutingMapDiffShardDetailChanged() { @@ -335,13 +340,15 @@ public void testGetIndicesRoutingMapDiffShardDetailChanged() { RoutingTable routingTable = RoutingTable.builder().addAsNew(indexMetadata).build(); RoutingTable routingTable2 = RoutingTable.builder().addAsRecovery(indexMetadata).build(); - DiffableUtils.MapDiff> diff = remoteRoutingTableService - .getIndicesRoutingMapDiff(routingTable, routingTable2); - assertEquals(1, diff.getDiffs().size()); - assertNotNull(diff.getDiffs().get(indexName)); - assertEquals(noOfShards, diff.getDiffs().get(indexName).apply(routingTable.indicesRouting().get(indexName)).shards().size()); - assertEquals(0, diff.getUpserts().size()); - assertEquals(0, diff.getDeletes().size()); + StringKeyDiffProvider diff = remoteRoutingTableService.getIndicesRoutingMapDiff(routingTable, routingTable2); + assertEquals(1, diff.provideDiff().getDiffs().size()); + assertNotNull(diff.provideDiff().getDiffs().get(indexName)); + assertEquals( + noOfShards, + diff.provideDiff().getDiffs().get(indexName).apply(routingTable.indicesRouting().get(indexName)).shards().size() + ); + assertEquals(0, diff.provideDiff().getUpserts().size()); + assertEquals(0, diff.provideDiff().getDeletes().size()); } public void testGetIndicesRoutingMapDiffIndexDeleted() { @@ -363,13 +370,12 @@ public void testGetIndicesRoutingMapDiffIndexDeleted() { ).numberOfShards(between(1, 1000)).numberOfReplicas(randomInt(10)).build(); RoutingTable routingTable2 = RoutingTable.builder().addAsNew(indexMetadata2).build(); - DiffableUtils.MapDiff> diff = remoteRoutingTableService - .getIndicesRoutingMapDiff(routingTable, routingTable2); - assertEquals(1, diff.getUpserts().size()); - assertNotNull(diff.getUpserts().get(indexName2)); + StringKeyDiffProvider diff = remoteRoutingTableService.getIndicesRoutingMapDiff(routingTable, routingTable2); + assertEquals(1, diff.provideDiff().getUpserts().size()); + assertNotNull(diff.provideDiff().getUpserts().get(indexName2)); - assertEquals(1, diff.getDeletes().size()); - assertEquals(indexName, diff.getDeletes().get(0)); + assertEquals(1, diff.provideDiff().getDeletes().size()); + assertEquals(indexName, diff.provideDiff().getDeletes().get(0)); } public void testGetAllUploadedIndicesRouting() { @@ -493,11 +499,17 @@ public void testIndicesRoutingDiffWhenIndexDeleted() { assertEquals( 1, - remoteRoutingTableService.getIndicesRoutingMapDiff(state.getRoutingTable(), updatedRoutingTable).getDeletes().size() + remoteRoutingTableService.getIndicesRoutingMapDiff(state.getRoutingTable(), updatedRoutingTable) + .provideDiff() + .getDeletes() + .size() ); assertEquals( indexNameToDelete, - remoteRoutingTableService.getIndicesRoutingMapDiff(state.getRoutingTable(), updatedRoutingTable).getDeletes().get(0) + remoteRoutingTableService.getIndicesRoutingMapDiff(state.getRoutingTable(), updatedRoutingTable) + .provideDiff() + .getDeletes() + .get(0) ); } @@ -524,19 +536,29 @@ public void testIndicesRoutingDiffWhenIndexDeletedAndAdded() { assertEquals( 1, - remoteRoutingTableService.getIndicesRoutingMapDiff(state.getRoutingTable(), updatedRoutingTable).getDeletes().size() + remoteRoutingTableService.getIndicesRoutingMapDiff(state.getRoutingTable(), updatedRoutingTable) + .provideDiff() + .getDeletes() + .size() ); assertEquals( indexNameToDelete, - remoteRoutingTableService.getIndicesRoutingMapDiff(state.getRoutingTable(), updatedRoutingTable).getDeletes().get(0) + remoteRoutingTableService.getIndicesRoutingMapDiff(state.getRoutingTable(), updatedRoutingTable) + .provideDiff() + .getDeletes() + .get(0) ); assertEquals( 1, - remoteRoutingTableService.getIndicesRoutingMapDiff(state.getRoutingTable(), updatedRoutingTable).getUpserts().size() + remoteRoutingTableService.getIndicesRoutingMapDiff(state.getRoutingTable(), updatedRoutingTable) + .provideDiff() + .getUpserts() + .size() ); assertTrue( remoteRoutingTableService.getIndicesRoutingMapDiff(state.getRoutingTable(), updatedRoutingTable) + .provideDiff() .getUpserts() .containsKey(indexName) ); @@ -571,27 +593,17 @@ public void testGetAsyncIndexRoutingReadAction() throws Exception { public void testGetAsyncIndexRoutingTableDiffReadAction() throws Exception { String indexName = randomAlphaOfLength(randomIntBetween(1, 50)); - ClusterState currentState = createClusterState(indexName); - - // Get the IndexRoutingTable from the current state - IndexRoutingTable indexRoutingTable = currentState.routingTable().index(indexName); - Map shardRoutingTables = indexRoutingTable.getShards(); + ClusterState previousState = generateClusterStateWithOneIndex(indexName, 5, 1, false).build(); + ClusterState currentState = generateClusterStateWithOneIndex(indexName, 5, 2, true).build(); - RoutingTableIncrementalDiff.IndexRoutingTableIncrementalDiff indexRoutingTableDiff = - new RoutingTableIncrementalDiff.IndexRoutingTableIncrementalDiff(new ArrayList<>(shardRoutingTables.values())); - - // Create the map for RoutingTableIncrementalDiff - Map> diffs = new HashMap<>(); - diffs.put(indexName, indexRoutingTableDiff); - - RoutingTableIncrementalDiff diff = new RoutingTableIncrementalDiff(diffs); + RoutingTableIncrementalDiff diff = new RoutingTableIncrementalDiff(previousState.getRoutingTable(), currentState.getRoutingTable()); String uploadedFileName = String.format(Locale.ROOT, "routing-table-diff/" + indexName); when(blobContainer.readBlob(indexName)).thenReturn( REMOTE_ROUTING_TABLE_DIFF_FORMAT.serialize(diff, uploadedFileName, compressor).streamInput() ); - TestCapturingListener listener = new TestCapturingListener<>(); + TestCapturingListener> listener = new TestCapturingListener<>(); CountDownLatch latch = new CountDownLatch(1); remoteRoutingTableService.getAsyncIndexRoutingTableDiffReadAction( @@ -603,8 +615,11 @@ public void testGetAsyncIndexRoutingTableDiffReadAction() throws Exception { assertNull(listener.getFailure()); assertNotNull(listener.getResult()); - RoutingTableIncrementalDiff resultDiff = listener.getResult(); - assertEquals(diff.getDiffs().size(), resultDiff.getDiffs().size()); + Diff resultDiff = listener.getResult(); + assertEquals( + currentState.getRoutingTable().getIndicesRouting(), + resultDiff.apply(previousState.getRoutingTable()).getIndicesRouting() + ); } public void testGetAsyncIndexRoutingWriteAction() throws Exception { @@ -661,20 +676,8 @@ public void testGetAsyncIndexRoutingWriteAction() throws Exception { public void testGetAsyncIndexRoutingDiffWriteAction() throws Exception { String indexName = randomAlphaOfLength(randomIntBetween(1, 50)); - ClusterState currentState = createClusterState(indexName); - - // Get the IndexRoutingTable from the current state - IndexRoutingTable indexRoutingTable = currentState.routingTable().index(indexName); - Map shardRoutingTables = indexRoutingTable.getShards(); - - RoutingTableIncrementalDiff.IndexRoutingTableIncrementalDiff indexRoutingTableDiff = - new RoutingTableIncrementalDiff.IndexRoutingTableIncrementalDiff(new ArrayList<>(shardRoutingTables.values())); - - // Create the map for RoutingTableIncrementalDiff - Map> diffs = new HashMap<>(); - diffs.put(indexName, indexRoutingTableDiff); - - // RoutingTableIncrementalDiff diff = new RoutingTableIncrementalDiff(diffs); + ClusterState previousState = generateClusterStateWithOneIndex(indexName, 5, 1, false).build(); + ClusterState currentState = generateClusterStateWithOneIndex(indexName, 5, 2, true).build(); Iterable remotePath = new BlobPath().add("base-path") .add( @@ -699,7 +702,7 @@ public void testGetAsyncIndexRoutingDiffWriteAction() throws Exception { currentState.metadata().clusterUUID(), currentState.term(), currentState.version(), - diffs, + new RoutingTableIncrementalDiff(previousState.getRoutingTable(), currentState.getRoutingTable()), new LatchedActionListener<>(listener, latch) ); latch.await(); @@ -717,7 +720,7 @@ public void testGetAsyncIndexRoutingDiffWriteAction() throws Exception { assertEquals(4, fileNameTokens.length); assertEquals(ROUTING_TABLE_DIFF_METADATA_PREFIX, fileNameTokens[0]); assertEquals(RemoteStoreUtils.invertLong(1L), fileNameTokens[1]); - assertEquals(RemoteStoreUtils.invertLong(2L), fileNameTokens[2]); + assertEquals(RemoteStoreUtils.invertLong(1L), fileNameTokens[2]); assertThat(RemoteStoreUtils.invertLong(fileNameTokens[3]), lessThanOrEqualTo(System.currentTimeMillis())); } diff --git a/server/src/test/java/org/opensearch/cluster/service/ClusterManagerTaskThrottlerTests.java b/server/src/test/java/org/opensearch/cluster/service/ClusterManagerTaskThrottlerTests.java index e25a0e0b2c3bf..3bd9333dc4168 100644 --- a/server/src/test/java/org/opensearch/cluster/service/ClusterManagerTaskThrottlerTests.java +++ b/server/src/test/java/org/opensearch/cluster/service/ClusterManagerTaskThrottlerTests.java @@ -30,6 +30,8 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Optional; +import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import static org.opensearch.test.ClusterServiceUtils.setState; @@ -69,7 +71,7 @@ public static void afterClass() { public void testDefaults() { ClusterSettings clusterSettings = new ClusterSettings(Settings.builder().build(), ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); ClusterManagerTaskThrottler throttler = new ClusterManagerTaskThrottler(Settings.EMPTY, clusterSettings, () -> { - return clusterService.getMasterService().getMinNodeVersion(); + return clusterService.getClusterManagerService().getMinNodeVersion(); }, new ClusterManagerThrottlingStats()); throttler.registerClusterManagerTask("put-mapping", true); throttler.registerClusterManagerTask("create-index", true); @@ -108,7 +110,7 @@ public void testValidateSettingsForDifferentVersion() { } } - public void testValidateSettingsForTaskWihtoutRetryOnDataNode() { + public void testValidateSettingsForTaskWithoutRetryOnDataNode() { DiscoveryNode clusterManagerNode = getClusterManagerNode(Version.V_2_5_0); DiscoveryNode dataNode = getDataNode(Version.V_2_5_0); setState( @@ -139,7 +141,7 @@ public void testUpdateSettingsForNullValue() { ClusterSettings clusterSettings = new ClusterSettings(Settings.builder().build(), ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); ClusterManagerTaskThrottler throttler = new ClusterManagerTaskThrottler(Settings.EMPTY, clusterSettings, () -> { - return clusterService.getMasterService().getMinNodeVersion(); + return clusterService.getClusterManagerService().getMinNodeVersion(); }, new ClusterManagerThrottlingStats()); throttler.registerClusterManagerTask("put-mapping", true); @@ -173,7 +175,7 @@ public void testSettingsOnBootstrap() { .put("cluster_manager.throttling.retry.max.delay", maxDelay + "s") .build(); ClusterManagerTaskThrottler throttler = new ClusterManagerTaskThrottler(initialSettings, clusterSettings, () -> { - return clusterService.getMasterService().getMinNodeVersion(); + return clusterService.getClusterManagerService().getMinNodeVersion(); }, new ClusterManagerThrottlingStats()); throttler.registerClusterManagerTask("put-mapping", true); @@ -187,7 +189,7 @@ public void testSettingsOnBootstrap() { public void testUpdateRetryDelaySetting() { ClusterSettings clusterSettings = new ClusterSettings(Settings.builder().build(), ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); ClusterManagerTaskThrottler throttler = new ClusterManagerTaskThrottler(Settings.EMPTY, clusterSettings, () -> { - return clusterService.getMasterService().getMinNodeVersion(); + return clusterService.getClusterManagerService().getMinNodeVersion(); }, new ClusterManagerThrottlingStats()); // verify defaults @@ -217,7 +219,7 @@ public void testValidateSettingsForUnknownTask() { ClusterSettings clusterSettings = new ClusterSettings(Settings.builder().build(), ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); ClusterManagerTaskThrottler throttler = new ClusterManagerTaskThrottler(Settings.EMPTY, clusterSettings, () -> { - return clusterService.getMasterService().getMinNodeVersion(); + return clusterService.getClusterManagerService().getMinNodeVersion(); }, new ClusterManagerThrottlingStats()); // set some limit for update snapshot tasks @@ -236,7 +238,7 @@ public void testUpdateThrottlingLimitForBasicSanity() { ClusterSettings clusterSettings = new ClusterSettings(Settings.builder().build(), ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); ClusterManagerTaskThrottler throttler = new ClusterManagerTaskThrottler(Settings.EMPTY, clusterSettings, () -> { - return clusterService.getMasterService().getMinNodeVersion(); + return clusterService.getClusterManagerService().getMinNodeVersion(); }, new ClusterManagerThrottlingStats()); throttler.registerClusterManagerTask("put-mapping", true); @@ -263,7 +265,7 @@ public void testValidateSettingForLimit() { ClusterSettings clusterSettings = new ClusterSettings(Settings.builder().build(), ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); ClusterManagerTaskThrottler throttler = new ClusterManagerTaskThrottler(Settings.EMPTY, clusterSettings, () -> { - return clusterService.getMasterService().getMinNodeVersion(); + return clusterService.getClusterManagerService().getMinNodeVersion(); }, new ClusterManagerThrottlingStats()); throttler.registerClusterManagerTask("put-mapping", true); @@ -274,7 +276,7 @@ public void testValidateSettingForLimit() { public void testUpdateLimit() { ClusterSettings clusterSettings = new ClusterSettings(Settings.builder().build(), ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); ClusterManagerTaskThrottler throttler = new ClusterManagerTaskThrottler(Settings.EMPTY, clusterSettings, () -> { - return clusterService.getMasterService().getMinNodeVersion(); + return clusterService.getClusterManagerService().getMinNodeVersion(); }, new ClusterManagerThrottlingStats()); throttler.registerClusterManagerTask("put-mapping", true); @@ -309,7 +311,7 @@ public void testThrottlingForDisabledThrottlingTask() { String taskKey = "test"; ClusterSettings clusterSettings = new ClusterSettings(Settings.builder().build(), ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); ClusterManagerTaskThrottler throttler = new ClusterManagerTaskThrottler(Settings.EMPTY, clusterSettings, () -> { - return clusterService.getMasterService().getMinNodeVersion(); + return clusterService.getClusterManagerService().getMinNodeVersion(); }, throttlingStats); ClusterManagerTaskThrottler.ThrottlingKey throttlingKey = throttler.registerClusterManagerTask(taskKey, false); @@ -321,6 +323,9 @@ public void testThrottlingForDisabledThrottlingTask() { // Asserting that there was not any throttling for it assertEquals(0L, throttlingStats.getThrottlingCount(taskKey)); + + // Asserting value in tasksCount map to make sure it gets updated even when throttling is disabled + assertEquals(Optional.of(10L).get(), throttler.tasksCount.get(taskKey)); } public void testThrottlingForInitialStaticSettingAndVersionCheck() { @@ -339,7 +344,7 @@ public void testThrottlingForInitialStaticSettingAndVersionCheck() { .put("cluster_manager.throttling.thresholds.put-mapping.value", put_mapping_threshold_value) .build(); ClusterManagerTaskThrottler throttler = new ClusterManagerTaskThrottler(initialSettings, clusterSettings, () -> { - return clusterService.getMasterService().getMinNodeVersion(); + return clusterService.getClusterManagerService().getMinNodeVersion(); }, throttlingStats); ClusterManagerTaskThrottler.ThrottlingKey throttlingKey = throttler.registerClusterManagerTask("put-mapping", true); @@ -367,7 +372,7 @@ public void testThrottling() { String taskKey = "test"; ClusterSettings clusterSettings = new ClusterSettings(Settings.builder().build(), ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); ClusterManagerTaskThrottler throttler = new ClusterManagerTaskThrottler(Settings.EMPTY, clusterSettings, () -> { - return clusterService.getMasterService().getMinNodeVersion(); + return clusterService.getClusterManagerService().getMinNodeVersion(); }, throttlingStats); ClusterManagerTaskThrottler.ThrottlingKey throttlingKey = throttler.registerClusterManagerTask(taskKey, true); @@ -406,6 +411,164 @@ public void testThrottling() { throttler.onBeginSubmit(getMockUpdateTaskList(taskKey, throttlingKey, 1)); } + public void testThrottlingWithLock() { + ClusterManagerThrottlingStats throttlingStats = new ClusterManagerThrottlingStats(); + String taskKey = "test"; + ClusterSettings clusterSettings = new ClusterSettings(Settings.builder().build(), ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + ClusterManagerTaskThrottler throttler = new ClusterManagerTaskThrottler(Settings.EMPTY, clusterSettings, () -> { + return clusterService.getClusterManagerService().getMinNodeVersion(); + }, throttlingStats); + ClusterManagerTaskThrottler.ThrottlingKey throttlingKey = throttler.registerClusterManagerTask(taskKey, true); + + throttler.updateLimit(taskKey, 5); + + // adding 3 tasks + throttler.onBeginSubmit(getMockUpdateTaskList(taskKey, throttlingKey, 3)); + + // adding 3 more tasks, these tasks should be throttled + // taskCount in Queue: 3 Threshold: 5 + assertThrows( + ClusterManagerThrottlingException.class, + () -> throttler.onBeginSubmit(getMockUpdateTaskList(taskKey, throttlingKey, 3)) + ); + assertEquals(3L, throttlingStats.getThrottlingCount(taskKey)); + + // remove one task + throttler.onBeginProcessing(getMockUpdateTaskList(taskKey, throttlingKey, 1)); + + // add 3 tasks should pass now. + // taskCount in Queue: 2 Threshold: 5 + throttler.onBeginSubmit(getMockUpdateTaskList(taskKey, throttlingKey, 3)); + + final CountDownLatch latch = new CountDownLatch(1); + Thread threadToLock = null; + try { + // Taking lock on tasksCount will not impact throttling behaviour now. + threadToLock = new Thread(() -> { + throttler.tasksCount.computeIfPresent(taskKey, (key, count) -> { + try { + latch.await(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + return 10L; + }); + }); + threadToLock.start(); + + // adding one task will throttle + // taskCount in Queue: 5 Threshold: 5 + final ClusterManagerThrottlingException exception = assertThrows( + ClusterManagerThrottlingException.class, + () -> throttler.onBeginSubmit(getMockUpdateTaskList(taskKey, throttlingKey, 1)) + ); + assertEquals("Throttling Exception : Limit exceeded for test", exception.getMessage()); + assertEquals(Optional.of(5L).get(), throttler.tasksCount.get(taskKey)); + assertEquals(4L, throttlingStats.getThrottlingCount(taskKey)); + } finally { + if (threadToLock != null) { + latch.countDown(); + // Wait to complete and then assert on new tasksCount that got modified by threadToLock Thread + try { + threadToLock.join(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + } + assertEquals(Optional.of(10L).get(), throttler.tasksCount.get(taskKey)); + } + + public void testThrottlingWithMultipleOnBeginSubmitsThreadsWithLock() { + ClusterManagerThrottlingStats throttlingStats = new ClusterManagerThrottlingStats(); + String taskKey = "test"; + ClusterSettings clusterSettings = new ClusterSettings(Settings.builder().build(), ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + ClusterManagerTaskThrottler throttler = new ClusterManagerTaskThrottler(Settings.EMPTY, clusterSettings, () -> { + return clusterService.getClusterManagerService().getMinNodeVersion(); + }, throttlingStats); + ClusterManagerTaskThrottler.ThrottlingKey throttlingKey = throttler.registerClusterManagerTask(taskKey, true); + + throttler.updateLimit(taskKey, 5); + + // adding 3 tasks + throttler.onBeginSubmit(getMockUpdateTaskList(taskKey, throttlingKey, 3)); + + // adding 3 more tasks, these tasks should be throttled + // taskCount in Queue: 3 Threshold: 5 + assertThrows( + ClusterManagerThrottlingException.class, + () -> throttler.onBeginSubmit(getMockUpdateTaskList(taskKey, throttlingKey, 3)) + ); + assertEquals(3L, throttlingStats.getThrottlingCount(taskKey)); + + // remove one task + throttler.onBeginProcessing(getMockUpdateTaskList(taskKey, throttlingKey, 1)); + + // add 3 tasks should pass now. + // taskCount in Queue: 2 Threshold: 5 + throttler.onBeginSubmit(getMockUpdateTaskList(taskKey, throttlingKey, 3)); + + final CountDownLatch latch = new CountDownLatch(1); + Thread threadToLock = null; + List submittingThreads = new ArrayList<>(); + + try { + // Taking lock on tasksCount will not impact throttling behaviour now. + threadToLock = new Thread(() -> { + throttler.tasksCount.computeIfPresent(taskKey, (key, count) -> { + try { + latch.await(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + return 10L; + }); + }); + threadToLock.start(); + + final CountDownLatch latch2 = new CountDownLatch(10); + for (int i = 0; i < 10; i++) { + Thread submittingThread = new Thread(() -> { + // adding one task will throttle + // taskCount in Queue: 5 Threshold: 5 + final ClusterManagerThrottlingException exception = assertThrows( + ClusterManagerThrottlingException.class, + () -> throttler.onBeginSubmit(getMockUpdateTaskList(taskKey, throttlingKey, 1)) + ); + assertEquals("Throttling Exception : Limit exceeded for test", exception.getMessage()); + assertEquals(Optional.of(5L).get(), throttler.tasksCount.get(taskKey)); + latch2.countDown(); + }); + submittingThread.start(); + submittingThreads.add(submittingThread); + } + try { + latch2.await(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + assertEquals(13L, throttlingStats.getThrottlingCount(taskKey)); + } finally { + if (threadToLock != null) { + latch.countDown(); + try { + // Wait to complete and then assert on new tasksCount that got modified by threadToLock Thread + threadToLock.join(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + for (Thread submittingThread : submittingThreads) { + try { + submittingThread.join(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + } + assertEquals(Optional.of(10L).get(), throttler.tasksCount.get(taskKey)); + } + private List getMockUpdateTaskList( String taskKey, ClusterManagerTaskThrottler.ThrottlingKey throttlingKey, diff --git a/server/src/test/java/org/opensearch/cluster/service/MasterServiceTests.java b/server/src/test/java/org/opensearch/cluster/service/MasterServiceTests.java index 7562dfc2e9d33..db9abe0310e40 100644 --- a/server/src/test/java/org/opensearch/cluster/service/MasterServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/service/MasterServiceTests.java @@ -534,8 +534,8 @@ public void onFailure(String source, Exception e) { }); assertBusy(mockAppender::assertAllExpectationsMatched); // verify stats values after state is published - assertEquals(1, clusterManagerService.getClusterStateStats().getUpdateSuccess()); - assertEquals(0, clusterManagerService.getClusterStateStats().getUpdateFailed()); + assertBusy(() -> assertEquals(1, clusterManagerService.getClusterStateStats().getUpdateSuccess())); + assertBusy(() -> assertEquals(0, clusterManagerService.getClusterStateStats().getUpdateFailed())); } } } @@ -699,8 +699,8 @@ public void onFailure(String source, Exception e) { }); assertBusy(mockAppender::assertAllExpectationsMatched); // verify stats values after state is published - assertEquals(1, clusterManagerService.getClusterStateStats().getUpdateSuccess()); - assertEquals(0, clusterManagerService.getClusterStateStats().getUpdateFailed()); + assertBusy(() -> assertEquals(1, clusterManagerService.getClusterStateStats().getUpdateSuccess())); + assertBusy(() -> assertEquals(0, clusterManagerService.getClusterStateStats().getUpdateFailed())); } } } diff --git a/server/src/test/java/org/opensearch/common/settings/ScopedSettingsTests.java b/server/src/test/java/org/opensearch/common/settings/ScopedSettingsTests.java index 0c5cece4249ef..7780481c9deff 100644 --- a/server/src/test/java/org/opensearch/common/settings/ScopedSettingsTests.java +++ b/server/src/test/java/org/opensearch/common/settings/ScopedSettingsTests.java @@ -61,6 +61,9 @@ import java.util.function.Function; import java.util.stream.Collectors; +import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE; +import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_AUTO; import static org.hamcrest.CoreMatchers.containsString; import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.startsWith; @@ -1478,4 +1481,107 @@ public void testAddSettingsUpdateConsumer() { if (i == 42) throw new AssertionError("empty key"); })); } + + public void testGetOrNullWhenSettingIsNotSet() { + Setting testSetting = Setting.intSetting("foo.bar", 1, Property.Dynamic, Property.NodeScope); + Setting testSetting2 = Setting.intSetting("foo.bar.baz", 1, Property.Dynamic, Property.NodeScope); + AbstractScopedSettings clusterSettings = new ClusterSettings( + Settings.EMPTY, + new HashSet<>(Arrays.asList(testSetting, testSetting2)) + ); + clusterSettings.registerSetting(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE); + clusterSettings.registerSetting(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING); + assertNull(clusterSettings.getOrNull(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING)); + + } + + public void testGetOrNullWhenSettingIsSet() { + Setting testSetting = Setting.intSetting("foo.bar", 1, Property.Dynamic, Property.NodeScope); + Setting testSetting2 = Setting.intSetting("foo.bar.baz", 1, Property.Dynamic, Property.NodeScope); + AbstractScopedSettings clusterSettings = new ClusterSettings( + Settings.EMPTY, + new HashSet<>(Arrays.asList(testSetting, testSetting2)) + ); + clusterSettings.registerSetting(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE); + clusterSettings.applySettings( + Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_AUTO).build() + ); + assertEquals(clusterSettings.getOrNull(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE), "auto"); + + } + + public void testGetOrNullWhenSettingIsSetInNodeSettings() { + Setting testSetting = Setting.intSetting("foo.bar", 1, Property.Dynamic, Property.NodeScope); + Setting testSetting2 = Setting.intSetting("foo.bar.baz", 1, Property.Dynamic, Property.NodeScope); + Settings concurrentSearchModeSetting = Settings.builder() + .put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), CONCURRENT_SEGMENT_SEARCH_MODE_AUTO) + .build(); + AbstractScopedSettings clusterSettings = new ClusterSettings( + concurrentSearchModeSetting, + new HashSet<>(Arrays.asList(testSetting, testSetting2, CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE)) + ); + assertEquals(clusterSettings.getOrNull(CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE), "auto"); + + } + + public void testGetOrNullWhenSettingScopeDoesntMatch() { + Setting testSetting = Setting.intSetting("foo.bar", 1, Property.Dynamic, Property.NodeScope); + Setting testSetting2 = Setting.intSetting("foo.bar.baz", 1, Property.Dynamic, Property.NodeScope); + AbstractScopedSettings clusterSettings = new ClusterSettings( + Settings.EMPTY, + new HashSet<>(Arrays.asList(testSetting, testSetting2)) + ); + Setting failedSetting = Setting.intSetting("foo.bar.scope.fail", 1, Property.Dynamic, Property.IndexScope); + clusterSettings.registerSetting(failedSetting); + try { + clusterSettings.getOrNull(failedSetting); + fail("setting scope doesn't match"); + } catch (SettingsException ex) { + assertEquals("settings scope doesn't match the setting scope [NodeScope] not in [[Dynamic, IndexScope]]", ex.getMessage()); + } + + } + + public void testGetOrNullWhenSettingIsNotRegistered() { + Setting testSetting = Setting.intSetting("foo.bar", 1, Property.Dynamic, Property.NodeScope); + Setting testSetting2 = Setting.intSetting("foo.bar.baz", 1, Property.Dynamic, Property.NodeScope); + AbstractScopedSettings clusterSettings = new ClusterSettings( + Settings.EMPTY, + new HashSet<>(Arrays.asList(testSetting, testSetting2)) + ); + Setting failedSetting = Setting.intSetting("foo.bar.register.fail", 1, Property.Dynamic, Property.NodeScope); + + try { + clusterSettings.getOrNull(failedSetting); + fail("setting is not registered"); + } catch (SettingsException ex) { + assertEquals("setting foo.bar.register.fail has not been registered", ex.getMessage()); + } + + } + + public void testGetOrNullWhenSettingIsRegisteredWithFallback() { + Setting fallbackSetting = Setting.intSetting("foo.bar", 10, Property.Dynamic, Property.NodeScope); + AbstractScopedSettings clusterSettings = new ClusterSettings(Settings.EMPTY, new HashSet<>(Arrays.asList(fallbackSetting))); + clusterSettings.registerSetting(fallbackSetting); + clusterSettings.applySettings(Settings.builder().put(fallbackSetting.getKey(), 100).build()); + Setting settingWithFallback = Setting.intSetting("foo.fallback", fallbackSetting, 1, Property.Dynamic, Property.NodeScope); + clusterSettings.registerSetting(settingWithFallback); + + assertEquals(clusterSettings.getOrNull(settingWithFallback), Integer.valueOf(100)); + + } + + public void testGetOrNullWhenSettingIsRegisteredNodeSettingFallback() { + Setting fallbackSetting = Setting.intSetting("foo.bar", 10, Property.Dynamic, Property.NodeScope); + Settings settings = Settings.builder().put(fallbackSetting.getKey(), 100).build(); + AbstractScopedSettings clusterSettings = new ClusterSettings(settings, new HashSet<>(Arrays.asList(fallbackSetting))); + + Setting settingWithFallback = Setting.intSetting("foo.fallback", fallbackSetting, 1, Property.Dynamic, Property.NodeScope); + clusterSettings.registerSetting(settingWithFallback); + + assertEquals(clusterSettings.getOrNull(settingWithFallback), Integer.valueOf(100)); + + } + } diff --git a/server/src/test/java/org/opensearch/common/settings/SettingsModuleTests.java b/server/src/test/java/org/opensearch/common/settings/SettingsModuleTests.java index 66c9801d16598..d504c3af90679 100644 --- a/server/src/test/java/org/opensearch/common/settings/SettingsModuleTests.java +++ b/server/src/test/java/org/opensearch/common/settings/SettingsModuleTests.java @@ -290,20 +290,27 @@ public void testConcurrentSegmentSearchClusterSettings() { Settings settings = Settings.builder().put(SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), settingValue).build(); SettingsModule settingsModule = new SettingsModule(settings); assertEquals(settingValue, SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.get(settingsModule.getSettings())); + assertSettingDeprecationsAndWarnings(new Setting[] { SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING }); } public void testConcurrentSegmentSearchIndexSettings() { Settings.Builder target = Settings.builder().put(Settings.EMPTY); Settings.Builder update = Settings.builder(); - + boolean settingValue = randomBoolean(); SettingsModule module = new SettingsModule(Settings.EMPTY); IndexScopedSettings indexScopedSettings = module.getIndexScopedSettings(); indexScopedSettings.updateDynamicSettings( - Settings.builder().put(IndexSettings.INDEX_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), true).build(), + Settings.builder().put(IndexSettings.INDEX_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), settingValue).build(), target, update, "node" ); + // apply the setting update + module.getIndexScopedSettings() + .applySettings(Settings.builder().put(IndexSettings.INDEX_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), settingValue).build()); + // assert value + assertEquals(settingValue, module.getIndexScopedSettings().get(IndexSettings.INDEX_CONCURRENT_SEGMENT_SEARCH_SETTING)); + assertSettingDeprecationsAndWarnings(new Setting[] { IndexSettings.INDEX_CONCURRENT_SEGMENT_SEARCH_SETTING }); } public void testMaxSliceCountClusterSettingsForConcurrentSearch() { diff --git a/server/src/test/java/org/opensearch/gateway/GatewayAllocatorTests.java b/server/src/test/java/org/opensearch/gateway/GatewayAllocatorTests.java index c7eae77d6deba..ebc2e59fa5a30 100644 --- a/server/src/test/java/org/opensearch/gateway/GatewayAllocatorTests.java +++ b/server/src/test/java/org/opensearch/gateway/GatewayAllocatorTests.java @@ -10,6 +10,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.opensearch.OpenSearchException; import org.opensearch.Version; import org.opensearch.action.support.nodes.BaseNodeResponse; import org.opensearch.cluster.ClusterInfo; @@ -22,6 +23,7 @@ import org.opensearch.cluster.routing.IndexRoutingTable; import org.opensearch.cluster.routing.IndexShardRoutingTable; import org.opensearch.cluster.routing.RecoverySource; +import org.opensearch.cluster.routing.RerouteService; import org.opensearch.cluster.routing.RoutingNodes; import org.opensearch.cluster.routing.RoutingTable; import org.opensearch.cluster.routing.ShardRouting; @@ -30,6 +32,8 @@ import org.opensearch.cluster.routing.UnassignedInfo; import org.opensearch.cluster.routing.allocation.RoutingAllocation; import org.opensearch.cluster.routing.allocation.decider.AllocationDeciders; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.Priority; import org.opensearch.common.collect.Tuple; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; @@ -37,7 +41,9 @@ import org.opensearch.common.util.set.Sets; import org.opensearch.core.index.shard.ShardId; import org.opensearch.snapshots.SnapshotShardSizeInfo; +import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.gateway.TestShardBatchGatewayAllocator; +import org.opensearch.threadpool.TestThreadPool; import org.junit.Before; import java.util.ArrayList; @@ -47,13 +53,13 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.CountDownLatch; -import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import static org.opensearch.gateway.ShardsBatchGatewayAllocator.PRIMARY_BATCH_ALLOCATOR_TIMEOUT_SETTING; import static org.opensearch.gateway.ShardsBatchGatewayAllocator.PRIMARY_BATCH_ALLOCATOR_TIMEOUT_SETTING_KEY; import static org.opensearch.gateway.ShardsBatchGatewayAllocator.REPLICA_BATCH_ALLOCATOR_TIMEOUT_SETTING; import static org.opensearch.gateway.ShardsBatchGatewayAllocator.REPLICA_BATCH_ALLOCATOR_TIMEOUT_SETTING_KEY; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; public class GatewayAllocatorTests extends OpenSearchAllocationTestCase { @@ -426,22 +432,62 @@ public void testReplicaAllocatorTimeout() { assertEquals(-1, REPLICA_BATCH_ALLOCATOR_TIMEOUT_SETTING.get(build).getMillis()); } - public void testCollectTimedOutShards() throws InterruptedException { + public void testCollectTimedOutShardsAndScheduleReroute_Success() throws InterruptedException { createIndexAndUpdateClusterState(2, 5, 2); - CountDownLatch latch = new CountDownLatch(10); - testShardsBatchGatewayAllocator = new TestShardBatchGatewayAllocator(latch); + TestThreadPool threadPool = new TestThreadPool(getTestName()); + ClusterService clusterService = ClusterServiceUtils.createClusterService(clusterState, threadPool); + final CountDownLatch rerouteLatch = new CountDownLatch(2); + final RerouteService rerouteService = (reason, priority, listener) -> { + listener.onResponse(clusterService.state()); + assertThat(rerouteLatch.getCount(), greaterThanOrEqualTo(0L)); + assertEquals("reroute after existing shards allocator timed out", reason); + assertEquals(Priority.HIGH, priority); + rerouteLatch.countDown(); + }; + CountDownLatch timedOutShardsLatch = new CountDownLatch(20); + testShardsBatchGatewayAllocator = new TestShardBatchGatewayAllocator(timedOutShardsLatch, 1000, rerouteService); testShardsBatchGatewayAllocator.setPrimaryBatchAllocatorTimeout(TimeValue.ZERO); testShardsBatchGatewayAllocator.setReplicaBatchAllocatorTimeout(TimeValue.ZERO); BatchRunnableExecutor executor = testShardsBatchGatewayAllocator.allocateAllUnassignedShards(testAllocation, true); executor.run(); - assertTrue(latch.await(1, TimeUnit.MINUTES)); - latch = new CountDownLatch(10); - testShardsBatchGatewayAllocator = new TestShardBatchGatewayAllocator(latch); + assertEquals(timedOutShardsLatch.getCount(), 10); + assertEquals(1, rerouteLatch.getCount()); + executor = testShardsBatchGatewayAllocator.allocateAllUnassignedShards(testAllocation, false); + executor.run(); + assertEquals(timedOutShardsLatch.getCount(), 0); + assertEquals(0, rerouteLatch.getCount()); // even with failure it doesn't leak any listeners + final boolean terminated = terminate(threadPool); + assert terminated; + clusterService.close(); + } + + public void testCollectTimedOutShardsAndScheduleReroute_Failure() throws InterruptedException { + createIndexAndUpdateClusterState(2, 5, 2); + TestThreadPool threadPool = new TestThreadPool(getTestName()); + ClusterService clusterService = ClusterServiceUtils.createClusterService(clusterState, threadPool); + final CountDownLatch rerouteLatch = new CountDownLatch(2); + final RerouteService rerouteService = (reason, priority, listener) -> { + listener.onFailure(new OpenSearchException("simulated")); + assertThat(rerouteLatch.getCount(), greaterThanOrEqualTo(0L)); + assertEquals("reroute after existing shards allocator timed out", reason); + assertEquals(Priority.HIGH, priority); + rerouteLatch.countDown(); + }; + CountDownLatch timedOutShardsLatch = new CountDownLatch(20); + testShardsBatchGatewayAllocator = new TestShardBatchGatewayAllocator(timedOutShardsLatch, 1000, rerouteService); testShardsBatchGatewayAllocator.setPrimaryBatchAllocatorTimeout(TimeValue.ZERO); testShardsBatchGatewayAllocator.setReplicaBatchAllocatorTimeout(TimeValue.ZERO); + BatchRunnableExecutor executor = testShardsBatchGatewayAllocator.allocateAllUnassignedShards(testAllocation, true); + executor.run(); + assertEquals(timedOutShardsLatch.getCount(), 10); + assertEquals(1, rerouteLatch.getCount()); executor = testShardsBatchGatewayAllocator.allocateAllUnassignedShards(testAllocation, false); executor.run(); - assertTrue(latch.await(1, TimeUnit.MINUTES)); + assertEquals(timedOutShardsLatch.getCount(), 0); + assertEquals(0, rerouteLatch.getCount()); // even with failure it doesn't leak any listeners + final boolean terminated = terminate(threadPool); + assert terminated; + clusterService.close(); } private void createIndexAndUpdateClusterState(int count, int numberOfShards, int numberOfReplicas) { diff --git a/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java b/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java index 8e8d80c870ddf..5ea5241762753 100644 --- a/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java +++ b/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java @@ -68,11 +68,12 @@ import org.opensearch.gateway.PersistedClusterStateService.Writer; import org.opensearch.gateway.remote.ClusterMetadataManifest; import org.opensearch.gateway.remote.RemoteClusterStateService; -import org.opensearch.gateway.remote.RemotePersistenceStats; +import org.opensearch.gateway.remote.RemoteUploadStats; import org.opensearch.gateway.remote.model.RemoteClusterStateManifestInfo; import org.opensearch.index.recovery.RemoteStoreRestoreService; import org.opensearch.index.recovery.RemoteStoreRestoreService.RemoteRestoreResult; import org.opensearch.index.remote.RemoteIndexPathUploader; +import org.opensearch.indices.DefaultRemoteStoreSettings; import org.opensearch.node.Node; import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.fs.FsRepository; @@ -98,6 +99,8 @@ import org.mockito.Mockito; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_INDEX_UUID; +import static org.opensearch.gateway.remote.ClusterMetadataManifest.CODEC_V1; +import static org.opensearch.gateway.remote.ClusterMetadataManifest.MANIFEST_CURRENT_CODEC_VERSION; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT; @@ -109,8 +112,10 @@ import static org.hamcrest.Matchers.nullValue; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyBoolean; -import static org.mockito.Mockito.doCallRealMethod; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.verifyNoInteractions; @@ -206,11 +211,32 @@ public void testSetCurrentTerm() throws IOException { } private ClusterState createClusterState(long version, Metadata metadata) { - return ClusterState.builder(clusterName) - .nodes(DiscoveryNodes.builder().add(localNode).localNodeId(localNode.getId()).build()) - .version(version) - .metadata(metadata) + return createClusterState(version, metadata, false); + } + + private ClusterState createClusterState(long version, Metadata metadata, boolean isClusterManagerNode) { + DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder().add(localNode).localNodeId(localNode.getId()); + if (isClusterManagerNode) { + nodesBuilder.clusterManagerNodeId(localNode.getId()); + } + return ClusterState.builder(clusterName).nodes(nodesBuilder.build()).version(version).metadata(metadata).build(); + } + + private ClusterState createClusterStateWithNodes(long version, Metadata metadata) { + DiscoveryNode oldNode = new DiscoveryNode( + "node2", + buildNewFakeTransportAddress(), + Collections.emptyMap(), + Sets.newHashSet(DiscoveryNodeRole.CLUSTER_MANAGER_ROLE), + Version.V_2_13_0 + ); + DiscoveryNodes discoveryNodes = DiscoveryNodes.builder() + .add(localNode) + .localNodeId(localNode.getId()) + .clusterManagerNodeId(localNode.getId()) + .add(oldNode) .build(); + return ClusterState.builder(clusterName).nodes(discoveryNodes).version(version).metadata(metadata).build(); } private CoordinationMetadata createCoordinationMetadata(long term) { @@ -489,7 +515,15 @@ public void testDataOnlyNodePersistence() throws Exception { clusterService, () -> 0L, threadPool, - List.of(new RemoteIndexPathUploader(threadPool, settings, repositoriesServiceSupplier, clusterSettings)), + List.of( + new RemoteIndexPathUploader( + threadPool, + settings, + repositoriesServiceSupplier, + clusterSettings, + DefaultRemoteStoreSettings.INSTANCE + ) + ), writableRegistry() ); } else { @@ -725,7 +759,7 @@ public void testRemotePersistedState() throws IOException { final RemoteClusterStateService remoteClusterStateService = Mockito.mock(RemoteClusterStateService.class); final ClusterMetadataManifest manifest = ClusterMetadataManifest.builder().clusterTerm(1L).stateVersion(5L).build(); final String previousClusterUUID = "prev-cluster-uuid"; - Mockito.when(remoteClusterStateService.writeFullMetadata(Mockito.any(), Mockito.any())) + Mockito.when(remoteClusterStateService.writeFullMetadata(Mockito.any(), Mockito.any(), eq(MANIFEST_CURRENT_CODEC_VERSION))) .thenReturn(new RemoteClusterStateManifestInfo(manifest, "path/to/manifest")); Mockito.when(remoteClusterStateService.writeIncrementalMetadata(Mockito.any(), Mockito.any(), Mockito.any())) @@ -738,31 +772,34 @@ public void testRemotePersistedState() throws IOException { final long clusterTerm = randomNonNegativeLong(); final ClusterState clusterState = createClusterState( randomNonNegativeLong(), - Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(clusterTerm).build()).build() + Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(clusterTerm).build()).build(), + true ); remotePersistedState.setLastAcceptedState(clusterState); - Mockito.verify(remoteClusterStateService).writeFullMetadata(clusterState, previousClusterUUID); + Mockito.verify(remoteClusterStateService).writeFullMetadata(clusterState, previousClusterUUID, MANIFEST_CURRENT_CODEC_VERSION); assertThat(remotePersistedState.getLastAcceptedState(), equalTo(clusterState)); assertThat(remotePersistedState.getCurrentTerm(), equalTo(clusterTerm)); final ClusterState secondClusterState = createClusterState( randomNonNegativeLong(), - Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(clusterTerm).build()).build() + Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(clusterTerm).build()).build(), + true ); remotePersistedState.setLastAcceptedState(secondClusterState); - Mockito.verify(remoteClusterStateService, times(1)).writeFullMetadata(secondClusterState, previousClusterUUID); + Mockito.verify(remoteClusterStateService, times(1)) + .writeFullMetadata(secondClusterState, previousClusterUUID, MANIFEST_CURRENT_CODEC_VERSION); assertThat(remotePersistedState.getLastAcceptedState(), equalTo(secondClusterState)); assertThat(remotePersistedState.getCurrentTerm(), equalTo(clusterTerm)); - when(remoteClusterStateService.markLastStateAsCommitted(Mockito.any(), Mockito.any())).thenReturn( + when(remoteClusterStateService.markLastStateAsCommitted(Mockito.any(), Mockito.any(), eq(false))).thenReturn( new RemoteClusterStateManifestInfo(manifest, "path/to/manifest") ); remotePersistedState.markLastAcceptedStateAsCommitted(); - Mockito.verify(remoteClusterStateService, times(1)).markLastStateAsCommitted(Mockito.any(), Mockito.any()); + Mockito.verify(remoteClusterStateService, times(1)).markLastStateAsCommitted(Mockito.any(), Mockito.any(), eq(false)); assertThat(remotePersistedState.getLastAcceptedState(), equalTo(secondClusterState)); assertThat(remotePersistedState.getCurrentTerm(), equalTo(clusterTerm)); @@ -776,6 +813,123 @@ public void testRemotePersistedState() throws IOException { assertThat(remotePersistedState.getLastAcceptedState().metadata().clusterUUIDCommitted(), equalTo(true)); } + public void testRemotePersistedStateWithDifferentNodeConfiguration() throws IOException { + final RemoteClusterStateService remoteClusterStateService = Mockito.mock(RemoteClusterStateService.class); + final String previousClusterUUID = "prev-cluster-uuid"; + final ClusterMetadataManifest manifest = ClusterMetadataManifest.builder() + .clusterTerm(1L) + .stateVersion(5L) + .codecVersion(CODEC_V1) + .opensearchVersion(Version.CURRENT) + .build(); + Mockito.when(remoteClusterStateService.writeFullMetadata(Mockito.any(), Mockito.any(), eq(CODEC_V1))) + .thenReturn(new RemoteClusterStateManifestInfo(manifest, "path/to/manifest2")); + + CoordinationState.PersistedState remotePersistedState = new RemotePersistedState(remoteClusterStateService, previousClusterUUID); + + ClusterState clusterState1 = createClusterStateWithNodes( + randomNonNegativeLong(), + Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(1L).build()).build() + ); + remotePersistedState.setLastAcceptedState(clusterState1); + + Mockito.verify(remoteClusterStateService).writeFullMetadata(clusterState1, previousClusterUUID, CODEC_V1); + + ClusterState clusterState2 = createClusterState( + randomNonNegativeLong(), + Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(1L).build()).build(), + true + ); + final ClusterMetadataManifest manifest2 = ClusterMetadataManifest.builder() + .clusterTerm(1L) + .stateVersion(5L) + .codecVersion(MANIFEST_CURRENT_CODEC_VERSION) + .opensearchVersion(Version.CURRENT) + .build(); + Mockito.when(remoteClusterStateService.writeFullMetadata(Mockito.any(), Mockito.any(), eq(MANIFEST_CURRENT_CODEC_VERSION))) + .thenReturn(new RemoteClusterStateManifestInfo(manifest2, "path/to/manifest")); + remotePersistedState.setLastAcceptedState(clusterState2); + Mockito.verify(remoteClusterStateService).writeFullMetadata(clusterState2, previousClusterUUID, MANIFEST_CURRENT_CODEC_VERSION); + + ClusterState clusterState3 = createClusterState( + randomNonNegativeLong(), + Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(1L).build()).build(), + true + ); + Mockito.when(remoteClusterStateService.writeIncrementalMetadata(Mockito.any(), Mockito.any(), Mockito.any())) + .thenReturn(new RemoteClusterStateManifestInfo(manifest2, "path/to/manifest3")); + remotePersistedState.setLastAcceptedState(clusterState3); + Mockito.verify(remoteClusterStateService).writeIncrementalMetadata(clusterState2, clusterState3, manifest2); + + } + + public void testRemotePersistentState_FollowerNode() throws IOException { + final RemoteClusterStateService remoteClusterStateService = Mockito.mock(RemoteClusterStateService.class); + final ClusterMetadataManifest manifest = ClusterMetadataManifest.builder() + .clusterTerm(1L) + .stateVersion(5L) + .committed(false) + .build(); + final String previousClusterUUID = "prev-cluster-uuid"; + RemotePersistedState remotePersistedState = new RemotePersistedState(remoteClusterStateService, previousClusterUUID); + + assertNull(remotePersistedState.getLastAcceptedState()); + assertNull(remotePersistedState.getLastAcceptedManifest()); + assertEquals(0, remotePersistedState.getCurrentTerm()); + + final long clusterTerm = randomNonNegativeLong(); + final ClusterState clusterState = createClusterState( + randomNonNegativeLong(), + Metadata.builder() + .coordinationMetadata(CoordinationMetadata.builder().term(clusterTerm).build()) + .clusterUUIDCommitted(true) + .build(), + false + ); + + remotePersistedState.setLastAcceptedState(clusterState); + remotePersistedState.setLastAcceptedManifest(manifest); + Mockito.verify(remoteClusterStateService, never()) + .writeFullMetadata(clusterState, previousClusterUUID, MANIFEST_CURRENT_CODEC_VERSION); + + assertEquals(clusterState, remotePersistedState.getLastAcceptedState()); + assertEquals(clusterTerm, remotePersistedState.getCurrentTerm()); + assertEquals(manifest, remotePersistedState.getLastAcceptedManifest()); + + final ClusterState secondClusterState = createClusterState( + randomNonNegativeLong(), + Metadata.builder() + .coordinationMetadata(CoordinationMetadata.builder().term(clusterTerm).build()) + .clusterUUIDCommitted(false) + .build(), + false + ); + + remotePersistedState.setLastAcceptedState(secondClusterState); + Mockito.verify(remoteClusterStateService, never()) + .writeFullMetadata(secondClusterState, previousClusterUUID, MANIFEST_CURRENT_CODEC_VERSION); + + assertEquals(secondClusterState, remotePersistedState.getLastAcceptedState()); + assertEquals(clusterTerm, remotePersistedState.getCurrentTerm()); + assertFalse(remotePersistedState.getLastAcceptedManifest().isCommitted()); + + remotePersistedState.markLastAcceptedStateAsCommitted(); + Mockito.verify(remoteClusterStateService, never()).markLastStateAsCommitted(Mockito.any(), Mockito.any(), eq(false)); + + assertEquals(secondClusterState, remotePersistedState.getLastAcceptedState()); + assertEquals(clusterTerm, remotePersistedState.getCurrentTerm()); + assertFalse(remotePersistedState.getLastAcceptedState().metadata().clusterUUIDCommitted()); + assertTrue(remotePersistedState.getLastAcceptedManifest().isCommitted()); + + final ClusterState thirdClusterState = ClusterState.builder(secondClusterState) + .metadata(Metadata.builder(secondClusterState.getMetadata()).clusterUUID(randomAlphaOfLength(10)).build()) + .build(); + remotePersistedState.setLastAcceptedState(thirdClusterState); + remotePersistedState.markLastAcceptedStateAsCommitted(); + assertTrue(remotePersistedState.getLastAcceptedState().metadata().clusterUUIDCommitted()); + assertTrue(remotePersistedState.getLastAcceptedManifest().isCommitted()); + } + public void testRemotePersistedStateNotCommitted() throws IOException { final RemoteClusterStateService remoteClusterStateService = Mockito.mock(RemoteClusterStateService.class); final String previousClusterUUID = "prev-cluster-uuid"; @@ -786,7 +940,7 @@ public void testRemotePersistedStateNotCommitted() throws IOException { .build(); Mockito.when(remoteClusterStateService.getLatestClusterMetadataManifest(Mockito.any(), Mockito.any())) .thenReturn(Optional.of(manifest)); - Mockito.when(remoteClusterStateService.writeFullMetadata(Mockito.any(), Mockito.any())) + Mockito.when(remoteClusterStateService.writeFullMetadata(Mockito.any(), Mockito.any(), eq(MANIFEST_CURRENT_CODEC_VERSION))) .thenReturn(new RemoteClusterStateManifestInfo(manifest, "path/to/manifest")); Mockito.when(remoteClusterStateService.writeIncrementalMetadata(Mockito.any(), Mockito.any(), Mockito.any())) @@ -802,7 +956,8 @@ public void testRemotePersistedStateNotCommitted() throws IOException { final long clusterTerm = randomNonNegativeLong(); ClusterState clusterState = createClusterState( randomNonNegativeLong(), - Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(clusterTerm).build()).build() + Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(clusterTerm).build()).build(), + true ); clusterState = ClusterState.builder(clusterState) .metadata(Metadata.builder(clusterState.getMetadata()).clusterUUID(randomAlphaOfLength(10)).clusterUUIDCommitted(false).build()) @@ -811,44 +966,54 @@ public void testRemotePersistedStateNotCommitted() throws IOException { remotePersistedState.setLastAcceptedState(clusterState); ArgumentCaptor previousClusterUUIDCaptor = ArgumentCaptor.forClass(String.class); ArgumentCaptor clusterStateCaptor = ArgumentCaptor.forClass(ClusterState.class); - Mockito.verify(remoteClusterStateService).writeFullMetadata(clusterStateCaptor.capture(), previousClusterUUIDCaptor.capture()); + Mockito.verify(remoteClusterStateService) + .writeFullMetadata(clusterStateCaptor.capture(), previousClusterUUIDCaptor.capture(), eq(MANIFEST_CURRENT_CODEC_VERSION)); assertEquals(previousClusterUUID, previousClusterUUIDCaptor.getValue()); } public void testRemotePersistedStateExceptionOnFullStateUpload() throws IOException { final RemoteClusterStateService remoteClusterStateService = Mockito.mock(RemoteClusterStateService.class); final String previousClusterUUID = "prev-cluster-uuid"; - Mockito.doThrow(IOException.class).when(remoteClusterStateService).writeFullMetadata(Mockito.any(), Mockito.any()); + Mockito.doThrow(IOException.class) + .when(remoteClusterStateService) + .writeFullMetadata(Mockito.any(), Mockito.any(), eq(MANIFEST_CURRENT_CODEC_VERSION)); CoordinationState.PersistedState remotePersistedState = new RemotePersistedState(remoteClusterStateService, previousClusterUUID); final long clusterTerm = randomNonNegativeLong(); final ClusterState clusterState = createClusterState( randomNonNegativeLong(), - Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(clusterTerm).build()).build() + Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(clusterTerm).build()).build(), + true ); assertThrows(OpenSearchException.class, () -> remotePersistedState.setLastAcceptedState(clusterState)); } public void testRemotePersistedStateFailureStats() throws IOException { - RemotePersistenceStats remoteStateStats = new RemotePersistenceStats(); + RemoteUploadStats remoteStateStats = new RemoteUploadStats(); final RemoteClusterStateService remoteClusterStateService = Mockito.mock(RemoteClusterStateService.class); final String previousClusterUUID = "prev-cluster-uuid"; - Mockito.doThrow(IOException.class).when(remoteClusterStateService).writeFullMetadata(Mockito.any(), Mockito.any()); - when(remoteClusterStateService.getStats()).thenReturn(remoteStateStats); - doCallRealMethod().when(remoteClusterStateService).writeMetadataFailed(); + Mockito.doThrow(IOException.class) + .when(remoteClusterStateService) + .writeFullMetadata(Mockito.any(), Mockito.any(), eq(MANIFEST_CURRENT_CODEC_VERSION)); + when(remoteClusterStateService.getUploadStats()).thenReturn(remoteStateStats); + doAnswer((i) -> { + remoteStateStats.stateFailed(); + return null; + }).when(remoteClusterStateService).writeMetadataFailed(); CoordinationState.PersistedState remotePersistedState = new RemotePersistedState(remoteClusterStateService, previousClusterUUID); final long clusterTerm = randomNonNegativeLong(); final ClusterState clusterState = createClusterState( randomNonNegativeLong(), - Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(clusterTerm).build()).build() + Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(clusterTerm).build()).build(), + true ); assertThrows(OpenSearchException.class, () -> remotePersistedState.setLastAcceptedState(clusterState)); - assertEquals(1, remoteClusterStateService.getStats().getFailedCount()); - assertEquals(0, remoteClusterStateService.getStats().getSuccessCount()); + assertEquals(1, remoteClusterStateService.getUploadStats().getFailedCount()); + assertEquals(0, remoteClusterStateService.getUploadStats().getSuccessCount()); } public void testGatewayForRemoteState() throws IOException { @@ -944,7 +1109,8 @@ public void testGatewayForRemoteStateForNodeReplacement() throws IOException { false ) .clusterUUID(randomAlphaOfLength(10)) - .build() + .build(), + false ); when(remoteClusterStateService.getLastKnownUUIDFromRemote(clusterName.value())).thenReturn( previousState.metadata().clusterUUID() @@ -990,7 +1156,8 @@ public void testGatewayForRemoteStateForNodeReboot() throws IOException { .coordinationMetadata(CoordinationMetadata.builder().term(randomLong()).build()) .put(indexMetadata, false) .clusterUUID(randomAlphaOfLength(10)) - .build() + .build(), + false ); gateway = newGatewayForRemoteState( remoteClusterStateService, @@ -1036,7 +1203,8 @@ public void testGatewayForRemoteStateForInitialBootstrapBlocksApplied() throws I .put(indexMetadata, false) .clusterUUID(ClusterState.UNKNOWN_UUID) .persistentSettings(Settings.builder().put(Metadata.SETTING_READ_ONLY_SETTING.getKey(), true).build()) - .build() + .build(), + false ) ).nodes(DiscoveryNodes.EMPTY_NODES).build(); diff --git a/server/src/test/java/org/opensearch/gateway/remote/ClusterMetadataManifestTests.java b/server/src/test/java/org/opensearch/gateway/remote/ClusterMetadataManifestTests.java index 8a6dd6bc96e72..3f9aa1245cab3 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/ClusterMetadataManifestTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/ClusterMetadataManifestTests.java @@ -9,15 +9,24 @@ package org.opensearch.gateway.remote; import org.opensearch.Version; +import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; -import org.opensearch.cluster.DiffableUtils; +import org.opensearch.cluster.coordination.CoordinationMetadata; import org.opensearch.cluster.metadata.IndexGraveyard; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.IndexTemplateMetadata; +import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.metadata.RepositoriesMetadata; +import org.opensearch.cluster.metadata.TemplatesMetadata; import org.opensearch.cluster.metadata.WeightedRoutingMetadata; import org.opensearch.cluster.routing.IndexRoutingTable; +import org.opensearch.cluster.routing.RoutingTable; +import org.opensearch.cluster.routing.StringKeyDiffProvider; +import org.opensearch.common.settings.Settings; import org.opensearch.common.xcontent.json.JsonXContent; import org.opensearch.core.common.bytes.BytesReference; import org.opensearch.core.common.io.stream.NamedWriteableRegistry; +import org.opensearch.core.index.Index; import org.opensearch.core.xcontent.ToXContent; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentParser; @@ -39,6 +48,9 @@ import static org.opensearch.gateway.remote.ClusterMetadataManifest.CODEC_V0; import static org.opensearch.gateway.remote.ClusterMetadataManifest.CODEC_V1; +import static org.opensearch.gateway.remote.ClusterMetadataManifest.CODEC_V2; +import static org.opensearch.gateway.remote.ClusterMetadataManifest.CODEC_V3; +import static org.opensearch.gateway.remote.ClusterMetadataManifest.CODEC_V4; import static org.opensearch.gateway.remote.RemoteClusterStateAttributesManager.CLUSTER_BLOCKS; import static org.opensearch.gateway.remote.RemoteClusterStateAttributesManager.DISCOVERY_NODES; import static org.opensearch.gateway.remote.model.RemoteCoordinationMetadata.COORDINATION_METADATA; @@ -115,7 +127,7 @@ public void testClusterMetadataManifestXContent() throws IOException { .opensearchVersion(Version.CURRENT) .nodeId("test-node-id") .committed(false) - .codecVersion(ClusterMetadataManifest.CODEC_V2) + .codecVersion(CODEC_V2) .indices(Collections.singletonList(uploadedIndexMetadata)) .previousClusterUUID("prev-cluster-uuid") .clusterUUIDCommitted(true) @@ -162,7 +174,7 @@ public void testClusterMetadataManifestSerializationEqualsHashCode() { .opensearchVersion(Version.CURRENT) .nodeId("B10RX1f5RJenMQvYccCgSQ") .committed(true) - .codecVersion(ClusterMetadataManifest.CODEC_V3) + .codecVersion(ClusterMetadataManifest.CODEC_V4) .indices(randomUploadedIndexMetadataList()) .previousClusterUUID("yfObdx8KSMKKrXf8UyHhM") .clusterUUIDCommitted(true) @@ -197,10 +209,12 @@ public void testClusterMetadataManifestSerializationEqualsHashCode() { new ClusterStateDiffManifest( RemoteClusterStateServiceTests.generateClusterStateWithOneIndex().build(), ClusterState.EMPTY_STATE, + ClusterMetadataManifest.CODEC_V3, null, "indicesRoutingDiffPath" ) ) + .checksum(new ClusterStateChecksum(createClusterState())) .build(); { // Mutate Cluster Term EqualsHashCodeTestUtils.checkEqualsAndHashCode( @@ -482,6 +496,22 @@ public void testClusterMetadataManifestSerializationEqualsHashCode() { } ); } + { + // Mutate checksum + EqualsHashCodeTestUtils.checkEqualsAndHashCode( + initialManifest, + orig -> OpenSearchTestCase.copyWriteable( + orig, + new NamedWriteableRegistry(Collections.emptyList()), + ClusterMetadataManifest::new + ), + manifest -> { + ClusterMetadataManifest.Builder builder = ClusterMetadataManifest.builder(manifest); + builder.checksum(null); + return builder.build(); + } + ); + } } public void testClusterMetadataManifestXContentV2() throws IOException { @@ -495,7 +525,7 @@ public void testClusterMetadataManifestXContentV2() throws IOException { .opensearchVersion(Version.CURRENT) .nodeId("test-node-id") .committed(false) - .codecVersion(ClusterMetadataManifest.CODEC_V2) + .codecVersion(CODEC_V2) .indices(Collections.singletonList(uploadedIndexMetadata)) .previousClusterUUID("prev-cluster-uuid") .clusterUUIDCommitted(true) @@ -531,6 +561,7 @@ public void testClusterMetadataManifestXContentV2() throws IOException { new ClusterStateDiffManifest( RemoteClusterStateServiceTests.generateClusterStateWithOneIndex().build(), ClusterState.EMPTY_STATE, + CODEC_V2, null, null ) @@ -550,9 +581,7 @@ public void testClusterMetadataManifestXContentV2() throws IOException { public void testClusterMetadataManifestXContentV3() throws IOException { UploadedIndexMetadata uploadedIndexMetadata = new UploadedIndexMetadata("test-index", "test-uuid", "/test/upload/path"); UploadedMetadataAttribute uploadedMetadataAttribute = new UploadedMetadataAttribute("attribute_name", "testing_attribute"); - final DiffableUtils.MapDiff> routingTableIncrementalDiff = Mockito.mock( - DiffableUtils.MapDiff.class - ); + final StringKeyDiffProvider routingTableIncrementalDiff = Mockito.mock(StringKeyDiffProvider.class); ClusterMetadataManifest originalManifest = ClusterMetadataManifest.builder() .clusterTerm(1L) .stateVersion(1L) @@ -561,7 +590,7 @@ public void testClusterMetadataManifestXContentV3() throws IOException { .opensearchVersion(Version.CURRENT) .nodeId("test-node-id") .committed(false) - .codecVersion(ClusterMetadataManifest.CODEC_V3) + .codecVersion(CODEC_V3) .indices(Collections.singletonList(uploadedIndexMetadata)) .previousClusterUUID("prev-cluster-uuid") .clusterUUIDCommitted(true) @@ -597,6 +626,7 @@ public void testClusterMetadataManifestXContentV3() throws IOException { new ClusterStateDiffManifest( RemoteClusterStateServiceTests.generateClusterStateWithOneIndex().build(), ClusterState.EMPTY_STATE, + CODEC_V3, routingTableIncrementalDiff, uploadedMetadataAttribute.getUploadedFilename() ) @@ -613,6 +643,73 @@ public void testClusterMetadataManifestXContentV3() throws IOException { } } + public void testClusterMetadataManifestXContentV4() throws IOException { + UploadedIndexMetadata uploadedIndexMetadata = new UploadedIndexMetadata("test-index", "test-uuid", "/test/upload/path"); + UploadedMetadataAttribute uploadedMetadataAttribute = new UploadedMetadataAttribute("attribute_name", "testing_attribute"); + final StringKeyDiffProvider routingTableIncrementalDiff = Mockito.mock(StringKeyDiffProvider.class); + ClusterStateChecksum checksum = new ClusterStateChecksum(createClusterState()); + ClusterMetadataManifest originalManifest = ClusterMetadataManifest.builder() + .clusterTerm(1L) + .stateVersion(1L) + .clusterUUID("test-cluster-uuid") + .stateUUID("test-state-uuid") + .opensearchVersion(Version.CURRENT) + .nodeId("test-node-id") + .committed(false) + .codecVersion(ClusterMetadataManifest.CODEC_V4) + .indices(Collections.singletonList(uploadedIndexMetadata)) + .previousClusterUUID("prev-cluster-uuid") + .clusterUUIDCommitted(true) + .coordinationMetadata(uploadedMetadataAttribute) + .settingMetadata(uploadedMetadataAttribute) + .templatesMetadata(uploadedMetadataAttribute) + .customMetadataMap( + Collections.unmodifiableList( + Arrays.asList( + new UploadedMetadataAttribute( + CUSTOM_METADATA + CUSTOM_DELIMITER + RepositoriesMetadata.TYPE, + "custom--repositories-file" + ), + new UploadedMetadataAttribute( + CUSTOM_METADATA + CUSTOM_DELIMITER + IndexGraveyard.TYPE, + "custom--index_graveyard-file" + ), + new UploadedMetadataAttribute( + CUSTOM_METADATA + CUSTOM_DELIMITER + WeightedRoutingMetadata.TYPE, + "custom--weighted_routing_netadata-file" + ) + ) + ).stream().collect(Collectors.toMap(UploadedMetadataAttribute::getAttributeName, Function.identity())) + ) + .routingTableVersion(1L) + .indicesRouting(Collections.singletonList(uploadedIndexMetadata)) + .discoveryNodesMetadata(uploadedMetadataAttribute) + .clusterBlocksMetadata(uploadedMetadataAttribute) + .transientSettingsMetadata(uploadedMetadataAttribute) + .hashesOfConsistentSettings(uploadedMetadataAttribute) + .clusterStateCustomMetadataMap(Collections.emptyMap()) + .diffManifest( + new ClusterStateDiffManifest( + RemoteClusterStateServiceTests.generateClusterStateWithOneIndex().build(), + ClusterState.EMPTY_STATE, + CODEC_V4, + routingTableIncrementalDiff, + uploadedMetadataAttribute.getUploadedFilename() + ) + ) + .checksum(checksum) + .build(); + final XContentBuilder builder = JsonXContent.contentBuilder(); + builder.startObject(); + originalManifest.toXContent(builder, ToXContent.EMPTY_PARAMS); + builder.endObject(); + + try (XContentParser parser = createParser(JsonXContent.jsonXContent, BytesReference.bytes(builder))) { + final ClusterMetadataManifest fromXContentManifest = ClusterMetadataManifest.fromXContent(parser); + assertEquals(originalManifest, fromXContentManifest); + } + } + public void testClusterMetadataManifestXContentV2WithoutEphemeral() throws IOException { UploadedIndexMetadata uploadedIndexMetadata = new UploadedIndexMetadata("test-index", "test-uuid", "/test/upload/path"); UploadedMetadataAttribute uploadedMetadataAttribute = new UploadedMetadataAttribute("attribute_name", "testing_attribute"); @@ -630,7 +727,7 @@ public void testClusterMetadataManifestXContentV2WithoutEphemeral() throws IOExc .opensearchVersion(Version.CURRENT) .nodeId("test-node-id") .committed(false) - .codecVersion(ClusterMetadataManifest.CODEC_V2) + .codecVersion(CODEC_V2) .indices(Collections.singletonList(uploadedIndexMetadata)) .previousClusterUUID("prev-cluster-uuid") .clusterUUIDCommitted(true) @@ -712,6 +809,17 @@ public void testUploadedIndexMetadataWithoutComponentPrefix() throws IOException } } + public void testGetCodecForVersion() { + assertEquals(-1, ClusterMetadataManifest.getCodecForVersion(Version.fromString("1.3.0"))); + assertEquals(-1, ClusterMetadataManifest.getCodecForVersion(Version.V_2_1_0)); + assertEquals(CODEC_V0, ClusterMetadataManifest.getCodecForVersion(Version.V_2_10_0)); + assertEquals(CODEC_V1, ClusterMetadataManifest.getCodecForVersion(Version.V_2_12_0)); + assertEquals(CODEC_V1, ClusterMetadataManifest.getCodecForVersion(Version.V_2_13_0)); + assertEquals(CODEC_V2, ClusterMetadataManifest.getCodecForVersion(Version.V_2_15_0)); + assertEquals(CODEC_V3, ClusterMetadataManifest.getCodecForVersion(Version.V_2_16_0)); + assertEquals(CODEC_V4, ClusterMetadataManifest.getCodecForVersion(Version.V_2_17_0)); + } + private UploadedIndexMetadata randomlyChangingUploadedIndexMetadata(UploadedIndexMetadata uploadedIndexMetadata) { switch (randomInt(2)) { case 0: @@ -736,4 +844,40 @@ private UploadedIndexMetadata randomlyChangingUploadedIndexMetadata(UploadedInde return uploadedIndexMetadata; } + static ClusterState createClusterState() { + final Index index = new Index("test-index", "index-uuid"); + final Settings idxSettings = Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetadata.SETTING_INDEX_UUID, index.getUUID()) + .build(); + final IndexMetadata indexMetadata = new IndexMetadata.Builder(index.getName()).settings(idxSettings) + .numberOfShards(1) + .numberOfReplicas(0) + .build(); + final CoordinationMetadata coordinationMetadata = CoordinationMetadata.builder().term(1L).build(); + final Settings settings = Settings.builder().put("mock-settings", true).build(); + final TemplatesMetadata templatesMetadata = TemplatesMetadata.builder() + .put(IndexTemplateMetadata.builder("template1").settings(idxSettings).patterns(List.of("test*")).build()) + .build(); + final RemoteClusterStateTestUtils.CustomMetadata1 customMetadata = new RemoteClusterStateTestUtils.CustomMetadata1( + "custom-metadata-1" + ); + return ClusterState.builder(ClusterName.DEFAULT) + .version(1L) + .stateUUID("state-uuid") + .metadata( + Metadata.builder() + .version(randomNonNegativeLong()) + .put(indexMetadata, true) + .clusterUUID("cluster-uuid") + .coordinationMetadata(coordinationMetadata) + .persistentSettings(settings) + .templates(templatesMetadata) + .hashesOfConsistentSettings(Map.of("key1", "value1", "key2", "value2")) + .putCustom(customMetadata.getWriteableName(), customMetadata) + .build() + ) + .routingTable(RoutingTable.builder().addAsNew(indexMetadata).version(1L).build()) + .build(); + } } diff --git a/server/src/test/java/org/opensearch/gateway/remote/ClusterStateChecksumTests.java b/server/src/test/java/org/opensearch/gateway/remote/ClusterStateChecksumTests.java new file mode 100644 index 0000000000000..0203e56dd2d5c --- /dev/null +++ b/server/src/test/java/org/opensearch/gateway/remote/ClusterStateChecksumTests.java @@ -0,0 +1,222 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import org.opensearch.Version; +import org.opensearch.cluster.ClusterName; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.block.ClusterBlock; +import org.opensearch.cluster.block.ClusterBlockLevel; +import org.opensearch.cluster.block.ClusterBlocks; +import org.opensearch.cluster.coordination.CoordinationMetadata; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.IndexTemplateMetadata; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.metadata.TemplatesMetadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.RoutingTable; +import org.opensearch.common.io.stream.BytesStreamOutput; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.xcontent.json.JsonXContent; +import org.opensearch.core.common.bytes.BytesReference; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.transport.TransportAddress; +import org.opensearch.core.index.Index; +import org.opensearch.core.rest.RestStatus; +import org.opensearch.core.xcontent.ToXContent; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; +import java.util.EnumSet; +import java.util.List; +import java.util.Map; + +public class ClusterStateChecksumTests extends OpenSearchTestCase { + + public void testClusterStateChecksumEmptyClusterState() { + ClusterStateChecksum checksum = new ClusterStateChecksum(ClusterState.EMPTY_STATE); + assertNotNull(checksum); + } + + public void testClusterStateChecksum() { + ClusterStateChecksum checksum = new ClusterStateChecksum(generateClusterState()); + assertNotNull(checksum); + assertTrue(checksum.routingTableChecksum != 0); + assertTrue(checksum.nodesChecksum != 0); + assertTrue(checksum.blocksChecksum != 0); + assertTrue(checksum.clusterStateCustomsChecksum != 0); + assertTrue(checksum.coordinationMetadataChecksum != 0); + assertTrue(checksum.settingMetadataChecksum != 0); + assertTrue(checksum.transientSettingsMetadataChecksum != 0); + assertTrue(checksum.templatesMetadataChecksum != 0); + assertTrue(checksum.customMetadataMapChecksum != 0); + assertTrue(checksum.hashesOfConsistentSettingsChecksum != 0); + assertTrue(checksum.indicesChecksum != 0); + assertTrue(checksum.clusterStateChecksum != 0); + } + + public void testClusterStateMatchChecksum() { + ClusterStateChecksum checksum = new ClusterStateChecksum(generateClusterState()); + ClusterStateChecksum newChecksum = new ClusterStateChecksum(generateClusterState()); + assertNotNull(checksum); + assertNotNull(newChecksum); + assertEquals(checksum.routingTableChecksum, newChecksum.routingTableChecksum); + assertEquals(checksum.nodesChecksum, newChecksum.nodesChecksum); + assertEquals(checksum.blocksChecksum, newChecksum.blocksChecksum); + assertEquals(checksum.clusterStateCustomsChecksum, newChecksum.clusterStateCustomsChecksum); + assertEquals(checksum.coordinationMetadataChecksum, newChecksum.coordinationMetadataChecksum); + assertEquals(checksum.settingMetadataChecksum, newChecksum.settingMetadataChecksum); + assertEquals(checksum.transientSettingsMetadataChecksum, newChecksum.transientSettingsMetadataChecksum); + assertEquals(checksum.templatesMetadataChecksum, newChecksum.templatesMetadataChecksum); + assertEquals(checksum.customMetadataMapChecksum, newChecksum.customMetadataMapChecksum); + assertEquals(checksum.hashesOfConsistentSettingsChecksum, newChecksum.hashesOfConsistentSettingsChecksum); + assertEquals(checksum.indicesChecksum, newChecksum.indicesChecksum); + assertEquals(checksum.clusterStateChecksum, newChecksum.clusterStateChecksum); + } + + public void testXContentConversion() throws IOException { + ClusterStateChecksum checksum = new ClusterStateChecksum(generateClusterState()); + final XContentBuilder builder = JsonXContent.contentBuilder(); + builder.startObject(); + checksum.toXContent(builder, ToXContent.EMPTY_PARAMS); + builder.endObject(); + + try (XContentParser parser = createParser(JsonXContent.jsonXContent, BytesReference.bytes(builder))) { + final ClusterStateChecksum parsedChecksum = ClusterStateChecksum.fromXContent(parser); + assertEquals(checksum, parsedChecksum); + } + } + + public void testSerialization() throws IOException { + ClusterStateChecksum checksum = new ClusterStateChecksum(generateClusterState()); + BytesStreamOutput output = new BytesStreamOutput(); + checksum.writeTo(output); + + try (StreamInput in = output.bytes().streamInput()) { + ClusterStateChecksum deserializedChecksum = new ClusterStateChecksum(in); + assertEquals(checksum, deserializedChecksum); + } + } + + public void testGetMismatchEntities() { + ClusterState clsState1 = generateClusterState(); + ClusterStateChecksum checksum = new ClusterStateChecksum(clsState1); + assertTrue(checksum.getMismatchEntities(checksum).isEmpty()); + + ClusterStateChecksum checksum2 = new ClusterStateChecksum(clsState1); + assertTrue(checksum.getMismatchEntities(checksum2).isEmpty()); + + ClusterState clsState2 = ClusterState.builder(ClusterName.DEFAULT) + .routingTable(RoutingTable.builder().build()) + .nodes(DiscoveryNodes.builder().build()) + .blocks(ClusterBlocks.builder().build()) + .customs(Map.of()) + .metadata(Metadata.EMPTY_METADATA) + .build(); + ClusterStateChecksum checksum3 = new ClusterStateChecksum(clsState2); + List mismatches = checksum.getMismatchEntities(checksum3); + assertFalse(mismatches.isEmpty()); + assertEquals(11, mismatches.size()); + assertEquals(ClusterStateChecksum.ROUTING_TABLE_CS, mismatches.get(0)); + assertEquals(ClusterStateChecksum.NODES_CS, mismatches.get(1)); + assertEquals(ClusterStateChecksum.BLOCKS_CS, mismatches.get(2)); + assertEquals(ClusterStateChecksum.CUSTOMS_CS, mismatches.get(3)); + assertEquals(ClusterStateChecksum.COORDINATION_MD_CS, mismatches.get(4)); + assertEquals(ClusterStateChecksum.SETTINGS_MD_CS, mismatches.get(5)); + assertEquals(ClusterStateChecksum.TRANSIENT_SETTINGS_MD_CS, mismatches.get(6)); + assertEquals(ClusterStateChecksum.TEMPLATES_MD_CS, mismatches.get(7)); + assertEquals(ClusterStateChecksum.CUSTOM_MD_CS, mismatches.get(8)); + assertEquals(ClusterStateChecksum.HASHES_MD_CS, mismatches.get(9)); + assertEquals(ClusterStateChecksum.INDICES_CS, mismatches.get(10)); + } + + public void testGetMismatchEntitiesUnorderedInput() { + ClusterState state1 = generateClusterState(); + DiscoveryNode node1 = DiscoveryNode.createLocal(Settings.EMPTY, new TransportAddress(TransportAddress.META_ADDRESS, 9200), "node1"); + DiscoveryNode node2 = DiscoveryNode.createLocal(Settings.EMPTY, new TransportAddress(TransportAddress.META_ADDRESS, 9201), "node2"); + DiscoveryNode node3 = DiscoveryNode.createLocal(Settings.EMPTY, new TransportAddress(TransportAddress.META_ADDRESS, 9202), "node3"); + + DiscoveryNodes nodes1 = DiscoveryNodes.builder().clusterManagerNodeId("test-node").add(node1).add(node2).add(node3).build(); + DiscoveryNodes nodes2 = DiscoveryNodes.builder().clusterManagerNodeId("test-node").add(node2).add(node3).build(); + nodes2 = nodes2.newNode(node1); + ClusterState state2 = ClusterState.builder(state1).nodes(nodes1).build(); + ClusterState state3 = ClusterState.builder(state1).nodes(nodes2).build(); + + ClusterStateChecksum checksum1 = new ClusterStateChecksum(state2); + ClusterStateChecksum checksum2 = new ClusterStateChecksum(state3); + assertEquals(checksum2, checksum1); + } + + private ClusterState generateClusterState() { + final Index index = new Index("test-index", "index-uuid"); + final Settings idxSettings = Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetadata.SETTING_INDEX_UUID, index.getUUID()) + .put(IndexMetadata.INDEX_READ_ONLY_SETTING.getKey(), true) + .build(); + final IndexMetadata indexMetadata = new IndexMetadata.Builder(index.getName()).settings(idxSettings) + .numberOfShards(1) + .numberOfReplicas(0) + .build(); + final Index index2 = new Index("test-index2", "index-uuid2"); + final Settings idxSettings2 = Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetadata.SETTING_INDEX_UUID, index2.getUUID()) + .put(IndexMetadata.INDEX_READ_ONLY_SETTING.getKey(), true) + .build(); + final IndexMetadata indexMetadata2 = new IndexMetadata.Builder(index2.getName()).settings(idxSettings2) + .numberOfShards(3) + .numberOfReplicas(2) + .build(); + final CoordinationMetadata coordinationMetadata = CoordinationMetadata.builder().term(1L).build(); + final Settings settings = Settings.builder().put("mock-settings", true).build(); + final TemplatesMetadata templatesMetadata = TemplatesMetadata.builder() + .put(IndexTemplateMetadata.builder("template1").settings(idxSettings).patterns(List.of("test*")).build()) + .build(); + final RemoteClusterStateTestUtils.CustomMetadata1 customMetadata1 = new RemoteClusterStateTestUtils.CustomMetadata1( + "custom-metadata-1" + ); + RemoteClusterStateTestUtils.TestClusterStateCustom1 clusterStateCustom1 = new RemoteClusterStateTestUtils.TestClusterStateCustom1( + "custom-1" + ); + return ClusterState.builder(ClusterName.DEFAULT) + .version(1L) + .stateUUID("state-uuid") + .metadata( + Metadata.builder() + .version(1L) + .put(indexMetadata, true) + .clusterUUID("cluster-uuid") + .coordinationMetadata(coordinationMetadata) + .persistentSettings(settings) + .transientSettings(settings) + .templates(templatesMetadata) + .hashesOfConsistentSettings(Map.of("key1", "value1", "key2", "value2")) + .putCustom(customMetadata1.getWriteableName(), customMetadata1) + .indices(Map.of(indexMetadata.getIndex().getName(), indexMetadata, indexMetadata2.getIndex().getName(), indexMetadata2)) + .build() + ) + .nodes(DiscoveryNodes.builder().clusterManagerNodeId("test-node").build()) + .blocks( + ClusterBlocks.builder() + .addBlocks(indexMetadata) + .addGlobalBlock(new ClusterBlock(1, "block", true, true, true, RestStatus.ACCEPTED, EnumSet.of(ClusterBlockLevel.READ))) + .addGlobalBlock( + new ClusterBlock(2, "block-name", false, true, true, RestStatus.OK, EnumSet.of(ClusterBlockLevel.WRITE)) + ) + .build() + ) + .customs(Map.of(clusterStateCustom1.getWriteableName(), clusterStateCustom1)) + .routingTable(RoutingTable.builder().addAsNew(indexMetadata).addAsNew(indexMetadata2).version(1L).build()) + .build(); + } +} diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateAttributesManagerTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateAttributesManagerTests.java index 4ef459e6657a1..67b1528466a9e 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateAttributesManagerTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateAttributesManagerTests.java @@ -140,7 +140,12 @@ public void testGetAsyncReadRunnable_DiscoveryNodes() throws IOException, Interr DiscoveryNodes discoveryNodes = getDiscoveryNodes(); String fileName = randomAlphaOfLength(10); when(blobStoreTransferService.downloadBlob(anyIterable(), anyString())).thenReturn( - DISCOVERY_NODES_FORMAT.serialize(discoveryNodes, fileName, compressor).streamInput() + DISCOVERY_NODES_FORMAT.serialize( + (out, discoveryNode) -> discoveryNode.writeToWithAttribute(out), + discoveryNodes, + fileName, + compressor + ).streamInput() ); RemoteDiscoveryNodes remoteObjForDownload = new RemoteDiscoveryNodes(fileName, "cluster-uuid", compressor); CountDownLatch latch = new CountDownLatch(1); diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerTests.java index 920a48f02b99a..8e114c9a26534 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateCleanupManagerTests.java @@ -144,7 +144,7 @@ public void setup() { remoteManifestManager = mock(RemoteManifestManager.class); remoteClusterStateService = mock(RemoteClusterStateService.class); when(remoteClusterStateService.getRemoteManifestManager()).thenReturn(remoteManifestManager); - when(remoteClusterStateService.getStats()).thenReturn(new RemotePersistenceStats()); + when(remoteClusterStateService.getRemoteStateStats()).thenReturn(new RemotePersistenceStats()); when(remoteClusterStateService.getThreadpool()).thenReturn(threadPool); when(remoteClusterStateService.getBlobStore()).thenReturn(blobStore); when(remoteClusterStateService.getBlobStoreRepository()).thenReturn(blobStoreRepository); @@ -503,7 +503,7 @@ public void testRemoteStateCleanupFailureStats() throws IOException { assertBusy(() -> { // wait for stats to get updated assertTrue(remoteClusterStateCleanupManager.getStats() != null); - assertEquals(0, remoteClusterStateCleanupManager.getStats().getSuccessCount()); + assertEquals(0, remoteClusterStateCleanupManager.getStats().getUploadStats().getSuccessCount()); assertEquals(1, remoteClusterStateCleanupManager.getStats().getCleanupAttemptFailedCount()); }); } catch (Exception e) { diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index 1871f4d08ba43..b11d5e48bec06 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -24,8 +24,8 @@ import org.opensearch.cluster.metadata.TemplatesMetadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodes; -import org.opensearch.cluster.routing.IndexRoutingTable; import org.opensearch.cluster.routing.RoutingTable; +import org.opensearch.cluster.routing.StringKeyDiffProvider; import org.opensearch.cluster.routing.remote.InternalRemoteRoutingTableService; import org.opensearch.cluster.routing.remote.NoopRemoteRoutingTableService; import org.opensearch.cluster.service.ClusterService; @@ -46,7 +46,6 @@ import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.common.util.FeatureFlags; -import org.opensearch.core.ParseField; import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.bytes.BytesArray; import org.opensearch.core.common.bytes.BytesReference; @@ -62,6 +61,8 @@ import org.opensearch.gateway.remote.model.RemoteReadResult; import org.opensearch.gateway.remote.model.RemoteTransientSettingsMetadata; import org.opensearch.index.remote.RemoteIndexPathUploader; +import org.opensearch.index.remote.RemoteStoreUtils; +import org.opensearch.indices.DefaultRemoteStoreSettings; import org.opensearch.indices.IndicesModule; import org.opensearch.repositories.FilterRepository; import org.opensearch.repositories.RepositoriesService; @@ -106,11 +107,12 @@ import static java.util.Collections.emptyList; import static java.util.Collections.emptyMap; import static java.util.stream.Collectors.toList; -import static org.opensearch.common.util.FeatureFlags.REMOTE_PUBLICATION_EXPERIMENTAL; import static org.opensearch.gateway.remote.ClusterMetadataManifest.CODEC_V1; import static org.opensearch.gateway.remote.ClusterMetadataManifest.CODEC_V2; +import static org.opensearch.gateway.remote.ClusterMetadataManifest.MANIFEST_CURRENT_CODEC_VERSION; import static org.opensearch.gateway.remote.RemoteClusterStateAttributesManager.CLUSTER_BLOCKS; import static org.opensearch.gateway.remote.RemoteClusterStateAttributesManager.CLUSTER_STATE_ATTRIBUTE; +import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_PUBLICATION_SETTING_KEY; import static org.opensearch.gateway.remote.RemoteClusterStateTestUtils.CustomMetadata1; import static org.opensearch.gateway.remote.RemoteClusterStateTestUtils.CustomMetadata2; import static org.opensearch.gateway.remote.RemoteClusterStateTestUtils.CustomMetadata3; @@ -124,7 +126,6 @@ import static org.opensearch.gateway.remote.RemoteGlobalMetadataManager.GLOBAL_METADATA_UPLOAD_TIMEOUT_DEFAULT; import static org.opensearch.gateway.remote.model.RemoteClusterBlocks.CLUSTER_BLOCKS_FORMAT; import static org.opensearch.gateway.remote.model.RemoteClusterBlocksTests.randomClusterBlocks; -import static org.opensearch.gateway.remote.model.RemoteClusterMetadataManifest.MANIFEST_CURRENT_CODEC_VERSION; import static org.opensearch.gateway.remote.model.RemoteClusterStateCustoms.CLUSTER_STATE_CUSTOM; import static org.opensearch.gateway.remote.model.RemoteCoordinationMetadata.COORDINATION_METADATA; import static org.opensearch.gateway.remote.model.RemoteCoordinationMetadata.COORDINATION_METADATA_FORMAT; @@ -150,6 +151,7 @@ import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.isRemoteRoutingTableEnabled; import static org.hamcrest.Matchers.anEmptyMap; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.is; @@ -165,6 +167,7 @@ import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doNothing; +import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.spy; @@ -252,7 +255,15 @@ public void setup() { clusterService, () -> 0L, threadPool, - List.of(new RemoteIndexPathUploader(threadPool, settings, repositoriesServiceSupplier, clusterSettings)), + List.of( + new RemoteIndexPathUploader( + threadPool, + settings, + repositoriesServiceSupplier, + clusterSettings, + DefaultRemoteStoreSettings.INSTANCE + ) + ), namedWriteableRegistry ); } @@ -262,8 +273,6 @@ public void teardown() throws Exception { super.tearDown(); remoteClusterStateService.close(); publicationEnabled = false; - Settings nodeSettings = Settings.builder().build(); - FeatureFlags.initializeFeatureFlags(nodeSettings); threadPool.shutdown(); } @@ -271,7 +280,8 @@ public void testFailWriteFullMetadataNonClusterManagerNode() throws IOException final ClusterState clusterState = generateClusterStateWithOneIndex().build(); final RemoteClusterStateManifestInfo manifestDetails = remoteClusterStateService.writeFullMetadata( clusterState, - randomAlphaOfLength(10) + randomAlphaOfLength(10), + MANIFEST_CURRENT_CODEC_VERSION ); Assert.assertThat(manifestDetails, nullValue()); } @@ -288,7 +298,15 @@ public void testFailInitializationWhenRemoteStateDisabled() { clusterService, () -> 0L, threadPool, - List.of(new RemoteIndexPathUploader(threadPool, settings, repositoriesServiceSupplier, clusterSettings)), + List.of( + new RemoteIndexPathUploader( + threadPool, + settings, + repositoriesServiceSupplier, + clusterSettings, + DefaultRemoteStoreSettings.INSTANCE + ) + ), writableRegistry() ) ); @@ -309,8 +327,11 @@ public void testWriteFullMetadataSuccess() throws IOException { final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); mockBlobStoreObjects(); remoteClusterStateService.start(); - final ClusterMetadataManifest manifest = remoteClusterStateService.writeFullMetadata(clusterState, "prev-cluster-uuid") - .getClusterMetadataManifest(); + final ClusterMetadataManifest manifest = remoteClusterStateService.writeFullMetadata( + clusterState, + "prev-cluster-uuid", + MANIFEST_CURRENT_CODEC_VERSION + ).getClusterMetadataManifest(); final UploadedIndexMetadata uploadedIndexMetadata = new UploadedIndexMetadata("test-index", "index-uuid", "metadata-filename"); List indices = List.of(uploadedIndexMetadata); @@ -348,8 +369,7 @@ public void testWriteFullMetadataSuccess() throws IOException { public void testWriteFullMetadataSuccessPublicationEnabled() throws IOException { // TODO Make the publication flag parameterized publicationEnabled = true; - Settings nodeSettings = Settings.builder().put(REMOTE_PUBLICATION_EXPERIMENTAL, publicationEnabled).build(); - FeatureFlags.initializeFeatureFlags(nodeSettings); + settings = Settings.builder().put(settings).put(REMOTE_PUBLICATION_SETTING_KEY, publicationEnabled).build(); remoteClusterStateService = new RemoteClusterStateService( "test-node-id", repositoriesServiceSupplier, @@ -357,7 +377,15 @@ public void testWriteFullMetadataSuccessPublicationEnabled() throws IOException clusterService, () -> 0L, threadPool, - List.of(new RemoteIndexPathUploader(threadPool, settings, repositoriesServiceSupplier, clusterSettings)), + List.of( + new RemoteIndexPathUploader( + threadPool, + settings, + repositoriesServiceSupplier, + clusterSettings, + DefaultRemoteStoreSettings.INSTANCE + ) + ), writableRegistry() ); final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()) @@ -370,8 +398,11 @@ public void testWriteFullMetadataSuccessPublicationEnabled() throws IOException .build(); mockBlobStoreObjects(); remoteClusterStateService.start(); - final ClusterMetadataManifest manifest = remoteClusterStateService.writeFullMetadata(clusterState, "prev-cluster-uuid") - .getClusterMetadataManifest(); + final ClusterMetadataManifest manifest = remoteClusterStateService.writeFullMetadata( + clusterState, + "prev-cluster-uuid", + MANIFEST_CURRENT_CODEC_VERSION + ).getClusterMetadataManifest(); final UploadedIndexMetadata uploadedIndexMetadata = new UploadedIndexMetadata("test-index", "index-uuid", "metadata-filename"); List indices = List.of(uploadedIndexMetadata); @@ -419,8 +450,11 @@ public void testWriteFullMetadataInParallelSuccess() throws IOException { }).when(container).asyncBlobUpload(writeContextArgumentCaptor.capture(), actionListenerArgumentCaptor.capture()); remoteClusterStateService.start(); - final ClusterMetadataManifest manifest = remoteClusterStateService.writeFullMetadata(clusterState, "prev-cluster-uuid") - .getClusterMetadataManifest(); + final ClusterMetadataManifest manifest = remoteClusterStateService.writeFullMetadata( + clusterState, + "prev-cluster-uuid", + MANIFEST_CURRENT_CODEC_VERSION + ).getClusterMetadataManifest(); final UploadedIndexMetadata uploadedIndexMetadata = new UploadedIndexMetadata("test-index", "index-uuid", "metadata-filename"); List indices = List.of(uploadedIndexMetadata); @@ -449,8 +483,8 @@ public void testWriteFullMetadataInParallelSuccess() throws IOException { assertThat(manifest.getStateUUID(), is(expectedManifest.getStateUUID())); assertThat(manifest.getPreviousClusterUUID(), is(expectedManifest.getPreviousClusterUUID())); - assertEquals(7, actionListenerArgumentCaptor.getAllValues().size()); - assertEquals(7, writeContextArgumentCaptor.getAllValues().size()); + assertEquals(8, actionListenerArgumentCaptor.getAllValues().size()); + assertEquals(8, writeContextArgumentCaptor.getAllValues().size()); byte[] writtenBytes = capturedWriteContext.get("metadata") .getStreamProvider(Integer.MAX_VALUE) @@ -497,7 +531,7 @@ public void run() { remoteClusterStateService.start(); assertThrows( RemoteStateTransferException.class, - () -> remoteClusterStateService.writeFullMetadata(clusterState, randomAlphaOfLength(10)) + () -> remoteClusterStateService.writeFullMetadata(clusterState, randomAlphaOfLength(10), MANIFEST_CURRENT_CODEC_VERSION) ); } @@ -536,14 +570,14 @@ public void testTimeoutWhileWritingManifestFile() throws IOException { anyMap(), anyBoolean(), anyList(), - anyMap() + any() ) ).thenReturn(new RemoteClusterStateUtils.UploadedMetadataResults()); RemoteStateTransferException ex = expectThrows( RemoteStateTransferException.class, - () -> spiedService.writeFullMetadata(clusterState, randomAlphaOfLength(10)) + () -> spiedService.writeFullMetadata(clusterState, randomAlphaOfLength(10), MANIFEST_CURRENT_CODEC_VERSION) ); - assertTrue(ex.getMessage().contains("Timed out waiting for transfer of following metadata to complete")); + assertTrue(ex.getMessage().contains("Timed out waiting for transfer")); } public void testWriteFullMetadataInParallelFailureForIndexMetadata() throws IOException { @@ -563,9 +597,9 @@ public void testWriteFullMetadataInParallelFailureForIndexMetadata() throws IOEx remoteClusterStateService.start(); assertThrows( RemoteStateTransferException.class, - () -> remoteClusterStateService.writeFullMetadata(clusterState, randomAlphaOfLength(10)) + () -> remoteClusterStateService.writeFullMetadata(clusterState, randomAlphaOfLength(10), MANIFEST_CURRENT_CODEC_VERSION) ); - assertEquals(0, remoteClusterStateService.getStats().getSuccessCount()); + assertEquals(0, remoteClusterStateService.getUploadStats().getSuccessCount()); } public void testFailWriteIncrementalMetadataNonClusterManagerNode() throws IOException { @@ -574,20 +608,20 @@ public void testFailWriteIncrementalMetadataNonClusterManagerNode() throws IOExc final RemoteClusterStateManifestInfo manifestDetails = remoteClusterStateService.writeIncrementalMetadata( clusterState, clusterState, - null + ClusterMetadataManifest.builder().build() ); Assert.assertThat(manifestDetails, nullValue()); - assertEquals(0, remoteClusterStateService.getStats().getSuccessCount()); + assertEquals(0, remoteClusterStateService.getUploadStats().getSuccessCount()); } - public void testFailWriteIncrementalMetadataWhenTermChanged() { + public void testFailWriteIncrementalMetadataWhenManifestNull() { final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); final CoordinationMetadata coordinationMetadata = CoordinationMetadata.builder().term(2L).build(); final ClusterState previousClusterState = ClusterState.builder(ClusterName.DEFAULT) .metadata(Metadata.builder().coordinationMetadata(coordinationMetadata)) .build(); assertThrows( - AssertionError.class, + IllegalArgumentException.class, () -> remoteClusterStateService.writeIncrementalMetadata(previousClusterState, clusterState, null) ); } @@ -686,8 +720,8 @@ public void testWriteIncrementalMetadataSuccess() throws IOException { eq(false), eq(Collections.emptyMap()), eq(false), - eq(Collections.emptyList()), - eq(Collections.emptyMap()) + anyList(), + Mockito.any(StringKeyDiffProvider.class) ); assertThat(manifestInfo.getManifestFileName(), notNullValue()); @@ -707,13 +741,12 @@ public void testWriteIncrementalMetadataSuccess() throws IOException { assertThat(manifest.getTemplatesMetadata(), notNullValue()); assertThat(manifest.getCoordinationMetadata(), notNullValue()); assertThat(manifest.getCustomMetadataMap().size(), is(2)); - assertThat(manifest.getIndicesRouting().size(), is(0)); + assertThat(manifest.getIndicesRouting().size(), is(1)); } public void testWriteIncrementalMetadataSuccessWhenPublicationEnabled() throws IOException { publicationEnabled = true; - Settings nodeSettings = Settings.builder().put(REMOTE_PUBLICATION_EXPERIMENTAL, publicationEnabled).build(); - FeatureFlags.initializeFeatureFlags(nodeSettings); + settings = Settings.builder().put(settings).put(REMOTE_PUBLICATION_SETTING_KEY, true).build(); remoteClusterStateService = new RemoteClusterStateService( "test-node-id", repositoriesServiceSupplier, @@ -721,7 +754,15 @@ public void testWriteIncrementalMetadataSuccessWhenPublicationEnabled() throws I clusterService, () -> 0L, threadPool, - List.of(new RemoteIndexPathUploader(threadPool, settings, repositoriesServiceSupplier, clusterSettings)), + List.of( + new RemoteIndexPathUploader( + threadPool, + settings, + repositoriesServiceSupplier, + clusterSettings, + DefaultRemoteStoreSettings.INSTANCE + ) + ), writableRegistry() ); final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); @@ -768,7 +809,7 @@ public void testWriteIncrementalMetadataSuccessWhenPublicationEnabled() throws I eq(Collections.emptyMap()), eq(true), anyList(), - eq(Collections.emptyMap()) + Mockito.any(StringKeyDiffProvider.class) ); assertThat(manifestInfo.getManifestFileName(), notNullValue()); @@ -851,6 +892,10 @@ public void testGetClusterStateForManifest_IncludeEphemeral() throws IOException when(mockedResult.getComponent()).thenReturn(COORDINATION_METADATA); RemoteClusterStateService mockService = spy(remoteClusterStateService); mockService.getClusterStateForManifest(ClusterName.DEFAULT.value(), manifest, NODE_ID, true); + + assertNotNull(remoteClusterStateService.getFullDownloadStats()); + assertEquals(1, remoteClusterStateService.getFullDownloadStats().getSuccessCount()); + assertEquals(0, remoteClusterStateService.getFullDownloadStats().getFailedCount()); verify(mockService, times(1)).readClusterStateInParallel( any(), eq(manifest), @@ -1172,7 +1217,12 @@ public void testGetClusterStateUsingDiff() throws IOException { diffManifestBuilder.discoveryNodesUpdated(true); manifestBuilder.discoveryNodesMetadata(new UploadedMetadataAttribute(DISCOVERY_NODES, DISCOVERY_NODES_FILENAME)); when(blobContainer.readBlob(DISCOVERY_NODES_FILENAME)).thenAnswer(invocationOnMock -> { - BytesReference bytes = DISCOVERY_NODES_FORMAT.serialize(nodesBuilder.build(), DISCOVERY_NODES_FILENAME, compressor); + BytesReference bytes = DISCOVERY_NODES_FORMAT.serialize( + (out, nodes) -> nodes.writeToWithAttribute(out), + nodesBuilder.build(), + DISCOVERY_NODES_FILENAME, + compressor + ); return new ByteArrayInputStream(bytes.streamInput().readAllBytes()); }); } @@ -1242,11 +1292,6 @@ public void testGetClusterStateUsingDiff() throws IOException { ClusterState.Custom originalClusterStateCustom = clusterState.customs().get(clusterStateCustomName); assertNotEquals(originalClusterStateCustom, updateClusterStateCustom); }); - diffManifest.getIndicesRoutingUpdated().forEach(indexName -> { - IndexRoutingTable updatedIndexRoutingTable = updatedClusterState.getRoutingTable().getIndicesRouting().get(indexName); - IndexRoutingTable originalIndexingRoutingTable = clusterState.getRoutingTable().getIndicesRouting().get(indexName); - assertNotEquals(originalIndexingRoutingTable, updatedIndexRoutingTable); - }); diffManifest.getIndicesDeleted() .forEach(indexName -> { assertFalse(updatedClusterState.metadata().getIndices().containsKey(indexName)); }); diffManifest.getCustomMetadataDeleted().forEach(customMetadataName -> { @@ -1255,9 +1300,6 @@ public void testGetClusterStateUsingDiff() throws IOException { diffManifest.getClusterStateCustomDeleted().forEach(clusterStateCustomName -> { assertFalse(updatedClusterState.customs().containsKey(clusterStateCustomName)); }); - diffManifest.getIndicesRoutingDeleted().forEach(indexName -> { - assertFalse(updatedClusterState.getRoutingTable().getIndicesRouting().containsKey(indexName)); - }); } public void testReadClusterStateInParallel_TimedOut() throws IOException { @@ -1801,8 +1843,8 @@ private void verifyCodecMigrationManifest(int previousCodec) throws IOException // global metadata is updated assertThat(manifestAfterUpdate.hasMetadataAttributesFiles(), is(true)); - // Manifest file with codec version with 1 is updated. - assertThat(manifestAfterUpdate.getCodecVersion(), is(MANIFEST_CURRENT_CODEC_VERSION)); + // During incremental update, codec version will not change. + assertThat(manifestAfterUpdate.getCodecVersion(), is(previousCodec)); } public void testWriteIncrementalGlobalMetadataFromCodecV0Success() throws IOException { @@ -1837,7 +1879,7 @@ private void verifyWriteIncrementalGlobalMetadataFromOlderCodecSuccess(ClusterMe ).getClusterMetadataManifest(); final ClusterMetadataManifest expectedManifest = ClusterMetadataManifest.builder() - .codecVersion(MANIFEST_CURRENT_CODEC_VERSION) + .codecVersion(previousManifest.getCodecVersion()) .indices(Collections.emptyList()) .clusterTerm(1L) .stateVersion(1L) @@ -2026,8 +2068,11 @@ public void testCustomMetadataDeletedUpdatedAndAdded() throws IOException { // Initial cluster state with index. final ClusterState initialClusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); remoteClusterStateService.start(); - final ClusterMetadataManifest initialManifest = remoteClusterStateService.writeFullMetadata(initialClusterState, "_na_") - .getClusterMetadataManifest(); + final ClusterMetadataManifest initialManifest = remoteClusterStateService.writeFullMetadata( + initialClusterState, + "_na_", + MANIFEST_CURRENT_CODEC_VERSION + ).getClusterMetadataManifest(); ClusterState clusterState1 = ClusterState.builder(initialClusterState) .metadata( @@ -2105,8 +2150,11 @@ public void testIndexMetadataDeletedUpdatedAndAdded() throws IOException { // Initial cluster state with index. final ClusterState initialClusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); remoteClusterStateService.start(); - final ClusterMetadataManifest initialManifest = remoteClusterStateService.writeFullMetadata(initialClusterState, "_na_") - .getClusterMetadataManifest(); + final ClusterMetadataManifest initialManifest = remoteClusterStateService.writeFullMetadata( + initialClusterState, + "_na_", + MANIFEST_CURRENT_CODEC_VERSION + ).getClusterMetadataManifest(); String initialIndex = "test-index"; Index index1 = new Index("test-index-1", "index-uuid-1"); Index index2 = new Index("test-index-2", "index-uuid-2"); @@ -2184,8 +2232,11 @@ private void verifyMetadataAttributeOnlyUpdated( // Initial cluster state with index. final ClusterState initialClusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); remoteClusterStateService.start(); - final ClusterMetadataManifest initialManifest = remoteClusterStateService.writeFullMetadata(initialClusterState, "_na_") - .getClusterMetadataManifest(); + final ClusterMetadataManifest initialManifest = remoteClusterStateService.writeFullMetadata( + initialClusterState, + "_na_", + MANIFEST_CURRENT_CODEC_VERSION + ).getClusterMetadataManifest(); ClusterState newClusterState = clusterStateUpdater.apply(initialClusterState); @@ -2198,8 +2249,11 @@ private void verifyMetadataAttributeOnlyUpdated( initialManifest ).getClusterMetadataManifest(); } else { - manifestAfterMetadataUpdate = remoteClusterStateService.writeFullMetadata(newClusterState, initialClusterState.stateUUID()) - .getClusterMetadataManifest(); + manifestAfterMetadataUpdate = remoteClusterStateService.writeFullMetadata( + newClusterState, + initialClusterState.stateUUID(), + MANIFEST_CURRENT_CODEC_VERSION + ).getClusterMetadataManifest(); } assertions.accept(initialManifest, manifestAfterMetadataUpdate); @@ -2268,6 +2322,73 @@ public void testReadLatestMetadataManifestSuccessButNoIndexMetadata() throws IOE ); } + public void testReadLatestClusterStateFromCache() throws IOException { + final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); + remoteClusterStateService.start(); + final ClusterMetadataManifest expectedManifest = ClusterMetadataManifest.builder() + .indices(List.of()) + .clusterTerm(1L) + .stateVersion(12) + .stateUUID("state-uuid") + .clusterUUID("cluster-uuid") + .codecVersion(MANIFEST_CURRENT_CODEC_VERSION) + .coordinationMetadata(new ClusterMetadataManifest.UploadedMetadataAttribute(COORDINATION_METADATA, "mock-coordination-file")) + .settingMetadata(new ClusterMetadataManifest.UploadedMetadataAttribute(SETTING_METADATA, "mock-setting-file")) + .templatesMetadata(new ClusterMetadataManifest.UploadedMetadataAttribute(TEMPLATES_METADATA, "mock-templates-file")) + .put( + IndexGraveyard.TYPE, + new ClusterMetadataManifest.UploadedMetadataAttribute(IndexGraveyard.TYPE, "mock-custom-" + IndexGraveyard.TYPE + "-file") + ) + .nodeId("nodeA") + .opensearchVersion(VersionUtils.randomOpenSearchVersion(random())) + .previousClusterUUID("prev-cluster-uuid") + .routingTableVersion(1) + .indicesRouting(List.of()) + .build(); + + Metadata expectedMetadata = Metadata.builder() + .clusterUUID("cluster-uuid") + .version(12) + .coordinationMetadata(CoordinationMetadata.builder().term(1).build()) + .persistentSettings(Settings.builder().put("readonly", true).build()) + .build(); + mockBlobContainerForGlobalMetadata(mockBlobStoreObjects(), expectedManifest, expectedMetadata); + + ClusterState state = remoteClusterStateService.getLatestClusterState( + clusterState.getClusterName().value(), + clusterState.metadata().clusterUUID(), + true + ); + + ClusterState stateFromCache = remoteClusterStateService.getRemoteClusterStateCache() + .getState(clusterState.getClusterName().value(), expectedManifest); + assertEquals(stateFromCache.getMetadata(), state.getMetadata()); + + final ClusterMetadataManifest notExistMetadata = ClusterMetadataManifest.builder() + .indices(List.of()) + .clusterTerm(1L) + .stateVersion(13) + .stateUUID("state-uuid") + .clusterUUID("cluster-uuid") + .codecVersion(MANIFEST_CURRENT_CODEC_VERSION) + .coordinationMetadata(new ClusterMetadataManifest.UploadedMetadataAttribute(COORDINATION_METADATA, "mock-coordination-file")) + .settingMetadata(new ClusterMetadataManifest.UploadedMetadataAttribute(SETTING_METADATA, "mock-setting-file")) + .templatesMetadata(new ClusterMetadataManifest.UploadedMetadataAttribute(TEMPLATES_METADATA, "mock-templates-file")) + .put( + IndexGraveyard.TYPE, + new ClusterMetadataManifest.UploadedMetadataAttribute(IndexGraveyard.TYPE, "mock-custom-" + IndexGraveyard.TYPE + "-file") + ) + .nodeId("nodeA") + .opensearchVersion(VersionUtils.randomOpenSearchVersion(random())) + .previousClusterUUID("prev-cluster-uuid") + .routingTableVersion(1) + .indicesRouting(List.of()) + .build(); + ClusterState notInCacheState = remoteClusterStateService.getRemoteClusterStateCache() + .getState(clusterState.getClusterName().value(), notExistMetadata); + assertNull(notInCacheState); + } + public void testReadLatestMetadataManifestSuccessButIndexMetadataFetchIOException() throws IOException { final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); final UploadedIndexMetadata uploadedIndexMetadata = new UploadedIndexMetadata("test-index", "index-uuid", "metadata-filename__2"); @@ -2334,9 +2455,46 @@ public void testReadLatestMetadataManifestSuccess() throws IOException { assertThat(manifest.getStateUUID(), is(expectedManifest.getStateUUID())); } + public void testReadLatestMetadataManifestSuccessByTermVersion() throws IOException { + final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); + final UploadedIndexMetadata uploadedIndexMetadata = new UploadedIndexMetadata("test-index", "index-uuid", "metadata-filename"); + final List indices = List.of(uploadedIndexMetadata); + + final ClusterMetadataManifest expectedManifest = ClusterMetadataManifest.builder() + .indices(indices) + .clusterTerm(1L) + .stateVersion(1L) + .stateUUID("state-uuid") + .clusterUUID("cluster-uuid") + .nodeId("nodeA") + .opensearchVersion(VersionUtils.randomOpenSearchVersion(random())) + .codecVersion(CODEC_V2) + .previousClusterUUID("prev-cluster-uuid") + .build(); + + mockBlobContainer(mockBlobStoreObjects(), expectedManifest, new HashMap<>(), CODEC_V2, 1, 1); + remoteClusterStateService.start(); + final ClusterMetadataManifest manifest = remoteClusterStateService.getClusterMetadataManifestByTermVersion( + clusterState.getClusterName().value(), + clusterState.metadata().clusterUUID(), + 1, + 1 + ).get(); + + assertThat(manifest.getIndices().size(), is(1)); + assertThat(manifest.getIndices().get(0).getIndexName(), is(uploadedIndexMetadata.getIndexName())); + assertThat(manifest.getIndices().get(0).getIndexUUID(), is(uploadedIndexMetadata.getIndexUUID())); + assertThat(manifest.getIndices().get(0).getUploadedFilename(), notNullValue()); + assertThat(manifest.getClusterTerm(), is(expectedManifest.getClusterTerm())); + assertThat(manifest.getStateVersion(), is(expectedManifest.getStateVersion())); + assertThat(manifest.getClusterUUID(), is(expectedManifest.getClusterUUID())); + assertThat(manifest.getStateUUID(), is(expectedManifest.getStateUUID())); + } + public void testReadGlobalMetadata() throws IOException { - when(blobStoreRepository.getNamedXContentRegistry()).thenReturn(new NamedXContentRegistry( - List.of(new NamedXContentRegistry.Entry(Metadata.Custom.class, new ParseField(IndexGraveyard.TYPE), IndexGraveyard::fromXContent)))); + // when(blobStoreRepository.getNamedXContentRegistry()).thenReturn(new NamedXContentRegistry( + // List.of(new NamedXContentRegistry.Entry(Metadata.Custom.class, new ParseField(IndexGraveyard.TYPE), + // IndexGraveyard::fromXContent)))); final ClusterState clusterState = generateClusterStateWithGlobalMetadata().nodes(nodesWithLocalNodeClusterManager()).build(); remoteClusterStateService.start(); @@ -2351,7 +2509,10 @@ public void testReadGlobalMetadata() throws IOException { .coordinationMetadata(new ClusterMetadataManifest.UploadedMetadataAttribute(COORDINATION_METADATA, "mock-coordination-file")) .settingMetadata(new ClusterMetadataManifest.UploadedMetadataAttribute(SETTING_METADATA, "mock-setting-file")) .templatesMetadata(new ClusterMetadataManifest.UploadedMetadataAttribute(TEMPLATES_METADATA, "mock-templates-file")) - .put(IndexGraveyard.TYPE, new ClusterMetadataManifest.UploadedMetadataAttribute(IndexGraveyard.TYPE, "mock-custom-" +IndexGraveyard.TYPE+ "-file")) + .put( + IndexGraveyard.TYPE, + new ClusterMetadataManifest.UploadedMetadataAttribute(IndexGraveyard.TYPE, "mock-custom-" + IndexGraveyard.TYPE + "-file") + ) .nodeId("nodeA") .opensearchVersion(VersionUtils.randomOpenSearchVersion(random())) .previousClusterUUID("prev-cluster-uuid") @@ -2359,7 +2520,10 @@ public void testReadGlobalMetadata() throws IOException { .indicesRouting(List.of()) .build(); - Metadata expectedMetadata = Metadata.builder().clusterUUID("cluster-uuid").persistentSettings(Settings.builder().put("readonly", true).build()).build(); + Metadata expectedMetadata = Metadata.builder() + .clusterUUID("cluster-uuid") + .persistentSettings(Settings.builder().put("readonly", true).build()) + .build(); mockBlobContainerForGlobalMetadata(mockBlobStoreObjects(), expectedManifest, expectedMetadata); ClusterState newClusterState = remoteClusterStateService.getLatestClusterState( @@ -2466,7 +2630,7 @@ public void testMarkLastStateAsCommittedSuccess() throws IOException { List indices = List.of(uploadedIndexMetadata); final ClusterMetadataManifest previousManifest = ClusterMetadataManifest.builder().indices(indices).build(); - final ClusterMetadataManifest manifest = remoteClusterStateService.markLastStateAsCommitted(clusterState, previousManifest) + final ClusterMetadataManifest manifest = remoteClusterStateService.markLastStateAsCommitted(clusterState, previousManifest, false) .getClusterMetadataManifest(); final ClusterMetadataManifest expectedManifest = ClusterMetadataManifest.builder() @@ -2568,21 +2732,24 @@ public void testGetValidPreviousClusterUUIDWhenLastUUIDUncommitted() throws IOEx assertThat(previousClusterUUID, equalTo("cluster-uuid2")); } - public void testRemoteStateStats() throws IOException { + public void testRemoteStateUploadStats() throws IOException { final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); mockBlobStoreObjects(); remoteClusterStateService.start(); - final ClusterMetadataManifest manifest = remoteClusterStateService.writeFullMetadata(clusterState, "prev-cluster-uuid") - .getClusterMetadataManifest(); + final ClusterMetadataManifest manifest = remoteClusterStateService.writeFullMetadata( + clusterState, + "prev-cluster-uuid", + MANIFEST_CURRENT_CODEC_VERSION + ).getClusterMetadataManifest(); - assertTrue(remoteClusterStateService.getStats() != null); - assertEquals(1, remoteClusterStateService.getStats().getSuccessCount()); - assertEquals(0, remoteClusterStateService.getStats().getCleanupAttemptFailedCount()); - assertEquals(0, remoteClusterStateService.getStats().getFailedCount()); + assertTrue(remoteClusterStateService.getUploadStats() != null); + assertEquals(1, remoteClusterStateService.getUploadStats().getSuccessCount()); + assertEquals(0, remoteClusterStateService.getRemoteStateStats().getCleanupAttemptFailedCount()); + assertEquals(0, remoteClusterStateService.getUploadStats().getFailedCount()); } public void testRemoteRoutingTableNotInitializedWhenDisabled() { - if (publicationEnabled) { + if (isRemoteRoutingTableEnabled(settings)) { assertTrue(remoteClusterStateService.getRemoteRoutingTableService() instanceof InternalRemoteRoutingTableService); } else { assertTrue(remoteClusterStateService.getRemoteRoutingTableService() instanceof NoopRemoteRoutingTableService); @@ -2597,7 +2764,7 @@ public void testRemoteRoutingTableInitializedWhenEnabled() { .build(); clusterSettings.applySettings(newSettings); - Settings nodeSettings = Settings.builder().put(REMOTE_PUBLICATION_EXPERIMENTAL, "true").build(); + Settings nodeSettings = Settings.builder().put(REMOTE_PUBLICATION_SETTING_KEY, "true").build(); FeatureFlags.initializeFeatureFlags(nodeSettings); remoteClusterStateService = new RemoteClusterStateService( @@ -2607,7 +2774,15 @@ public void testRemoteRoutingTableInitializedWhenEnabled() { clusterService, () -> 0L, threadPool, - List.of(new RemoteIndexPathUploader(threadPool, newSettings, repositoriesServiceSupplier, clusterSettings)), + List.of( + new RemoteIndexPathUploader( + threadPool, + newSettings, + repositoriesServiceSupplier, + clusterSettings, + DefaultRemoteStoreSettings.INSTANCE + ) + ), writableRegistry() ); assertTrue(remoteClusterStateService.getRemoteRoutingTableService() instanceof InternalRemoteRoutingTableService); @@ -2620,8 +2795,11 @@ public void testWriteFullMetadataSuccessWithRoutingTable() throws IOException { final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); remoteClusterStateService.start(); - final ClusterMetadataManifest manifest = remoteClusterStateService.writeFullMetadata(clusterState, "prev-cluster-uuid") - .getClusterMetadataManifest(); + final ClusterMetadataManifest manifest = remoteClusterStateService.writeFullMetadata( + clusterState, + "prev-cluster-uuid", + MANIFEST_CURRENT_CODEC_VERSION + ).getClusterMetadataManifest(); final UploadedIndexMetadata uploadedIndexMetadata = new UploadedIndexMetadata("test-index", "index-uuid", "metadata-filename"); final UploadedIndexMetadata uploadedIndiceRoutingMetadata = new UploadedIndexMetadata( "test-index", @@ -2670,8 +2848,11 @@ public void testWriteFullMetadataInParallelSuccessWithRoutingTable() throws IOEx when((blobStoreRepository.basePath())).thenReturn(BlobPath.cleanPath().add("base-path")); remoteClusterStateService.start(); - final ClusterMetadataManifest manifest = remoteClusterStateService.writeFullMetadata(clusterState, "prev-cluster-uuid") - .getClusterMetadataManifest(); + final ClusterMetadataManifest manifest = remoteClusterStateService.writeFullMetadata( + clusterState, + "prev-cluster-uuid", + MANIFEST_CURRENT_CODEC_VERSION + ).getClusterMetadataManifest(); final UploadedIndexMetadata uploadedIndexMetadata = new UploadedIndexMetadata("test-index", "index-uuid", "metadata-filename"); final UploadedIndexMetadata uploadedIndiceRoutingMetadata = new UploadedIndexMetadata( @@ -2823,9 +3004,7 @@ public void testWriteIncrementalMetadataSuccessWithRoutingTableDiffNull() throws INDEX_ROUTING_METADATA_PREFIX ); indices.add(uploadedIndiceRoutingMetadata); - final ClusterState previousClusterState = generateClusterStateWithOneIndex("test-index2", 5, 1, false).nodes( - nodesWithLocalNodeClusterManager() - ).build(); + final ClusterState previousClusterState = clusterState; final ClusterMetadataManifest previousManifest = ClusterMetadataManifest.builder().indices(indices).build(); when((blobStoreRepository.basePath())).thenReturn(BlobPath.cleanPath().add("base-path")); @@ -2854,9 +3033,6 @@ public void testWriteIncrementalMetadataSuccessWithRoutingTableDiffNull() throws assertThat(manifest.getClusterUUID(), is(expectedManifest.getClusterUUID())); assertThat(manifest.getStateUUID(), is(expectedManifest.getStateUUID())); assertThat(manifest.getRoutingTableVersion(), is(expectedManifest.getRoutingTableVersion())); - assertThat(manifest.getIndicesRouting().get(0).getIndexName(), is(uploadedIndiceRoutingMetadata.getIndexName())); - assertThat(manifest.getIndicesRouting().get(0).getIndexUUID(), is(uploadedIndiceRoutingMetadata.getIndexUUID())); - assertThat(manifest.getIndicesRouting().get(0).getUploadedFilename(), notNullValue()); assertThat(manifest.getDiffManifest().getIndicesRoutingDiffPath(), nullValue()); } @@ -2865,11 +3041,10 @@ private void initializeRoutingTable() { .put("node.attr." + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY, "routing_repository") .put("node.attr." + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, "remote_store_repository") .put(RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true) + .put(REMOTE_PUBLICATION_SETTING_KEY, "true") .build(); clusterSettings.applySettings(newSettings); - Settings nodeSettings = Settings.builder().put(REMOTE_PUBLICATION_EXPERIMENTAL, "true").build(); - FeatureFlags.initializeFeatureFlags(nodeSettings); remoteClusterStateService = new RemoteClusterStateService( "test-node-id", repositoriesServiceSupplier, @@ -2877,94 +3052,777 @@ private void initializeRoutingTable() { clusterService, () -> 0L, threadPool, - List.of(new RemoteIndexPathUploader(threadPool, newSettings, repositoriesServiceSupplier, clusterSettings)), + List.of( + new RemoteIndexPathUploader( + threadPool, + newSettings, + repositoriesServiceSupplier, + clusterSettings, + DefaultRemoteStoreSettings.INSTANCE + ) + ), writableRegistry() ); } - private void mockObjectsForGettingPreviousClusterUUID(Map clusterUUIDsPointers) throws IOException { - mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, false, Collections.emptyMap()); - } + private void initializeWithChecksumEnabled(RemoteClusterStateService.RemoteClusterStateValidationMode mode) { + Settings newSettings = Settings.builder() + .put("node.attr." + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY, "routing_repository") + .put("node.attr." + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, "remote_store_repository") + .put(RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true) + .put(RemoteClusterStateService.REMOTE_CLUSTER_STATE_CHECKSUM_VALIDATION_MODE_SETTING.getKey(), mode.name()) + .put(REMOTE_PUBLICATION_SETTING_KEY, true) + .build(); + clusterSettings.applySettings(newSettings); - private void mockObjectsForGettingPreviousClusterUUID( - Map clusterUUIDsPointers, - Map clusterUUIDCommitted - ) throws IOException { - mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, false, clusterUUIDCommitted); + remoteClusterStateService = new RemoteClusterStateService( + "test-node-id", + repositoriesServiceSupplier, + newSettings, + clusterService, + () -> 0L, + threadPool, + List.of( + new RemoteIndexPathUploader( + threadPool, + newSettings, + repositoriesServiceSupplier, + clusterSettings, + DefaultRemoteStoreSettings.INSTANCE + ) + ), + writableRegistry() + ); } - private void mockObjectsForGettingPreviousClusterUUID( - Map clusterUUIDsPointers, - boolean differGlobalMetadata, - Map clusterUUIDCommitted - ) throws IOException { - final BlobPath blobPath = mock(BlobPath.class); - when((blobStoreRepository.basePath())).thenReturn(blobPath); - when(blobPath.add(anyString())).thenReturn(blobPath); - when(blobPath.buildAsString()).thenReturn("/blob/path/"); - BlobContainer blobContainer1 = mock(BlobContainer.class); - BlobContainer blobContainer2 = mock(BlobContainer.class); - BlobContainer blobContainer3 = mock(BlobContainer.class); - BlobContainer uuidBlobContainer = mock(BlobContainer.class); - when(blobContainer1.path()).thenReturn(blobPath); - when(blobContainer2.path()).thenReturn(blobPath); - when(blobContainer3.path()).thenReturn(blobPath); + public void testWriteFullMetadataSuccessWithChecksumValidationEnabled() throws IOException { + initializeWithChecksumEnabled(RemoteClusterStateService.RemoteClusterStateValidationMode.FAILURE); + mockBlobStoreObjects(); + when((blobStoreRepository.basePath())).thenReturn(BlobPath.cleanPath().add("base-path")); - mockBlobContainerForClusterUUIDs(uuidBlobContainer, clusterUUIDsPointers.keySet()); - List uploadedIndexMetadataList1 = List.of( - new UploadedIndexMetadata("index1", "index-uuid1", "key1__2"), - new UploadedIndexMetadata("index2", "index-uuid2", "key2__2") - ); - Map customMetadataMap = new HashMap<>(); - final ClusterMetadataManifest clusterManifest1 = generateClusterMetadataManifest( - "cluster-uuid1", - clusterUUIDsPointers.get("cluster-uuid1"), - randomAlphaOfLength(10), - uploadedIndexMetadataList1, - customMetadataMap, - new UploadedMetadataAttribute(COORDINATION_METADATA, "key3"), - new UploadedMetadataAttribute(SETTING_METADATA, "key4"), - new UploadedMetadataAttribute(TEMPLATES_METADATA, "key5"), - clusterUUIDCommitted.getOrDefault("cluster-uuid1", true) + final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); + remoteClusterStateService.start(); + final ClusterMetadataManifest manifest = remoteClusterStateService.writeFullMetadata( + clusterState, + "prev-cluster-uuid", + MANIFEST_CURRENT_CODEC_VERSION + ).getClusterMetadataManifest(); + final UploadedIndexMetadata uploadedIndexMetadata = new UploadedIndexMetadata("test-index", "index-uuid", "metadata-filename"); + final UploadedIndexMetadata uploadedIndiceRoutingMetadata = new UploadedIndexMetadata( + "test-index", + "index-uuid", + "routing-filename", + INDEX_ROUTING_METADATA_PREFIX ); - Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).build(); - IndexMetadata indexMetadata1 = IndexMetadata.builder("index1") - .settings(indexSettings) - .numberOfShards(1) - .numberOfReplicas(1) - .build(); - IndexMetadata indexMetadata2 = IndexMetadata.builder("index2") - .settings(indexSettings) - .numberOfShards(1) - .numberOfReplicas(1) - .build(); - Metadata metadata1 = Metadata.builder() - .persistentSettings(Settings.builder().put(Metadata.SETTING_READ_ONLY_SETTING.getKey(), true).build()) + final ClusterMetadataManifest expectedManifest = ClusterMetadataManifest.builder() + .indices(List.of(uploadedIndexMetadata)) + .clusterTerm(1L) + .stateVersion(1L) + .stateUUID("state-uuid") + .clusterUUID("cluster-uuid") + .previousClusterUUID("prev-cluster-uuid") + .routingTableVersion(1L) + .indicesRouting(List.of(uploadedIndiceRoutingMetadata)) + .checksum(new ClusterStateChecksum(clusterState)) .build(); - Map indexMetadataMap1 = Map.of("index-uuid1", indexMetadata1, "index-uuid2", indexMetadata2); - mockBlobContainerForGlobalMetadata(blobContainer1, clusterManifest1, metadata1); - mockBlobContainer(blobContainer1, clusterManifest1, indexMetadataMap1, MANIFEST_CURRENT_CODEC_VERSION); - List uploadedIndexMetadataList2 = List.of( - new UploadedIndexMetadata("index1", "index-uuid1", "key1__2"), - new UploadedIndexMetadata("index2", "index-uuid2", "key2__2") - ); - final ClusterMetadataManifest clusterManifest2 = generateClusterMetadataManifest( - "cluster-uuid2", - clusterUUIDsPointers.get("cluster-uuid2"), - randomAlphaOfLength(10), - uploadedIndexMetadataList2, - customMetadataMap, - new UploadedMetadataAttribute(COORDINATION_METADATA, "key3"), - new UploadedMetadataAttribute(SETTING_METADATA, "key4"), - new UploadedMetadataAttribute(TEMPLATES_METADATA, "key5"), - clusterUUIDCommitted.getOrDefault("cluster-uuid2", true) - ); - IndexMetadata indexMetadata3 = IndexMetadata.builder("index1") - .settings(indexSettings) - .numberOfShards(1) - .numberOfReplicas(1) - .build(); + assertThat(manifest.getIndices().size(), is(1)); + assertThat(manifest.getClusterTerm(), is(expectedManifest.getClusterTerm())); + assertThat(manifest.getStateVersion(), is(expectedManifest.getStateVersion())); + assertThat(manifest.getClusterUUID(), is(expectedManifest.getClusterUUID())); + assertThat(manifest.getStateUUID(), is(expectedManifest.getStateUUID())); + assertThat(manifest.getPreviousClusterUUID(), is(expectedManifest.getPreviousClusterUUID())); + assertThat(manifest.getRoutingTableVersion(), is(expectedManifest.getRoutingTableVersion())); + assertThat(manifest.getIndicesRouting().get(0).getIndexName(), is(uploadedIndiceRoutingMetadata.getIndexName())); + assertThat(manifest.getIndicesRouting().get(0).getIndexUUID(), is(uploadedIndiceRoutingMetadata.getIndexUUID())); + assertThat(manifest.getIndicesRouting().get(0).getUploadedFilename(), notNullValue()); + assertThat(manifest.getClusterStateChecksum(), is(expectedManifest.getClusterStateChecksum())); + } + + public void testWriteFullMetadataSuccessWithChecksumValidationModeNone() throws IOException { + initializeWithChecksumEnabled(RemoteClusterStateService.RemoteClusterStateValidationMode.NONE); + mockBlobStoreObjects(); + when((blobStoreRepository.basePath())).thenReturn(BlobPath.cleanPath().add("base-path")); + + final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); + remoteClusterStateService.start(); + final ClusterMetadataManifest manifest = remoteClusterStateService.writeFullMetadata( + clusterState, + "prev-cluster-uuid", + MANIFEST_CURRENT_CODEC_VERSION + ).getClusterMetadataManifest(); + final UploadedIndexMetadata uploadedIndexMetadata = new UploadedIndexMetadata("test-index", "index-uuid", "metadata-filename"); + final UploadedIndexMetadata uploadedIndiceRoutingMetadata = new UploadedIndexMetadata( + "test-index", + "index-uuid", + "routing-filename", + INDEX_ROUTING_METADATA_PREFIX + ); + final ClusterMetadataManifest expectedManifest = ClusterMetadataManifest.builder() + .indices(List.of(uploadedIndexMetadata)) + .clusterTerm(1L) + .stateVersion(1L) + .stateUUID("state-uuid") + .clusterUUID("cluster-uuid") + .previousClusterUUID("prev-cluster-uuid") + .routingTableVersion(1L) + .indicesRouting(List.of(uploadedIndiceRoutingMetadata)) + .build(); + + assertThat(manifest.getIndices().size(), is(1)); + assertThat(manifest.getClusterTerm(), is(expectedManifest.getClusterTerm())); + assertThat(manifest.getStateVersion(), is(expectedManifest.getStateVersion())); + assertThat(manifest.getClusterUUID(), is(expectedManifest.getClusterUUID())); + assertThat(manifest.getStateUUID(), is(expectedManifest.getStateUUID())); + assertThat(manifest.getPreviousClusterUUID(), is(expectedManifest.getPreviousClusterUUID())); + assertThat(manifest.getRoutingTableVersion(), is(expectedManifest.getRoutingTableVersion())); + assertThat(manifest.getIndicesRouting().get(0).getIndexName(), is(uploadedIndiceRoutingMetadata.getIndexName())); + assertThat(manifest.getIndicesRouting().get(0).getIndexUUID(), is(uploadedIndiceRoutingMetadata.getIndexUUID())); + assertThat(manifest.getIndicesRouting().get(0).getUploadedFilename(), notNullValue()); + assertNull(manifest.getClusterStateChecksum()); + } + + public void testWriteIncrementalMetadataSuccessWithChecksumValidationEnabled() throws IOException { + initializeWithChecksumEnabled(RemoteClusterStateService.RemoteClusterStateValidationMode.FAILURE); + final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); + mockBlobStoreObjects(); + final CoordinationMetadata coordinationMetadata = CoordinationMetadata.builder().term(1L).build(); + final ClusterState previousClusterState = ClusterState.builder(ClusterName.DEFAULT) + .metadata(Metadata.builder().coordinationMetadata(coordinationMetadata)) + .build(); + + final ClusterMetadataManifest previousManifest = ClusterMetadataManifest.builder() + .indices(Collections.emptyList()) + .checksum(new ClusterStateChecksum(clusterState)) + .build(); + when((blobStoreRepository.basePath())).thenReturn(BlobPath.cleanPath().add("base-path")); + + remoteClusterStateService.start(); + final ClusterMetadataManifest manifest = remoteClusterStateService.writeIncrementalMetadata( + previousClusterState, + clusterState, + previousManifest + ).getClusterMetadataManifest(); + final UploadedIndexMetadata uploadedIndexMetadata = new UploadedIndexMetadata("test-index", "index-uuid", "metadata-filename"); + final UploadedIndexMetadata uploadedIndiceRoutingMetadata = new UploadedIndexMetadata( + "test-index", + "index-uuid", + "routing-filename", + INDEX_ROUTING_METADATA_PREFIX + ); + final ClusterMetadataManifest expectedManifest = ClusterMetadataManifest.builder() + .indices(List.of(uploadedIndexMetadata)) + .clusterTerm(1L) + .stateVersion(1L) + .stateUUID("state-uuid") + .clusterUUID("cluster-uuid") + .previousClusterUUID("prev-cluster-uuid") + .routingTableVersion(1) + .indicesRouting(List.of(uploadedIndiceRoutingMetadata)) + .checksum(new ClusterStateChecksum(clusterState)) + .build(); + + assertThat(manifest.getIndices().size(), is(1)); + assertThat(manifest.getClusterTerm(), is(expectedManifest.getClusterTerm())); + assertThat(manifest.getStateVersion(), is(expectedManifest.getStateVersion())); + assertThat(manifest.getClusterUUID(), is(expectedManifest.getClusterUUID())); + assertThat(manifest.getStateUUID(), is(expectedManifest.getStateUUID())); + assertThat(manifest.getRoutingTableVersion(), is(expectedManifest.getRoutingTableVersion())); + assertThat(manifest.getIndicesRouting().get(0).getIndexName(), is(uploadedIndiceRoutingMetadata.getIndexName())); + assertThat(manifest.getIndicesRouting().get(0).getIndexUUID(), is(uploadedIndiceRoutingMetadata.getIndexUUID())); + assertThat(manifest.getIndicesRouting().get(0).getUploadedFilename(), notNullValue()); + assertThat(manifest.getClusterStateChecksum(), is(expectedManifest.getClusterStateChecksum())); + } + + public void testWriteIncrementalMetadataSuccessWithChecksumValidationModeNone() throws IOException { + initializeWithChecksumEnabled(RemoteClusterStateService.RemoteClusterStateValidationMode.NONE); + final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); + mockBlobStoreObjects(); + final CoordinationMetadata coordinationMetadata = CoordinationMetadata.builder().term(1L).build(); + final ClusterState previousClusterState = ClusterState.builder(ClusterName.DEFAULT) + .metadata(Metadata.builder().coordinationMetadata(coordinationMetadata)) + .build(); + + final ClusterMetadataManifest previousManifest = ClusterMetadataManifest.builder() + .indices(Collections.emptyList()) + .checksum(new ClusterStateChecksum(clusterState)) + .build(); + when((blobStoreRepository.basePath())).thenReturn(BlobPath.cleanPath().add("base-path")); + + remoteClusterStateService.start(); + final ClusterMetadataManifest manifest = remoteClusterStateService.writeIncrementalMetadata( + previousClusterState, + clusterState, + previousManifest + ).getClusterMetadataManifest(); + final UploadedIndexMetadata uploadedIndexMetadata = new UploadedIndexMetadata("test-index", "index-uuid", "metadata-filename"); + final UploadedIndexMetadata uploadedIndiceRoutingMetadata = new UploadedIndexMetadata( + "test-index", + "index-uuid", + "routing-filename", + INDEX_ROUTING_METADATA_PREFIX + ); + final ClusterMetadataManifest expectedManifest = ClusterMetadataManifest.builder() + .indices(List.of(uploadedIndexMetadata)) + .clusterTerm(1L) + .stateVersion(1L) + .stateUUID("state-uuid") + .clusterUUID("cluster-uuid") + .previousClusterUUID("prev-cluster-uuid") + .routingTableVersion(1) + .indicesRouting(List.of(uploadedIndiceRoutingMetadata)) + .checksum(new ClusterStateChecksum(clusterState)) + .build(); + + assertThat(manifest.getIndices().size(), is(1)); + assertThat(manifest.getClusterTerm(), is(expectedManifest.getClusterTerm())); + assertThat(manifest.getStateVersion(), is(expectedManifest.getStateVersion())); + assertThat(manifest.getClusterUUID(), is(expectedManifest.getClusterUUID())); + assertThat(manifest.getStateUUID(), is(expectedManifest.getStateUUID())); + assertThat(manifest.getRoutingTableVersion(), is(expectedManifest.getRoutingTableVersion())); + assertThat(manifest.getIndicesRouting().get(0).getIndexName(), is(uploadedIndiceRoutingMetadata.getIndexName())); + assertThat(manifest.getIndicesRouting().get(0).getIndexUUID(), is(uploadedIndiceRoutingMetadata.getIndexUUID())); + assertThat(manifest.getIndicesRouting().get(0).getUploadedFilename(), notNullValue()); + assertNull(manifest.getClusterStateChecksum()); + } + + public void testGetClusterStateForManifestWithChecksumValidationEnabledWithNullChecksum() throws IOException { + initializeWithChecksumEnabled(RemoteClusterStateService.RemoteClusterStateValidationMode.FAILURE); + ClusterMetadataManifest manifest = generateClusterMetadataManifestWithAllAttributes().build(); + mockBlobStoreObjects(); + remoteClusterStateService.start(); + RemoteReadResult mockedResult = mock(RemoteReadResult.class); + RemoteIndexMetadataManager mockedIndexManager = mock(RemoteIndexMetadataManager.class); + RemoteGlobalMetadataManager mockedGlobalMetadataManager = mock(RemoteGlobalMetadataManager.class); + RemoteClusterStateAttributesManager mockedClusterStateAttributeManager = mock(RemoteClusterStateAttributesManager.class); + remoteClusterStateService.setRemoteIndexMetadataManager(mockedIndexManager); + remoteClusterStateService.setRemoteGlobalMetadataManager(mockedGlobalMetadataManager); + remoteClusterStateService.setRemoteClusterStateAttributesManager(mockedClusterStateAttributeManager); + ArgumentCaptor> listenerArgumentCaptor = ArgumentCaptor.forClass( + LatchedActionListener.class + ); + doAnswer(invocation -> { + listenerArgumentCaptor.getValue().onResponse(mockedResult); + return null; + }).when(mockedIndexManager).readAsync(any(), any(), listenerArgumentCaptor.capture()); + doAnswer(invocation -> { + listenerArgumentCaptor.getValue().onResponse(mockedResult); + return null; + }).when(mockedGlobalMetadataManager).readAsync(any(), any(), listenerArgumentCaptor.capture()); + doAnswer(invocation -> { + listenerArgumentCaptor.getValue().onResponse(mockedResult); + return null; + }).when(mockedClusterStateAttributeManager).readAsync(anyString(), any(), listenerArgumentCaptor.capture()); + when(mockedResult.getComponent()).thenReturn(COORDINATION_METADATA); + RemoteClusterStateService mockService = spy(remoteClusterStateService); + mockService.getClusterStateForManifest(ClusterName.DEFAULT.value(), manifest, NODE_ID, true); + verify(mockService, times(1)).readClusterStateInParallel( + any(), + eq(manifest), + eq(manifest.getClusterUUID()), + eq(NODE_ID), + eq(manifest.getIndices()), + eq(manifest.getCustomMetadataMap()), + eq(true), + eq(true), + eq(true), + eq(true), + eq(true), + eq(true), + eq(manifest.getIndicesRouting()), + eq(true), + eq(manifest.getClusterStateCustomMap()), + eq(false), + eq(true) + ); + verify(mockService, times(0)).validateClusterStateFromChecksum( + any(ClusterMetadataManifest.class), + any(ClusterState.class), + anyString(), + anyString(), + anyBoolean() + ); + } + + public void testGetClusterStateForManifestWithChecksumValidationEnabled() throws IOException { + initializeWithChecksumEnabled(RemoteClusterStateService.RemoteClusterStateValidationMode.FAILURE); + ClusterState clusterState = generateClusterStateWithAllAttributes().build(); + ClusterMetadataManifest manifest = generateClusterMetadataManifestWithAllAttributes().checksum( + new ClusterStateChecksum(clusterState) + ).build(); + remoteClusterStateService.start(); + RemoteClusterStateService mockService = spy(remoteClusterStateService); + doReturn(clusterState).when(mockService) + .readClusterStateInParallel( + any(), + eq(manifest), + eq(manifest.getClusterUUID()), + eq(NODE_ID), + eq(manifest.getIndices()), + eq(manifest.getCustomMetadataMap()), + eq(true), + eq(true), + eq(true), + eq(true), + eq(true), + eq(true), + eq(manifest.getIndicesRouting()), + eq(true), + eq(manifest.getClusterStateCustomMap()), + eq(false), + eq(true) + ); + mockService.getClusterStateForManifest(ClusterName.DEFAULT.value(), manifest, NODE_ID, true); + verify(mockService, times(1)).validateClusterStateFromChecksum(manifest, clusterState, ClusterName.DEFAULT.value(), NODE_ID, true); + } + + public void testGetClusterStateForManifestWithChecksumValidationModeNone() throws IOException { + initializeWithChecksumEnabled(RemoteClusterStateService.RemoteClusterStateValidationMode.NONE); + ClusterState clusterState = generateClusterStateWithAllAttributes().build(); + ClusterMetadataManifest manifest = generateClusterMetadataManifestWithAllAttributes().checksum( + new ClusterStateChecksum(clusterState) + ).build(); + remoteClusterStateService.start(); + RemoteClusterStateService mockService = spy(remoteClusterStateService); + doReturn(clusterState).when(mockService) + .readClusterStateInParallel( + any(), + eq(manifest), + eq(manifest.getClusterUUID()), + eq(NODE_ID), + eq(manifest.getIndices()), + eq(manifest.getCustomMetadataMap()), + eq(true), + eq(true), + eq(true), + eq(true), + eq(true), + eq(true), + eq(manifest.getIndicesRouting()), + eq(true), + eq(manifest.getClusterStateCustomMap()), + eq(false), + eq(true) + ); + mockService.getClusterStateForManifest(ClusterName.DEFAULT.value(), manifest, NODE_ID, true); + verify(mockService, times(0)).validateClusterStateFromChecksum(any(), any(), anyString(), anyString(), anyBoolean()); + } + + public void testGetClusterStateForManifestWithChecksumValidationEnabledWithMismatch() throws IOException { + initializeWithChecksumEnabled(RemoteClusterStateService.RemoteClusterStateValidationMode.FAILURE); + ClusterState clusterState = generateClusterStateWithAllAttributes().build(); + ClusterMetadataManifest manifest = generateClusterMetadataManifestWithAllAttributes().checksum( + new ClusterStateChecksum(clusterState) + ).build(); + remoteClusterStateService.start(); + RemoteClusterStateService mockService = spy(remoteClusterStateService); + ClusterState clusterStateWrong = ClusterState.builder(clusterState).routingTable(RoutingTable.EMPTY_ROUTING_TABLE).build(); + doReturn(clusterStateWrong).when(mockService) + .readClusterStateInParallel( + any(), + eq(manifest), + eq(manifest.getClusterUUID()), + eq(NODE_ID), + eq(manifest.getIndices()), + eq(manifest.getCustomMetadataMap()), + eq(true), + eq(true), + eq(true), + eq(true), + eq(true), + eq(true), + eq(manifest.getIndicesRouting()), + eq(true), + eq(manifest.getClusterStateCustomMap()), + eq(false), + eq(true) + ); + expectThrows( + IllegalStateException.class, + () -> mockService.getClusterStateForManifest(ClusterName.DEFAULT.value(), manifest, NODE_ID, true) + ); + verify(mockService, times(1)).validateClusterStateFromChecksum( + manifest, + clusterStateWrong, + ClusterName.DEFAULT.value(), + NODE_ID, + true + ); + } + + public void testGetClusterStateForManifestWithChecksumValidationDebugWithMismatch() throws IOException { + initializeWithChecksumEnabled( + randomFrom( + Arrays.asList( + RemoteClusterStateService.RemoteClusterStateValidationMode.DEBUG, + RemoteClusterStateService.RemoteClusterStateValidationMode.TRACE + ) + ) + ); + ClusterState clusterState = generateClusterStateWithAllAttributes().build(); + ClusterMetadataManifest manifest = generateClusterMetadataManifestWithAllAttributes().checksum( + new ClusterStateChecksum(clusterState) + ).build(); + remoteClusterStateService.start(); + RemoteClusterStateService mockService = spy(remoteClusterStateService); + ClusterState clusterStateWrong = ClusterState.builder(clusterState).routingTable(RoutingTable.EMPTY_ROUTING_TABLE).build(); + doReturn(clusterStateWrong).when(mockService) + .readClusterStateInParallel( + any(), + eq(manifest), + eq(manifest.getClusterUUID()), + eq(NODE_ID), + eq(manifest.getIndices()), + eq(manifest.getCustomMetadataMap()), + eq(true), + eq(true), + eq(true), + eq(true), + eq(true), + eq(true), + eq(manifest.getIndicesRouting()), + eq(true), + eq(manifest.getClusterStateCustomMap()), + eq(false), + eq(true) + ); + mockService.getClusterStateForManifest(ClusterName.DEFAULT.value(), manifest, NODE_ID, true); + verify(mockService, times(1)).validateClusterStateFromChecksum( + manifest, + clusterStateWrong, + ClusterName.DEFAULT.value(), + NODE_ID, + true + ); + } + + public void testGetClusterStateUsingDiffWithChecksum() throws IOException { + initializeWithChecksumEnabled(RemoteClusterStateService.RemoteClusterStateValidationMode.FAILURE); + ClusterState clusterState = generateClusterStateWithAllAttributes().build(); + ClusterMetadataManifest manifest = generateClusterMetadataManifestWithAllAttributes().checksum( + new ClusterStateChecksum(clusterState) + ).diffManifest(ClusterStateDiffManifest.builder().build()).build(); + + remoteClusterStateService.start(); + RemoteClusterStateService mockService = spy(remoteClusterStateService); + + doReturn(clusterState).when(mockService) + .readClusterStateInParallel( + any(), + eq(manifest), + eq(manifest.getClusterUUID()), + eq(NODE_ID), + eq(emptyList()), + eq(emptyMap()), + anyBoolean(), + anyBoolean(), + anyBoolean(), + anyBoolean(), + anyBoolean(), + anyBoolean(), + eq(emptyList()), + anyBoolean(), + eq(emptyMap()), + anyBoolean(), + anyBoolean() + ); + mockService.getClusterStateUsingDiff(manifest, clusterState, NODE_ID); + + verify(mockService, times(1)).validateClusterStateFromChecksum( + eq(manifest), + any(ClusterState.class), + eq(ClusterName.DEFAULT.value()), + eq(NODE_ID), + eq(false) + ); + } + + public void testGetClusterStateUsingDiffWithChecksumModeNone() throws IOException { + initializeWithChecksumEnabled(RemoteClusterStateService.RemoteClusterStateValidationMode.NONE); + ClusterState clusterState = generateClusterStateWithAllAttributes().build(); + ClusterMetadataManifest manifest = generateClusterMetadataManifestWithAllAttributes().checksum( + new ClusterStateChecksum(clusterState) + ).diffManifest(ClusterStateDiffManifest.builder().build()).build(); + + remoteClusterStateService.start(); + RemoteClusterStateService mockService = spy(remoteClusterStateService); + + doReturn(clusterState).when(mockService) + .readClusterStateInParallel( + any(), + eq(manifest), + eq(manifest.getClusterUUID()), + eq(NODE_ID), + eq(emptyList()), + eq(emptyMap()), + anyBoolean(), + anyBoolean(), + anyBoolean(), + anyBoolean(), + anyBoolean(), + anyBoolean(), + eq(emptyList()), + anyBoolean(), + eq(emptyMap()), + anyBoolean(), + anyBoolean() + ); + mockService.getClusterStateUsingDiff(manifest, clusterState, NODE_ID); + + verify(mockService, times(0)).validateClusterStateFromChecksum( + eq(manifest), + any(ClusterState.class), + eq(ClusterName.DEFAULT.value()), + eq(NODE_ID), + eq(false) + ); + } + + public void testGetClusterStateUsingDiffWithChecksumModeDebugMismatch() throws IOException { + initializeWithChecksumEnabled(RemoteClusterStateService.RemoteClusterStateValidationMode.DEBUG); + ClusterState clusterState = generateClusterStateWithAllAttributes().build(); + ClusterMetadataManifest manifest = generateClusterMetadataManifestWithAllAttributes().checksum( + new ClusterStateChecksum(clusterState) + ).diffManifest(ClusterStateDiffManifest.builder().build()).build(); + + remoteClusterStateService.start(); + RemoteClusterStateService mockService = spy(remoteClusterStateService); + ClusterState clusterStateWrong = ClusterState.builder(clusterState).routingTable(RoutingTable.EMPTY_ROUTING_TABLE).build(); + doReturn(clusterStateWrong).when(mockService) + .readClusterStateInParallel( + any(), + eq(manifest), + eq(manifest.getClusterUUID()), + eq(NODE_ID), + eq(emptyList()), + eq(emptyMap()), + anyBoolean(), + anyBoolean(), + anyBoolean(), + anyBoolean(), + anyBoolean(), + anyBoolean(), + eq(emptyList()), + anyBoolean(), + eq(emptyMap()), + anyBoolean(), + anyBoolean() + ); + mockService.getClusterStateUsingDiff(manifest, clusterState, NODE_ID); + verify(mockService, times(1)).validateClusterStateFromChecksum( + eq(manifest), + any(ClusterState.class), + eq(ClusterName.DEFAULT.value()), + eq(NODE_ID), + eq(false) + ); + } + + public void testGetClusterStateUsingDiffWithChecksumModeTraceMismatch() throws IOException { + initializeWithChecksumEnabled(RemoteClusterStateService.RemoteClusterStateValidationMode.TRACE); + ClusterState clusterState = generateClusterStateWithAllAttributes().build(); + ClusterMetadataManifest manifest = generateClusterMetadataManifestWithAllAttributes().checksum( + new ClusterStateChecksum(clusterState) + ).diffManifest(ClusterStateDiffManifest.builder().build()).build(); + + remoteClusterStateService.start(); + RemoteClusterStateService mockService = spy(remoteClusterStateService); + ClusterState clusterStateWrong = ClusterState.builder(clusterState).routingTable(RoutingTable.EMPTY_ROUTING_TABLE).build(); + doReturn(clusterStateWrong).when(mockService) + .readClusterStateInParallel( + any(), + eq(manifest), + eq(manifest.getClusterUUID()), + eq(NODE_ID), + eq(emptyList()), + eq(emptyMap()), + anyBoolean(), + anyBoolean(), + anyBoolean(), + anyBoolean(), + anyBoolean(), + anyBoolean(), + eq(emptyList()), + anyBoolean(), + eq(emptyMap()), + anyBoolean(), + anyBoolean() + ); + doReturn(clusterState).when(mockService) + .readClusterStateInParallel( + any(), + eq(manifest), + eq(manifest.getClusterUUID()), + eq(NODE_ID), + eq(manifest.getIndices()), + eq(manifest.getCustomMetadataMap()), + eq(true), + eq(true), + eq(true), + eq(true), + eq(true), + eq(true), + eq(manifest.getIndicesRouting()), + eq(true), + eq(manifest.getClusterStateCustomMap()), + eq(false), + eq(true) + ); + + mockService.getClusterStateUsingDiff(manifest, clusterState, NODE_ID); + verify(mockService, times(1)).validateClusterStateFromChecksum( + eq(manifest), + any(ClusterState.class), + eq(ClusterName.DEFAULT.value()), + eq(NODE_ID), + eq(false) + ); + } + + public void testGetClusterStateUsingDiffWithChecksumMismatch() throws IOException { + initializeWithChecksumEnabled(RemoteClusterStateService.RemoteClusterStateValidationMode.FAILURE); + ClusterState clusterState = generateClusterStateWithAllAttributes().build(); + ClusterMetadataManifest manifest = generateClusterMetadataManifestWithAllAttributes().checksum( + new ClusterStateChecksum(clusterState) + ).diffManifest(ClusterStateDiffManifest.builder().build()).build(); + + remoteClusterStateService.start(); + RemoteClusterStateService mockService = spy(remoteClusterStateService); + ClusterState clusterStateWrong = ClusterState.builder(clusterState).routingTable(RoutingTable.EMPTY_ROUTING_TABLE).build(); + doReturn(clusterStateWrong).when(mockService) + .readClusterStateInParallel( + any(), + eq(manifest), + eq(manifest.getClusterUUID()), + eq(NODE_ID), + eq(emptyList()), + eq(emptyMap()), + anyBoolean(), + anyBoolean(), + anyBoolean(), + anyBoolean(), + anyBoolean(), + anyBoolean(), + eq(emptyList()), + anyBoolean(), + eq(emptyMap()), + anyBoolean(), + anyBoolean() + ); + doReturn(clusterState).when(mockService) + .readClusterStateInParallel( + any(), + eq(manifest), + eq(manifest.getClusterUUID()), + eq(NODE_ID), + eq(manifest.getIndices()), + eq(manifest.getCustomMetadataMap()), + eq(true), + eq(true), + eq(true), + eq(true), + eq(true), + eq(true), + eq(manifest.getIndicesRouting()), + eq(true), + eq(manifest.getClusterStateCustomMap()), + eq(false), + eq(true) + ); + + expectThrows(IllegalStateException.class, () -> mockService.getClusterStateUsingDiff(manifest, clusterState, NODE_ID)); + verify(mockService, times(1)).validateClusterStateFromChecksum( + eq(manifest), + any(ClusterState.class), + eq(ClusterName.DEFAULT.value()), + eq(NODE_ID), + eq(false) + ); + } + + private void mockObjectsForGettingPreviousClusterUUID(Map clusterUUIDsPointers) throws IOException { + mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, false, Collections.emptyMap()); + } + + private void mockObjectsForGettingPreviousClusterUUID( + Map clusterUUIDsPointers, + Map clusterUUIDCommitted + ) throws IOException { + mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, false, clusterUUIDCommitted); + } + + private void mockObjectsForGettingPreviousClusterUUID( + Map clusterUUIDsPointers, + boolean differGlobalMetadata, + Map clusterUUIDCommitted + ) throws IOException { + final BlobPath blobPath = mock(BlobPath.class); + when((blobStoreRepository.basePath())).thenReturn(blobPath); + when(blobPath.add(anyString())).thenReturn(blobPath); + when(blobPath.buildAsString()).thenReturn("/blob/path/"); + BlobContainer blobContainer1 = mock(BlobContainer.class); + BlobContainer blobContainer2 = mock(BlobContainer.class); + BlobContainer blobContainer3 = mock(BlobContainer.class); + BlobContainer uuidBlobContainer = mock(BlobContainer.class); + when(blobContainer1.path()).thenReturn(blobPath); + when(blobContainer2.path()).thenReturn(blobPath); + when(blobContainer3.path()).thenReturn(blobPath); + + mockBlobContainerForClusterUUIDs(uuidBlobContainer, clusterUUIDsPointers.keySet()); + List uploadedIndexMetadataList1 = List.of( + new UploadedIndexMetadata("index1", "index-uuid1", "key1__2"), + new UploadedIndexMetadata("index2", "index-uuid2", "key2__2") + ); + Map customMetadataMap = new HashMap<>(); + final ClusterMetadataManifest clusterManifest1 = generateClusterMetadataManifest( + "cluster-uuid1", + clusterUUIDsPointers.get("cluster-uuid1"), + randomAlphaOfLength(10), + uploadedIndexMetadataList1, + customMetadataMap, + new UploadedMetadataAttribute(COORDINATION_METADATA, "key3"), + new UploadedMetadataAttribute(SETTING_METADATA, "key4"), + new UploadedMetadataAttribute(TEMPLATES_METADATA, "key5"), + clusterUUIDCommitted.getOrDefault("cluster-uuid1", true) + ); + Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).build(); + IndexMetadata indexMetadata1 = IndexMetadata.builder("index1") + .settings(indexSettings) + .numberOfShards(1) + .numberOfReplicas(1) + .build(); + IndexMetadata indexMetadata2 = IndexMetadata.builder("index2") + .settings(indexSettings) + .numberOfShards(1) + .numberOfReplicas(1) + .build(); + Metadata metadata1 = Metadata.builder() + .persistentSettings(Settings.builder().put(Metadata.SETTING_READ_ONLY_SETTING.getKey(), true).build()) + .build(); + Map indexMetadataMap1 = Map.of("index-uuid1", indexMetadata1, "index-uuid2", indexMetadata2); + mockBlobContainerForGlobalMetadata(blobContainer1, clusterManifest1, metadata1); + mockBlobContainer(blobContainer1, clusterManifest1, indexMetadataMap1, MANIFEST_CURRENT_CODEC_VERSION); + + List uploadedIndexMetadataList2 = List.of( + new UploadedIndexMetadata("index1", "index-uuid1", "key1__2"), + new UploadedIndexMetadata("index2", "index-uuid2", "key2__2") + ); + final ClusterMetadataManifest clusterManifest2 = generateClusterMetadataManifest( + "cluster-uuid2", + clusterUUIDsPointers.get("cluster-uuid2"), + randomAlphaOfLength(10), + uploadedIndexMetadataList2, + customMetadataMap, + new UploadedMetadataAttribute(COORDINATION_METADATA, "key3"), + new UploadedMetadataAttribute(SETTING_METADATA, "key4"), + new UploadedMetadataAttribute(TEMPLATES_METADATA, "key5"), + clusterUUIDCommitted.getOrDefault("cluster-uuid2", true) + ); + IndexMetadata indexMetadata3 = IndexMetadata.builder("index1") + .settings(indexSettings) + .numberOfShards(1) + .numberOfReplicas(1) + .build(); IndexMetadata indexMetadata4 = IndexMetadata.builder("index2") .settings(indexSettings) .numberOfShards(1) @@ -3191,6 +4049,55 @@ private void mockBlobContainer( }); } + private void mockBlobContainer( + BlobContainer blobContainer, + ClusterMetadataManifest clusterMetadataManifest, + Map indexMetadataMap, + int codecVersion, + long term, + long version + ) throws IOException { + String manifestFileName = codecVersion >= CODEC_V1 + ? "manifest__manifestFileName__abcd__abcd__abcd__" + codecVersion + : "manifestFileName"; + BlobMetadata blobMetadata = new PlainBlobMetadata(manifestFileName, 1); + + String manifestPrefix = String.join(DELIMITER, "manifest", RemoteStoreUtils.invertLong(term), RemoteStoreUtils.invertLong(version)) + + DELIMITER; + when(blobContainer.listBlobsByPrefixInSortedOrder(manifestPrefix, 1, BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC)).thenReturn( + Arrays.asList(blobMetadata) + ); + + BytesReference bytes = RemoteClusterMetadataManifest.CLUSTER_METADATA_MANIFEST_FORMAT.serialize( + clusterMetadataManifest, + manifestFileName, + blobStoreRepository.getCompressor(), + FORMAT_PARAMS + ); + when(blobContainer.readBlob(manifestFileName)).thenReturn(new ByteArrayInputStream(bytes.streamInput().readAllBytes())); + + clusterMetadataManifest.getIndices().forEach(uploadedIndexMetadata -> { + try { + IndexMetadata indexMetadata = indexMetadataMap.get(uploadedIndexMetadata.getIndexUUID()); + if (indexMetadata == null) { + return; + } + String fileName = uploadedIndexMetadata.getUploadedFilename(); + when(blobContainer.readBlob(getFormattedIndexFileName(fileName))).thenAnswer((invocationOnMock) -> { + BytesReference bytesIndexMetadata = INDEX_METADATA_FORMAT.serialize( + indexMetadata, + fileName, + blobStoreRepository.getCompressor(), + FORMAT_PARAMS + ); + return new ByteArrayInputStream(bytesIndexMetadata.streamInput().readAllBytes()); + }); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } + private void mockBlobContainerForGlobalMetadata( BlobContainer blobContainer, ClusterMetadataManifest clusterMetadataManifest, @@ -3431,6 +4338,8 @@ static ClusterMetadataManifest.Builder generateClusterMetadataManifestWithAllAtt return ClusterMetadataManifest.builder() .codecVersion(CODEC_V2) .clusterUUID("cluster-uuid") + .stateVersion(1L) + .stateUUID("state-uuid") .indices(List.of(new UploadedIndexMetadata("test-index", "test-index-uuid", "test-index-file__2"))) .customMetadataMap( Map.of( @@ -3456,7 +4365,8 @@ static ClusterMetadataManifest.Builder generateClusterMetadataManifestWithAllAtt "custom_2", new UploadedMetadataAttribute("custom_2", "test-cluster-state-custom2-file__1") ) - ); + ) + .routingTableVersion(1L); } public static DiscoveryNodes nodesWithLocalNodeClusterManager() { diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteIndexMetadataManagerTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteIndexMetadataManagerTests.java index 76c5792677ea0..ac7f9b655e877 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteIndexMetadataManagerTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteIndexMetadataManagerTests.java @@ -26,6 +26,7 @@ import org.opensearch.core.compress.NoneCompressor; import org.opensearch.gateway.remote.model.RemoteIndexMetadata; import org.opensearch.gateway.remote.model.RemoteReadResult; +import org.opensearch.index.remote.RemoteStoreEnums; import org.opensearch.index.remote.RemoteStoreUtils; import org.opensearch.index.translog.transfer.BlobStoreTransferService; import org.opensearch.repositories.blobstore.BlobStoreRepository; @@ -43,6 +44,8 @@ import static org.opensearch.gateway.remote.RemoteClusterStateUtils.PATH_DELIMITER; import static org.opensearch.gateway.remote.model.RemoteIndexMetadata.INDEX; import static org.opensearch.gateway.remote.model.RemoteIndexMetadata.INDEX_METADATA_FORMAT; +import static org.opensearch.index.remote.RemoteStoreEnums.PathHashAlgorithm.FNV_1A_BASE64; +import static org.opensearch.index.remote.RemoteStoreEnums.PathType.HASHED_PREFIX; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyIterable; import static org.mockito.ArgumentMatchers.anyString; @@ -58,11 +61,12 @@ public class RemoteIndexMetadataManagerTests extends OpenSearchTestCase { private BlobStoreRepository blobStoreRepository; private BlobStoreTransferService blobStoreTransferService; private Compressor compressor; + private ClusterSettings clusterSettings; private final ThreadPool threadPool = new TestThreadPool(getClass().getName()); @Before public void setup() { - ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); blobStoreRepository = mock(BlobStoreRepository.class); BlobPath blobPath = new BlobPath().add("random-path"); when((blobStoreRepository.basePath())).thenReturn(blobPath); @@ -100,7 +104,7 @@ public void testGetAsyncWriteRunnable_Success() throws Exception { remoteIndexMetadataManager.writeAsync( INDEX, - new RemoteIndexMetadata(indexMetadata, "cluster-uuid", compressor, null), + new RemoteIndexMetadata(indexMetadata, "cluster-uuid", compressor, null, null, null, null), new LatchedActionListener<>(listener, latch) ); latch.await(); @@ -132,7 +136,7 @@ public void testGetAsyncWriteRunnable_IOFailure() throws Exception { remoteIndexMetadataManager.writeAsync( INDEX, - new RemoteIndexMetadata(indexMetadata, "cluster-uuid", compressor, null), + new RemoteIndexMetadata(indexMetadata, "cluster-uuid", compressor, null, null, null, null), new LatchedActionListener<>(listener, latch) ); latch.await(); @@ -182,6 +186,31 @@ public void testGetAsyncReadRunnable_IOFailure() throws Exception { assertTrue(listener.getFailure() instanceof RemoteStateTransferException); } + public void testRemoteIndexMetadataPathTypeSetting() { + // Assert the default is HASHED_PREFIX + assertEquals(HASHED_PREFIX.toString(), remoteIndexMetadataManager.getPathTypeSetting().toString()); + + Settings newSettings = Settings.builder() + .put("cluster.remote_store.index_metadata.path_type", RemoteStoreEnums.PathType.FIXED.toString()) + .build(); + clusterSettings.applySettings(newSettings); + assertEquals(RemoteStoreEnums.PathType.FIXED.toString(), remoteIndexMetadataManager.getPathTypeSetting().toString()); + } + + public void testRemoteIndexMetadataHashAlgoSetting() { + // Assert the default is FNV_1A_BASE64 + assertEquals(FNV_1A_BASE64.toString(), remoteIndexMetadataManager.getPathHashAlgoSetting().toString()); + + Settings newSettings = Settings.builder() + .put("cluster.remote_store.index_metadata.path_hash_algo", RemoteStoreEnums.PathHashAlgorithm.FNV_1A_COMPOSITE_1.toString()) + .build(); + clusterSettings.applySettings(newSettings); + assertEquals( + RemoteStoreEnums.PathHashAlgorithm.FNV_1A_COMPOSITE_1.toString(), + remoteIndexMetadataManager.getPathHashAlgoSetting().toString() + ); + } + private IndexMetadata getIndexMetadata(String name, @Nullable Boolean writeIndex, String... aliases) { IndexMetadata.Builder builder = IndexMetadata.builder(name) .settings( diff --git a/server/src/test/java/org/opensearch/gateway/remote/model/ClusterStateDiffManifestTests.java b/server/src/test/java/org/opensearch/gateway/remote/model/ClusterStateDiffManifestTests.java index f89619a09cd52..e716822939a58 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/model/ClusterStateDiffManifestTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/model/ClusterStateDiffManifestTests.java @@ -10,7 +10,6 @@ import org.opensearch.Version; import org.opensearch.cluster.ClusterState; -import org.opensearch.cluster.DiffableUtils; import org.opensearch.cluster.coordination.CoordinationMetadata; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.IndexTemplateMetadata; @@ -19,6 +18,8 @@ import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.cluster.routing.IndexRoutingTable; +import org.opensearch.cluster.routing.RoutingTableIncrementalDiff; +import org.opensearch.cluster.routing.StringKeyDiffProvider; import org.opensearch.common.settings.Settings; import org.opensearch.common.xcontent.json.JsonXContent; import org.opensearch.core.common.bytes.BytesReference; @@ -42,9 +43,9 @@ import static java.util.stream.Collectors.toList; import static org.opensearch.Version.CURRENT; import static org.opensearch.cluster.ClusterState.EMPTY_STATE; -import static org.opensearch.cluster.routing.remote.RemoteRoutingTableService.CUSTOM_ROUTING_TABLE_DIFFABLE_VALUE_SERIALIZER; import static org.opensearch.core.common.transport.TransportAddress.META_ADDRESS; import static org.opensearch.gateway.remote.ClusterMetadataManifest.CODEC_V3; +import static org.opensearch.gateway.remote.ClusterMetadataManifest.CODEC_V4; import static org.opensearch.gateway.remote.RemoteClusterStateServiceTests.generateClusterStateWithOneIndex; import static org.opensearch.gateway.remote.RemoteClusterStateServiceTests.nodesWithLocalNodeClusterManager; import static org.opensearch.gateway.remote.model.RemoteClusterBlocksTests.randomClusterBlocks; @@ -120,7 +121,7 @@ public void testClusterStateDiffManifestXContent() throws IOException { diffManifest.toXContent(builder, ToXContent.EMPTY_PARAMS); builder.endObject(); try (XContentParser parser = createParser(JsonXContent.jsonXContent, BytesReference.bytes(builder))) { - final ClusterStateDiffManifest parsedManifest = ClusterStateDiffManifest.fromXContent(parser, CODEC_V3); + final ClusterStateDiffManifest parsedManifest = ClusterStateDiffManifest.fromXContent(parser, CODEC_V4); assertEquals(diffManifest, parsedManifest); } } @@ -132,7 +133,7 @@ public void testClusterStateWithRoutingTableDiffInDiffManifestXContent() throws ClusterState updatedState = generateClusterStateWithOneIndex("test-index", 5, 2, false).nodes(nodesWithLocalNodeClusterManager()) .build(); - ClusterStateDiffManifest diffManifest = verifyRoutingTableDiffManifest(initialState, updatedState); + ClusterStateDiffManifest diffManifest = verifyRoutingTableDiffManifest(initialState, updatedState, CODEC_V3); final XContentBuilder builder = JsonXContent.contentBuilder(); builder.startObject(); diffManifest.toXContent(builder, ToXContent.EMPTY_PARAMS); @@ -143,14 +144,14 @@ public void testClusterStateWithRoutingTableDiffInDiffManifestXContent() throws } } - public void testClusterStateWithRoutingTableDiffInDiffManifestXContent1() throws IOException { + public void testClusterStateWithRoutingTableDiffInDiffManifestXContentWithDeletes() throws IOException { ClusterState initialState = generateClusterStateWithOneIndex("test-index", 5, 1, true).nodes(nodesWithLocalNodeClusterManager()) .build(); ClusterState updatedState = generateClusterStateWithOneIndex("test-index-1", 5, 2, false).nodes(nodesWithLocalNodeClusterManager()) .build(); - ClusterStateDiffManifest diffManifest = verifyRoutingTableDiffManifest(initialState, updatedState); + ClusterStateDiffManifest diffManifest = verifyRoutingTableDiffManifest(initialState, updatedState, CODEC_V3); final XContentBuilder builder = JsonXContent.contentBuilder(); builder.startObject(); diffManifest.toXContent(builder, ToXContent.EMPTY_PARAMS); @@ -161,26 +162,60 @@ public void testClusterStateWithRoutingTableDiffInDiffManifestXContent1() throws } } - private ClusterStateDiffManifest verifyRoutingTableDiffManifest(ClusterState previousState, ClusterState currentState) { - // Create initial and updated IndexRoutingTable maps - Map initialRoutingTableMap = previousState.getRoutingTable().indicesRouting(); - Map updatedRoutingTableMap = currentState.getRoutingTable().indicesRouting(); + public void testClusterStateWithRoutingTableDiffInDiffManifestXContentV4() throws IOException { + ClusterState initialState = generateClusterStateWithOneIndex("test-index", 5, 1, true).nodes(nodesWithLocalNodeClusterManager()) + .build(); - DiffableUtils.MapDiff> routingTableIncrementalDiff = DiffableUtils.diff( - initialRoutingTableMap, - updatedRoutingTableMap, - DiffableUtils.getStringKeySerializer(), - CUSTOM_ROUTING_TABLE_DIFFABLE_VALUE_SERIALIZER + ClusterState updatedState = generateClusterStateWithOneIndex("test-index", 5, 2, false).nodes(nodesWithLocalNodeClusterManager()) + .build(); + + ClusterStateDiffManifest diffManifest = verifyRoutingTableDiffManifest(initialState, updatedState, CODEC_V4); + final XContentBuilder builder = JsonXContent.contentBuilder(); + builder.startObject(); + diffManifest.toXContent(builder, ToXContent.EMPTY_PARAMS); + builder.endObject(); + try (XContentParser parser = createParser(JsonXContent.jsonXContent, BytesReference.bytes(builder))) { + final ClusterStateDiffManifest parsedManifest = ClusterStateDiffManifest.fromXContent(parser, CODEC_V4); + assertEquals(diffManifest, parsedManifest); + } + } + + public void testClusterStateWithRoutingTableDiffInDiffManifestXContentWithDeletesV4() throws IOException { + ClusterState initialState = generateClusterStateWithOneIndex("test-index", 5, 1, true).nodes(nodesWithLocalNodeClusterManager()) + .build(); + + ClusterState updatedState = generateClusterStateWithOneIndex("test-index-1", 5, 2, false).nodes(nodesWithLocalNodeClusterManager()) + .build(); + + ClusterStateDiffManifest diffManifest = verifyRoutingTableDiffManifest(initialState, updatedState, CODEC_V4); + final XContentBuilder builder = JsonXContent.contentBuilder(); + builder.startObject(); + diffManifest.toXContent(builder, ToXContent.EMPTY_PARAMS); + builder.endObject(); + try (XContentParser parser = createParser(JsonXContent.jsonXContent, BytesReference.bytes(builder))) { + final ClusterStateDiffManifest parsedManifest = ClusterStateDiffManifest.fromXContent(parser, CODEC_V4); + assertEquals(diffManifest, parsedManifest); + } + } + + private ClusterStateDiffManifest verifyRoutingTableDiffManifest( + ClusterState previousState, + ClusterState currentState, + int codecVersion + ) { + // Create initial and updated IndexRoutingTable maps + StringKeyDiffProvider routingTableDiff = new RoutingTableIncrementalDiff( + previousState.getRoutingTable(), + currentState.getRoutingTable() ); ClusterStateDiffManifest manifest = new ClusterStateDiffManifest( currentState, previousState, - routingTableIncrementalDiff, + codecVersion, + routingTableDiff, "indicesRoutingDiffPath" ); assertEquals("indicesRoutingDiffPath", manifest.getIndicesRoutingDiffPath()); - assertEquals(routingTableIncrementalDiff.getUpserts().size(), manifest.getIndicesRoutingUpdated().size()); - assertEquals(routingTableIncrementalDiff.getDeletes().size(), manifest.getIndicesRoutingDeleted().size()); return manifest; } @@ -256,7 +291,7 @@ private ClusterStateDiffManifest updateAndVerifyState( } ClusterState updatedClusterState = clusterStateBuilder.metadata(metadataBuilder.build()).build(); - ClusterStateDiffManifest manifest = new ClusterStateDiffManifest(updatedClusterState, initialState, null, null); + ClusterStateDiffManifest manifest = new ClusterStateDiffManifest(updatedClusterState, initialState, CODEC_V4, null, null); assertEquals(indicesToAdd.stream().map(im -> im.getIndex().getName()).collect(toList()), manifest.getIndicesUpdated()); assertEquals(indicesToRemove, manifest.getIndicesDeleted()); assertEquals(new ArrayList<>(customsToAdd.keySet()), manifest.getCustomMetadataUpdated()); diff --git a/server/src/test/java/org/opensearch/gateway/remote/model/RemoteClusterMetadataManifestTests.java b/server/src/test/java/org/opensearch/gateway/remote/model/RemoteClusterMetadataManifestTests.java index de1befbecd924..f399dd2f3d7a5 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/model/RemoteClusterMetadataManifestTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/model/RemoteClusterMetadataManifestTests.java @@ -43,8 +43,8 @@ import static java.util.stream.Collectors.toList; import static org.opensearch.gateway.remote.ClusterMetadataManifest.CODEC_V0; import static org.opensearch.gateway.remote.ClusterMetadataManifest.CODEC_V2; +import static org.opensearch.gateway.remote.ClusterMetadataManifest.MANIFEST_CURRENT_CODEC_VERSION; import static org.opensearch.gateway.remote.model.RemoteClusterMetadataManifest.MANIFEST; -import static org.opensearch.gateway.remote.model.RemoteClusterMetadataManifest.MANIFEST_CURRENT_CODEC_VERSION; import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.lessThanOrEqualTo; diff --git a/server/src/test/java/org/opensearch/gateway/remote/model/RemoteDiscoveryNodesTests.java b/server/src/test/java/org/opensearch/gateway/remote/model/RemoteDiscoveryNodesTests.java index f1bced2bdf855..1b988ee1f37ec 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/model/RemoteDiscoveryNodesTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/model/RemoteDiscoveryNodesTests.java @@ -143,7 +143,7 @@ public void testSerDe() throws IOException { public void testExceptionDuringSerialization() throws IOException { DiscoveryNodes nodes = mock(DiscoveryNodes.class); RemoteDiscoveryNodes remoteObjectForUpload = new RemoteDiscoveryNodes(nodes, METADATA_VERSION, clusterUUID, compressor); - doThrow(new IOException("mock-exception")).when(nodes).writeTo(any()); + doThrow(new IOException("mock-exception")).when(nodes).writeToWithAttribute(any()); IOException iea = assertThrows(IOException.class, remoteObjectForUpload::serialize); } diff --git a/server/src/test/java/org/opensearch/gateway/remote/model/RemoteIndexMetadataTests.java b/server/src/test/java/org/opensearch/gateway/remote/model/RemoteIndexMetadataTests.java index 7f9c3fdbae91b..9923f23875af0 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/model/RemoteIndexMetadataTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/model/RemoteIndexMetadataTests.java @@ -23,6 +23,8 @@ import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedMetadata; import org.opensearch.gateway.remote.RemoteClusterStateUtils; +import org.opensearch.index.remote.RemoteStoreEnums.PathHashAlgorithm; +import org.opensearch.index.remote.RemoteStoreEnums.PathType; import org.opensearch.index.remote.RemoteStoreUtils; import org.opensearch.index.translog.transfer.BlobStoreTransferService; import org.opensearch.indices.IndicesModule; @@ -93,7 +95,15 @@ public void tearDown() throws Exception { public void testClusterUUID() { IndexMetadata indexMetadata = getIndexMetadata(); - RemoteIndexMetadata remoteObjectForUpload = new RemoteIndexMetadata(indexMetadata, clusterUUID, compressor, namedXContentRegistry); + RemoteIndexMetadata remoteObjectForUpload = new RemoteIndexMetadata( + indexMetadata, + clusterUUID, + compressor, + namedXContentRegistry, + null, + null, + null + ); assertThat(remoteObjectForUpload.clusterUUID(), is(clusterUUID)); RemoteIndexMetadata remoteObjectForDownload = new RemoteIndexMetadata( @@ -107,7 +117,15 @@ public void testClusterUUID() { public void testFullBlobName() { IndexMetadata indexMetadata = getIndexMetadata(); - RemoteIndexMetadata remoteObjectForUpload = new RemoteIndexMetadata(indexMetadata, clusterUUID, compressor, namedXContentRegistry); + RemoteIndexMetadata remoteObjectForUpload = new RemoteIndexMetadata( + indexMetadata, + clusterUUID, + compressor, + namedXContentRegistry, + null, + null, + null + ); assertThat(remoteObjectForUpload.getFullBlobName(), nullValue()); RemoteIndexMetadata remoteObjectForDownload = new RemoteIndexMetadata( @@ -121,7 +139,15 @@ public void testFullBlobName() { public void testBlobFileName() { IndexMetadata indexMetadata = getIndexMetadata(); - RemoteIndexMetadata remoteObjectForUpload = new RemoteIndexMetadata(indexMetadata, clusterUUID, compressor, namedXContentRegistry); + RemoteIndexMetadata remoteObjectForUpload = new RemoteIndexMetadata( + indexMetadata, + clusterUUID, + compressor, + namedXContentRegistry, + null, + null, + null + ); assertThat(remoteObjectForUpload.getBlobFileName(), nullValue()); RemoteIndexMetadata remoteObjectForDownload = new RemoteIndexMetadata( @@ -135,7 +161,15 @@ public void testBlobFileName() { public void testBlobPathParameters() { IndexMetadata indexMetadata = getIndexMetadata(); - RemoteIndexMetadata remoteObjectForUpload = new RemoteIndexMetadata(indexMetadata, clusterUUID, compressor, namedXContentRegistry); + RemoteIndexMetadata remoteObjectForUpload = new RemoteIndexMetadata( + indexMetadata, + clusterUUID, + compressor, + namedXContentRegistry, + null, + null, + null + ); BlobPathParameters params = remoteObjectForUpload.getBlobPathParameters(); assertThat(params.getPathTokens(), is(List.of(INDEX, indexMetadata.getIndexUUID()))); assertThat(params.getFilePrefix(), is("metadata")); @@ -143,7 +177,15 @@ public void testBlobPathParameters() { public void testGenerateBlobFileName() { IndexMetadata indexMetadata = getIndexMetadata(); - RemoteIndexMetadata remoteObjectForUpload = new RemoteIndexMetadata(indexMetadata, clusterUUID, compressor, namedXContentRegistry); + RemoteIndexMetadata remoteObjectForUpload = new RemoteIndexMetadata( + indexMetadata, + clusterUUID, + compressor, + namedXContentRegistry, + null, + null, + null + ); String blobFileName = remoteObjectForUpload.generateBlobFileName(); String[] nameTokens = blobFileName.split(RemoteClusterStateUtils.DELIMITER); assertThat(nameTokens[0], is("metadata")); @@ -154,7 +196,15 @@ public void testGenerateBlobFileName() { public void testGetUploadedMetadata() throws IOException { IndexMetadata indexMetadata = getIndexMetadata(); - RemoteIndexMetadata remoteObjectForUpload = new RemoteIndexMetadata(indexMetadata, clusterUUID, compressor, namedXContentRegistry); + RemoteIndexMetadata remoteObjectForUpload = new RemoteIndexMetadata( + indexMetadata, + clusterUUID, + compressor, + namedXContentRegistry, + null, + null, + null + ); assertThrows(AssertionError.class, remoteObjectForUpload::getUploadedMetadata); remoteObjectForUpload.setFullBlobName(new BlobPath().add(TEST_BLOB_PATH)); UploadedMetadata uploadedMetadata = remoteObjectForUpload.getUploadedMetadata(); @@ -163,7 +213,15 @@ public void testGetUploadedMetadata() throws IOException { public void testSerDe() throws IOException { IndexMetadata indexMetadata = getIndexMetadata(); - RemoteIndexMetadata remoteObjectForUpload = new RemoteIndexMetadata(indexMetadata, clusterUUID, compressor, namedXContentRegistry); + RemoteIndexMetadata remoteObjectForUpload = new RemoteIndexMetadata( + indexMetadata, + clusterUUID, + compressor, + namedXContentRegistry, + null, + null, + null + ); try (InputStream inputStream = remoteObjectForUpload.serialize()) { assertThat(inputStream.available(), greaterThan(0)); IndexMetadata readIndexMetadata = remoteObjectForUpload.deserialize(inputStream); @@ -171,6 +229,25 @@ public void testSerDe() throws IOException { } } + public void testPrefixedPath() { + IndexMetadata indexMetadata = getIndexMetadata(); + String fixedPrefix = "*"; + RemoteIndexMetadata remoteObjectForUpload = new RemoteIndexMetadata( + indexMetadata, + clusterUUID, + compressor, + namedXContentRegistry, + PathType.HASHED_PREFIX, + PathHashAlgorithm.FNV_1A_COMPOSITE_1, + fixedPrefix + ); + String testPath = "test-path"; + String expectedPath = fixedPrefix + "410100110100101/test-path/index-uuid/"; + BlobPath prefixedPath = remoteObjectForUpload.getPrefixedPath(BlobPath.cleanPath().add(testPath)); + assertThat(prefixedPath.buildAsString(), is(expectedPath)); + + } + private IndexMetadata getIndexMetadata() { final Index index = new Index("test-index", "index-uuid"); final Settings idxSettings = Settings.builder() diff --git a/server/src/test/java/org/opensearch/gateway/remote/model/RemotePinnedTimestampsTests.java b/server/src/test/java/org/opensearch/gateway/remote/model/RemotePinnedTimestampsTests.java deleted file mode 100644 index 309263a634265..0000000000000 --- a/server/src/test/java/org/opensearch/gateway/remote/model/RemotePinnedTimestampsTests.java +++ /dev/null @@ -1,101 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.gateway.remote.model; - -import org.opensearch.common.compress.DeflateCompressor; -import org.opensearch.common.io.stream.BytesStreamOutput; -import org.opensearch.core.common.io.stream.BytesStreamInput; -import org.opensearch.core.common.io.stream.StreamInput; -import org.opensearch.core.compress.Compressor; -import org.opensearch.test.OpenSearchTestCase; -import org.junit.Before; - -import java.io.IOException; -import java.io.InputStream; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -public class RemotePinnedTimestampsTests extends OpenSearchTestCase { - - private RemotePinnedTimestamps remotePinnedTimestamps; - - @Before - public void setup() { - Compressor compressor = new DeflateCompressor(); - remotePinnedTimestamps = new RemotePinnedTimestamps("testClusterUUID", compressor); - } - - public void testGenerateBlobFileName() { - String fileName = remotePinnedTimestamps.generateBlobFileName(); - assertTrue(fileName.startsWith(RemotePinnedTimestamps.PINNED_TIMESTAMPS)); - assertEquals(fileName, remotePinnedTimestamps.getBlobFileName()); - } - - public void testSerializeAndDeserialize() throws IOException { - RemotePinnedTimestamps.PinnedTimestamps pinnedTimestamps = new RemotePinnedTimestamps.PinnedTimestamps(new HashMap<>()); - pinnedTimestamps.pin(1000L, "entity1"); - pinnedTimestamps.pin(2000L, "entity2"); - remotePinnedTimestamps.setPinnedTimestamps(pinnedTimestamps); - - InputStream serialized = remotePinnedTimestamps.serialize(); - RemotePinnedTimestamps.PinnedTimestamps deserialized = remotePinnedTimestamps.deserialize(serialized); - - assertEquals(pinnedTimestamps.getPinnedTimestampPinningEntityMap(), deserialized.getPinnedTimestampPinningEntityMap()); - } - - public void testSetAndGetPinnedTimestamps() { - RemotePinnedTimestamps.PinnedTimestamps pinnedTimestamps = new RemotePinnedTimestamps.PinnedTimestamps(new HashMap<>()); - remotePinnedTimestamps.setPinnedTimestamps(pinnedTimestamps); - assertEquals(pinnedTimestamps, remotePinnedTimestamps.getPinnedTimestamps()); - } - - public void testPinnedTimestampsPin() { - RemotePinnedTimestamps.PinnedTimestamps pinnedTimestamps = new RemotePinnedTimestamps.PinnedTimestamps(new HashMap<>()); - pinnedTimestamps.pin(1000L, "entity1"); - pinnedTimestamps.pin(1000L, "entity2"); - pinnedTimestamps.pin(2000L, "entity3"); - - Map> expected = new HashMap<>(); - expected.put(1000L, Arrays.asList("entity1", "entity2")); - expected.put(2000L, List.of("entity3")); - - assertEquals(expected, pinnedTimestamps.getPinnedTimestampPinningEntityMap()); - } - - public void testPinnedTimestampsUnpin() { - RemotePinnedTimestamps.PinnedTimestamps pinnedTimestamps = new RemotePinnedTimestamps.PinnedTimestamps(new HashMap<>()); - pinnedTimestamps.pin(1000L, "entity1"); - pinnedTimestamps.pin(1000L, "entity2"); - pinnedTimestamps.pin(2000L, "entity3"); - - pinnedTimestamps.unpin(1000L, "entity1"); - pinnedTimestamps.unpin(2000L, "entity3"); - - Map> expected = new HashMap<>(); - expected.put(1000L, List.of("entity2")); - - assertEquals(expected, pinnedTimestamps.getPinnedTimestampPinningEntityMap()); - } - - public void testPinnedTimestampsReadFromAndWriteTo() throws IOException { - RemotePinnedTimestamps.PinnedTimestamps original = new RemotePinnedTimestamps.PinnedTimestamps(new HashMap<>()); - original.pin(1000L, "entity1"); - original.pin(2000L, "entity2"); - - BytesStreamOutput out = new BytesStreamOutput(); - original.writeTo(out); - - StreamInput in = new BytesStreamInput(out.bytes().toBytesRef().bytes); - RemotePinnedTimestamps.PinnedTimestamps deserialized = RemotePinnedTimestamps.PinnedTimestamps.readFrom(in); - - assertEquals(original.getPinnedTimestampPinningEntityMap(), deserialized.getPinnedTimestampPinningEntityMap()); - } -} diff --git a/server/src/test/java/org/opensearch/gateway/remote/model/RemoteTemplatesMetadataTests.java b/server/src/test/java/org/opensearch/gateway/remote/model/RemoteTemplatesMetadataTests.java index d7ecd2ad3f44a..ca978654c8825 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/model/RemoteTemplatesMetadataTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/model/RemoteTemplatesMetadataTests.java @@ -231,12 +231,22 @@ public static TemplatesMetadata getTemplatesMetadata() { return TemplatesMetadata.builder() .put( IndexTemplateMetadata.builder("template" + randomAlphaOfLength(3)) + .order(1234) .patterns(Arrays.asList("bar-*", "foo-*")) .settings( Settings.builder().put("index.random_index_setting_" + randomAlphaOfLength(3), randomAlphaOfLength(5)).build() ) .build() ) + .put( + IndexTemplateMetadata.builder("template" + randomAlphaOfLength(3)) + .order(5678) + .patterns(Arrays.asList("test-*")) + .settings( + Settings.builder().put("index.random_index_setting_" + randomAlphaOfLength(3), randomAlphaOfLength(5)).build() + ) + .build() + ) .build(); } } diff --git a/server/src/test/java/org/opensearch/gateway/remote/routingtable/RemoteIndexRoutingTableDiffTests.java b/server/src/test/java/org/opensearch/gateway/remote/routingtable/RemoteIndexRoutingTableDiffTests.java index 6ffa7fc5cded8..5dcc3127f2f5d 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/routingtable/RemoteIndexRoutingTableDiffTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/routingtable/RemoteIndexRoutingTableDiffTests.java @@ -8,11 +8,8 @@ package org.opensearch.gateway.remote.routingtable; -import org.opensearch.Version; +import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.Diff; -import org.opensearch.cluster.metadata.IndexMetadata; -import org.opensearch.cluster.metadata.Metadata; -import org.opensearch.cluster.routing.IndexRoutingTable; import org.opensearch.cluster.routing.RoutingTable; import org.opensearch.cluster.routing.RoutingTableIncrementalDiff; import org.opensearch.common.blobstore.BlobPath; @@ -35,14 +32,12 @@ import java.io.IOException; import java.io.InputStream; -import java.util.HashMap; import java.util.List; -import java.util.Map; +import static org.opensearch.gateway.remote.RemoteClusterStateServiceTests.generateClusterStateWithOneIndex; import static org.opensearch.gateway.remote.routingtable.RemoteRoutingTableDiff.ROUTING_TABLE_DIFF_FILE; import static org.opensearch.gateway.remote.routingtable.RemoteRoutingTableDiff.ROUTING_TABLE_DIFF_METADATA_PREFIX; import static org.opensearch.gateway.remote.routingtable.RemoteRoutingTableDiff.ROUTING_TABLE_DIFF_PATH_TOKEN; -import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.lessThanOrEqualTo; import static org.hamcrest.Matchers.nullValue; @@ -87,22 +82,14 @@ public void tearDown() throws Exception { } public void testClusterUUID() { - Map> diffs = new HashMap<>(); String indexName = randomAlphaOfLength(randomIntBetween(1, 50)); - int numberOfShards = randomIntBetween(1, 10); - int numberOfReplicas = randomIntBetween(1, 10); - - IndexMetadata indexMetadata = IndexMetadata.builder(indexName) - .settings(settings(Version.CURRENT)) - .numberOfShards(numberOfShards) - .numberOfReplicas(numberOfReplicas) - .build(); + ClusterState previousState = generateClusterStateWithOneIndex(indexName, 5, 1, false).build(); + ClusterState currentState = generateClusterStateWithOneIndex(indexName, 5, 2, true).build(); - IndexRoutingTable indexRoutingTable = IndexRoutingTable.builder(indexMetadata.getIndex()).initializeAsNew(indexMetadata).build(); - - diffs.put(indexName, indexRoutingTable.diff(indexRoutingTable)); - - RoutingTableIncrementalDiff routingTableIncrementalDiff = new RoutingTableIncrementalDiff(diffs); + RoutingTableIncrementalDiff routingTableIncrementalDiff = new RoutingTableIncrementalDiff( + previousState.getRoutingTable(), + currentState.getRoutingTable() + ); RemoteRoutingTableDiff remoteDiffForUpload = new RemoteRoutingTableDiff( routingTableIncrementalDiff, @@ -118,21 +105,14 @@ public void testClusterUUID() { } public void testFullBlobName() { - Map> diffs = new HashMap<>(); String indexName = randomAlphaOfLength(randomIntBetween(1, 50)); - int numberOfShards = randomIntBetween(1, 10); - int numberOfReplicas = randomIntBetween(1, 10); - - IndexMetadata indexMetadata = IndexMetadata.builder(indexName) - .settings(settings(Version.CURRENT)) - .numberOfShards(numberOfShards) - .numberOfReplicas(numberOfReplicas) - .build(); + ClusterState previousState = generateClusterStateWithOneIndex(indexName, 5, 1, false).build(); + ClusterState currentState = generateClusterStateWithOneIndex(indexName, 5, 2, true).build(); - IndexRoutingTable indexRoutingTable = IndexRoutingTable.builder(indexMetadata.getIndex()).initializeAsNew(indexMetadata).build(); - - diffs.put(indexName, indexRoutingTable.diff(indexRoutingTable)); - RoutingTableIncrementalDiff routingTableIncrementalDiff = new RoutingTableIncrementalDiff(diffs); + RoutingTableIncrementalDiff routingTableIncrementalDiff = new RoutingTableIncrementalDiff( + previousState.getRoutingTable(), + currentState.getRoutingTable() + ); RemoteRoutingTableDiff remoteDiffForUpload = new RemoteRoutingTableDiff( routingTableIncrementalDiff, @@ -148,21 +128,14 @@ public void testFullBlobName() { } public void testBlobFileName() { - Map> diffs = new HashMap<>(); String indexName = randomAlphaOfLength(randomIntBetween(1, 50)); - int numberOfShards = randomIntBetween(1, 10); - int numberOfReplicas = randomIntBetween(1, 10); - - IndexMetadata indexMetadata = IndexMetadata.builder(indexName) - .settings(settings(Version.CURRENT)) - .numberOfShards(numberOfShards) - .numberOfReplicas(numberOfReplicas) - .build(); - - IndexRoutingTable indexRoutingTable = IndexRoutingTable.builder(indexMetadata.getIndex()).initializeAsNew(indexMetadata).build(); + ClusterState previousState = generateClusterStateWithOneIndex(indexName, 5, 1, false).build(); + ClusterState currentState = generateClusterStateWithOneIndex(indexName, 5, 2, true).build(); - diffs.put(indexName, indexRoutingTable.diff(indexRoutingTable)); - RoutingTableIncrementalDiff routingTableIncrementalDiff = new RoutingTableIncrementalDiff(diffs); + RoutingTableIncrementalDiff routingTableIncrementalDiff = new RoutingTableIncrementalDiff( + previousState.getRoutingTable(), + currentState.getRoutingTable() + ); RemoteRoutingTableDiff remoteDiffForUpload = new RemoteRoutingTableDiff( routingTableIncrementalDiff, @@ -178,21 +151,14 @@ public void testBlobFileName() { } public void testBlobPathParameters() { - Map> diffs = new HashMap<>(); String indexName = randomAlphaOfLength(randomIntBetween(1, 50)); - int numberOfShards = randomIntBetween(1, 10); - int numberOfReplicas = randomIntBetween(1, 10); + ClusterState previousState = generateClusterStateWithOneIndex(indexName, 5, 1, false).build(); + ClusterState currentState = generateClusterStateWithOneIndex(indexName, 5, 2, true).build(); - IndexMetadata indexMetadata = IndexMetadata.builder(indexName) - .settings(settings(Version.CURRENT)) - .numberOfShards(numberOfShards) - .numberOfReplicas(numberOfReplicas) - .build(); - - IndexRoutingTable indexRoutingTable = IndexRoutingTable.builder(indexMetadata.getIndex()).initializeAsNew(indexMetadata).build(); - - diffs.put(indexName, indexRoutingTable.diff(indexRoutingTable)); - RoutingTableIncrementalDiff routingTableIncrementalDiff = new RoutingTableIncrementalDiff(diffs); + RoutingTableIncrementalDiff routingTableIncrementalDiff = new RoutingTableIncrementalDiff( + previousState.getRoutingTable(), + currentState.getRoutingTable() + ); RemoteRoutingTableDiff remoteDiffForUpload = new RemoteRoutingTableDiff( routingTableIncrementalDiff, @@ -210,21 +176,14 @@ public void testBlobPathParameters() { } public void testGenerateBlobFileName() { - Map> diffs = new HashMap<>(); String indexName = randomAlphaOfLength(randomIntBetween(1, 50)); - int numberOfShards = randomIntBetween(1, 10); - int numberOfReplicas = randomIntBetween(1, 10); - - IndexMetadata indexMetadata = IndexMetadata.builder(indexName) - .settings(settings(Version.CURRENT)) - .numberOfShards(numberOfShards) - .numberOfReplicas(numberOfReplicas) - .build(); + ClusterState previousState = generateClusterStateWithOneIndex(indexName, 5, 1, false).build(); + ClusterState currentState = generateClusterStateWithOneIndex(indexName, 5, 2, true).build(); - IndexRoutingTable indexRoutingTable = IndexRoutingTable.builder(indexMetadata.getIndex()).initializeAsNew(indexMetadata).build(); - - diffs.put(indexName, indexRoutingTable.diff(indexRoutingTable)); - RoutingTableIncrementalDiff routingTableIncrementalDiff = new RoutingTableIncrementalDiff(diffs); + RoutingTableIncrementalDiff routingTableIncrementalDiff = new RoutingTableIncrementalDiff( + previousState.getRoutingTable(), + currentState.getRoutingTable() + ); RemoteRoutingTableDiff remoteDiffForUpload = new RemoteRoutingTableDiff( routingTableIncrementalDiff, @@ -243,21 +202,14 @@ public void testGenerateBlobFileName() { } public void testGetUploadedMetadata() throws IOException { - Map> diffs = new HashMap<>(); String indexName = randomAlphaOfLength(randomIntBetween(1, 50)); - int numberOfShards = randomIntBetween(1, 10); - int numberOfReplicas = randomIntBetween(1, 10); - - IndexMetadata indexMetadata = IndexMetadata.builder(indexName) - .settings(settings(Version.CURRENT)) - .numberOfShards(numberOfShards) - .numberOfReplicas(numberOfReplicas) - .build(); - - IndexRoutingTable indexRoutingTable = IndexRoutingTable.builder(indexMetadata.getIndex()).initializeAsNew(indexMetadata).build(); + ClusterState previousState = generateClusterStateWithOneIndex(indexName, 5, 1, false).build(); + ClusterState currentState = generateClusterStateWithOneIndex(indexName, 5, 2, true).build(); - diffs.put(indexName, indexRoutingTable.diff(indexRoutingTable)); - RoutingTableIncrementalDiff routingTableIncrementalDiff = new RoutingTableIncrementalDiff(diffs); + RoutingTableIncrementalDiff routingTableIncrementalDiff = new RoutingTableIncrementalDiff( + previousState.getRoutingTable(), + currentState.getRoutingTable() + ); RemoteRoutingTableDiff remoteDiffForUpload = new RemoteRoutingTableDiff( routingTableIncrementalDiff, @@ -277,41 +229,35 @@ public void testStreamOperations() throws IOException { int numberOfShards = randomIntBetween(1, 10); int numberOfReplicas = randomIntBetween(1, 10); - Metadata metadata = Metadata.builder() - .put( - IndexMetadata.builder(indexName) - .settings(settings(Version.CURRENT)) - .numberOfShards(numberOfShards) - .numberOfReplicas(numberOfReplicas) - ) - .build(); - - RoutingTable initialRoutingTable = RoutingTable.builder().addAsNew(metadata.index(indexName)).build(); - Map> diffs = new HashMap<>(); - - initialRoutingTable.getIndicesRouting().values().forEach(indexRoutingTable -> { - diffs.put(indexName, indexRoutingTable.diff(indexRoutingTable)); - RoutingTableIncrementalDiff routingTableIncrementalDiff = new RoutingTableIncrementalDiff(diffs); - - RemoteRoutingTableDiff remoteDiffForUpload = new RemoteRoutingTableDiff( - routingTableIncrementalDiff, - clusterUUID, - compressor, - STATE_TERM, - STATE_VERSION - ); - - assertThrows(AssertionError.class, remoteDiffForUpload::getUploadedMetadata); - - try (InputStream inputStream = remoteDiffForUpload.serialize()) { - remoteDiffForUpload.setFullBlobName(BlobPath.cleanPath()); - assertThat(inputStream.available(), greaterThan(0)); - - routingTableIncrementalDiff = remoteDiffForUpload.deserialize(inputStream); - assertEquals(remoteDiffForUpload.getDiffs().size(), routingTableIncrementalDiff.getDiffs().size()); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); + ClusterState previousState = generateClusterStateWithOneIndex(indexName, numberOfShards, numberOfReplicas, false).build(); + ClusterState currentState = generateClusterStateWithOneIndex(indexName, numberOfShards, numberOfReplicas + 1, true).build(); + + RoutingTableIncrementalDiff routingTableIncrementalDiff = new RoutingTableIncrementalDiff( + previousState.getRoutingTable(), + currentState.getRoutingTable() + ); + + RemoteRoutingTableDiff remoteDiffForUpload = new RemoteRoutingTableDiff( + routingTableIncrementalDiff, + clusterUUID, + compressor, + STATE_TERM, + STATE_VERSION + ); + + // Serialize the remote diff + InputStream inputStream = remoteDiffForUpload.serialize(); + + // Create a new instance for deserialization + RemoteRoutingTableDiff remoteDiffForDownload = new RemoteRoutingTableDiff(TEST_BLOB_NAME, clusterUUID, compressor); + + // Deserialize the remote diff + Diff deserializedDiff = remoteDiffForDownload.deserialize(inputStream); + + // Assert that the indices routing table created from routingTableIncrementalDiff and deserializedDiff is equal + assertEquals( + routingTableIncrementalDiff.apply(previousState.getRoutingTable()).getIndicesRouting(), + deserializedDiff.apply(previousState.getRoutingTable()).getIndicesRouting() + ); } } diff --git a/server/src/test/java/org/opensearch/index/IndexModuleTests.java b/server/src/test/java/org/opensearch/index/IndexModuleTests.java index 4ce4936c047d9..bd86d3d396987 100644 --- a/server/src/test/java/org/opensearch/index/IndexModuleTests.java +++ b/server/src/test/java/org/opensearch/index/IndexModuleTests.java @@ -260,11 +260,12 @@ private IndexService newIndexService(IndexModule module) throws IOException { writableRegistry(), () -> false, null, - new RemoteSegmentStoreDirectoryFactory(() -> repositoriesService, threadPool), + new RemoteSegmentStoreDirectoryFactory(() -> repositoriesService, threadPool, ""), translogFactorySupplier, () -> IndexSettings.DEFAULT_REFRESH_INTERVAL, DefaultRecoverySettings.INSTANCE, - DefaultRemoteStoreSettings.INSTANCE + DefaultRemoteStoreSettings.INSTANCE, + s -> {} ); } diff --git a/server/src/test/java/org/opensearch/index/IndexServiceTests.java b/server/src/test/java/org/opensearch/index/IndexServiceTests.java index 14451ef21726e..5905e64cede1b 100644 --- a/server/src/test/java/org/opensearch/index/IndexServiceTests.java +++ b/server/src/test/java/org/opensearch/index/IndexServiceTests.java @@ -41,6 +41,7 @@ import org.opensearch.common.compress.CompressedXContent; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.core.index.Index; import org.opensearch.core.xcontent.MediaTypeRegistry; @@ -52,6 +53,7 @@ import org.opensearch.index.shard.IndexShardTestCase; import org.opensearch.index.translog.Translog; import org.opensearch.indices.IndicesService; +import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.plugins.Plugin; import org.opensearch.test.InternalSettingsPlugin; import org.opensearch.test.OpenSearchSingleNodeTestCase; @@ -591,6 +593,57 @@ public void testIndexSortBackwardCompatible() { } } + public void testReplicationTask() throws Exception { + // create with docrep - task should not schedule + IndexService indexService = createIndex( + "docrep_index", + Settings.builder().put(IndexMetadata.INDEX_REPLICATION_TYPE_SETTING.getKey(), ReplicationType.DOCUMENT).build() + ); + final Index index = indexService.index(); + ensureGreen(index.getName()); + IndexService.AsyncReplicationTask task = indexService.getReplicationTask(); + assertFalse(task.isScheduled()); + assertFalse(task.mustReschedule()); + + // create for segrep - task should schedule + indexService = createIndex( + "segrep_index", + Settings.builder() + .put(IndexMetadata.INDEX_REPLICATION_TYPE_SETTING.getKey(), ReplicationType.SEGMENT) + .put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), "5s") + .build() + ); + final Index srIndex = indexService.index(); + ensureGreen(srIndex.getName()); + task = indexService.getReplicationTask(); + assertTrue(task.isScheduled()); + assertTrue(task.mustReschedule()); + assertEquals(5000, task.getInterval().millis()); + + // test we can update the interval + client().admin() + .indices() + .prepareUpdateSettings("segrep_index") + .setSettings(Settings.builder().put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), "1s")) + .get(); + + IndexService.AsyncReplicationTask updatedTask = indexService.getReplicationTask(); + assertNotSame(task, updatedTask); + assertFalse(task.isScheduled()); + assertTrue(task.isClosed()); + assertTrue(updatedTask.isScheduled()); + assertTrue(updatedTask.mustReschedule()); + assertEquals(1000, updatedTask.getInterval().millis()); + } + + @Override + protected Settings featureFlagSettings() { + return Settings.builder() + .put(super.featureFlagSettings()) + .put(FeatureFlags.READER_WRITER_SPLIT_EXPERIMENTAL_SETTING.getKey(), true) + .build(); + } + private static String createTestMapping(String type) { return " \"properties\": {\n" + " \"test\": {\n" diff --git a/server/src/test/java/org/opensearch/index/codec/composite/datacube/startree/StarTreeDocValuesFormatTests.java b/server/src/test/java/org/opensearch/index/codec/composite/datacube/startree/StarTreeDocValuesFormatTests.java deleted file mode 100644 index 54a9cc035d7a9..0000000000000 --- a/server/src/test/java/org/opensearch/index/codec/composite/datacube/startree/StarTreeDocValuesFormatTests.java +++ /dev/null @@ -1,176 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.index.codec.composite.datacube.startree; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.lucene99.Lucene99Codec; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.SortedNumericDocValuesField; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.store.Directory; -import org.apache.lucene.tests.index.BaseDocValuesFormatTestCase; -import org.apache.lucene.tests.index.RandomIndexWriter; -import org.apache.lucene.tests.util.LuceneTestCase; -import org.opensearch.Version; -import org.opensearch.cluster.ClusterModule; -import org.opensearch.cluster.metadata.IndexMetadata; -import org.opensearch.common.CheckedConsumer; -import org.opensearch.common.settings.Settings; -import org.opensearch.common.util.FeatureFlags; -import org.opensearch.common.xcontent.XContentFactory; -import org.opensearch.core.xcontent.NamedXContentRegistry; -import org.opensearch.core.xcontent.XContentBuilder; -import org.opensearch.index.MapperTestUtils; -import org.opensearch.index.codec.composite.composite99.Composite99Codec; -import org.opensearch.index.mapper.MapperService; -import org.opensearch.indices.IndicesModule; -import org.junit.After; -import org.junit.AfterClass; -import org.junit.BeforeClass; - -import java.io.IOException; -import java.util.Collections; - -import static org.opensearch.common.util.FeatureFlags.STAR_TREE_INDEX; - -/** - * Star tree doc values Lucene tests - */ -@LuceneTestCase.SuppressSysoutChecks(bugUrl = "we log a lot on purpose") -public class StarTreeDocValuesFormatTests extends BaseDocValuesFormatTestCase { - MapperService mapperService = null; - - @BeforeClass - public static void createMapper() throws Exception { - FeatureFlags.initializeFeatureFlags(Settings.builder().put(STAR_TREE_INDEX, "true").build()); - } - - @AfterClass - public static void clearMapper() { - FeatureFlags.initializeFeatureFlags(Settings.EMPTY); - } - - @After - public void teardown() throws IOException { - mapperService.close(); - } - - @Override - protected Codec getCodec() { - final Logger testLogger = LogManager.getLogger(StarTreeDocValuesFormatTests.class); - - try { - createMapperService(getExpandedMapping("status", "size")); - } catch (IOException e) { - throw new RuntimeException(e); - } - Codec codec = new Composite99Codec(Lucene99Codec.Mode.BEST_SPEED, mapperService, testLogger); - return codec; - } - - public void testStarTreeDocValues() throws IOException { - Directory directory = newDirectory(); - IndexWriterConfig conf = newIndexWriterConfig(null); - conf.setMergePolicy(newLogMergePolicy()); - RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf); - Document doc = new Document(); - doc.add(new SortedNumericDocValuesField("sndv", 1)); - doc.add(new SortedNumericDocValuesField("dv", 1)); - doc.add(new SortedNumericDocValuesField("field", 1)); - iw.addDocument(doc); - doc.add(new SortedNumericDocValuesField("sndv", 1)); - doc.add(new SortedNumericDocValuesField("dv", 1)); - doc.add(new SortedNumericDocValuesField("field", 1)); - iw.addDocument(doc); - iw.forceMerge(1); - doc.add(new SortedNumericDocValuesField("sndv", 2)); - doc.add(new SortedNumericDocValuesField("dv", 2)); - doc.add(new SortedNumericDocValuesField("field", 2)); - iw.addDocument(doc); - doc.add(new SortedNumericDocValuesField("sndv", 2)); - doc.add(new SortedNumericDocValuesField("dv", 2)); - doc.add(new SortedNumericDocValuesField("field", 2)); - iw.addDocument(doc); - iw.forceMerge(1); - iw.close(); - - // TODO : validate star tree structures that got created - directory.close(); - } - - private XContentBuilder getExpandedMapping(String dim, String metric) throws IOException { - return topMapping(b -> { - b.startObject("composite"); - b.startObject("startree"); - b.field("type", "star_tree"); - b.startObject("config"); - b.field("max_leaf_docs", 100); - b.startArray("ordered_dimensions"); - b.startObject(); - b.field("name", "sndv"); - b.endObject(); - b.startObject(); - b.field("name", "dv"); - b.endObject(); - b.endArray(); - b.startArray("metrics"); - b.startObject(); - b.field("name", "field"); - b.startArray("stats"); - b.value("sum"); - b.value("value_count"); - b.endArray(); - b.endObject(); - b.endArray(); - b.endObject(); - b.endObject(); - b.endObject(); - b.startObject("properties"); - b.startObject("sndv"); - b.field("type", "integer"); - b.endObject(); - b.startObject("dv"); - b.field("type", "integer"); - b.endObject(); - b.startObject("field"); - b.field("type", "integer"); - b.endObject(); - b.endObject(); - }); - } - - private XContentBuilder topMapping(CheckedConsumer buildFields) throws IOException { - XContentBuilder builder = XContentFactory.jsonBuilder().startObject().startObject("_doc"); - buildFields.accept(builder); - return builder.endObject().endObject(); - } - - private void createMapperService(XContentBuilder builder) throws IOException { - IndexMetadata indexMetadata = IndexMetadata.builder("test") - .settings( - Settings.builder() - .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) - .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) - .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) - ) - .putMapping(builder.toString()) - .build(); - IndicesModule indicesModule = new IndicesModule(Collections.emptyList()); - mapperService = MapperTestUtils.newMapperServiceWithHelperAnalyzer( - new NamedXContentRegistry(ClusterModule.getNamedXWriteables()), - createTempDir(), - Settings.EMPTY, - indicesModule, - "test" - ); - mapperService.merge(indexMetadata, MapperService.MergeReason.INDEX_TEMPLATE); - } -} diff --git a/server/src/test/java/org/opensearch/index/codec/composite99/datacube/startree/StarTreeDocValuesFormatTests.java b/server/src/test/java/org/opensearch/index/codec/composite99/datacube/startree/StarTreeDocValuesFormatTests.java new file mode 100644 index 0000000000000..1c267c67e60ed --- /dev/null +++ b/server/src/test/java/org/opensearch/index/codec/composite99/datacube/startree/StarTreeDocValuesFormatTests.java @@ -0,0 +1,289 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec.composite99.datacube.startree; + +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.lucene99.Lucene99Codec; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SegmentReader; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.index.BaseDocValuesFormatTestCase; +import org.apache.lucene.tests.index.RandomIndexWriter; +import org.apache.lucene.tests.util.LuceneTestCase; +import org.apache.lucene.tests.util.TestUtil; +import org.opensearch.Version; +import org.opensearch.cluster.ClusterModule; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.CheckedConsumer; +import org.opensearch.common.lucene.Lucene; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; +import org.opensearch.common.xcontent.XContentFactory; +import org.opensearch.core.common.unit.ByteSizeUnit; +import org.opensearch.core.common.unit.ByteSizeValue; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.MapperTestUtils; +import org.opensearch.index.codec.composite.CompositeIndexFieldInfo; +import org.opensearch.index.codec.composite.CompositeIndexReader; +import org.opensearch.index.codec.composite.composite99.Composite99Codec; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeDocument; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeIndexSettings; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeTestUtils; +import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.mapper.NumberFieldMapper; +import org.opensearch.indices.IndicesModule; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.BeforeClass; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +import static org.opensearch.common.util.FeatureFlags.STAR_TREE_INDEX; +import static org.opensearch.index.compositeindex.datacube.startree.StarTreeTestUtils.assertStarTreeDocuments; + +/** + * Star tree doc values Lucene tests + */ +@LuceneTestCase.SuppressSysoutChecks(bugUrl = "we log a lot on purpose") +public class StarTreeDocValuesFormatTests extends BaseDocValuesFormatTestCase { + MapperService mapperService = null; + StarTreeFieldConfiguration.StarTreeBuildMode buildMode; + + public StarTreeDocValuesFormatTests(StarTreeFieldConfiguration.StarTreeBuildMode buildMode) { + this.buildMode = buildMode; + } + + @ParametersFactory + public static Collection parameters() { + List parameters = new ArrayList<>(); + parameters.add(new Object[] { StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP }); + parameters.add(new Object[] { StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP }); + return parameters; + } + + @BeforeClass + public static void createMapper() throws Exception { + FeatureFlags.initializeFeatureFlags(Settings.builder().put(STAR_TREE_INDEX, "true").build()); + } + + @AfterClass + public static void clearMapper() { + FeatureFlags.initializeFeatureFlags(Settings.EMPTY); + } + + @After + public void teardown() throws IOException { + mapperService.close(); + } + + @Override + protected Codec getCodec() { + final Logger testLogger = LogManager.getLogger(StarTreeDocValuesFormatTests.class); + + try { + createMapperService(getExpandedMapping()); + } catch (IOException e) { + throw new RuntimeException(e); + } + Codec codec = new Composite99Codec(Lucene99Codec.Mode.BEST_SPEED, mapperService, testLogger); + return codec; + } + + public void testStarTreeDocValues() throws IOException { + Directory directory = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(null); + conf.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf); + Document doc = new Document(); + doc.add(new SortedNumericDocValuesField("sndv", 1)); + doc.add(new SortedNumericDocValuesField("dv", 1)); + doc.add(new SortedNumericDocValuesField("field", -1)); + iw.addDocument(doc); + doc = new Document(); + doc.add(new SortedNumericDocValuesField("sndv", 1)); + doc.add(new SortedNumericDocValuesField("dv", 1)); + doc.add(new SortedNumericDocValuesField("field", -1)); + iw.addDocument(doc); + doc = new Document(); + iw.forceMerge(1); + doc.add(new SortedNumericDocValuesField("sndv", 2)); + doc.add(new SortedNumericDocValuesField("dv", 2)); + doc.add(new SortedNumericDocValuesField("field", -2)); + iw.addDocument(doc); + doc = new Document(); + doc.add(new SortedNumericDocValuesField("sndv", 2)); + doc.add(new SortedNumericDocValuesField("dv", 2)); + doc.add(new SortedNumericDocValuesField("field", -2)); + iw.addDocument(doc); + iw.forceMerge(1); + iw.close(); + + DirectoryReader ir = maybeWrapWithMergingReader(DirectoryReader.open(directory)); + TestUtil.checkReader(ir); + assertEquals(1, ir.leaves().size()); + + // Segment documents + /** + * sndv dv field + * [1, 1, -1] + * [1, 1, -1] + * [2, 2, -2] + * [2, 2, -2] + */ + // Star tree docuements + /** + * sndv dv | [ sum, value_count, min, max[field]] , [ sum, value_count, min, max[sndv]], doc_count + * [1, 1] | [-2.0, 2.0, -1.0, -1.0, 2.0, 2.0, 1.0, 1.0, 2.0] + * [2, 2] | [-4.0, 2.0, -2.0, -2.0, 4.0, 2.0, 2.0, 2.0, 2.0] + * [null, 1] | [-2.0, 2.0, -1.0, -1.0, 2.0, 2.0, 1.0, 1.0, 2.0] + * [null, 2] | [-4.0, 2.0, -2.0, -2.0, 4.0, 2.0, 2.0, 2.0, 2.0] + */ + StarTreeDocument[] expectedStarTreeDocuments = new StarTreeDocument[4]; + expectedStarTreeDocuments[0] = new StarTreeDocument( + new Long[] { 1L, 1L }, + new Double[] { -2.0, 2.0, -1.0, -1.0, 2.0, 2.0, 1.0, 1.0, 2.0 } + ); + expectedStarTreeDocuments[1] = new StarTreeDocument( + new Long[] { 2L, 2L }, + new Double[] { -4.0, 2.0, -2.0, -2.0, 4.0, 2.0, 2.0, 2.0, 2.0 } + ); + expectedStarTreeDocuments[2] = new StarTreeDocument( + new Long[] { null, 1L }, + new Double[] { -2.0, 2.0, -1.0, -1.0, 2.0, 2.0, 1.0, 1.0, 2.0 } + ); + expectedStarTreeDocuments[3] = new StarTreeDocument( + new Long[] { null, 2L }, + new Double[] { -4.0, 2.0, -2.0, -2.0, 4.0, 2.0, 2.0, 2.0, 2.0 } + ); + + for (LeafReaderContext context : ir.leaves()) { + SegmentReader reader = Lucene.segmentReader(context.reader()); + CompositeIndexReader starTreeDocValuesReader = (CompositeIndexReader) reader.getDocValuesReader(); + List compositeIndexFields = starTreeDocValuesReader.getCompositeIndexFields(); + + for (CompositeIndexFieldInfo compositeIndexFieldInfo : compositeIndexFields) { + StarTreeValues starTreeValues = (StarTreeValues) starTreeDocValuesReader.getCompositeIndexValues(compositeIndexFieldInfo); + StarTreeDocument[] starTreeDocuments = StarTreeTestUtils.getSegmentsStarTreeDocuments( + List.of(starTreeValues), + List.of( + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.LONG, + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.LONG, + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.DOUBLE, + NumberFieldMapper.NumberType.LONG + ), + reader.maxDoc() + ); + assertStarTreeDocuments(starTreeDocuments, expectedStarTreeDocuments); + } + } + ir.close(); + directory.close(); + } + + private XContentBuilder getExpandedMapping() throws IOException { + return topMapping(b -> { + b.startObject("composite"); + b.startObject("startree"); + b.field("type", "star_tree"); + b.startObject("config"); + b.field("max_leaf_docs", 1); + b.startArray("ordered_dimensions"); + b.startObject(); + b.field("name", "sndv"); + b.endObject(); + b.startObject(); + b.field("name", "dv"); + b.endObject(); + b.endArray(); + b.startArray("metrics"); + b.startObject(); + b.field("name", "field"); + b.startArray("stats"); + b.value("sum"); + b.value("value_count"); + b.value("avg"); + b.value("min"); + b.value("max"); + b.endArray(); + b.endObject(); + b.startObject(); + b.field("name", "sndv"); + b.startArray("stats"); + b.value("sum"); + b.value("value_count"); + b.value("avg"); + b.value("min"); + b.value("max"); + b.endArray(); + b.endObject(); + b.endArray(); + b.endObject(); + b.endObject(); + b.endObject(); + b.startObject("properties"); + b.startObject("sndv"); + b.field("type", "integer"); + b.endObject(); + b.startObject("dv"); + b.field("type", "integer"); + b.endObject(); + b.startObject("field"); + b.field("type", "integer"); + b.endObject(); + b.endObject(); + }); + } + + private XContentBuilder topMapping(CheckedConsumer buildFields) throws IOException { + XContentBuilder builder = XContentFactory.jsonBuilder().startObject().startObject("_doc"); + buildFields.accept(builder); + return builder.endObject().endObject(); + } + + private void createMapperService(XContentBuilder builder) throws IOException { + Settings settings = Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) + .put(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey(), true) + .put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), new ByteSizeValue(512, ByteSizeUnit.MB)) + .build(); + IndexMetadata indexMetadata = IndexMetadata.builder("test").settings(settings).putMapping(builder.toString()).build(); + IndicesModule indicesModule = new IndicesModule(Collections.emptyList()); + mapperService = MapperTestUtils.newMapperServiceWithHelperAnalyzer( + new NamedXContentRegistry(ClusterModule.getNamedXWriteables()), + createTempDir(), + settings, + indicesModule, + "test" + ); + mapperService.merge(indexMetadata, MapperService.MergeReason.INDEX_TEMPLATE); + } +} diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeTestUtils.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeTestUtils.java new file mode 100644 index 0000000000000..b7395b993f67b --- /dev/null +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeTestUtils.java @@ -0,0 +1,321 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree; + +import org.apache.lucene.store.IndexInput; +import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.Metric; +import org.opensearch.index.compositeindex.datacube.MetricStat; +import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.StarTreeMetadata; +import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues; +import org.opensearch.index.compositeindex.datacube.startree.node.InMemoryTreeNode; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeFactory; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNode; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNodeType; +import org.opensearch.index.compositeindex.datacube.startree.utils.SequentialDocValuesIterator; +import org.opensearch.index.mapper.CompositeMappedFieldType; +import org.opensearch.index.mapper.FieldValueConverter; + +import java.io.IOException; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Queue; + +import static org.opensearch.index.compositeindex.CompositeIndexConstants.COMPOSITE_FIELD_MARKER; +import static org.opensearch.index.compositeindex.datacube.startree.fileformats.StarTreeWriter.VERSION_CURRENT; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.fullyQualifiedFieldNameForStarTreeMetricsDocValues; +import static org.opensearch.index.mapper.CompositeMappedFieldType.CompositeFieldType.STAR_TREE; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +public class StarTreeTestUtils { + + public static StarTreeDocument[] getSegmentsStarTreeDocuments( + List starTreeValuesSubs, + List fieldValueConverters, + int numDocs + ) throws IOException { + List starTreeDocuments = new ArrayList<>(); + for (StarTreeValues starTreeValues : starTreeValuesSubs) { + List dimensionsSplitOrder = starTreeValues.getStarTreeField().getDimensionsOrder(); + SequentialDocValuesIterator[] dimensionReaders = new SequentialDocValuesIterator[dimensionsSplitOrder.size()]; + + for (int i = 0; i < dimensionsSplitOrder.size(); i++) { + String dimension = dimensionsSplitOrder.get(i).getField(); + dimensionReaders[i] = new SequentialDocValuesIterator(starTreeValues.getDimensionDocIdSetIterator(dimension)); + } + + List metricReaders = new ArrayList<>(); + // get doc id set iterators for metrics + for (Metric metric : starTreeValues.getStarTreeField().getMetrics()) { + for (MetricStat metricStat : metric.getMetrics()) { + if (metricStat.isDerivedMetric()) { + continue; + } + String metricFullName = fullyQualifiedFieldNameForStarTreeMetricsDocValues( + starTreeValues.getStarTreeField().getName(), + metric.getField(), + metricStat.getTypeName() + ); + metricReaders.add(new SequentialDocValuesIterator(starTreeValues.getMetricDocIdSetIterator(metricFullName))); + + } + } + int currentDocId = 0; + while (currentDocId < numDocs) { + starTreeDocuments.add(getStarTreeDocument(currentDocId, dimensionReaders, metricReaders, fieldValueConverters)); + currentDocId++; + } + } + StarTreeDocument[] starTreeDocumentsArr = new StarTreeDocument[starTreeDocuments.size()]; + return starTreeDocuments.toArray(starTreeDocumentsArr); + } + + public static StarTreeDocument getStarTreeDocument( + int currentDocId, + SequentialDocValuesIterator[] dimensionReaders, + List metricReaders, + List fieldValueConverters + ) throws IOException { + Long[] dims = new Long[dimensionReaders.length]; + int i = 0; + for (SequentialDocValuesIterator dimensionDocValueIterator : dimensionReaders) { + dimensionDocValueIterator.nextDoc(currentDocId); + Long val = dimensionDocValueIterator.value(currentDocId); + dims[i] = val; + i++; + } + i = 0; + Object[] metrics = new Object[metricReaders.size()]; + for (SequentialDocValuesIterator metricDocValuesIterator : metricReaders) { + metricDocValuesIterator.nextDoc(currentDocId); + metrics[i] = toAggregatorValueType(metricDocValuesIterator.value(currentDocId), fieldValueConverters.get(i)); + i++; + } + return new StarTreeDocument(dims, metrics); + } + + public static Double toAggregatorValueType(Long value, FieldValueConverter fieldValueConverter) { + try { + return fieldValueConverter.toDoubleValue(value); + } catch (Exception e) { + throw new IllegalStateException("Cannot convert " + value + " to sortable aggregation type", e); + } + } + + public static void assertStarTreeDocuments(StarTreeDocument[] starTreeDocuments, StarTreeDocument[] expectedStarTreeDocuments) { + + assertNotNull(starTreeDocuments); + assertEquals(starTreeDocuments.length, expectedStarTreeDocuments.length); + + for (int i = 0; i < starTreeDocuments.length; i++) { + + StarTreeDocument resultStarTreeDocument = starTreeDocuments[i]; + StarTreeDocument expectedStarTreeDocument = expectedStarTreeDocuments[i]; + + assertNotNull(resultStarTreeDocument.dimensions); + assertNotNull(resultStarTreeDocument.metrics); + + assertEquals(expectedStarTreeDocument.dimensions.length, resultStarTreeDocument.dimensions.length); + assertEquals(expectedStarTreeDocument.metrics.length, resultStarTreeDocument.metrics.length); + + for (int di = 0; di < resultStarTreeDocument.dimensions.length; di++) { + assertEquals(expectedStarTreeDocument.dimensions[di], resultStarTreeDocument.dimensions[di]); + } + + for (int mi = 0; mi < resultStarTreeDocument.metrics.length; mi++) { + if (expectedStarTreeDocument.metrics[mi] instanceof Long) { + assertEquals(((Long) expectedStarTreeDocument.metrics[mi]).doubleValue(), resultStarTreeDocument.metrics[mi]); + } else { + assertEquals(expectedStarTreeDocument.metrics[mi], resultStarTreeDocument.metrics[mi]); + } + } + } + } + + public static void validateFileFormats( + IndexInput dataIn, + IndexInput metaIn, + InMemoryTreeNode rootNode, + StarTreeMetadata expectedStarTreeMetadata + ) throws IOException { + long magicMarker = metaIn.readLong(); + assertEquals(COMPOSITE_FIELD_MARKER, magicMarker); + int version = metaIn.readVInt(); + assertEquals(VERSION_CURRENT, version); + + String compositeFieldName = metaIn.readString(); + assertEquals(expectedStarTreeMetadata.getStarTreeFieldName(), compositeFieldName); + CompositeMappedFieldType.CompositeFieldType compositeFieldType = CompositeMappedFieldType.CompositeFieldType.fromName( + metaIn.readString() + ); + assertEquals(STAR_TREE, compositeFieldType); + StarTreeMetadata resultStarTreeMetadata = new StarTreeMetadata(metaIn, compositeFieldName, compositeFieldType, version); + assertStarTreeMetadata(expectedStarTreeMetadata, resultStarTreeMetadata); + + IndexInput starTreeIndexInput = dataIn.slice( + "star-tree data slice for respective star-tree fields", + resultStarTreeMetadata.getDataStartFilePointer(), + resultStarTreeMetadata.getDataLength() + ); + + StarTreeNode starTreeNode = StarTreeFactory.createStarTree(starTreeIndexInput, resultStarTreeMetadata); + Queue expectedTreeNodeQueue = new ArrayDeque<>(); + Queue resultTreeNodeQueue = new ArrayDeque<>(); + + expectedTreeNodeQueue.add(starTreeNode); + resultTreeNodeQueue.add(rootNode); + + while ((starTreeNode = expectedTreeNodeQueue.poll()) != null && (rootNode = resultTreeNodeQueue.poll()) != null) { + + // verify the star node + assertStarTreeNode(starTreeNode, rootNode); + + Iterator expectedChildrenIterator = starTreeNode.getChildrenIterator(); + + List sortedChildren = new ArrayList<>(); + if (rootNode.getChildren() != null) { + sortedChildren = new ArrayList<>(rootNode.getChildren().values()); + } + + if (starTreeNode.getChildDimensionId() != -1) { + assertFalse(sortedChildren.isEmpty()); + int childCount = 0; + boolean childStarNodeAsserted = false; + while (expectedChildrenIterator.hasNext()) { + StarTreeNode child = expectedChildrenIterator.next(); + InMemoryTreeNode resultChildNode = null; + if (!childStarNodeAsserted && rootNode.getChildStarNode() != null) { + // check if star tree node exists + resultChildNode = rootNode.getChildStarNode(); + assertNotNull(child); + assertNotNull(starTreeNode.getChildStarNode()); + assertStarTreeNode(child, resultChildNode); + childStarNodeAsserted = true; + } else { + resultChildNode = sortedChildren.get(childCount); + assertNotNull(child); + assertNotNull(resultChildNode); + if (child.getStarTreeNodeType() != StarTreeNodeType.NULL.getValue()) { + assertNotNull(starTreeNode.getChildForDimensionValue(child.getDimensionValue())); + } else { + assertNull(starTreeNode.getChildForDimensionValue(child.getDimensionValue())); + } + assertStarTreeNode(child, resultChildNode); + assertNotEquals(child.getStarTreeNodeType(), StarTreeNodeType.STAR.getValue()); + childCount++; + } + + expectedTreeNodeQueue.add(child); + resultTreeNodeQueue.add(resultChildNode); + } + + assertEquals(childCount, rootNode.getChildren().size()); + } else { + assertTrue(rootNode.getChildren().isEmpty()); + } + } + + assertTrue(expectedTreeNodeQueue.isEmpty()); + assertTrue(resultTreeNodeQueue.isEmpty()); + + } + + public static void assertStarTreeNode(StarTreeNode starTreeNode, InMemoryTreeNode treeNode) throws IOException { + assertEquals(starTreeNode.getDimensionId(), treeNode.getDimensionId()); + assertEquals(starTreeNode.getDimensionValue(), treeNode.getDimensionValue()); + assertEquals(starTreeNode.getStartDocId(), treeNode.getStartDocId()); + assertEquals(starTreeNode.getEndDocId(), treeNode.getEndDocId()); + assertEquals(starTreeNode.getChildDimensionId(), treeNode.getChildDimensionId()); + assertEquals(starTreeNode.getAggregatedDocId(), treeNode.getAggregatedDocId()); + + if (starTreeNode.getChildDimensionId() != -1) { + assertFalse(starTreeNode.isLeaf()); + if (treeNode.getChildren() != null) { + assertEquals( + starTreeNode.getNumChildren(), + treeNode.getChildren().values().size() + (treeNode.getChildStarNode() != null ? 1 : 0) + ); + } + } else { + assertTrue(starTreeNode.isLeaf()); + } + + } + + public static void assertStarTreeMetadata(StarTreeMetadata expectedStarTreeMetadata, StarTreeMetadata resultStarTreeMetadata) { + + assertEquals(expectedStarTreeMetadata.getCompositeFieldName(), resultStarTreeMetadata.getCompositeFieldName()); + assertEquals(expectedStarTreeMetadata.getCompositeFieldType(), resultStarTreeMetadata.getCompositeFieldType()); + assertEquals(expectedStarTreeMetadata.getDimensionFields().size(), resultStarTreeMetadata.getDimensionFields().size()); + for (int i = 0; i < expectedStarTreeMetadata.getDimensionFields().size(); i++) { + assertEquals(expectedStarTreeMetadata.getDimensionFields().get(i), resultStarTreeMetadata.getDimensionFields().get(i)); + } + assertEquals(expectedStarTreeMetadata.getMetrics().size(), resultStarTreeMetadata.getMetrics().size()); + + for (int i = 0; i < expectedStarTreeMetadata.getMetrics().size(); i++) { + + Metric expectedMetric = expectedStarTreeMetadata.getMetrics().get(i); + Metric resultMetric = resultStarTreeMetadata.getMetrics().get(i); + assertEquals(expectedMetric.getField(), resultMetric.getField()); + List metricStats = new ArrayList<>(); + for (MetricStat metricStat : expectedMetric.getMetrics()) { + if (metricStat.isDerivedMetric()) { + continue; + } + metricStats.add(metricStat); + } + Metric expectedMetricWithoutDerivedMetrics = new Metric(expectedMetric.getField(), metricStats); + metricStats = new ArrayList<>(); + for (MetricStat metricStat : resultMetric.getMetrics()) { + if (metricStat.isDerivedMetric()) { + continue; + } + metricStats.add(metricStat); + } + Metric resultantMetricWithoutDerivedMetrics = new Metric(resultMetric.getField(), metricStats); + + // assert base metrics are in order in metadata + for (int j = 0; j < expectedMetricWithoutDerivedMetrics.getMetrics().size(); j++) { + assertEquals( + expectedMetricWithoutDerivedMetrics.getMetrics().get(j), + resultantMetricWithoutDerivedMetrics.getMetrics().get(j) + ); + } + + // assert all metrics ( including derived metrics are present ) + for (int j = 0; j < expectedMetric.getMetrics().size(); j++) { + assertTrue(resultMetric.getMetrics().contains(expectedMetric.getMetrics().get(j))); + } + + } + + assertEquals(expectedStarTreeMetadata.getSegmentAggregatedDocCount(), resultStarTreeMetadata.getSegmentAggregatedDocCount()); + assertEquals(expectedStarTreeMetadata.getStarTreeDocCount(), resultStarTreeMetadata.getStarTreeDocCount()); + assertEquals(expectedStarTreeMetadata.getMaxLeafDocs(), resultStarTreeMetadata.getMaxLeafDocs()); + assertEquals( + expectedStarTreeMetadata.getSkipStarNodeCreationInDims().size(), + resultStarTreeMetadata.getSkipStarNodeCreationInDims().size() + ); + for (String skipDimension : expectedStarTreeMetadata.getSkipStarNodeCreationInDims()) { + assertTrue(resultStarTreeMetadata.getSkipStarNodeCreationInDims().contains(skipDimension)); + } + assertEquals(expectedStarTreeMetadata.getStarTreeBuildMode(), resultStarTreeMetadata.getStarTreeBuildMode()); + assertEquals(expectedStarTreeMetadata.getDataStartFilePointer(), resultStarTreeMetadata.getDataStartFilePointer()); + assertEquals(expectedStarTreeMetadata.getDataLength(), resultStarTreeMetadata.getDataLength()); + assertEquals(0, (resultStarTreeMetadata.getDataLength()) % 33); + } + +} diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/AbstractValueAggregatorTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/AbstractValueAggregatorTests.java index 36f75834abba8..00f9fcf0e269f 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/AbstractValueAggregatorTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/AbstractValueAggregatorTests.java @@ -10,7 +10,8 @@ import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; -import org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype.StarTreeNumericType; +import org.opensearch.index.mapper.FieldValueConverter; +import org.opensearch.index.mapper.NumberFieldMapper; import org.opensearch.test.OpenSearchTestCase; import org.junit.Before; @@ -21,27 +22,27 @@ public abstract class AbstractValueAggregatorTests extends OpenSearchTestCase { private ValueAggregator aggregator; - protected StarTreeNumericType starTreeNumericType; + protected FieldValueConverter fieldValueConverter; - public AbstractValueAggregatorTests(StarTreeNumericType starTreeNumericType) { - this.starTreeNumericType = starTreeNumericType; + public AbstractValueAggregatorTests(FieldValueConverter fieldValueConverter) { + this.fieldValueConverter = fieldValueConverter; } @Before public void setup() { - aggregator = getValueAggregator(starTreeNumericType); + aggregator = getValueAggregator(fieldValueConverter); } @ParametersFactory public static Collection parameters() { List parameters = new ArrayList<>(); - for (StarTreeNumericType starTreeNumericType : StarTreeNumericType.values()) { - parameters.add(new Object[] { starTreeNumericType }); + for (FieldValueConverter fieldValueConverter : NumberFieldMapper.NumberType.values()) { + parameters.add(new Object[] { fieldValueConverter }); } return parameters; } - public abstract ValueAggregator getValueAggregator(StarTreeNumericType starTreeNumericType); + public abstract ValueAggregator getValueAggregator(FieldValueConverter fieldValueConverter); public void testGetInitialAggregatedValueForSegmentDocNullValue() { assertEquals(aggregator.getIdentityMetricValue(), aggregator.getInitialAggregatedValueForSegmentDocValue(null)); @@ -61,6 +62,10 @@ public void testGetInitialAggregatedNullValue() { public void testGetInitialAggregatedValueForSegmentDocValue() { long randomLong = randomLong(); - assertEquals(starTreeNumericType.getDoubleValue(randomLong), aggregator.getInitialAggregatedValueForSegmentDocValue(randomLong)); + if (aggregator instanceof CountValueAggregator) { + assertEquals(CountValueAggregator.DEFAULT_INITIAL_VALUE, aggregator.getInitialAggregatedValueForSegmentDocValue(randomLong())); + } else { + assertEquals(fieldValueConverter.toDoubleValue(randomLong), aggregator.getInitialAggregatedValueForSegmentDocValue(randomLong)); + } } } diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/CountValueAggregatorTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/CountValueAggregatorTests.java index b270c1b1bc26c..db22ee9af18e2 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/CountValueAggregatorTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/CountValueAggregatorTests.java @@ -8,14 +8,14 @@ package org.opensearch.index.compositeindex.datacube.startree.aggregators; -import org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype.StarTreeNumericType; +import org.opensearch.index.mapper.FieldValueConverter; public class CountValueAggregatorTests extends AbstractValueAggregatorTests { private CountValueAggregator aggregator; - public CountValueAggregatorTests(StarTreeNumericType starTreeNumericType) { - super(starTreeNumericType); + public CountValueAggregatorTests(FieldValueConverter fieldValueConverter) { + super(fieldValueConverter); } public void testMergeAggregatedValueAndSegmentValue() { @@ -52,7 +52,7 @@ public void testIdentityMetricValue() { } @Override - public ValueAggregator getValueAggregator(StarTreeNumericType starTreeNumericType) { + public ValueAggregator getValueAggregator(FieldValueConverter fieldValueConverter) { aggregator = new CountValueAggregator(); return aggregator; } diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/DocCountAggregatorTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/DocCountAggregatorTests.java index 2765629aa5950..3960412d65868 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/DocCountAggregatorTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/DocCountAggregatorTests.java @@ -8,7 +8,7 @@ package org.opensearch.index.compositeindex.datacube.startree.aggregators; -import org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype.StarTreeNumericType; +import org.opensearch.index.mapper.FieldValueConverter; /** * Unit tests for {@link DocCountAggregator}. @@ -17,8 +17,8 @@ public class DocCountAggregatorTests extends AbstractValueAggregatorTests { private DocCountAggregator aggregator; - public DocCountAggregatorTests(StarTreeNumericType starTreeNumericType) { - super(starTreeNumericType); + public DocCountAggregatorTests(FieldValueConverter fieldValueConverter) { + super(fieldValueConverter); } public void testMergeAggregatedValueAndSegmentValue() { @@ -52,7 +52,7 @@ public void testGetInitialAggregatedValue() { assertEquals(randomLong, (long) aggregator.getInitialAggregatedValue(randomLong)); } - public void testToStarTreeNumericTypeValue() { + public void testToAggregatedValueType() { long randomLong = randomLong(); assertEquals(randomLong, (long) aggregator.toAggregatedValueType(randomLong)); } @@ -62,7 +62,7 @@ public void testIdentityMetricValue() { } @Override - public ValueAggregator getValueAggregator(StarTreeNumericType starTreeNumericType) { + public ValueAggregator getValueAggregator(FieldValueConverter fieldValueConverter) { aggregator = new DocCountAggregator(); return aggregator; } diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MaxValueAggregatorTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MaxValueAggregatorTests.java index b103416251c46..b713a47ed5f62 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MaxValueAggregatorTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MaxValueAggregatorTests.java @@ -9,27 +9,28 @@ package org.opensearch.index.compositeindex.datacube.startree.aggregators; import org.apache.lucene.util.NumericUtils; -import org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype.StarTreeNumericType; +import org.opensearch.index.mapper.FieldValueConverter; +import org.opensearch.index.mapper.NumberFieldMapper; public class MaxValueAggregatorTests extends AbstractValueAggregatorTests { private MaxValueAggregator aggregator; - public MaxValueAggregatorTests(StarTreeNumericType starTreeNumericType) { - super(starTreeNumericType); + public MaxValueAggregatorTests(FieldValueConverter fieldValueConverter) { + super(fieldValueConverter); } public void testMergeAggregatedValueAndSegmentValue() { Long randomLong = randomLong(); double randomDouble = randomDouble(); assertEquals( - Math.max(starTreeNumericType.getDoubleValue(randomLong), randomDouble), + Math.max(fieldValueConverter.toDoubleValue(randomLong), randomDouble), aggregator.mergeAggregatedValueAndSegmentValue(randomDouble, randomLong), 0.0 ); - assertEquals(starTreeNumericType.getDoubleValue(randomLong), aggregator.mergeAggregatedValueAndSegmentValue(null, randomLong), 0.0); + assertEquals(fieldValueConverter.toDoubleValue(randomLong), aggregator.mergeAggregatedValueAndSegmentValue(null, randomLong), 0.0); assertEquals(randomDouble, aggregator.mergeAggregatedValueAndSegmentValue(randomDouble, null), 0.0); - assertEquals(Math.max(2.0, starTreeNumericType.getDoubleValue(3L)), aggregator.mergeAggregatedValueAndSegmentValue(2.0, 3L), 0.0); + assertEquals(Math.max(2.0, fieldValueConverter.toDoubleValue(3L)), aggregator.mergeAggregatedValueAndSegmentValue(2.0, 3L), 0.0); } public void testMergeAggregatedValues() { @@ -46,7 +47,7 @@ public void testGetInitialAggregatedValue() { } public void testToAggregatedValueType() { - MaxValueAggregator aggregator = new MaxValueAggregator(StarTreeNumericType.DOUBLE); + MaxValueAggregator aggregator = new MaxValueAggregator(NumberFieldMapper.NumberType.DOUBLE); long randomLong = randomLong(); assertEquals(NumericUtils.sortableLongToDouble(randomLong), aggregator.toAggregatedValueType(randomLong), 0.0); } @@ -56,8 +57,8 @@ public void testIdentityMetricValue() { } @Override - public ValueAggregator getValueAggregator(StarTreeNumericType starTreeNumericType) { - aggregator = new MaxValueAggregator(starTreeNumericType); + public ValueAggregator getValueAggregator(FieldValueConverter fieldValueConverter) { + aggregator = new MaxValueAggregator(fieldValueConverter); return aggregator; } diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MetricAggregatorInfoTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MetricAggregatorInfoTests.java index 62671ffa03b82..5588cc1b366c9 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MetricAggregatorInfoTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MetricAggregatorInfoTests.java @@ -9,7 +9,7 @@ package org.opensearch.index.compositeindex.datacube.startree.aggregators; import org.opensearch.index.compositeindex.datacube.MetricStat; -import org.opensearch.index.fielddata.IndexNumericFieldData; +import org.opensearch.index.mapper.NumberFieldMapper; import org.opensearch.test.OpenSearchTestCase; public class MetricAggregatorInfoTests extends OpenSearchTestCase { @@ -19,7 +19,7 @@ public void testConstructor() { MetricStat.SUM, "column1", "star_tree_field", - IndexNumericFieldData.NumericType.DOUBLE + NumberFieldMapper.NumberType.DOUBLE ); assertEquals(MetricStat.SUM, pair.getMetricStat()); assertEquals("column1", pair.getField()); @@ -30,7 +30,7 @@ public void testCountStarConstructor() { MetricStat.VALUE_COUNT, "anything", "star_tree_field", - IndexNumericFieldData.NumericType.DOUBLE + NumberFieldMapper.NumberType.DOUBLE ); assertEquals(MetricStat.VALUE_COUNT, pair.getMetricStat()); assertEquals("anything", pair.getField()); @@ -41,7 +41,7 @@ public void testToFieldName() { MetricStat.SUM, "column2", "star_tree_field", - IndexNumericFieldData.NumericType.DOUBLE + NumberFieldMapper.NumberType.DOUBLE ); assertEquals("star_tree_field_column2_sum", pair.toFieldName()); } @@ -51,23 +51,20 @@ public void testEquals() { MetricStat.SUM, "column1", "star_tree_field", - IndexNumericFieldData.NumericType.DOUBLE + NumberFieldMapper.NumberType.DOUBLE ); MetricAggregatorInfo pair2 = new MetricAggregatorInfo( MetricStat.SUM, "column1", "star_tree_field", - IndexNumericFieldData.NumericType.DOUBLE + NumberFieldMapper.NumberType.DOUBLE ); assertEquals(pair1, pair2); assertNotEquals( pair1, - new MetricAggregatorInfo(MetricStat.VALUE_COUNT, "column1", "star_tree_field", IndexNumericFieldData.NumericType.DOUBLE) - ); - assertNotEquals( - pair1, - new MetricAggregatorInfo(MetricStat.SUM, "column2", "star_tree_field", IndexNumericFieldData.NumericType.DOUBLE) + new MetricAggregatorInfo(MetricStat.VALUE_COUNT, "column1", "star_tree_field", NumberFieldMapper.NumberType.DOUBLE) ); + assertNotEquals(pair1, new MetricAggregatorInfo(MetricStat.SUM, "column2", "star_tree_field", NumberFieldMapper.NumberType.DOUBLE)); } public void testHashCode() { @@ -75,13 +72,13 @@ public void testHashCode() { MetricStat.SUM, "column1", "star_tree_field", - IndexNumericFieldData.NumericType.DOUBLE + NumberFieldMapper.NumberType.DOUBLE ); MetricAggregatorInfo pair2 = new MetricAggregatorInfo( MetricStat.SUM, "column1", "star_tree_field", - IndexNumericFieldData.NumericType.DOUBLE + NumberFieldMapper.NumberType.DOUBLE ); assertEquals(pair1.hashCode(), pair2.hashCode()); } @@ -91,19 +88,19 @@ public void testCompareTo() { MetricStat.SUM, "column1", "star_tree_field", - IndexNumericFieldData.NumericType.DOUBLE + NumberFieldMapper.NumberType.DOUBLE ); MetricAggregatorInfo pair2 = new MetricAggregatorInfo( MetricStat.SUM, "column2", "star_tree_field", - IndexNumericFieldData.NumericType.DOUBLE + NumberFieldMapper.NumberType.DOUBLE ); MetricAggregatorInfo pair3 = new MetricAggregatorInfo( MetricStat.VALUE_COUNT, "column1", "star_tree_field", - IndexNumericFieldData.NumericType.DOUBLE + NumberFieldMapper.NumberType.DOUBLE ); assertTrue(pair1.compareTo(pair2) < 0); assertTrue(pair2.compareTo(pair1) > 0); diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MinValueAggregatorTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MinValueAggregatorTests.java index 013c60d8a1b91..ec406d71ff633 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MinValueAggregatorTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MinValueAggregatorTests.java @@ -9,26 +9,27 @@ package org.opensearch.index.compositeindex.datacube.startree.aggregators; import org.apache.lucene.util.NumericUtils; -import org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype.StarTreeNumericType; +import org.opensearch.index.mapper.FieldValueConverter; +import org.opensearch.index.mapper.NumberFieldMapper; public class MinValueAggregatorTests extends AbstractValueAggregatorTests { private MinValueAggregator aggregator; - public MinValueAggregatorTests(StarTreeNumericType starTreeNumericType) { - super(starTreeNumericType); + public MinValueAggregatorTests(FieldValueConverter fieldValueConverter) { + super(fieldValueConverter); } public void testMergeAggregatedValueAndSegmentValue() { Long randomLong = randomLong(); double randomDouble = randomDouble(); assertEquals( - Math.min(starTreeNumericType.getDoubleValue(randomLong), randomDouble), + Math.min(fieldValueConverter.toDoubleValue(randomLong), randomDouble), aggregator.mergeAggregatedValueAndSegmentValue(randomDouble, randomLong), 0.0 ); - assertEquals(starTreeNumericType.getDoubleValue(randomLong), aggregator.mergeAggregatedValueAndSegmentValue(null, randomLong), 0.0); + assertEquals(fieldValueConverter.toDoubleValue(randomLong), aggregator.mergeAggregatedValueAndSegmentValue(null, randomLong), 0.0); assertEquals(randomDouble, aggregator.mergeAggregatedValueAndSegmentValue(randomDouble, null), 0.0); - assertEquals(Math.min(2.0, starTreeNumericType.getDoubleValue(3L)), aggregator.mergeAggregatedValueAndSegmentValue(2.0, 3L), 0.0); + assertEquals(Math.min(2.0, fieldValueConverter.toDoubleValue(3L)), aggregator.mergeAggregatedValueAndSegmentValue(2.0, 3L), 0.0); } public void testMergeAggregatedValues() { @@ -45,7 +46,7 @@ public void testGetInitialAggregatedValue() { } public void testToAggregatedValueType() { - MinValueAggregator aggregator = new MinValueAggregator(StarTreeNumericType.DOUBLE); + MinValueAggregator aggregator = new MinValueAggregator(NumberFieldMapper.NumberType.DOUBLE); long randomLong = randomLong(); assertEquals(NumericUtils.sortableLongToDouble(randomLong), aggregator.toAggregatedValueType(randomLong), 0.0); } @@ -55,8 +56,8 @@ public void testIdentityMetricValue() { } @Override - public ValueAggregator getValueAggregator(StarTreeNumericType starTreeNumericType) { - aggregator = new MinValueAggregator(starTreeNumericType); + public ValueAggregator getValueAggregator(FieldValueConverter fieldValueConverter) { + aggregator = new MinValueAggregator(fieldValueConverter); return aggregator; } diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/StaticValueAggregatorTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/StaticValueAggregatorTests.java index 487bd54602503..2ba29a1165dc8 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/StaticValueAggregatorTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/StaticValueAggregatorTests.java @@ -9,7 +9,7 @@ package org.opensearch.index.compositeindex.datacube.startree.aggregators; import org.apache.lucene.util.NumericUtils; -import org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype.StarTreeNumericType; +import org.opensearch.index.mapper.NumberFieldMapper; import org.opensearch.search.aggregations.metrics.CompensatedSum; import org.opensearch.test.OpenSearchTestCase; @@ -39,7 +39,7 @@ public void testKahanSummation() { private static double getAggregatedValue(double[] numbers) { // explicitly took double to test for most precision // hard to run similar tests for different data types dynamically as inputs and precision vary - SumValueAggregator aggregator = new SumValueAggregator(StarTreeNumericType.DOUBLE); + SumValueAggregator aggregator = new SumValueAggregator(NumberFieldMapper.NumberType.DOUBLE); double aggregatedValue = aggregator.getInitialAggregatedValueForSegmentDocValue(NumericUtils.doubleToSortableLong(numbers[0])); aggregatedValue = aggregator.mergeAggregatedValueAndSegmentValue(aggregatedValue, NumericUtils.doubleToSortableLong(numbers[1])); aggregatedValue = aggregator.mergeAggregatedValueAndSegmentValue(aggregatedValue, NumericUtils.doubleToSortableLong(numbers[2])); @@ -65,7 +65,7 @@ private double normalSum(double[] numbers) { public void testMaxAggregatorExtremeValues() { double[] numbers = { Double.MAX_VALUE, Double.MIN_VALUE, 0.0, Double.MAX_VALUE + 1 }; double expected = Double.MAX_VALUE + 1; - MaxValueAggregator aggregator = new MaxValueAggregator(StarTreeNumericType.DOUBLE); + MaxValueAggregator aggregator = new MaxValueAggregator(NumberFieldMapper.NumberType.DOUBLE); double aggregatedValue = aggregator.getInitialAggregatedValueForSegmentDocValue(NumericUtils.doubleToSortableLong(numbers[0])); for (int i = 1; i < numbers.length; i++) { aggregatedValue = aggregator.mergeAggregatedValueAndSegmentValue( @@ -85,7 +85,7 @@ public void testMaxAggregatorExtremeValues_Infinity() { Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY }; double expected = Double.POSITIVE_INFINITY; - MaxValueAggregator aggregator = new MaxValueAggregator(StarTreeNumericType.DOUBLE); + MaxValueAggregator aggregator = new MaxValueAggregator(NumberFieldMapper.NumberType.DOUBLE); double aggregatedValue = aggregator.getInitialAggregatedValueForSegmentDocValue(NumericUtils.doubleToSortableLong(numbers[0])); for (int i = 1; i < numbers.length; i++) { aggregatedValue = aggregator.mergeAggregatedValueAndSegmentValue( @@ -99,7 +99,7 @@ public void testMaxAggregatorExtremeValues_Infinity() { public void testMinAggregatorExtremeValues() { double[] numbers = { Double.MAX_VALUE, Double.MIN_VALUE - 1, 0.0, Double.MAX_VALUE + 1 }; double expected = Double.MIN_VALUE - 1; - MinValueAggregator aggregator = new MinValueAggregator(StarTreeNumericType.DOUBLE); + MinValueAggregator aggregator = new MinValueAggregator(NumberFieldMapper.NumberType.DOUBLE); double aggregatedValue = aggregator.getInitialAggregatedValueForSegmentDocValue(NumericUtils.doubleToSortableLong(numbers[0])); for (int i = 1; i < numbers.length; i++) { aggregatedValue = aggregator.mergeAggregatedValueAndSegmentValue( @@ -119,7 +119,7 @@ public void testMinAggregatorExtremeValues_Infinity() { Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY }; double expected = Double.NEGATIVE_INFINITY; - MinValueAggregator aggregator = new MinValueAggregator(StarTreeNumericType.DOUBLE); + MinValueAggregator aggregator = new MinValueAggregator(NumberFieldMapper.NumberType.DOUBLE); double aggregatedValue = aggregator.getInitialAggregatedValueForSegmentDocValue(NumericUtils.doubleToSortableLong(numbers[0])); for (int i = 1; i < numbers.length; i++) { aggregatedValue = aggregator.mergeAggregatedValueAndSegmentValue( diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregatorTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregatorTests.java index 44c7f17a276b4..ef328fe7eafc2 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregatorTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/SumValueAggregatorTests.java @@ -8,19 +8,19 @@ package org.opensearch.index.compositeindex.datacube.startree.aggregators; -import org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype.StarTreeNumericType; +import org.opensearch.index.mapper.FieldValueConverter; public class SumValueAggregatorTests extends AbstractValueAggregatorTests { private SumValueAggregator aggregator; - public SumValueAggregatorTests(StarTreeNumericType starTreeNumericType) { - super(starTreeNumericType); + public SumValueAggregatorTests(FieldValueConverter fieldValueConverter) { + super(fieldValueConverter); } @Override - public ValueAggregator getValueAggregator(StarTreeNumericType starTreeNumericType) { - aggregator = new SumValueAggregator(starTreeNumericType); + public ValueAggregator getValueAggregator(FieldValueConverter fieldValueConverter) { + aggregator = new SumValueAggregator(fieldValueConverter); return aggregator; } @@ -29,7 +29,7 @@ public void testMergeAggregatedValueAndSegmentValue() { Long randomLong = randomLong(); aggregator.getInitialAggregatedValue(randomDouble); assertEquals( - randomDouble + starTreeNumericType.getDoubleValue(randomLong), + randomDouble + fieldValueConverter.toDoubleValue(randomLong), aggregator.mergeAggregatedValueAndSegmentValue(randomDouble, randomLong), 0.0 ); @@ -41,7 +41,7 @@ public void testMergeAggregatedValueAndSegmentValue_nullSegmentDocValue() { aggregator.getInitialAggregatedValue(randomDouble1); assertEquals(randomDouble1, aggregator.mergeAggregatedValueAndSegmentValue(randomDouble1, null), 0.0); assertEquals( - randomDouble1 + starTreeNumericType.getDoubleValue(randomLong), + randomDouble1 + fieldValueConverter.toDoubleValue(randomLong), aggregator.mergeAggregatedValueAndSegmentValue(randomDouble1, randomLong), 0.0 ); @@ -50,7 +50,7 @@ public void testMergeAggregatedValueAndSegmentValue_nullSegmentDocValue() { public void testMergeAggregatedValueAndSegmentValue_nullInitialDocValue() { Long randomLong = randomLong(); aggregator.getInitialAggregatedValue(null); - assertEquals(starTreeNumericType.getDoubleValue(randomLong), aggregator.mergeAggregatedValueAndSegmentValue(null, randomLong), 0.0); + assertEquals(fieldValueConverter.toDoubleValue(randomLong), aggregator.mergeAggregatedValueAndSegmentValue(null, randomLong), 0.0); } public void testMergeAggregatedValues() { diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregatorFactoryTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregatorFactoryTests.java index 6572d75d5b738..96ab1b3a52e8b 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregatorFactoryTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/ValueAggregatorFactoryTests.java @@ -9,31 +9,31 @@ package org.opensearch.index.compositeindex.datacube.startree.aggregators; import org.opensearch.index.compositeindex.datacube.MetricStat; -import org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype.StarTreeNumericType; +import org.opensearch.index.mapper.NumberFieldMapper; import org.opensearch.test.OpenSearchTestCase; public class ValueAggregatorFactoryTests extends OpenSearchTestCase { public void testGetValueAggregatorForSumType() { - ValueAggregator aggregator = ValueAggregatorFactory.getValueAggregator(MetricStat.SUM, StarTreeNumericType.LONG); + ValueAggregator aggregator = ValueAggregatorFactory.getValueAggregator(MetricStat.SUM, NumberFieldMapper.NumberType.LONG); assertNotNull(aggregator); assertEquals(SumValueAggregator.class, aggregator.getClass()); } public void testGetValueAggregatorForMinType() { - ValueAggregator aggregator = ValueAggregatorFactory.getValueAggregator(MetricStat.MIN, StarTreeNumericType.LONG); + ValueAggregator aggregator = ValueAggregatorFactory.getValueAggregator(MetricStat.MIN, NumberFieldMapper.NumberType.LONG); assertNotNull(aggregator); assertEquals(MinValueAggregator.class, aggregator.getClass()); } public void testGetValueAggregatorForMaxType() { - ValueAggregator aggregator = ValueAggregatorFactory.getValueAggregator(MetricStat.MAX, StarTreeNumericType.LONG); + ValueAggregator aggregator = ValueAggregatorFactory.getValueAggregator(MetricStat.MAX, NumberFieldMapper.NumberType.LONG); assertNotNull(aggregator); assertEquals(MaxValueAggregator.class, aggregator.getClass()); } public void testGetValueAggregatorForCountType() { - ValueAggregator aggregator = ValueAggregatorFactory.getValueAggregator(MetricStat.VALUE_COUNT, StarTreeNumericType.LONG); + ValueAggregator aggregator = ValueAggregatorFactory.getValueAggregator(MetricStat.VALUE_COUNT, NumberFieldMapper.NumberType.LONG); assertNotNull(aggregator); assertEquals(CountValueAggregator.class, aggregator.getClass()); } @@ -41,7 +41,7 @@ public void testGetValueAggregatorForCountType() { public void testGetValueAggregatorForAvgType() { assertThrows( IllegalStateException.class, - () -> ValueAggregatorFactory.getValueAggregator(MetricStat.AVG, StarTreeNumericType.LONG) + () -> ValueAggregatorFactory.getValueAggregator(MetricStat.AVG, NumberFieldMapper.NumberType.LONG) ); } diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java index 33088e8ccbcb3..65adc43ea8bea 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java @@ -8,6 +8,7 @@ package org.opensearch.index.compositeindex.datacube.startree.builder; +import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.codecs.lucene99.Lucene99Codec; import org.apache.lucene.index.DocValues; @@ -15,8 +16,10 @@ import org.apache.lucene.index.EmptyDocValuesProducer; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.VectorEncoding; @@ -24,11 +27,18 @@ import org.apache.lucene.sandbox.document.HalfFloatPoint; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.Version; import org.opensearch.common.settings.Settings; -import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues; +import org.opensearch.index.codec.composite.LuceneDocValuesConsumerFactory; +import org.opensearch.index.codec.composite.LuceneDocValuesProducerFactory; +import org.opensearch.index.codec.composite.composite99.Composite99Codec; +import org.opensearch.index.codec.composite.composite99.Composite99DocValuesFormat; +import org.opensearch.index.compositeindex.CompositeIndexConstants; import org.opensearch.index.compositeindex.datacube.Dimension; import org.opensearch.index.compositeindex.datacube.Metric; import org.opensearch.index.compositeindex.datacube.MetricStat; @@ -36,12 +46,16 @@ import org.opensearch.index.compositeindex.datacube.startree.StarTreeDocument; import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeTestUtils; +import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.StarTreeMetadata; +import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues; import org.opensearch.index.compositeindex.datacube.startree.node.InMemoryTreeNode; import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNodeType; import org.opensearch.index.compositeindex.datacube.startree.utils.SequentialDocValuesIterator; import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils; import org.opensearch.index.mapper.ContentPath; import org.opensearch.index.mapper.DocumentMapper; +import org.opensearch.index.mapper.FieldValueConverter; import org.opensearch.index.mapper.Mapper; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.mapper.MappingLookup; @@ -65,8 +79,14 @@ import java.util.Queue; import java.util.Set; import java.util.UUID; - -import static org.opensearch.index.compositeindex.datacube.startree.builder.BaseStarTreeBuilder.NUM_SEGMENT_DOCS; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Supplier; + +import static org.opensearch.index.compositeindex.datacube.startree.StarTreeTestUtils.validateFileFormats; +import static org.opensearch.index.compositeindex.datacube.startree.fileformats.StarTreeWriter.VERSION_CURRENT; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.fullyQualifiedFieldNameForStarTreeDimensionsDocValues; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.fullyQualifiedFieldNameForStarTreeMetricsDocValues; +import static org.opensearch.index.mapper.CompositeMappedFieldType.CompositeFieldType.STAR_TREE; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -80,7 +100,12 @@ public abstract class AbstractStarTreeBuilderTests extends OpenSearchTestCase { protected StarTreeField compositeField; protected Map fieldProducerMap; protected SegmentWriteState writeState; - private BaseStarTreeBuilder builder; + protected BaseStarTreeBuilder builder; + protected IndexOutput dataOut; + protected IndexOutput metaOut; + protected DocValuesConsumer docValuesConsumer; + protected String dataFileName; + protected String metaFileName; @Before public void setup() throws IOException { @@ -107,7 +132,7 @@ public void setup() throws IOException { "test", dimensionsOrder, metrics, - new StarTreeFieldConfiguration(1, Set.of("field8"), StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP) + new StarTreeFieldConfiguration(1, Set.of("field8"), getBuildMode()) ); directory = newFSDirectory(createTempDir()); @@ -135,7 +160,21 @@ public void setup() throws IOException { ); fieldProducerMap.put(fields.get(i), docValuesProducer); } - writeState = getWriteState(5); + writeState = getWriteState(5, UUID.randomUUID().toString().substring(0, 16).getBytes(StandardCharsets.UTF_8)); + + dataFileName = IndexFileNames.segmentFileName( + writeState.segmentInfo.name, + writeState.segmentSuffix, + Composite99DocValuesFormat.DATA_EXTENSION + ); + dataOut = writeState.directory.createOutput(dataFileName, writeState.context); + + metaFileName = IndexFileNames.segmentFileName( + writeState.segmentInfo.name, + writeState.segmentSuffix, + Composite99DocValuesFormat.META_EXTENSION + ); + metaOut = writeState.directory.createOutput(metaFileName, writeState.context); mapperService = mock(MapperService.class); DocumentMapper documentMapper = mock(DocumentMapper.class); @@ -159,9 +198,89 @@ public void setup() throws IOException { null ); when(documentMapper.mappers()).thenReturn(fieldMappers); + docValuesConsumer = mock(DocValuesConsumer.class); } - private SegmentWriteState getWriteState(int numDocs) { + private SegmentReadState getReadState(int numDocs, List dimensionFields, List metrics) { + + int numMetrics = 0; + for (Metric metric : metrics) { + numMetrics += metric.getBaseMetrics().size(); + } + + FieldInfo[] fields = new FieldInfo[dimensionFields.size() + numMetrics]; + + int i = 0; + for (String dimension : dimensionFields) { + fields[i] = new FieldInfo( + fullyQualifiedFieldNameForStarTreeDimensionsDocValues(compositeField.getName(), dimension), + i, + false, + false, + true, + IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, + DocValuesType.SORTED_NUMERIC, + -1, + Collections.emptyMap(), + 0, + 0, + 0, + 0, + VectorEncoding.FLOAT32, + VectorSimilarityFunction.EUCLIDEAN, + false, + false + ); + i++; + } + + for (Metric metric : metrics) { + for (MetricStat metricStat : metric.getBaseMetrics()) { + fields[i] = new FieldInfo( + fullyQualifiedFieldNameForStarTreeMetricsDocValues( + compositeField.getName(), + metric.getField(), + metricStat.getTypeName() + ), + i, + false, + false, + true, + IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, + DocValuesType.SORTED_NUMERIC, + -1, + Collections.emptyMap(), + 0, + 0, + 0, + 0, + VectorEncoding.FLOAT32, + VectorSimilarityFunction.EUCLIDEAN, + false, + false + ); + i++; + } + } + + SegmentInfo segmentInfo = new SegmentInfo( + directory, + Version.LATEST, + Version.LUCENE_9_11_0, + "test_segment", + numDocs, + false, + false, + new Lucene99Codec(), + new HashMap<>(), + writeState.segmentInfo.getId(), + new HashMap<>(), + null + ); + return new SegmentReadState(segmentInfo.dir, segmentInfo, new FieldInfos(fields), writeState.context); + } + + private SegmentWriteState getWriteState(int numDocs, byte[] id) { FieldInfos fieldInfos = new FieldInfos(fieldsInfo); SegmentInfo segmentInfo = new SegmentInfo( directory, @@ -173,7 +292,7 @@ private SegmentWriteState getWriteState(int numDocs) { false, new Lucene99Codec(), new HashMap<>(), - UUID.randomUUID().toString().substring(0, 16).getBytes(StandardCharsets.UTF_8), + id, new HashMap<>(), null ); @@ -181,6 +300,8 @@ private SegmentWriteState getWriteState(int numDocs) { } public abstract BaseStarTreeBuilder getStarTreeBuilder( + IndexOutput metaOut, + IndexOutput dataOut, StarTreeField starTreeField, SegmentWriteState segmentWriteState, MapperService mapperService @@ -233,7 +354,7 @@ public void test_sortAndAggregateStarTreeDocuments() throws IOException { SequentialDocValuesIterator[] dimsIterators = getDimensionIterators(segmentStarTreeDocuments); List metricsIterators = getMetricIterators(segmentStarTreeDocuments); - builder = getStarTreeBuilder(compositeField, writeState, mapperService); + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); Iterator segmentStarTreeDocumentIterator = builder.sortAndAggregateSegmentDocuments( dimsIterators, metricsIterators @@ -329,7 +450,7 @@ public void test_sortAndAggregateStarTreeDocuments_nullMetric() throws IOExcepti } SequentialDocValuesIterator[] dimsIterators = getDimensionIterators(segmentStarTreeDocuments); List metricsIterators = getMetricIterators(segmentStarTreeDocuments); - builder = getStarTreeBuilder(compositeField, writeState, mapperService); + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); Iterator segmentStarTreeDocumentIterator = builder.sortAndAggregateSegmentDocuments( dimsIterators, metricsIterators @@ -400,7 +521,7 @@ public void test_sortAndAggregateStarTreeDocuments_nullMetricField() throws IOEx } SequentialDocValuesIterator[] dimsIterators = getDimensionIterators(segmentStarTreeDocuments); List metricsIterators = getMetricIterators(segmentStarTreeDocuments); - builder = getStarTreeBuilder(compositeField, writeState, mapperService); + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); Iterator segmentStarTreeDocumentIterator = builder.sortAndAggregateSegmentDocuments( dimsIterators, metricsIterators @@ -422,30 +543,35 @@ public void test_sortAndAggregateStarTreeDocuments_nullMetricField() throws IOEx } } - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/14813") public void test_sortAndAggregateStarTreeDocuments_nullAndMinusOneInDimensionField() throws IOException { int noOfStarTreeDocuments = 5; StarTreeDocument[] starTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; // Setting second metric iterator as empty sorted numeric , indicating a metric field is null starTreeDocuments[0] = new StarTreeDocument( new Long[] { 2L, null, 3L, 4L }, - new Object[] { 12.0, null, randomDouble(), 8.0, 20.0 } + new Object[] { 12.0, null, randomDouble(), 8.0, 20.0, null } ); starTreeDocuments[1] = new StarTreeDocument( new Long[] { null, 4L, 2L, 1L }, - new Object[] { 10.0, null, randomDouble(), 12.0, 10.0 } + new Object[] { 10.0, null, randomDouble(), 12.0, 10.0, null } ); starTreeDocuments[2] = new StarTreeDocument( new Long[] { null, 4L, 2L, 1L }, - new Object[] { 14.0, null, randomDouble(), 6.0, 24.0 } + new Object[] { 14.0, null, randomDouble(), 6.0, 24.0, null } + ); + starTreeDocuments[3] = new StarTreeDocument( + new Long[] { 2L, null, 3L, 4L }, + new Object[] { 9.0, null, randomDouble(), 9.0, 12.0, 10L } + ); + starTreeDocuments[4] = new StarTreeDocument( + new Long[] { -1L, 4L, 2L, 1L }, + new Object[] { 11.0, null, randomDouble(), 8.0, 13.0, null } ); - starTreeDocuments[3] = new StarTreeDocument(new Long[] { 2L, null, 3L, 4L }, new Object[] { 9.0, null, randomDouble(), 9.0, 12.0 }); - starTreeDocuments[4] = new StarTreeDocument(new Long[] { -1L, 4L, 2L, 1L }, new Object[] { 11.0, null, randomDouble(), 8.0, 13.0 }); List inorderStarTreeDocuments = List.of( - new StarTreeDocument(new Long[] { 2L, null, 3L, 4L }, new Object[] { 21.0, 0.0, 2L }), - new StarTreeDocument(new Long[] { null, 4L, 2L, 1L }, new Object[] { 24.0, 0.0, 2L }), - new StarTreeDocument(new Long[] { -1L, 4L, 2L, 1L }, new Object[] { 11.0, 0.0, 1L }) + new StarTreeDocument(new Long[] { -1L, 4L, 2L, 1L }, new Object[] { 11.0, 0.0, 1L, 8.0, 13.0, 1L }), + new StarTreeDocument(new Long[] { 2L, null, 3L, 4L }, new Object[] { 21.0, 0.0, 2L, 8.0, 20.0, 11L }), + new StarTreeDocument(new Long[] { null, 4L, 2L, 1L }, new Object[] { 24.0, 0.0, 2L, 6.0, 24.0, 2L }) ); Iterator expectedStarTreeDocumentIterator = inorderStarTreeDocuments.iterator(); @@ -458,20 +584,22 @@ public void test_sortAndAggregateStarTreeDocuments_nullAndMinusOneInDimensionFie long metric3 = NumericUtils.doubleToSortableLong((Double) starTreeDocuments[i].metrics[2]); long metric4 = NumericUtils.doubleToSortableLong((Double) starTreeDocuments[i].metrics[3]); long metric5 = NumericUtils.doubleToSortableLong((Double) starTreeDocuments[i].metrics[4]); + Long metric6 = starTreeDocuments[i].metrics[5] != null ? (long) starTreeDocuments[i].metrics[5] : null; segmentStarTreeDocuments[i] = new StarTreeDocument( starTreeDocuments[i].dimensions, - new Object[] { metric1, metric2, metric3, metric4, metric5 } + new Object[] { metric1, metric2, metric3, metric4, metric5, metric6 } ); } SequentialDocValuesIterator[] dimsIterators = getDimensionIterators(segmentStarTreeDocuments); List metricsIterators = getMetricIterators(segmentStarTreeDocuments); - builder = getStarTreeBuilder(compositeField, writeState, mapperService); + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); Iterator segmentStarTreeDocumentIterator = builder.sortAndAggregateSegmentDocuments( dimsIterators, metricsIterators ); - for (StarTreeDocument resultStarTreeDocument : builder.getStarTreeDocuments()) { + while (segmentStarTreeDocumentIterator.hasNext()) { + StarTreeDocument resultStarTreeDocument = segmentStarTreeDocumentIterator.next(); StarTreeDocument expectedStarTreeDocument = expectedStarTreeDocumentIterator.next(); assertEquals(expectedStarTreeDocument.dimensions[0], resultStarTreeDocument.dimensions[0]); assertEquals(expectedStarTreeDocument.dimensions[1], resultStarTreeDocument.dimensions[1]); @@ -484,7 +612,10 @@ public void test_sortAndAggregateStarTreeDocuments_nullAndMinusOneInDimensionFie assertEquals(expectedStarTreeDocument.metrics[4], resultStarTreeDocument.metrics[4]); assertEquals(expectedStarTreeDocument.metrics[5], resultStarTreeDocument.metrics[5]); } - builder.build(segmentStarTreeDocumentIterator); + + assertFalse(expectedStarTreeDocumentIterator.hasNext()); + + builder.build(segmentStarTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); validateStarTree(builder.getRootNode(), 4, 1, builder.getStarTreeDocuments()); } @@ -542,7 +673,7 @@ public void test_sortAndAggregateStarTreeDocuments_nullDimensionsAndNullMetrics( } SequentialDocValuesIterator[] dimsIterators = getDimensionIterators(segmentStarTreeDocuments); List metricsIterators = getMetricIterators(segmentStarTreeDocuments); - builder = getStarTreeBuilder(compositeField, writeState, mapperService); + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); Iterator segmentStarTreeDocumentIterator = builder.sortAndAggregateSegmentDocuments( dimsIterators, metricsIterators @@ -562,7 +693,7 @@ public void test_sortAndAggregateStarTreeDocuments_nullDimensionsAndNullMetrics( assertEquals(expectedStarTreeDocument.metrics[4], resultStarTreeDocument.metrics[4]); assertEquals(expectedStarTreeDocument.metrics[5], resultStarTreeDocument.metrics[5]); } - builder.build(segmentStarTreeDocumentIterator); + builder.build(segmentStarTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); validateStarTree(builder.getRootNode(), 4, 1, builder.getStarTreeDocuments()); } @@ -619,7 +750,7 @@ public void test_sortAndAggregateStarTreeDocuments_nullDimensionsAndFewNullMetri } SequentialDocValuesIterator[] dimsIterators = getDimensionIterators(segmentStarTreeDocuments); List metricsIterators = getMetricIterators(segmentStarTreeDocuments); - builder = getStarTreeBuilder(compositeField, writeState, mapperService); + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); Iterator segmentStarTreeDocumentIterator = builder.sortAndAggregateSegmentDocuments( dimsIterators, metricsIterators @@ -639,7 +770,7 @@ public void test_sortAndAggregateStarTreeDocuments_nullDimensionsAndFewNullMetri assertEquals(expectedStarTreeDocument.metrics[4], resultStarTreeDocument.metrics[4]); assertEquals(expectedStarTreeDocument.metrics[5], resultStarTreeDocument.metrics[5]); } - builder.build(segmentStarTreeDocumentIterator); + builder.build(segmentStarTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); validateStarTree(builder.getRootNode(), 4, 1, builder.getStarTreeDocuments()); } @@ -691,7 +822,7 @@ public void test_sortAndAggregateStarTreeDocuments_emptyDimensions() throws IOEx } SequentialDocValuesIterator[] dimsIterators = getDimensionIterators(segmentStarTreeDocuments); List metricsIterators = getMetricIterators(segmentStarTreeDocuments); - builder = getStarTreeBuilder(compositeField, writeState, mapperService); + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); Iterator segmentStarTreeDocumentIterator = builder.sortAndAggregateSegmentDocuments( dimsIterators, metricsIterators @@ -760,7 +891,7 @@ public void test_sortAndAggregateStarTreeDocument_longMaxAndLongMinDimensions() SequentialDocValuesIterator[] dimsIterators = getDimensionIterators(segmentStarTreeDocuments); List metricsIterators = getMetricIterators(segmentStarTreeDocuments); - builder = getStarTreeBuilder(compositeField, writeState, mapperService); + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); Iterator segmentStarTreeDocumentIterator = builder.sortAndAggregateSegmentDocuments( dimsIterators, metricsIterators @@ -836,7 +967,7 @@ public void test_sortAndAggregateStarTreeDocument_DoubleMaxAndDoubleMinMetrics() SequentialDocValuesIterator[] dimsIterators = getDimensionIterators(segmentStarTreeDocuments); List metricsIterators = getMetricIterators(segmentStarTreeDocuments); - builder = getStarTreeBuilder(compositeField, writeState, mapperService); + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); Iterator segmentStarTreeDocumentIterator = builder.sortAndAggregateSegmentDocuments( dimsIterators, metricsIterators @@ -860,7 +991,7 @@ public void test_sortAndAggregateStarTreeDocument_DoubleMaxAndDoubleMinMetrics() } assertEquals(inorderStarTreeDocuments.size(), numOfAggregatedDocuments); - builder.build(segmentStarTreeDocumentIterator); + builder.build(segmentStarTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); validateStarTree(builder.getRootNode(), 3, 1, builder.getStarTreeDocuments()); } @@ -968,19 +1099,53 @@ public void test_build_halfFloatMetrics() throws IOException { SequentialDocValuesIterator[] dimsIterators = getDimensionIterators(segmentStarTreeDocuments); List metricsIterators = getMetricIterators(segmentStarTreeDocuments); - builder = getStarTreeBuilder(compositeField, writeState, mapperService); + this.docValuesConsumer = LuceneDocValuesConsumerFactory.getDocValuesConsumerForCompositeCodec( + writeState, + Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC, + Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION, + Composite99DocValuesFormat.META_DOC_VALUES_CODEC, + Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION + ); + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); Iterator segmentStarTreeDocumentIterator = builder.sortAndAggregateSegmentDocuments( dimsIterators, metricsIterators ); - builder.build(segmentStarTreeDocumentIterator); + builder.build(segmentStarTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); List resultStarTreeDocuments = builder.getStarTreeDocuments(); assertEquals(7, resultStarTreeDocuments.size()); - Iterator expectedStarTreeDocumentIterator = getExpectedStarTreeDocumentIterator(); + Iterator expectedStarTreeDocumentIterator = getExpectedStarTreeDocumentIterator().iterator(); assertStarTreeDocuments(resultStarTreeDocuments, expectedStarTreeDocumentIterator); - builder.build(expectedStarTreeDocumentIterator); + + metaOut.close(); + dataOut.close(); + docValuesConsumer.close(); + + StarTreeMetadata starTreeMetadata = new StarTreeMetadata( + "test", + STAR_TREE, + mock(IndexInput.class), + VERSION_CURRENT, + builder.numStarTreeNodes, + getStarTreeDimensionNames(compositeField.getDimensionsOrder()), + compositeField.getMetrics(), + 2, + getExpectedStarTreeDocumentIterator().size(), + 1, + Set.of("field8"), + getBuildMode(), + 0, + 330 + ); + + validateStarTreeFileFormats( + builder.getRootNode(), + getExpectedStarTreeDocumentIterator().size(), + starTreeMetadata, + getExpectedStarTreeDocumentIterator() + ); } public void test_build_floatMetrics() throws IOException { @@ -1048,20 +1213,57 @@ public void test_build_floatMetrics() throws IOException { SequentialDocValuesIterator[] dimsIterators = getDimensionIterators(segmentStarTreeDocuments); List metricsIterators = getMetricIterators(segmentStarTreeDocuments); - builder = getStarTreeBuilder(compositeField, writeState, mapperService); + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); Iterator segmentStarTreeDocumentIterator = builder.sortAndAggregateSegmentDocuments( dimsIterators, metricsIterators ); - builder.build(segmentStarTreeDocumentIterator); + this.docValuesConsumer = LuceneDocValuesConsumerFactory.getDocValuesConsumerForCompositeCodec( + writeState, + Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC, + Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION, + Composite99DocValuesFormat.META_DOC_VALUES_CODEC, + Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION + ); + builder.build(segmentStarTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); List resultStarTreeDocuments = builder.getStarTreeDocuments(); assertEquals(7, resultStarTreeDocuments.size()); - Iterator expectedStarTreeDocumentIterator = getExpectedStarTreeDocumentIterator(); + Iterator expectedStarTreeDocumentIterator = getExpectedStarTreeDocumentIterator().iterator(); assertStarTreeDocuments(resultStarTreeDocuments, expectedStarTreeDocumentIterator); + + metaOut.close(); + dataOut.close(); + docValuesConsumer.close(); + + StarTreeMetadata starTreeMetadata = new StarTreeMetadata( + "test", + STAR_TREE, + mock(IndexInput.class), + VERSION_CURRENT, + builder.numStarTreeNodes, + getStarTreeDimensionNames(compositeField.getDimensionsOrder()), + compositeField.getMetrics(), + 2, + getExpectedStarTreeDocumentIterator().size(), + 1, + Set.of("field8"), + getBuildMode(), + 0, + 330 + ); + + validateStarTreeFileFormats( + builder.getRootNode(), + getExpectedStarTreeDocumentIterator().size(), + starTreeMetadata, + getExpectedStarTreeDocumentIterator() + ); } + abstract StarTreeFieldConfiguration.StarTreeBuildMode getBuildMode(); + public void test_build_longMetrics() throws IOException { mapperService = mock(MapperService.class); @@ -1111,22 +1313,57 @@ public void test_build_longMetrics() throws IOException { SequentialDocValuesIterator[] dimsIterators = getDimensionIterators(segmentStarTreeDocuments); List metricsIterators = getMetricIterators(segmentStarTreeDocuments); - builder = getStarTreeBuilder(compositeField, writeState, mapperService); + this.docValuesConsumer = LuceneDocValuesConsumerFactory.getDocValuesConsumerForCompositeCodec( + writeState, + Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC, + Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION, + Composite99DocValuesFormat.META_DOC_VALUES_CODEC, + Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION + ); + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); Iterator segmentStarTreeDocumentIterator = builder.sortAndAggregateSegmentDocuments( dimsIterators, metricsIterators ); - builder.build(segmentStarTreeDocumentIterator); + builder.build(segmentStarTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); List resultStarTreeDocuments = builder.getStarTreeDocuments(); assertEquals(7, resultStarTreeDocuments.size()); - Iterator expectedStarTreeDocumentIterator = getExpectedStarTreeDocumentIterator(); + Iterator expectedStarTreeDocumentIterator = getExpectedStarTreeDocumentIterator().iterator(); assertStarTreeDocuments(resultStarTreeDocuments, expectedStarTreeDocumentIterator); + + metaOut.close(); + dataOut.close(); + docValuesConsumer.close(); + + StarTreeMetadata starTreeMetadata = new StarTreeMetadata( + "test", + STAR_TREE, + mock(IndexInput.class), + VERSION_CURRENT, + builder.numStarTreeNodes, + getStarTreeDimensionNames(compositeField.getDimensionsOrder()), + compositeField.getMetrics(), + 2, + getExpectedStarTreeDocumentIterator().size(), + 1, + Set.of("field8"), + getBuildMode(), + 0, + 330 + ); + + validateStarTreeFileFormats( + builder.getRootNode(), + getExpectedStarTreeDocumentIterator().size(), + starTreeMetadata, + getExpectedStarTreeDocumentIterator() + ); } - private static Iterator getExpectedStarTreeDocumentIterator() { - List expectedStarTreeDocuments = List.of( + private static List getExpectedStarTreeDocumentIterator() { + return List.of( new StarTreeDocument(new Long[] { 2L, 4L, 3L, 4L }, new Object[] { 21.0, 14.0, 2L, 8.0, 20.0, 2L }), new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Object[] { 35.0, 34.0, 3L, 6.0, 24.0, 3L }), new StarTreeDocument(new Long[] { null, 4L, 2L, 1L }, new Object[] { 35.0, 34.0, 3L, 6.0, 24.0, 3L }), @@ -1135,7 +1372,220 @@ private static Iterator getExpectedStarTreeDocumentIterator() new StarTreeDocument(new Long[] { null, 4L, null, 4L }, new Object[] { 21.0, 14.0, 2L, 8.0, 20.0, 2L }), new StarTreeDocument(new Long[] { null, 4L, null, null }, new Object[] { 56.0, 48.0, 5L, 6.0, 24.0, 5L }) ); - return expectedStarTreeDocuments.iterator(); + } + + public void test_build_multipleStarTrees() throws IOException { + + int noOfStarTreeDocuments = 5; + StarTreeDocument[] starTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; + + starTreeDocuments[0] = new StarTreeDocument(new Long[] { 2L, 4L, 3L, 4L }, new Double[] { 12.0, 10.0, randomDouble(), 8.0, 20.0 }); + starTreeDocuments[1] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 10.0, 6.0, randomDouble(), 12.0, 10.0 }); + starTreeDocuments[2] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 14.0, 12.0, randomDouble(), 6.0, 24.0 }); + starTreeDocuments[3] = new StarTreeDocument(new Long[] { 2L, 4L, 3L, 4L }, new Double[] { 9.0, 4.0, randomDouble(), 9.0, 12.0 }); + starTreeDocuments[4] = new StarTreeDocument(new Long[] { 3L, 4L, 2L, 1L }, new Double[] { 11.0, 16.0, randomDouble(), 8.0, 13.0 }); + + StarTreeDocument[] segmentStarTreeDocuments = new StarTreeDocument[noOfStarTreeDocuments]; + for (int i = 0; i < noOfStarTreeDocuments; i++) { + long metric1 = NumericUtils.doubleToSortableLong((Double) starTreeDocuments[i].metrics[0]); + long metric2 = NumericUtils.doubleToSortableLong((Double) starTreeDocuments[i].metrics[1]); + long metric3 = NumericUtils.doubleToSortableLong((Double) starTreeDocuments[i].metrics[2]); + long metric4 = NumericUtils.doubleToSortableLong((Double) starTreeDocuments[i].metrics[3]); + long metric5 = NumericUtils.doubleToSortableLong((Double) starTreeDocuments[i].metrics[4]); + segmentStarTreeDocuments[i] = new StarTreeDocument( + starTreeDocuments[i].dimensions, + new Long[] { metric1, metric2, metric3, metric4, metric5 } + ); + } + + SequentialDocValuesIterator[] dimsIterators = getDimensionIterators(segmentStarTreeDocuments); + List metricsIterators = getMetricIterators(segmentStarTreeDocuments); + + metrics = List.of( + new Metric("field2", List.of(MetricStat.SUM)), + new Metric("field4", List.of(MetricStat.SUM)), + new Metric("field6", List.of(MetricStat.VALUE_COUNT)), + new Metric("field9", List.of(MetricStat.MIN)), + new Metric("field10", List.of(MetricStat.MAX)) + ); + + compositeField = new StarTreeField( + "test", + dimensionsOrder, + metrics, + new StarTreeFieldConfiguration(1, Set.of("field8"), getBuildMode()) + ); + + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); + Iterator segmentStarTreeDocumentIterator = builder.sortAndAggregateSegmentDocuments( + dimsIterators, + metricsIterators + ); + builder.build(segmentStarTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); + + List resultStarTreeDocuments = builder.getStarTreeDocuments(); + assertEquals(7, resultStarTreeDocuments.size()); + + Iterator expectedStarTreeDocumentIterator = getExpectedStarTreeDocumentIterator().iterator(); + assertStarTreeDocuments(resultStarTreeDocuments, expectedStarTreeDocumentIterator); + builder.close(); + + // building another tree in the same file + fields = List.of("fieldC", "fieldB", "fieldL", "fieldI"); + + dimensionsOrder = List.of(new NumericDimension("fieldC"), new NumericDimension("fieldB"), new NumericDimension("fieldL")); + metrics = List.of(new Metric("fieldI", List.of(MetricStat.SUM))); + + DocValuesProducer docValuesProducer = mock(DocValuesProducer.class); + + compositeField = new StarTreeField("test", dimensionsOrder, metrics, new StarTreeFieldConfiguration(1, Set.of(), getBuildMode())); + SegmentInfo segmentInfo = new SegmentInfo( + directory, + Version.LATEST, + Version.LUCENE_9_11_0, + "test_segment", + 7, + false, + false, + new Lucene99Codec(), + new HashMap<>(), + UUID.randomUUID().toString().substring(0, 16).getBytes(StandardCharsets.UTF_8), + new HashMap<>(), + null + ); + + fieldsInfo = new FieldInfo[fields.size()]; + fieldProducerMap = new HashMap<>(); + for (int i = 0; i < fieldsInfo.length; i++) { + fieldsInfo[i] = new FieldInfo( + fields.get(i), + i, + false, + false, + true, + IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, + DocValuesType.SORTED_NUMERIC, + -1, + Collections.emptyMap(), + 0, + 0, + 0, + 0, + VectorEncoding.FLOAT32, + VectorSimilarityFunction.EUCLIDEAN, + false, + false + ); + fieldProducerMap.put(fields.get(i), docValuesProducer); + } + FieldInfos fieldInfos = new FieldInfos(fieldsInfo); + writeState = new SegmentWriteState(InfoStream.getDefault(), segmentInfo.dir, segmentInfo, fieldInfos, null, newIOContext(random())); + + mapperService = mock(MapperService.class); + DocumentMapper documentMapper = mock(DocumentMapper.class); + when(mapperService.documentMapper()).thenReturn(documentMapper); + Settings settings = Settings.builder().put(settings(org.opensearch.Version.CURRENT).build()).build(); + NumberFieldMapper numberFieldMapper1 = new NumberFieldMapper.Builder("fieldI", NumberFieldMapper.NumberType.DOUBLE, false, true) + .build(new Mapper.BuilderContext(settings, new ContentPath())); + MappingLookup fieldMappers = new MappingLookup( + Set.of(numberFieldMapper1), + Collections.emptyList(), + Collections.emptyList(), + 0, + null + ); + when(documentMapper.mappers()).thenReturn(fieldMappers); + + InMemoryTreeNode rootNode1 = builder.getRootNode(); + + int noOfStarTreeDocuments2 = 7; + StarTreeDocument[] starTreeDocuments2 = new StarTreeDocument[noOfStarTreeDocuments2]; + starTreeDocuments2[0] = new StarTreeDocument(new Long[] { 1L, 11L, 21L }, new Double[] { 400.0 }); + starTreeDocuments2[1] = new StarTreeDocument(new Long[] { 1L, 12L, 22L }, new Double[] { 200.0 }); + starTreeDocuments2[2] = new StarTreeDocument(new Long[] { 2L, 13L, 23L }, new Double[] { 300.0 }); + starTreeDocuments2[3] = new StarTreeDocument(new Long[] { 2L, 13L, 21L }, new Double[] { 100.0 }); + starTreeDocuments2[4] = new StarTreeDocument(new Long[] { 3L, 11L, 21L }, new Double[] { 600.0 }); + starTreeDocuments2[5] = new StarTreeDocument(new Long[] { 3L, 12L, 23L }, new Double[] { 200.0 }); + starTreeDocuments2[6] = new StarTreeDocument(new Long[] { 3L, 12L, 21L }, new Double[] { 400.0 }); + + StarTreeDocument[] segmentStarTreeDocuments2 = new StarTreeDocument[noOfStarTreeDocuments2]; + for (int i = 0; i < noOfStarTreeDocuments2; i++) { + long metric1 = NumericUtils.doubleToSortableLong((Double) starTreeDocuments2[i].metrics[0]); + segmentStarTreeDocuments2[i] = new StarTreeDocument(starTreeDocuments2[i].dimensions, new Long[] { metric1 }); + } + + SequentialDocValuesIterator[] dimsIterators2 = getDimensionIterators(segmentStarTreeDocuments2); + List metricsIterators2 = getMetricIterators(segmentStarTreeDocuments2); + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); + Iterator segmentStarTreeDocumentIterator2 = builder.sortAndAggregateSegmentDocuments( + dimsIterators2, + metricsIterators2 + ); + builder.build(segmentStarTreeDocumentIterator2, new AtomicInteger(), mock(DocValuesConsumer.class)); + InMemoryTreeNode rootNode2 = builder.getRootNode(); + + metaOut.close(); + dataOut.close(); + + StarTreeMetadata starTreeMetadata = new StarTreeMetadata( + "test", + STAR_TREE, + mock(IndexInput.class), + VERSION_CURRENT, + builder.numStarTreeNodes, + List.of("field1", "field3", "field5", "field8"), + List.of( + new Metric("field2", List.of(MetricStat.SUM)), + new Metric("field4", List.of(MetricStat.SUM)), + new Metric("field6", List.of(MetricStat.VALUE_COUNT)), + new Metric("field9", List.of(MetricStat.MIN)), + new Metric("field10", List.of(MetricStat.MAX)) + ), + 2, + getExpectedStarTreeDocumentIterator().size(), + 1, + Set.of("field8"), + getBuildMode(), + 0, + 330 + ); + + StarTreeMetadata starTreeMetadata2 = new StarTreeMetadata( + "test", + STAR_TREE, + mock(IndexInput.class), + VERSION_CURRENT, + builder.numStarTreeNodes, + List.of("fieldC", "fieldB", "fieldL"), + List.of(new Metric("fieldI", List.of(MetricStat.SUM))), + 7, + 27, + 1, + Set.of(), + getBuildMode(), + 330, + 1287 + ); + + List totalDimensionFields = new ArrayList<>(); + totalDimensionFields.addAll(starTreeMetadata.getDimensionFields()); + totalDimensionFields.addAll(starTreeMetadata2.getDimensionFields()); + + List metrics = new ArrayList<>(); + metrics.addAll(starTreeMetadata.getMetrics()); + metrics.addAll(starTreeMetadata2.getMetrics()); + + SegmentReadState readState = getReadState(3, totalDimensionFields, metrics); + + IndexInput dataIn = readState.directory.openInput(dataFileName, IOContext.DEFAULT); + IndexInput metaIn = readState.directory.openInput(metaFileName, IOContext.DEFAULT); + + validateFileFormats(dataIn, metaIn, rootNode1, starTreeMetadata); + validateFileFormats(dataIn, metaIn, rootNode2, starTreeMetadata2); + + dataIn.close(); + metaIn.close(); + } public void test_build() throws IOException { @@ -1180,18 +1630,53 @@ public void test_build() throws IOException { SequentialDocValuesIterator[] dimsIterators = getDimensionIterators(segmentStarTreeDocuments); List metricsIterators = getMetricIterators(segmentStarTreeDocuments); - builder = getStarTreeBuilder(compositeField, writeState, mapperService); + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); Iterator segmentStarTreeDocumentIterator = builder.sortAndAggregateSegmentDocuments( dimsIterators, metricsIterators ); - builder.build(segmentStarTreeDocumentIterator); + docValuesConsumer = LuceneDocValuesConsumerFactory.getDocValuesConsumerForCompositeCodec( + writeState, + Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC, + Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION, + Composite99DocValuesFormat.META_DOC_VALUES_CODEC, + Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION + ); + builder.build(segmentStarTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); List resultStarTreeDocuments = builder.getStarTreeDocuments(); assertEquals(7, resultStarTreeDocuments.size()); - Iterator expectedStarTreeDocumentIterator = getExpectedStarTreeDocumentIterator(); + Iterator expectedStarTreeDocumentIterator = getExpectedStarTreeDocumentIterator().iterator(); assertStarTreeDocuments(resultStarTreeDocuments, expectedStarTreeDocumentIterator); + + metaOut.close(); + dataOut.close(); + docValuesConsumer.close(); + + StarTreeMetadata starTreeMetadata = new StarTreeMetadata( + "test", + STAR_TREE, + mock(IndexInput.class), + VERSION_CURRENT, + builder.numStarTreeNodes, + getStarTreeDimensionNames(compositeField.getDimensionsOrder()), + compositeField.getMetrics(), + 2, + getExpectedStarTreeDocumentIterator().size(), + 1, + Set.of("field8"), + getBuildMode(), + 0, + 330 + ); + + validateStarTreeFileFormats( + builder.getRootNode(), + getExpectedStarTreeDocumentIterator().size(), + starTreeMetadata, + getExpectedStarTreeDocumentIterator() + ); } private void assertStarTreeDocuments( @@ -1224,12 +1709,7 @@ public void test_build_starTreeDataset() throws IOException { DocValuesProducer docValuesProducer = mock(DocValuesProducer.class); - compositeField = new StarTreeField( - "test", - dimensionsOrder, - metrics, - new StarTreeFieldConfiguration(1, Set.of(), StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP) - ); + compositeField = new StarTreeField("test", dimensionsOrder, metrics, new StarTreeFieldConfiguration(1, Set.of(), getBuildMode())); SegmentInfo segmentInfo = new SegmentInfo( directory, Version.LATEST, @@ -1271,7 +1751,13 @@ public void test_build_starTreeDataset() throws IOException { } FieldInfos fieldInfos = new FieldInfos(fieldsInfo); writeState = new SegmentWriteState(InfoStream.getDefault(), segmentInfo.dir, segmentInfo, fieldInfos, null, newIOContext(random())); - + this.docValuesConsumer = LuceneDocValuesConsumerFactory.getDocValuesConsumerForCompositeCodec( + writeState, + Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC, + Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION, + Composite99DocValuesFormat.META_DOC_VALUES_CODEC, + Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION + ); mapperService = mock(MapperService.class); DocumentMapper documentMapper = mock(DocumentMapper.class); when(mapperService.documentMapper()).thenReturn(documentMapper); @@ -1305,18 +1791,18 @@ public void test_build_starTreeDataset() throws IOException { SequentialDocValuesIterator[] dimsIterators = getDimensionIterators(segmentStarTreeDocuments); List metricsIterators = getMetricIterators(segmentStarTreeDocuments); - builder = getStarTreeBuilder(compositeField, writeState, mapperService); + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); Iterator segmentStarTreeDocumentIterator = builder.sortAndAggregateSegmentDocuments( dimsIterators, metricsIterators ); - builder.build(segmentStarTreeDocumentIterator); + builder.build(segmentStarTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); List resultStarTreeDocuments = builder.getStarTreeDocuments(); - Iterator expectedStarTreeDocumentIterator = expectedStarTreeDocuments(); + Iterator expectedStarTreeDocumentIterator = expectedStarTreeDocuments().iterator(); Iterator resultStarTreeDocumentIterator = resultStarTreeDocuments.iterator(); Map> dimValueToDocIdMap = new HashMap<>(); - builder.rootNode.nodeType = StarTreeNodeType.STAR.getValue(); + builder.rootNode.setNodeType(StarTreeNodeType.STAR.getValue()); traverseStarTree(builder.rootNode, dimValueToDocIdMap, true); Map> expectedDimToValueMap = getExpectedDimToValueMap(); @@ -1342,7 +1828,89 @@ public void test_build_starTreeDataset() throws IOException { assertEquals(expectedStarTreeDocument.metrics[0], resultStarTreeDocument.metrics[0]); assertEquals(expectedStarTreeDocument.metrics[1], resultStarTreeDocument.metrics[1]); } + + metaOut.close(); + dataOut.close(); + docValuesConsumer.close(); + validateStarTree(builder.getRootNode(), 3, 1, builder.getStarTreeDocuments()); + + StarTreeMetadata starTreeMetadata = new StarTreeMetadata( + "test", + STAR_TREE, + mock(IndexInput.class), + VERSION_CURRENT, + builder.numStarTreeNodes, + getStarTreeDimensionNames(compositeField.getDimensionsOrder()), + compositeField.getMetrics(), + 7, + 27, + 1, + Set.of(), + getBuildMode(), + 0, + 1287 + ); + validateStarTreeFileFormats(builder.getRootNode(), 27, starTreeMetadata, expectedStarTreeDocuments()); + } + + private List getStarTreeDimensionNames(List dimensionsOrder) { + + List dimensionNames = new ArrayList<>(); + for (Dimension dimension : dimensionsOrder) { + dimensionNames.add(dimension.getField()); + } + return dimensionNames; + + } + + private void validateStarTreeFileFormats( + InMemoryTreeNode rootNode, + int numDocs, + StarTreeMetadata expectedStarTreeMetadata, + List expectedStarTreeDocuments + ) throws IOException { + + assertNotNull(rootNode.getChildren()); + assertFalse(rootNode.getChildren().isEmpty()); + SegmentReadState readState = getReadState( + numDocs, + expectedStarTreeMetadata.getDimensionFields(), + expectedStarTreeMetadata.getMetrics() + ); + + DocValuesProducer compositeDocValuesProducer = LuceneDocValuesProducerFactory.getDocValuesProducerForCompositeCodec( + Composite99Codec.COMPOSITE_INDEX_CODEC_NAME, + readState, + Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC, + Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION, + Composite99DocValuesFormat.META_DOC_VALUES_CODEC, + Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION + ); + + IndexInput dataIn = readState.directory.openInput(dataFileName, IOContext.DEFAULT); + IndexInput metaIn = readState.directory.openInput(metaFileName, IOContext.DEFAULT); + + StarTreeValues starTreeValues = new StarTreeValues(expectedStarTreeMetadata, dataIn, compositeDocValuesProducer, readState); + assertEquals(expectedStarTreeMetadata.getStarTreeDocCount(), starTreeValues.getStarTreeDocumentCount()); + List fieldValueConverters = new ArrayList<>(); + builder.metricAggregatorInfos.forEach( + metricAggregatorInfo -> fieldValueConverters.add(metricAggregatorInfo.getValueAggregators().getAggregatedValueType()) + ); + StarTreeDocument[] starTreeDocuments = StarTreeTestUtils.getSegmentsStarTreeDocuments( + List.of(starTreeValues), + fieldValueConverters, + readState.segmentInfo.maxDoc() + ); + + StarTreeDocument[] expectedStarTreeDocumentsArray = expectedStarTreeDocuments.toArray(new StarTreeDocument[0]); + StarTreeTestUtils.assertStarTreeDocuments(starTreeDocuments, expectedStarTreeDocumentsArray); + + validateFileFormats(dataIn, metaIn, rootNode, expectedStarTreeMetadata); + + dataIn.close(); + metaIn.close(); + compositeDocValuesProducer.close(); } private static Map> getExpectedDimToValueMap() { @@ -1367,8 +1935,8 @@ private static Map> getExpectedDimToValueMap() { return expectedDimToValueMap; } - private Iterator expectedStarTreeDocuments() { - List expectedStarTreeDocuments = List.of( + private List expectedStarTreeDocuments() { + return List.of( new StarTreeDocument(new Long[] { 1L, 11L, 21L }, new Object[] { 400.0, 1L }), new StarTreeDocument(new Long[] { 1L, 12L, 22L }, new Object[] { 200.0, 1L }), new StarTreeDocument(new Long[] { 2L, 13L, 21L }, new Object[] { 100.0, 1L }), @@ -1398,7 +1966,6 @@ private Iterator expectedStarTreeDocuments() { new StarTreeDocument(new Long[] { 3L, 12L, null }, new Object[] { 600.0, 2L }) ); - return expectedStarTreeDocuments.iterator(); } public void testFlushFlow() throws IOException { @@ -1417,13 +1984,14 @@ public void testFlushFlow() throws IOException { ); List metricsWithField = List.of(0, 1, 2, 3, 4, 5); - StarTreeField sf = getStarTreeFieldWithMultipleMetrics(); + compositeField = getStarTreeFieldWithMultipleMetrics(); SortedNumericDocValues d1sndv = getSortedNumericMock(dimList, docsWithField); SortedNumericDocValues d2sndv = getSortedNumericMock(dimList2, docsWithField2); SortedNumericDocValues m1sndv = getSortedNumericMock(metricsList, metricsWithField); SortedNumericDocValues m2sndv = getSortedNumericMock(metricsList, metricsWithField); - builder = getStarTreeBuilder(sf, getWriteState(6), mapperService); + writeState = getWriteState(6, writeState.segmentInfo.getId()); + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); SequentialDocValuesIterator[] dimDvs = { new SequentialDocValuesIterator(d1sndv), new SequentialDocValuesIterator(d2sndv) }; Iterator starTreeDocumentIterator = builder.sortAndAggregateSegmentDocuments( dimDvs, @@ -1438,19 +2006,62 @@ public void testFlushFlow() throws IOException { [5, 5] | [50.0, 1] [null, 2] | [20.0, 1] */ + this.docValuesConsumer = LuceneDocValuesConsumerFactory.getDocValuesConsumerForCompositeCodec( + writeState, + Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC, + Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION, + Composite99DocValuesFormat.META_DOC_VALUES_CODEC, + Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION + ); + builder.build(starTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); + List starTreeDocuments = builder.getStarTreeDocuments(); int count = 0; - while (starTreeDocumentIterator.hasNext()) { + for (StarTreeDocument starTreeDocument : starTreeDocuments) { count++; - StarTreeDocument starTreeDocument = starTreeDocumentIterator.next(); - assertEquals( - starTreeDocument.dimensions[0] != null ? starTreeDocument.dimensions[0] * 1 * 10.0 : 20.0, - starTreeDocument.metrics[0] - ); - assertEquals(1L, starTreeDocument.metrics[1]); + if (starTreeDocument.dimensions[1] != null) { + assertEquals( + starTreeDocument.dimensions[0] == null + ? starTreeDocument.dimensions[1] * 1 * 10.0 + : starTreeDocument.dimensions[0] * 10, + starTreeDocument.metrics[0] + ); + assertEquals(1L, starTreeDocument.metrics[1]); + } else { + assertEquals(150D, starTreeDocument.metrics[0]); + assertEquals(6L, starTreeDocument.metrics[1]); + } } - assertEquals(6, count); - builder.build(starTreeDocumentIterator); - validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); + assertEquals(13, count); + validateStarTree(builder.getRootNode(), 2, 1000, builder.getStarTreeDocuments()); + + metaOut.close(); + dataOut.close(); + docValuesConsumer.close(); + + StarTreeMetadata starTreeMetadata = new StarTreeMetadata( + "sf", + STAR_TREE, + mock(IndexInput.class), + VERSION_CURRENT, + builder.numStarTreeNodes, + List.of("field1", "field3"), + List.of(new Metric("field2", List.of(MetricStat.SUM, MetricStat.VALUE_COUNT, MetricStat.AVG))), + 6, + builder.numStarTreeDocs, + 1000, + Set.of(), + getBuildMode(), + 0, + 264 + ); + + validateStarTreeFileFormats( + builder.getRootNode(), + builder.getStarTreeDocuments().size(), + starTreeMetadata, + builder.getStarTreeDocuments() + ); + } public void testFlushFlowDimsReverse() throws IOException { @@ -1469,13 +2080,21 @@ public void testFlushFlowDimsReverse() throws IOException { ); List metricsWithField = List.of(0, 1, 2, 3, 4, 5); - StarTreeField sf = getStarTreeFieldWithMultipleMetrics(); + compositeField = getStarTreeFieldWithMultipleMetrics(); SortedNumericDocValues d1sndv = getSortedNumericMock(dimList, docsWithField); SortedNumericDocValues d2sndv = getSortedNumericMock(dimList2, docsWithField2); SortedNumericDocValues m1sndv = getSortedNumericMock(metricsList, metricsWithField); SortedNumericDocValues m2sndv = getSortedNumericMock(metricsList, metricsWithField); - builder = getStarTreeBuilder(sf, getWriteState(6), mapperService); + writeState = getWriteState(6, writeState.segmentInfo.getId()); + this.docValuesConsumer = LuceneDocValuesConsumerFactory.getDocValuesConsumerForCompositeCodec( + writeState, + Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC, + Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION, + Composite99DocValuesFormat.META_DOC_VALUES_CODEC, + Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION + ); + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); SequentialDocValuesIterator[] dimDvs = { new SequentialDocValuesIterator(d1sndv), new SequentialDocValuesIterator(d2sndv) }; Iterator starTreeDocumentIterator = builder.sortAndAggregateSegmentDocuments( dimDvs, @@ -1490,21 +2109,49 @@ public void testFlushFlowDimsReverse() throws IOException { [5, 5] | [50.0, 1] [null, 0] | [0.0, 1] */ - int count = 0; - while (starTreeDocumentIterator.hasNext()) { - count++; - StarTreeDocument starTreeDocument = starTreeDocumentIterator.next(); - if (starTreeDocument.dimensions[0] != null) { - assertEquals(count, (long) starTreeDocument.dimensions[0]); - } else { - assertEquals(6, count); + builder.appendDocumentsToStarTree(starTreeDocumentIterator); + assertEquals(6, builder.getStarTreeDocuments().size()); + builder.build(starTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); + int count = 0; + for (StarTreeDocument starTreeDocument : builder.getStarTreeDocuments()) { + if (count <= 6) { + count++; + if (starTreeDocument.dimensions[0] != null) { + assertEquals(count, (long) starTreeDocument.dimensions[0]); + } + assertEquals(starTreeDocument.dimensions[1] * 10.0, starTreeDocument.metrics[0]); + assertEquals(1L, starTreeDocument.metrics[1]); } - assertEquals(starTreeDocument.dimensions[1] * 10.0, starTreeDocument.metrics[0]); - assertEquals(1L, starTreeDocument.metrics[1]); } - assertEquals(6, count); - builder.build(starTreeDocumentIterator); - validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); + validateStarTree(builder.getRootNode(), 2, 1000, builder.getStarTreeDocuments()); + + metaOut.close(); + dataOut.close(); + docValuesConsumer.close(); + + StarTreeMetadata starTreeMetadata = new StarTreeMetadata( + "sf", + STAR_TREE, + mock(IndexInput.class), + VERSION_CURRENT, + builder.numStarTreeNodes, + List.of("field1", "field3"), + List.of(new Metric("field2", List.of(MetricStat.SUM, MetricStat.VALUE_COUNT, MetricStat.AVG))), + 6, + builder.numStarTreeDocs, + 1000, + Set.of(), + getBuildMode(), + 0, + 264 + ); + + validateStarTreeFileFormats( + builder.getRootNode(), + builder.getStarTreeDocuments().size(), + starTreeMetadata, + builder.getStarTreeDocuments() + ); } public void testFlushFlowBuild() throws IOException { @@ -1534,23 +2181,28 @@ public void testFlushFlowBuild() throws IOException { Metric m1 = new Metric("field2", List.of(MetricStat.SUM)); List dims = List.of(d1, d2); List metrics = List.of(m1); - StarTreeFieldConfiguration c = new StarTreeFieldConfiguration( - 1, - new HashSet<>(), - StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP - ); - StarTreeField sf = new StarTreeField("sf", dims, metrics, c); + StarTreeFieldConfiguration c = new StarTreeFieldConfiguration(1, new HashSet<>(), getBuildMode()); + compositeField = new StarTreeField("sf", dims, metrics, c); SortedNumericDocValues d1sndv = getSortedNumericMock(dimList, docsWithField); SortedNumericDocValues d2sndv = getSortedNumericMock(dimList2, docsWithField2); SortedNumericDocValues m1sndv = getSortedNumericMock(metricsList, metricsWithField); - builder = getStarTreeBuilder(sf, getWriteState(100), mapperService); + writeState = getWriteState(100, writeState.segmentInfo.getId()); + SegmentWriteState consumerWriteState = getWriteState(DocIdSetIterator.NO_MORE_DOCS, writeState.segmentInfo.getId()); + this.docValuesConsumer = LuceneDocValuesConsumerFactory.getDocValuesConsumerForCompositeCodec( + consumerWriteState, + Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC, + Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION, + Composite99DocValuesFormat.META_DOC_VALUES_CODEC, + Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION + ); + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); DocValuesProducer d1vp = getDocValuesProducer(d1sndv); DocValuesProducer d2vp = getDocValuesProducer(d2sndv); DocValuesProducer m1vp = getDocValuesProducer(m1sndv); Map fieldProducerMap = Map.of("field1", d1vp, "field3", d2vp, "field2", m1vp); - builder.build(fieldProducerMap); + builder.build(fieldProducerMap, new AtomicInteger(), docValuesConsumer); /** * Asserting following dim / metrics [ dim1, dim2 / Sum [ metric] ] [0, 0] | [0.0] @@ -1572,6 +2224,34 @@ public void testFlushFlowBuild() throws IOException { ); } validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); + + metaOut.close(); + dataOut.close(); + docValuesConsumer.close(); + + StarTreeMetadata starTreeMetadata = new StarTreeMetadata( + "sf", + STAR_TREE, + mock(IndexInput.class), + VERSION_CURRENT, + builder.numStarTreeNodes, + List.of("field1", "field3"), + compositeField.getMetrics(), + 100, + builder.numStarTreeDocs, + 1, + Set.of(), + getBuildMode(), + 0, + 6699 + ); + + validateStarTreeFileFormats( + builder.getRootNode(), + builder.getStarTreeDocuments().size(), + starTreeMetadata, + builder.getStarTreeDocuments() + ); } private static DocValuesProducer getDocValuesProducer(SortedNumericDocValues sndv) { @@ -1583,20 +2263,16 @@ public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOExcepti }; } - private static StarTreeField getStarTreeFieldWithMultipleMetrics() { + private StarTreeField getStarTreeFieldWithMultipleMetrics() { Dimension d1 = new NumericDimension("field1"); Dimension d2 = new NumericDimension("field3"); Metric m1 = new Metric("field2", List.of(MetricStat.SUM)); Metric m2 = new Metric("field2", List.of(MetricStat.VALUE_COUNT)); + Metric m3 = new Metric("field2", List.of(MetricStat.AVG)); List dims = List.of(d1, d2); - List metrics = List.of(m1, m2); - StarTreeFieldConfiguration c = new StarTreeFieldConfiguration( - 1000, - new HashSet<>(), - StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP - ); - StarTreeField sf = new StarTreeField("sf", dims, metrics, c); - return sf; + List metrics = List.of(m1, m2, m3); + StarTreeFieldConfiguration c = new StarTreeFieldConfiguration(1000, new HashSet<>(), getBuildMode()); + return new StarTreeField("sf", dims, metrics, c); } public void testMergeFlow_randomNumberTypes() throws Exception { @@ -1631,8 +2307,6 @@ public void testMergeFlow_randomNumberTypes() throws Exception { ); when(documentMapper.mappers()).thenReturn(fieldMappers); testMergeFlowWithSum(); - builder.close(); - testMergeFlowWithCount(); } public void testMergeFlowWithSum() throws IOException { @@ -1653,12 +2327,12 @@ public void testMergeFlowWithSum() throws IOException { ); List metricsWithField = List.of(0, 1, 2, 3, 4, 5, 6); - StarTreeField sf = getStarTreeField(MetricStat.SUM); + compositeField = getStarTreeField(MetricStat.SUM); StarTreeValues starTreeValues = getStarTreeValues( getSortedNumericMock(dimList, docsWithField), getSortedNumericMock(dimList2, docsWithField2), getSortedNumericMock(metricsList, metricsWithField), - sf, + compositeField, "6" ); @@ -1666,10 +2340,18 @@ public void testMergeFlowWithSum() throws IOException { getSortedNumericMock(dimList, docsWithField), getSortedNumericMock(dimList2, docsWithField2), getSortedNumericMock(metricsList, metricsWithField), - sf, + compositeField, "6" ); - builder = getStarTreeBuilder(sf, getWriteState(6), mapperService); + writeState = getWriteState(6, writeState.segmentInfo.getId()); + this.docValuesConsumer = LuceneDocValuesConsumerFactory.getDocValuesConsumerForCompositeCodec( + writeState, + Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC, + Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION, + Composite99DocValuesFormat.META_DOC_VALUES_CODEC, + Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION + ); + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Sum [ metric] ] @@ -1682,18 +2364,49 @@ public void testMergeFlowWithSum() throws IOException { * ------------------ We only take non star docs * [6,-1] | [120.0] */ + builder.appendDocumentsToStarTree(starTreeDocumentIterator); + assertEquals(6, builder.getStarTreeDocuments().size()); + builder.build(starTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); int count = 0; - while (starTreeDocumentIterator.hasNext()) { + for (StarTreeDocument starTreeDocument : builder.getStarTreeDocuments()) { count++; - StarTreeDocument starTreeDocument = starTreeDocumentIterator.next(); - assertEquals( - starTreeDocument.dimensions[0] != null ? starTreeDocument.dimensions[0] * 2 * 10.0 : 40.0, - starTreeDocument.metrics[0] - ); + if (count <= 6) { + assertEquals( + starTreeDocument.dimensions[0] != null ? starTreeDocument.dimensions[0] * 2 * 10.0 : 40.0, + starTreeDocument.metrics[0] + ); + } } - assertEquals(6, count); - builder.build(starTreeDocumentIterator); - validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); + + validateStarTree(builder.getRootNode(), 2, 1000, builder.getStarTreeDocuments()); + + metaOut.close(); + dataOut.close(); + docValuesConsumer.close(); + + StarTreeMetadata starTreeMetadata = new StarTreeMetadata( + "sf", + STAR_TREE, + mock(IndexInput.class), + VERSION_CURRENT, + builder.numStarTreeNodes, + List.of("field1", "field3"), + compositeField.getMetrics(), + 6, + builder.numStarTreeDocs, + 1000, + Set.of(), + getBuildMode(), + 0, + 264 + ); + + validateStarTreeFileFormats( + builder.getRootNode(), + builder.getStarTreeDocuments().size(), + starTreeMetadata, + builder.getStarTreeDocuments() + ); } public void testMergeFlowWithCount() throws IOException { @@ -1705,12 +2418,12 @@ public void testMergeFlowWithCount() throws IOException { List metricsList = List.of(0L, 1L, 2L, 3L, 4L, 5L, 6L); List metricsWithField = List.of(0, 1, 2, 3, 4, 5, 6); - StarTreeField sf = getStarTreeField(MetricStat.VALUE_COUNT); + compositeField = getStarTreeField(MetricStat.VALUE_COUNT); StarTreeValues starTreeValues = getStarTreeValues( getSortedNumericMock(dimList, docsWithField), getSortedNumericMock(dimList2, docsWithField2), getSortedNumericMock(metricsList, metricsWithField), - sf, + compositeField, "6" ); @@ -1718,10 +2431,18 @@ public void testMergeFlowWithCount() throws IOException { getSortedNumericMock(dimList, docsWithField), getSortedNumericMock(dimList2, docsWithField2), getSortedNumericMock(metricsList, metricsWithField), - sf, + compositeField, "6" ); - builder = getStarTreeBuilder(sf, getWriteState(6), mapperService); + writeState = getWriteState(6, writeState.segmentInfo.getId()); + this.docValuesConsumer = LuceneDocValuesConsumerFactory.getDocValuesConsumerForCompositeCodec( + writeState, + Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC, + Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION, + Composite99DocValuesFormat.META_DOC_VALUES_CODEC, + Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION + ); + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] @@ -1734,15 +2455,46 @@ public void testMergeFlowWithCount() throws IOException { --------------- [6,-1] | [12] */ + builder.appendDocumentsToStarTree(starTreeDocumentIterator); + assertEquals(6, builder.getStarTreeDocuments().size()); + builder.build(starTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); int count = 0; - while (starTreeDocumentIterator.hasNext()) { + for (StarTreeDocument starTreeDocument : builder.getStarTreeDocuments()) { count++; - StarTreeDocument starTreeDocument = starTreeDocumentIterator.next(); - assertEquals(starTreeDocument.dimensions[0] != null ? starTreeDocument.dimensions[0] * 2 : 4, starTreeDocument.metrics[0]); + if (count <= 6) { + assertEquals(starTreeDocument.dimensions[0] != null ? starTreeDocument.dimensions[0] * 2 : 4, starTreeDocument.metrics[0]); + } } - assertEquals(6, count); - builder.build(starTreeDocumentIterator); - validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); + + validateStarTree(builder.getRootNode(), 2, 1000, builder.getStarTreeDocuments()); + + metaOut.close(); + dataOut.close(); + docValuesConsumer.close(); + + StarTreeMetadata starTreeMetadata = new StarTreeMetadata( + "sf", + STAR_TREE, + mock(IndexInput.class), + VERSION_CURRENT, + builder.numStarTreeNodes, + List.of("field1", "field3"), + compositeField.getMetrics(), + 6, + builder.numStarTreeDocs, + 1000, + Set.of(), + getBuildMode(), + 0, + 264 + ); + + validateStarTreeFileFormats( + builder.getRootNode(), + builder.getStarTreeDocuments().size(), + starTreeMetadata, + builder.getStarTreeDocuments() + ); } @@ -1756,14 +2508,27 @@ private StarTreeValues getStarTreeValues( SortedNumericDocValues d1sndv = dimList; SortedNumericDocValues d2sndv = dimList2; SortedNumericDocValues m1sndv = metricsList; - Map dimDocIdSetIterators = Map.of("field1", d1sndv, "field3", d2sndv); - Map metricDocIdSetIterators = Map.of("field2", m1sndv); + Map> dimDocIdSetIterators = Map.of("field1", () -> d1sndv, "field3", () -> d2sndv); + + Map> metricDocIdSetIterators = new LinkedHashMap<>(); + for (Metric metric : sf.getMetrics()) { + for (MetricStat metricStat : metric.getMetrics()) { + String metricFullName = fullyQualifiedFieldNameForStarTreeMetricsDocValues( + sf.getName(), + metric.getField(), + metricStat.getTypeName() + ); + metricDocIdSetIterators.put(metricFullName, () -> m1sndv); + } + } + StarTreeValues starTreeValues = new StarTreeValues( sf, null, dimDocIdSetIterators, metricDocIdSetIterators, - Map.of("numSegmentDocs", number) + Map.of(CompositeIndexConstants.SEGMENT_DOCS_COUNT, number), + null ); return starTreeValues; } @@ -1785,12 +2550,12 @@ public void testMergeFlowWithDifferentDocsFromSegments() throws IOException { List metricsList2 = List.of(5L, 6L, 7L, 8L, 9L); List metricsWithField2 = List.of(0, 1, 2, 3, 4); - StarTreeField sf = getStarTreeField(MetricStat.VALUE_COUNT); + compositeField = getStarTreeField(MetricStat.VALUE_COUNT); StarTreeValues starTreeValues = getStarTreeValues( getSortedNumericMock(dimList, docsWithField), getSortedNumericMock(dimList2, docsWithField2), getSortedNumericMock(metricsList, metricsWithField), - sf, + compositeField, "6" ); @@ -1798,10 +2563,18 @@ public void testMergeFlowWithDifferentDocsFromSegments() throws IOException { getSortedNumericMock(dimList3, docsWithField3), getSortedNumericMock(dimList4, docsWithField4), getSortedNumericMock(metricsList2, metricsWithField2), - sf, + compositeField, "4" ); - builder = getStarTreeBuilder(sf, getWriteState(4), mapperService); + writeState = getWriteState(4, writeState.segmentInfo.getId()); + this.docValuesConsumer = LuceneDocValuesConsumerFactory.getDocValuesConsumerForCompositeCodec( + writeState, + Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC, + Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION, + Composite99DocValuesFormat.META_DOC_VALUES_CODEC, + Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION + ); + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] @@ -1815,19 +2588,49 @@ public void testMergeFlowWithDifferentDocsFromSegments() throws IOException { [null, 2] | [2] [null, 7] | [7] */ + builder.appendDocumentsToStarTree(starTreeDocumentIterator); + assertEquals(9, builder.getStarTreeDocuments().size()); + builder.build(starTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); int count = 0; - while (starTreeDocumentIterator.hasNext()) { + for (StarTreeDocument starTreeDocument : builder.getStarTreeDocuments()) { count++; - StarTreeDocument starTreeDocument = starTreeDocumentIterator.next(); - if (Objects.equals(starTreeDocument.dimensions[0], 5L)) { - assertEquals(starTreeDocument.dimensions[0] * 2, starTreeDocument.metrics[0]); - } else { - assertEquals(starTreeDocument.dimensions[1], starTreeDocument.metrics[0]); + if (count <= 9) { + if (Objects.equals(starTreeDocument.dimensions[0], 5L)) { + assertEquals(starTreeDocument.dimensions[0] * 2, starTreeDocument.metrics[0]); + } else { + assertEquals(starTreeDocument.dimensions[1], starTreeDocument.metrics[0]); + } } } - assertEquals(9, count); - builder.build(starTreeDocumentIterator); - validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); + validateStarTree(builder.getRootNode(), 2, 1000, builder.getStarTreeDocuments()); + + metaOut.close(); + dataOut.close(); + docValuesConsumer.close(); + + StarTreeMetadata starTreeMetadata = new StarTreeMetadata( + "sf", + STAR_TREE, + mock(IndexInput.class), + VERSION_CURRENT, + builder.numStarTreeNodes, + List.of("field1", "field3"), + compositeField.getMetrics(), + 9, + builder.numStarTreeDocs, + 1000, + Set.of(), + getBuildMode(), + 0, + 330 + ); + + validateStarTreeFileFormats( + builder.getRootNode(), + builder.getStarTreeDocuments().size(), + starTreeMetadata, + builder.getStarTreeDocuments() + ); } public void testMergeFlowNumSegmentsDocs() throws IOException { @@ -1863,7 +2666,7 @@ public void testMergeFlowNumSegmentsDocs() throws IOException { sf, "4" ); - builder = getStarTreeBuilder(sf, getWriteState(4), mapperService); + builder = getStarTreeBuilder(metaOut, dataOut, sf, getWriteState(4, writeState.segmentInfo.getId()), mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] @@ -1907,12 +2710,12 @@ public void testMergeFlowWithMissingDocs() throws IOException { List metricsList2 = List.of(5L, 6L, 7L, 8L, 9L); List metricsWithField2 = List.of(0, 1, 2, 3, 4); - StarTreeField sf = getStarTreeField(MetricStat.VALUE_COUNT); + compositeField = getStarTreeField(MetricStat.VALUE_COUNT); StarTreeValues starTreeValues = getStarTreeValues( getSortedNumericMock(dimList, docsWithField), getSortedNumericMock(dimList2, docsWithField2), getSortedNumericMock(metricsList, metricsWithField), - sf, + compositeField, "6" ); @@ -1920,10 +2723,18 @@ public void testMergeFlowWithMissingDocs() throws IOException { getSortedNumericMock(dimList3, docsWithField3), getSortedNumericMock(dimList4, docsWithField4), getSortedNumericMock(metricsList2, metricsWithField2), - sf, + compositeField, "4" ); - builder = getStarTreeBuilder(sf, getWriteState(4), mapperService); + writeState = getWriteState(4, writeState.segmentInfo.getId()); + this.docValuesConsumer = LuceneDocValuesConsumerFactory.getDocValuesConsumerForCompositeCodec( + writeState, + Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC, + Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION, + Composite99DocValuesFormat.META_DOC_VALUES_CODEC, + Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION + ); + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] @@ -1938,18 +2749,49 @@ public void testMergeFlowWithMissingDocs() throws IOException { [null, 5] | [5] [null, 7] | [7] */ + builder.appendDocumentsToStarTree(starTreeDocumentIterator); + assertEquals(10, builder.getStarTreeDocuments().size()); + builder.build(starTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); int count = 0; - while (starTreeDocumentIterator.hasNext()) { + for (StarTreeDocument starTreeDocument : builder.getStarTreeDocuments()) { count++; - StarTreeDocument starTreeDocument = starTreeDocumentIterator.next(); - if (starTreeDocument.dimensions[0] == null) { - assertTrue(List.of(5L, 7L).contains(starTreeDocument.dimensions[1])); + if (count <= 10) { + if (starTreeDocument.dimensions[0] == null) { + assertTrue(List.of(5L, 7L).contains(starTreeDocument.dimensions[1])); + } + assertEquals(starTreeDocument.dimensions[1], starTreeDocument.metrics[0]); } - assertEquals(starTreeDocument.dimensions[1], starTreeDocument.metrics[0]); } - assertEquals(10, count); - builder.build(starTreeDocumentIterator); - validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); + + validateStarTree(builder.getRootNode(), 2, 1000, builder.getStarTreeDocuments()); + + metaOut.close(); + dataOut.close(); + docValuesConsumer.close(); + + StarTreeMetadata starTreeMetadata = new StarTreeMetadata( + "sf", + STAR_TREE, + mock(IndexInput.class), + VERSION_CURRENT, + builder.numStarTreeNodes, + List.of("field1", "field3"), + compositeField.getMetrics(), + 10, + builder.numStarTreeDocs, + 1000, + Set.of(), + getBuildMode(), + 0, + 363 + ); + + validateStarTreeFileFormats( + builder.getRootNode(), + builder.getStarTreeDocuments().size(), + starTreeMetadata, + builder.getStarTreeDocuments() + ); } public void testMergeFlowWithMissingDocsWithZero() throws IOException { @@ -1969,12 +2811,12 @@ public void testMergeFlowWithMissingDocsWithZero() throws IOException { List metricsList2 = List.of(5L, 6L, 7L, 8L, 9L); List metricsWithField2 = List.of(0, 1, 2, 3, 4); - StarTreeField sf = getStarTreeField(MetricStat.VALUE_COUNT); + compositeField = getStarTreeField(MetricStat.VALUE_COUNT); StarTreeValues starTreeValues = getStarTreeValues( getSortedNumericMock(dimList, docsWithField), getSortedNumericMock(dimList2, docsWithField2), getSortedNumericMock(metricsList, metricsWithField), - sf, + compositeField, "7" ); @@ -1982,10 +2824,19 @@ public void testMergeFlowWithMissingDocsWithZero() throws IOException { getSortedNumericMock(dimList3, docsWithField3), getSortedNumericMock(dimList4, docsWithField4), getSortedNumericMock(metricsList2, metricsWithField2), - sf, + compositeField, "4" ); - builder = getStarTreeBuilder(sf, getWriteState(4), mapperService); + writeState = getWriteState(4, writeState.segmentInfo.getId()); + SegmentWriteState consumerWriteState = getWriteState(DocIdSetIterator.NO_MORE_DOCS, writeState.segmentInfo.getId()); + this.docValuesConsumer = LuceneDocValuesConsumerFactory.getDocValuesConsumerForCompositeCodec( + consumerWriteState, + Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC, + Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION, + Composite99DocValuesFormat.META_DOC_VALUES_CODEC, + Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION + ); + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] @@ -1996,23 +2847,54 @@ public void testMergeFlowWithMissingDocsWithZero() throws IOException { [null, 7] | [7] [null, null] | [12] */ + builder.appendDocumentsToStarTree(starTreeDocumentIterator); + assertEquals(6, builder.getStarTreeDocuments().size()); + builder.build(starTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); int count = 0; - while (starTreeDocumentIterator.hasNext()) { + for (StarTreeDocument starTreeDocument : builder.getStarTreeDocuments()) { count++; - StarTreeDocument starTreeDocument = starTreeDocumentIterator.next(); - if (starTreeDocument.dimensions[0] == null && starTreeDocument.dimensions[1] == null) { - assertEquals(12L, (long) starTreeDocument.metrics[0]); - } else if (starTreeDocument.dimensions[0] == null) { - assertEquals(7L, starTreeDocument.metrics[0]); - } else if (starTreeDocument.dimensions[0] == 0) { - assertEquals(9L, starTreeDocument.metrics[0]); - } else { - assertEquals(starTreeDocument.dimensions[1], starTreeDocument.metrics[0]); + if (count <= 6) { + if (starTreeDocument.dimensions[0] == null && starTreeDocument.dimensions[1] == null) { + assertEquals(12L, (long) starTreeDocument.metrics[0]); + } else if (starTreeDocument.dimensions[0] == null) { + assertEquals(7L, starTreeDocument.metrics[0]); + } else if (starTreeDocument.dimensions[0] == 0) { + assertEquals(9L, starTreeDocument.metrics[0]); + } else { + assertEquals(starTreeDocument.dimensions[1], starTreeDocument.metrics[0]); + } } } - assertEquals(6, count); - builder.build(starTreeDocumentIterator); - validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); + + validateStarTree(builder.getRootNode(), 2, 1000, builder.getStarTreeDocuments()); + + metaOut.close(); + dataOut.close(); + docValuesConsumer.close(); + + StarTreeMetadata starTreeMetadata = new StarTreeMetadata( + "sf", + STAR_TREE, + mock(IndexInput.class), + VERSION_CURRENT, + builder.numStarTreeNodes, + List.of("field1", "field3"), + compositeField.getMetrics(), + 6, + builder.numStarTreeDocs, + 1000, + Set.of(), + getBuildMode(), + 0, + 231 + ); + + validateStarTreeFileFormats( + builder.getRootNode(), + builder.getStarTreeDocuments().size(), + starTreeMetadata, + builder.getStarTreeDocuments() + ); } public void testMergeFlowWithMissingDocsWithZeroComplexCase() throws IOException { @@ -2032,12 +2914,12 @@ public void testMergeFlowWithMissingDocsWithZeroComplexCase() throws IOException List metricsList2 = List.of(5L, 6L, 7L, 8L, 9L); List metricsWithField2 = List.of(0, 1, 2, 3, 4); - StarTreeField sf = getStarTreeField(MetricStat.VALUE_COUNT); + compositeField = getStarTreeField(MetricStat.VALUE_COUNT); StarTreeValues starTreeValues = getStarTreeValues( getSortedNumericMock(dimList, docsWithField), getSortedNumericMock(dimList2, docsWithField2), getSortedNumericMock(metricsList, metricsWithField), - sf, + compositeField, "9" ); @@ -2045,10 +2927,18 @@ public void testMergeFlowWithMissingDocsWithZeroComplexCase() throws IOException getSortedNumericMock(dimList3, docsWithField3), getSortedNumericMock(dimList4, docsWithField4), getSortedNumericMock(metricsList2, metricsWithField2), - sf, + compositeField, "4" ); - builder = getStarTreeBuilder(sf, getWriteState(4), mapperService); + writeState = getWriteState(4, writeState.segmentInfo.getId()); + this.docValuesConsumer = LuceneDocValuesConsumerFactory.getDocValuesConsumerForCompositeCodec( + writeState, + Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC, + Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION, + Composite99DocValuesFormat.META_DOC_VALUES_CODEC, + Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION + ); + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] @@ -2060,26 +2950,57 @@ public void testMergeFlowWithMissingDocsWithZeroComplexCase() throws IOException [null, 7] | [7] [null, null] | [19] */ + builder.appendDocumentsToStarTree(starTreeDocumentIterator); + assertEquals(7, builder.getStarTreeDocuments().size()); + builder.build(starTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); int count = 0; - while (starTreeDocumentIterator.hasNext()) { + for (StarTreeDocument starTreeDocument : builder.getStarTreeDocuments()) { count++; - StarTreeDocument starTreeDocument = starTreeDocumentIterator.next(); - if (starTreeDocument.dimensions[0] == null && starTreeDocument.dimensions[1] == null) { - assertEquals(19L, (long) starTreeDocument.metrics[0]); - assertEquals(7, count); - } else if (starTreeDocument.dimensions[0] == null) { - assertEquals(7L, starTreeDocument.metrics[0]); - } else if (starTreeDocument.dimensions[1] == null) { - assertEquals(8L, starTreeDocument.metrics[0]); - } else if (starTreeDocument.dimensions[0] == 0) { - assertEquals(9L, starTreeDocument.metrics[0]); - } else { - assertEquals(starTreeDocument.dimensions[1], starTreeDocument.metrics[0]); + if (count <= 7) { + if (starTreeDocument.dimensions[0] == null && starTreeDocument.dimensions[1] == null) { + assertEquals(19L, (long) starTreeDocument.metrics[0]); + assertEquals(7, count); + } else if (starTreeDocument.dimensions[0] == null) { + assertEquals(7L, starTreeDocument.metrics[0]); + } else if (starTreeDocument.dimensions[1] == null) { + assertEquals(8L, starTreeDocument.metrics[0]); + } else if (starTreeDocument.dimensions[0] == 0) { + assertEquals(9L, starTreeDocument.metrics[0]); + } else { + assertEquals(starTreeDocument.dimensions[1], starTreeDocument.metrics[0]); + } } } - assertEquals(7, count); - builder.build(starTreeDocumentIterator); - validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); + + validateStarTree(builder.getRootNode(), 2, 1000, builder.getStarTreeDocuments()); + + metaOut.close(); + dataOut.close(); + docValuesConsumer.close(); + + StarTreeMetadata starTreeMetadata = new StarTreeMetadata( + "sf", + STAR_TREE, + mock(IndexInput.class), + VERSION_CURRENT, + builder.numStarTreeNodes, + List.of("field1", "field3"), + compositeField.getMetrics(), + 7, + builder.numStarTreeDocs, + 1000, + Set.of(), + getBuildMode(), + 0, + 231 + ); + + validateStarTreeFileFormats( + builder.getRootNode(), + builder.getStarTreeDocuments().size(), + starTreeMetadata, + builder.getStarTreeDocuments() + ); } public void testMergeFlowWithMissingDocsInSecondDim() throws IOException { @@ -2099,12 +3020,12 @@ public void testMergeFlowWithMissingDocsInSecondDim() throws IOException { List metricsList2 = List.of(5L, 6L, 7L, 8L, 9L); List metricsWithField2 = List.of(0, 1, 2, 3, 4); - StarTreeField sf = getStarTreeField(MetricStat.VALUE_COUNT); + compositeField = getStarTreeField(MetricStat.VALUE_COUNT); StarTreeValues starTreeValues = getStarTreeValues( getSortedNumericMock(dimList, docsWithField), getSortedNumericMock(dimList2, docsWithField2), getSortedNumericMock(metricsList, metricsWithField), - sf, + compositeField, "6" ); @@ -2112,10 +3033,18 @@ public void testMergeFlowWithMissingDocsInSecondDim() throws IOException { getSortedNumericMock(dimList3, docsWithField3), getSortedNumericMock(dimList4, docsWithField4), getSortedNumericMock(metricsList2, metricsWithField2), - sf, + compositeField, "4" ); - builder = getStarTreeBuilder(sf, getWriteState(4), mapperService); + writeState = getWriteState(4, writeState.segmentInfo.getId()); + this.docValuesConsumer = LuceneDocValuesConsumerFactory.getDocValuesConsumerForCompositeCodec( + writeState, + Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC, + Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION, + Composite99DocValuesFormat.META_DOC_VALUES_CODEC, + Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION + ); + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] @@ -2130,19 +3059,50 @@ public void testMergeFlowWithMissingDocsInSecondDim() throws IOException { [8, 8] | [8] [null, 7] | [7] */ + builder.appendDocumentsToStarTree(starTreeDocumentIterator); + assertEquals(10, builder.getStarTreeDocuments().size()); + builder.build(starTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); int count = 0; - while (starTreeDocumentIterator.hasNext()) { + for (StarTreeDocument starTreeDocument : builder.getStarTreeDocuments()) { count++; - StarTreeDocument starTreeDocument = starTreeDocumentIterator.next(); - if (starTreeDocument.dimensions[0] != null && starTreeDocument.dimensions[0] == 5) { - assertEquals(starTreeDocument.dimensions[0], starTreeDocument.metrics[0]); - } else { - assertEquals(starTreeDocument.dimensions[1], starTreeDocument.metrics[0]); + if (count <= 10) { + if (starTreeDocument.dimensions[0] != null && starTreeDocument.dimensions[0] == 5) { + assertEquals(starTreeDocument.dimensions[0], starTreeDocument.metrics[0]); + } else { + assertEquals(starTreeDocument.dimensions[1], starTreeDocument.metrics[0]); + } } } - assertEquals(10, count); - builder.build(starTreeDocumentIterator); - validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); + + validateStarTree(builder.getRootNode(), 2, 1000, builder.getStarTreeDocuments()); + + metaOut.close(); + dataOut.close(); + docValuesConsumer.close(); + + StarTreeMetadata starTreeMetadata = new StarTreeMetadata( + "sf", + STAR_TREE, + mock(IndexInput.class), + VERSION_CURRENT, + builder.numStarTreeNodes, + List.of("field1", "field3"), + compositeField.getMetrics(), + 10, + builder.numStarTreeDocs, + 1000, + Set.of(), + getBuildMode(), + 0, + 363 + ); + + validateStarTreeFileFormats( + builder.getRootNode(), + builder.getStarTreeDocuments().size(), + starTreeMetadata, + builder.getStarTreeDocuments() + ); } public void testMergeFlowWithDocsMissingAtTheEnd() throws IOException { @@ -2162,12 +3122,12 @@ public void testMergeFlowWithDocsMissingAtTheEnd() throws IOException { List metricsList2 = List.of(5L, 6L, 7L, 8L, 9L); List metricsWithField2 = List.of(0, 1, 2, 3, 4); - StarTreeField sf = getStarTreeField(MetricStat.VALUE_COUNT); + compositeField = getStarTreeField(MetricStat.VALUE_COUNT); StarTreeValues starTreeValues = getStarTreeValues( getSortedNumericMock(dimList, docsWithField), getSortedNumericMock(dimList2, docsWithField2), getSortedNumericMock(metricsList, metricsWithField), - sf, + compositeField, "6" ); @@ -2175,10 +3135,17 @@ public void testMergeFlowWithDocsMissingAtTheEnd() throws IOException { getSortedNumericMock(dimList3, docsWithField3), getSortedNumericMock(dimList4, docsWithField4), getSortedNumericMock(metricsList2, metricsWithField2), - sf, + compositeField, "4" ); - builder = getStarTreeBuilder(sf, writeState, mapperService); + this.docValuesConsumer = LuceneDocValuesConsumerFactory.getDocValuesConsumerForCompositeCodec( + writeState, + Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC, + Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION, + Composite99DocValuesFormat.META_DOC_VALUES_CODEC, + Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION + ); + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] @@ -2193,18 +3160,49 @@ public void testMergeFlowWithDocsMissingAtTheEnd() throws IOException { [null, 5] | [5] [null, 7] | [7] */ + builder.appendDocumentsToStarTree(starTreeDocumentIterator); + assertEquals(10, builder.getStarTreeDocuments().size()); + builder.build(starTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); int count = 0; - while (starTreeDocumentIterator.hasNext()) { + for (StarTreeDocument starTreeDocument : builder.getStarTreeDocuments()) { count++; - StarTreeDocument starTreeDocument = starTreeDocumentIterator.next(); - if (starTreeDocument.dimensions[0] == null) { - assertTrue(List.of(5L, 7L).contains(starTreeDocument.dimensions[1])); + if (count <= 10) { + if (starTreeDocument.dimensions[0] == null) { + assertTrue(List.of(5L, 7L).contains(starTreeDocument.dimensions[1])); + } + assertEquals(starTreeDocument.dimensions[1], starTreeDocument.metrics[0]); } - assertEquals(starTreeDocument.dimensions[1], starTreeDocument.metrics[0]); } - assertEquals(10, count); - builder.build(starTreeDocumentIterator); - validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); + + validateStarTree(builder.getRootNode(), 2, 1000, builder.getStarTreeDocuments()); + + metaOut.close(); + dataOut.close(); + docValuesConsumer.close(); + + StarTreeMetadata starTreeMetadata = new StarTreeMetadata( + "sf", + STAR_TREE, + mock(IndexInput.class), + VERSION_CURRENT, + builder.numStarTreeNodes, + List.of("field1", "field3"), + compositeField.getMetrics(), + 10, + builder.numStarTreeDocs, + 1000, + Set.of(), + getBuildMode(), + 0, + 363 + ); + + validateStarTreeFileFormats( + builder.getRootNode(), + builder.getStarTreeDocuments().size(), + starTreeMetadata, + builder.getStarTreeDocuments() + ); } public void testMergeFlowWithEmptyFieldsInOneSegment() throws IOException { @@ -2216,12 +3214,12 @@ public void testMergeFlowWithEmptyFieldsInOneSegment() throws IOException { List metricsList = List.of(0L, 1L, 2L, 3L, 4L, 5L, 6L); List metricsWithField = List.of(0, 1, 2, 3, 4, 5, 6); - StarTreeField sf = getStarTreeField(MetricStat.VALUE_COUNT); + compositeField = getStarTreeField(MetricStat.VALUE_COUNT); StarTreeValues starTreeValues = getStarTreeValues( getSortedNumericMock(dimList, docsWithField), getSortedNumericMock(dimList2, docsWithField2), getSortedNumericMock(metricsList, metricsWithField), - sf, + compositeField, "6" ); @@ -2229,10 +3227,18 @@ public void testMergeFlowWithEmptyFieldsInOneSegment() throws IOException { DocValues.emptySortedNumeric(), DocValues.emptySortedNumeric(), DocValues.emptySortedNumeric(), - sf, + compositeField, "0" ); - builder = getStarTreeBuilder(sf, getWriteState(0), mapperService); + writeState = getWriteState(0, writeState.segmentInfo.getId()); + this.docValuesConsumer = LuceneDocValuesConsumerFactory.getDocValuesConsumerForCompositeCodec( + writeState, + Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC, + Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION, + Composite99DocValuesFormat.META_DOC_VALUES_CODEC, + Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION + ); + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** * Asserting following dim / metrics [ dim1, dim2 / Count [ metric] ] @@ -2243,18 +3249,48 @@ public void testMergeFlowWithEmptyFieldsInOneSegment() throws IOException { [4, 4] | [4] [null, 5] | [5] */ + builder.appendDocumentsToStarTree(starTreeDocumentIterator); + assertEquals(6, builder.getStarTreeDocuments().size()); + builder.build(starTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); int count = 0; - while (starTreeDocumentIterator.hasNext()) { + for (StarTreeDocument starTreeDocument : builder.getStarTreeDocuments()) { count++; - StarTreeDocument starTreeDocument = starTreeDocumentIterator.next(); - if (starTreeDocument.dimensions[0] == null) { - assertEquals(5L, (long) starTreeDocument.dimensions[1]); + if (count <= 6) { + if (starTreeDocument.dimensions[0] == null) { + assertEquals(5L, (long) starTreeDocument.dimensions[1]); + } + assertEquals(starTreeDocument.dimensions[1], starTreeDocument.metrics[0]); } - assertEquals(starTreeDocument.dimensions[1], starTreeDocument.metrics[0]); } - assertEquals(6, count); - builder.build(starTreeDocumentIterator); - validateStarTree(builder.getRootNode(), 2, 1, builder.getStarTreeDocuments()); + validateStarTree(builder.getRootNode(), 2, 1000, builder.getStarTreeDocuments()); + + metaOut.close(); + dataOut.close(); + docValuesConsumer.close(); + + StarTreeMetadata starTreeMetadata = new StarTreeMetadata( + "sf", + STAR_TREE, + mock(IndexInput.class), + VERSION_CURRENT, + builder.numStarTreeNodes, + List.of("field1", "field3"), + compositeField.getMetrics(), + 6, + builder.numStarTreeDocs, + 1000, + Set.of(), + getBuildMode(), + 0, + 264 + ); + + validateStarTreeFileFormats( + builder.getRootNode(), + builder.getStarTreeDocuments().size(), + starTreeMetadata, + builder.getStarTreeDocuments() + ); } public void testMergeFlowWithDuplicateDimensionValues() throws IOException { @@ -2307,7 +3343,7 @@ public void testMergeFlowWithDuplicateDimensionValues() throws IOException { docCountMetricsWithField.add(i); } - StarTreeField sf = getStarTreeFieldWithDocCount(1, true); + compositeField = getStarTreeFieldWithDocCount(1, true); StarTreeValues starTreeValues = getStarTreeValues( dimList1, docsWithField1, @@ -2321,7 +3357,7 @@ public void testMergeFlowWithDuplicateDimensionValues() throws IOException { metricsWithField, docCountMetricsList, docCountMetricsWithField, - sf + compositeField ); StarTreeValues starTreeValues2 = getStarTreeValues( @@ -2337,10 +3373,17 @@ public void testMergeFlowWithDuplicateDimensionValues() throws IOException { metricsWithField, docCountMetricsList, docCountMetricsWithField, - sf + compositeField + ); + this.docValuesConsumer = LuceneDocValuesConsumerFactory.getDocValuesConsumerForCompositeCodec( + writeState, + Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC, + Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION, + Composite99DocValuesFormat.META_DOC_VALUES_CODEC, + Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION ); - builder = getStarTreeBuilder(sf, writeState, mapperService); - builder.build(builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2))); + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); + builder.build(builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)), new AtomicInteger(), docValuesConsumer); List starTreeDocuments = builder.getStarTreeDocuments(); assertEquals(401, starTreeDocuments.size()); int count = 0; @@ -2371,7 +3414,34 @@ public void testMergeFlowWithDuplicateDimensionValues() throws IOException { count++; } assertEquals(401, count); - validateStarTree(builder.getRootNode(), 4, sf.getStarTreeConfig().maxLeafDocs(), builder.getStarTreeDocuments()); + validateStarTree(builder.getRootNode(), 4, compositeField.getStarTreeConfig().maxLeafDocs(), builder.getStarTreeDocuments()); + metaOut.close(); + dataOut.close(); + docValuesConsumer.close(); + + StarTreeMetadata starTreeMetadata = new StarTreeMetadata( + "sf", + STAR_TREE, + mock(IndexInput.class), + VERSION_CURRENT, + builder.numStarTreeNodes, + getStarTreeDimensionNames(compositeField.getDimensionsOrder()), + compositeField.getMetrics(), + 100, + builder.numStarTreeDocs, + 1, + Set.of(), + getBuildMode(), + 0, + 13365 + ); + + validateStarTreeFileFormats( + builder.getRootNode(), + builder.getStarTreeDocuments().size(), + starTreeMetadata, + builder.getStarTreeDocuments() + ); } public void testMergeFlowWithMaxLeafDocs() throws IOException { @@ -2430,7 +3500,7 @@ public void testMergeFlowWithMaxLeafDocs() throws IOException { metricsWithField1.add(i); } - StarTreeField sf = getStarTreeFieldWithDocCount(3, true); + compositeField = getStarTreeFieldWithDocCount(3, true); StarTreeValues starTreeValues = getStarTreeValues( dimList1, docsWithField1, @@ -2444,7 +3514,7 @@ public void testMergeFlowWithMaxLeafDocs() throws IOException { metricsWithField, metricsList1, metricsWithField1, - sf + compositeField ); StarTreeValues starTreeValues2 = getStarTreeValues( @@ -2460,11 +3530,18 @@ public void testMergeFlowWithMaxLeafDocs() throws IOException { metricsWithField, metricsList1, metricsWithField1, - sf + compositeField ); - builder = getStarTreeBuilder(sf, writeState, mapperService); - builder.build(builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2))); + this.docValuesConsumer = LuceneDocValuesConsumerFactory.getDocValuesConsumerForCompositeCodec( + writeState, + Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC, + Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION, + Composite99DocValuesFormat.META_DOC_VALUES_CODEC, + Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION + ); + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); + builder.build(builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)), new AtomicInteger(), docValuesConsumer); List starTreeDocuments = builder.getStarTreeDocuments(); /** 635 docs get generated @@ -2520,7 +3597,34 @@ public void testMergeFlowWithMaxLeafDocs() throws IOException { assertEquals(40L, starTreeDocument.metrics[1]); } } - validateStarTree(builder.getRootNode(), 4, sf.getStarTreeConfig().maxLeafDocs(), builder.getStarTreeDocuments()); + validateStarTree(builder.getRootNode(), 4, compositeField.getStarTreeConfig().maxLeafDocs(), builder.getStarTreeDocuments()); + metaOut.close(); + dataOut.close(); + docValuesConsumer.close(); + + StarTreeMetadata starTreeMetadata = new StarTreeMetadata( + "sf", + STAR_TREE, + mock(IndexInput.class), + VERSION_CURRENT, + builder.numStarTreeNodes, + getStarTreeDimensionNames(compositeField.getDimensionsOrder()), + compositeField.getMetrics(), + 100, + builder.numStarTreeDocs, + 3, + Set.of(), + getBuildMode(), + 0, + 23199 + ); + + validateStarTreeFileFormats( + builder.getRootNode(), + builder.getStarTreeDocuments().size(), + starTreeMetadata, + builder.getStarTreeDocuments() + ); } private StarTreeValues getStarTreeValues( @@ -2544,11 +3648,45 @@ private StarTreeValues getStarTreeValues( SortedNumericDocValues d4sndv = getSortedNumericMock(dimList4, docsWithField4); SortedNumericDocValues m1sndv = getSortedNumericMock(metricsList, metricsWithField); SortedNumericDocValues m2sndv = getSortedNumericMock(metricsList1, metricsWithField1); - Map dimDocIdSetIterators = Map.of("field1", d1sndv, "field3", d2sndv, "field5", d3sndv, "field8", d4sndv); - Map metricDocIdSetIterators = new LinkedHashMap<>(); - metricDocIdSetIterators.put("field2", m1sndv); - metricDocIdSetIterators.put("_doc_count", m2sndv); - StarTreeValues starTreeValues = new StarTreeValues(sf, null, dimDocIdSetIterators, metricDocIdSetIterators, getAttributes(500)); + Map> dimDocIdSetIterators = Map.of( + "field1", + () -> d1sndv, + "field3", + () -> d2sndv, + "field5", + () -> d3sndv, + "field8", + () -> d4sndv + ); + + Map> metricDocIdSetIterators = new LinkedHashMap<>(); + + metricDocIdSetIterators.put( + fullyQualifiedFieldNameForStarTreeMetricsDocValues( + sf.getName(), + "field2", + sf.getMetrics().get(0).getMetrics().get(0).getTypeName() + ), + () -> m1sndv + ); + metricDocIdSetIterators.put( + fullyQualifiedFieldNameForStarTreeMetricsDocValues( + sf.getName(), + "_doc_count", + sf.getMetrics().get(1).getMetrics().get(0).getTypeName() + ), + () -> m2sndv + ); + // metricDocIdSetIterators.put("field2", () -> m1sndv); + // metricDocIdSetIterators.put("_doc_count", () -> m2sndv); + StarTreeValues starTreeValues = new StarTreeValues( + sf, + null, + dimDocIdSetIterators, + metricDocIdSetIterators, + getAttributes(500), + null + ); return starTreeValues; } @@ -2606,7 +3744,7 @@ public void testMergeFlowWithDuplicateDimensionValueWithMaxLeafDocs() throws IOE metricsWithField.add(i); } - StarTreeField sf = getStarTreeFieldWithDocCount(3, true); + compositeField = getStarTreeFieldWithDocCount(3, true); StarTreeValues starTreeValues = getStarTreeValues( dimList1, docsWithField1, @@ -2620,7 +3758,7 @@ public void testMergeFlowWithDuplicateDimensionValueWithMaxLeafDocs() throws IOE metricsWithField, docCountMetricsList, docCountMetricsWithField, - sf + compositeField ); StarTreeValues starTreeValues2 = getStarTreeValues( @@ -2636,13 +3774,48 @@ public void testMergeFlowWithDuplicateDimensionValueWithMaxLeafDocs() throws IOE metricsWithField, docCountMetricsList, docCountMetricsWithField, - sf + compositeField ); - builder = getStarTreeBuilder(sf, writeState, mapperService); - builder.build(builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2))); + this.docValuesConsumer = LuceneDocValuesConsumerFactory.getDocValuesConsumerForCompositeCodec( + writeState, + Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC, + Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION, + Composite99DocValuesFormat.META_DOC_VALUES_CODEC, + Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION + ); + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); + builder.build(builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)), new AtomicInteger(), docValuesConsumer); List starTreeDocuments = builder.getStarTreeDocuments(); assertEquals(401, starTreeDocuments.size()); - validateStarTree(builder.getRootNode(), 4, sf.getStarTreeConfig().maxLeafDocs(), builder.getStarTreeDocuments()); + validateStarTree(builder.getRootNode(), 4, compositeField.getStarTreeConfig().maxLeafDocs(), builder.getStarTreeDocuments()); + + metaOut.close(); + dataOut.close(); + docValuesConsumer.close(); + + StarTreeMetadata starTreeMetadata = new StarTreeMetadata( + "sf", + STAR_TREE, + mock(IndexInput.class), + VERSION_CURRENT, + builder.numStarTreeNodes, + getStarTreeDimensionNames(compositeField.getDimensionsOrder()), + compositeField.getMetrics(), + 100, + builder.numStarTreeDocs, + compositeField.getStarTreeConfig().maxLeafDocs(), + Set.of(), + getBuildMode(), + 0, + 15345 + ); + + validateStarTreeFileFormats( + builder.getRootNode(), + builder.getStarTreeDocuments().size(), + starTreeMetadata, + builder.getStarTreeDocuments() + ); } public static long getLongFromDouble(double value) { @@ -2714,7 +3887,7 @@ public void testMergeFlowWithMaxLeafDocsAndStarTreeNodesAssertion() throws IOExc metricsList.add(1L); metricsWithField.add(i); } - StarTreeField sf = getStarTreeFieldWithDocCount(10, true); + compositeField = getStarTreeFieldWithDocCount(10, true); StarTreeValues starTreeValues = getStarTreeValues( dimList1, docsWithField1, @@ -2728,7 +3901,7 @@ public void testMergeFlowWithMaxLeafDocsAndStarTreeNodesAssertion() throws IOExc metricsWithField, metricsList1, metricsWithField1, - sf + compositeField ); StarTreeValues starTreeValues2 = getStarTreeValues( @@ -2744,10 +3917,17 @@ public void testMergeFlowWithMaxLeafDocsAndStarTreeNodesAssertion() throws IOExc metricsWithField, metricsList1, metricsWithField1, - sf + compositeField + ); + this.docValuesConsumer = LuceneDocValuesConsumerFactory.getDocValuesConsumerForCompositeCodec( + writeState, + Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC, + Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION, + Composite99DocValuesFormat.META_DOC_VALUES_CODEC, + Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION ); - builder = getStarTreeBuilder(sf, writeState, mapperService); - builder.build(builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2))); + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); + builder.build(builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)), new AtomicInteger(), docValuesConsumer); List starTreeDocuments = builder.getStarTreeDocuments(); Map> dimValueToDocIdMap = new HashMap<>(); traverseStarTree(builder.rootNode, dimValueToDocIdMap, true); @@ -2762,10 +3942,38 @@ public void testMergeFlowWithMaxLeafDocsAndStarTreeNodesAssertion() throws IOExc } } assertEquals(1041, starTreeDocuments.size()); - validateStarTree(builder.getRootNode(), 4, sf.getStarTreeConfig().maxLeafDocs(), builder.getStarTreeDocuments()); + validateStarTree(builder.getRootNode(), 4, compositeField.getStarTreeConfig().maxLeafDocs(), builder.getStarTreeDocuments()); + + metaOut.close(); + dataOut.close(); + docValuesConsumer.close(); + + StarTreeMetadata starTreeMetadata = new StarTreeMetadata( + "sf", + STAR_TREE, + mock(IndexInput.class), + VERSION_CURRENT, + builder.numStarTreeNodes, + getStarTreeDimensionNames(compositeField.getDimensionsOrder()), + compositeField.getMetrics(), + 500, + builder.numStarTreeDocs, + compositeField.getStarTreeConfig().maxLeafDocs(), + Set.of(), + getBuildMode(), + 0, + 31779 + ); + + validateStarTreeFileFormats( + builder.getRootNode(), + builder.getStarTreeDocuments().size(), + starTreeMetadata, + builder.getStarTreeDocuments() + ); } - private static StarTreeField getStarTreeFieldWithDocCount(int maxLeafDocs, boolean includeDocCountMetric) { + private StarTreeField getStarTreeFieldWithDocCount(int maxLeafDocs, boolean includeDocCountMetric) { Dimension d1 = new NumericDimension("field1"); Dimension d2 = new NumericDimension("field3"); Dimension d3 = new NumericDimension("field5"); @@ -2777,11 +3985,7 @@ private static StarTreeField getStarTreeFieldWithDocCount(int maxLeafDocs, boole m2 = new Metric("_doc_count", List.of(MetricStat.DOC_COUNT)); } List metrics = m2 == null ? List.of(m1) : List.of(m1, m2); - StarTreeFieldConfiguration c = new StarTreeFieldConfiguration( - maxLeafDocs, - new HashSet<>(), - StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP - ); + StarTreeFieldConfiguration c = new StarTreeFieldConfiguration(maxLeafDocs, new HashSet<>(), getBuildMode()); StarTreeField sf = new StarTreeField("sf", dims, metrics, c); return sf; } @@ -2795,23 +3999,24 @@ private void traverseStarTree(InMemoryTreeNode root, Map docIds = new ArrayList<>(); while ((starTreeNode = queue.poll()) != null) { - int dimensionId = starTreeNode.dimensionId; + int dimensionId = starTreeNode.getDimensionId(); if (dimensionId > currentDimensionId) { currentDimensionId = dimensionId; } // store aggregated document of the node - int docId = starTreeNode.aggregatedDocId; + int docId = starTreeNode.getAggregatedDocId(); Map map = dimValueToDocIdMap.getOrDefault(dimensionId, new HashMap<>()); - if (starTreeNode.nodeType == StarTreeNodeType.STAR.getValue()) { + if (starTreeNode.getNodeType() == StarTreeNodeType.STAR.getValue()) { map.put(Long.MAX_VALUE, docId); } else { - map.put(starTreeNode.dimensionValue, docId); + map.put(starTreeNode.getDimensionValue(), docId); } dimValueToDocIdMap.put(dimensionId, map); - if (starTreeNode.children != null && (!traverStarNodes || starTreeNode.nodeType == StarTreeNodeType.STAR.getValue())) { - Iterator childrenIterator = starTreeNode.children.values().iterator(); + if (starTreeNode.getChildren() != null + && (!traverStarNodes || starTreeNode.getNodeType() == StarTreeNodeType.STAR.getValue())) { + Iterator childrenIterator = starTreeNode.getChildren().values().iterator(); while (childrenIterator.hasNext()) { InMemoryTreeNode childNode = childrenIterator.next(); queue.add(childNode); @@ -2863,57 +4068,103 @@ public void testMergeFlow() throws IOException { metricsWithField.add(i); } + List metricsListValueCount = new ArrayList<>(1000); + List metricsWithFieldValueCount = new ArrayList<>(1000); + for (int i = 0; i < 1000; i++) { + metricsListValueCount.add((long) i); + metricsWithFieldValueCount.add(i); + } + Dimension d1 = new NumericDimension("field1"); Dimension d2 = new NumericDimension("field3"); Dimension d3 = new NumericDimension("field5"); Dimension d4 = new NumericDimension("field8"); // Dimension d5 = new NumericDimension("field5"); - Metric m1 = new Metric("field2", List.of(MetricStat.SUM)); + Metric m1 = new Metric("field2", List.of(MetricStat.SUM, MetricStat.AVG, MetricStat.VALUE_COUNT)); Metric m2 = new Metric("_doc_count", List.of(MetricStat.DOC_COUNT)); List dims = List.of(d1, d2, d3, d4); List metrics = List.of(m1, m2); - StarTreeFieldConfiguration c = new StarTreeFieldConfiguration( - 1, - new HashSet<>(), - StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP - ); - StarTreeField sf = new StarTreeField("sf", dims, metrics, c); + StarTreeFieldConfiguration c = new StarTreeFieldConfiguration(1, new HashSet<>(), getBuildMode()); + compositeField = new StarTreeField("sf", dims, metrics, c); SortedNumericDocValues d1sndv = getSortedNumericMock(dimList1, docsWithField1); SortedNumericDocValues d2sndv = getSortedNumericMock(dimList2, docsWithField2); SortedNumericDocValues d3sndv = getSortedNumericMock(dimList3, docsWithField3); SortedNumericDocValues d4sndv = getSortedNumericMock(dimList4, docsWithField4); SortedNumericDocValues m1sndv = getSortedNumericMock(metricsList, metricsWithField); + SortedNumericDocValues valucountsndv = getSortedNumericMock(metricsListValueCount, metricsWithFieldValueCount); SortedNumericDocValues m2sndv = DocValues.emptySortedNumeric(); - Map dimDocIdSetIterators = Map.of("field1", d1sndv, "field3", d2sndv, "field5", d3sndv, "field8", d4sndv); - Map metricDocIdSetIterators = Map.of("field2", m1sndv, "_doc_count", m2sndv); - StarTreeValues starTreeValues = new StarTreeValues(sf, null, dimDocIdSetIterators, metricDocIdSetIterators, getAttributes(1000)); + Map> dimDocIdSetIterators = Map.of( + "field1", + () -> d1sndv, + "field3", + () -> d2sndv, + "field5", + () -> d3sndv, + "field8", + () -> d4sndv + ); + + Map> metricDocIdSetIterators = Map.of( + "sf_field2_sum_metric", + () -> m1sndv, + "sf_field2_value_count_metric", + () -> valucountsndv, + "sf__doc_count_doc_count_metric", + () -> m2sndv + ); + + StarTreeValues starTreeValues = new StarTreeValues( + compositeField, + null, + dimDocIdSetIterators, + metricDocIdSetIterators, + getAttributes(1000), + null + ); SortedNumericDocValues f2d1sndv = getSortedNumericMock(dimList1, docsWithField1); SortedNumericDocValues f2d2sndv = getSortedNumericMock(dimList2, docsWithField2); SortedNumericDocValues f2d3sndv = getSortedNumericMock(dimList3, docsWithField3); SortedNumericDocValues f2d4sndv = getSortedNumericMock(dimList4, docsWithField4); SortedNumericDocValues f2m1sndv = getSortedNumericMock(metricsList, metricsWithField); + SortedNumericDocValues f2valucountsndv = getSortedNumericMock(metricsListValueCount, metricsWithFieldValueCount); SortedNumericDocValues f2m2sndv = DocValues.emptySortedNumeric(); - Map f2dimDocIdSetIterators = Map.of( + Map> f2dimDocIdSetIterators = Map.of( "field1", - f2d1sndv, + () -> f2d1sndv, "field3", - f2d2sndv, + () -> f2d2sndv, "field5", - f2d3sndv, + () -> f2d3sndv, "field8", - f2d4sndv + () -> f2d4sndv + ); + + Map> f2metricDocIdSetIterators = Map.of( + "sf_field2_sum_metric", + () -> f2m1sndv, + "sf_field2_value_count_metric", + () -> f2valucountsndv, + "sf__doc_count_doc_count_metric", + () -> f2m2sndv ); - Map f2metricDocIdSetIterators = Map.of("field2", f2m1sndv, "_doc_count", f2m2sndv); StarTreeValues starTreeValues2 = new StarTreeValues( - sf, + compositeField, null, f2dimDocIdSetIterators, f2metricDocIdSetIterators, - getAttributes(1000) + getAttributes(1000), + null ); - builder = getStarTreeBuilder(sf, writeState, mapperService); + this.docValuesConsumer = LuceneDocValuesConsumerFactory.getDocValuesConsumerForCompositeCodec( + writeState, + Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC, + Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION, + Composite99DocValuesFormat.META_DOC_VALUES_CODEC, + Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION + ); + builder = getStarTreeBuilder(metaOut, dataOut, compositeField, writeState, mapperService); Iterator starTreeDocumentIterator = builder.mergeStarTrees(List.of(starTreeValues, starTreeValues2)); /** [0, 0, 0, 0] | [0.0, 2] @@ -2929,10 +4180,38 @@ public void testMergeFlow() throws IOException { assertEquals(starTreeDocument.dimensions[0] * 20.0, starTreeDocument.metrics[0]); assertEquals(2L, starTreeDocument.metrics[1]); } - builder.build(starTreeDocumentIterator); + builder.build(starTreeDocumentIterator, new AtomicInteger(), docValuesConsumer); // Validate the star tree structure validateStarTree(builder.getRootNode(), 4, 1, builder.getStarTreeDocuments()); + + metaOut.close(); + dataOut.close(); + docValuesConsumer.close(); + + StarTreeMetadata starTreeMetadata = new StarTreeMetadata( + "sf", + STAR_TREE, + mock(IndexInput.class), + VERSION_CURRENT, + builder.numStarTreeNodes, + getStarTreeDimensionNames(compositeField.getDimensionsOrder()), + compositeField.getMetrics(), + 1000, + builder.numStarTreeDocs, + compositeField.getStarTreeConfig().maxLeafDocs(), + Set.of(), + getBuildMode(), + 0, + 132165 + ); + + validateStarTreeFileFormats( + builder.getRootNode(), + builder.getStarTreeDocuments().size(), + starTreeMetadata, + builder.getStarTreeDocuments() + ); } private void validateStarTree( @@ -2951,36 +4230,37 @@ private void validateStarTree( assertNotNull(node); // assert dimensions - if (node.dimensionId != StarTreeUtils.ALL) { - assertTrue(node.dimensionId >= 0 && node.dimensionId < totalDimensions); + if (node.getDimensionId() != StarTreeUtils.ALL) { + assertTrue(node.getDimensionId() >= 0 && node.getDimensionId() < totalDimensions); } - if (node.children != null && !node.children.isEmpty()) { - assertEquals(node.dimensionId + 1, node.childDimensionId); - assertTrue(node.childDimensionId < totalDimensions); + + if (node.getChildren() != null && !node.getChildren().isEmpty()) { + assertEquals(node.getDimensionId() + 1, node.getChildDimensionId()); + assertTrue(node.getChildDimensionId() < totalDimensions); InMemoryTreeNode starNode = null; Object[] nonStarNodeCumulativeMetrics = getMetrics(starTreeDocuments); - for (Map.Entry entry : node.children.entrySet()) { + for (Map.Entry entry : node.getChildren().entrySet()) { Long childDimensionValue = entry.getKey(); InMemoryTreeNode child = entry.getValue(); Object[] currMetrics = getMetrics(starTreeDocuments); - if (child.nodeType != StarTreeNodeType.STAR.getValue()) { + if (child.getNodeType() != StarTreeNodeType.STAR.getValue()) { // Validate dimension values in documents - for (int i = child.startDocId; i < child.endDocId; i++) { + for (int i = child.getStartDocId(); i < child.getEndDocId(); i++) { StarTreeDocument doc = starTreeDocuments.get(i); int j = 0; addMetrics(doc, currMetrics, j); - if (child.nodeType != StarTreeNodeType.STAR.getValue()) { - Long dimension = doc.dimensions[child.dimensionId]; + if (child.getNodeType() != StarTreeNodeType.STAR.getValue()) { + Long dimension = doc.dimensions[child.getDimensionId()]; assertEquals(childDimensionValue, dimension); if (dimension != null) { - assertEquals(child.dimensionValue, (long) dimension); + assertEquals(child.getDimensionValue(), (long) dimension); } else { // TODO : fix this ? - assertEquals(child.dimensionValue, StarTreeUtils.ALL); + assertEquals(child.getDimensionValue(), StarTreeUtils.ALL); } } } - Object[] aggregatedMetrics = starTreeDocuments.get(child.aggregatedDocId).metrics; + Object[] aggregatedMetrics = starTreeDocuments.get(child.getAggregatedDocId()).metrics; int j = 0; for (Object metric : currMetrics) { /* @@ -3006,13 +4286,13 @@ private void validateStarTree( // Add star node to queue if (starNode != null) { Object[] starNodeMetrics = getMetrics(starTreeDocuments); - for (int i = starNode.startDocId; i < starNode.endDocId; i++) { + for (int i = starNode.getStartDocId(); i < starNode.getEndDocId(); i++) { StarTreeDocument doc = starTreeDocuments.get(i); int j = 0; addMetrics(doc, starNodeMetrics, j); } int j = 0; - Object[] aggregatedMetrics = starTreeDocuments.get(starNode.aggregatedDocId).metrics; + Object[] aggregatedMetrics = starTreeDocuments.get(starNode.getAggregatedDocId()).metrics; for (Object nonStarNodeCumulativeMetric : nonStarNodeCumulativeMetrics) { assertEquals(nonStarNodeCumulativeMetric, starNodeMetrics[j]); assertEquals(starNodeMetrics[j], aggregatedMetrics[j]); @@ -3032,20 +4312,20 @@ private void validateStarTree( j++; } - assertEquals(-1L, starNode.dimensionValue); + assertEquals(-1L, starNode.getDimensionValue()); queue.offer(new Object[] { starNode, true }); } } else { - assertTrue(node.endDocId - node.startDocId <= maxLeafDocuments); + assertTrue(node.getEndDocId() - node.getStartDocId() <= maxLeafDocuments); } if (currentIsStarNode) { StarTreeDocument prevDoc = null; int docCount = 0; - int docId = node.startDocId; - int dimensionId = node.dimensionId; + int docId = node.getStartDocId(); + int dimensionId = node.getDimensionId(); - while (docId < node.endDocId) { + while (docId < node.getEndDocId()) { StarTreeDocument currentDoc = starTreeDocuments.get(docId); docCount++; @@ -3061,7 +4341,7 @@ private void validateStarTree( } // Verify that the number of generated star documents matches the range in the star node - assertEquals(node.endDocId - node.startDocId, docCount); + assertEquals(node.getEndDocId() - node.getStartDocId(), docCount); } } } @@ -3111,20 +4391,16 @@ private int compareDocuments(StarTreeDocument doc1, StarTreeDocument doc2, int s } Map getAttributes(int numSegmentDocs) { - return Map.of(String.valueOf(NUM_SEGMENT_DOCS), String.valueOf(numSegmentDocs)); + return Map.of(CompositeIndexConstants.SEGMENT_DOCS_COUNT, String.valueOf(numSegmentDocs)); } - private static StarTreeField getStarTreeField(MetricStat count) { + private StarTreeField getStarTreeField(MetricStat count) { Dimension d1 = new NumericDimension("field1"); Dimension d2 = new NumericDimension("field3"); Metric m1 = new Metric("field2", List.of(count)); List dims = List.of(d1, d2); List metrics = List.of(m1); - StarTreeFieldConfiguration c = new StarTreeFieldConfiguration( - 1000, - new HashSet<>(), - StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP - ); + StarTreeFieldConfiguration c = new StarTreeFieldConfiguration(1000, new HashSet<>(), getBuildMode()); return new StarTreeField("sf", dims, metrics, c); } @@ -3179,6 +4455,9 @@ public void tearDown() throws Exception { if (builder != null) { builder.close(); } + docValuesConsumer.close(); + metaOut.close(); + dataOut.close(); directory.close(); } } diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilderTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilderTests.java index 51ebc02ea8243..8f750b09c75ee 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilderTests.java @@ -8,21 +8,24 @@ package org.opensearch.index.compositeindex.datacube.startree.builder; +import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.codecs.lucene99.Lucene99Codec; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.Version; import org.opensearch.common.settings.Settings; -import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues; +import org.opensearch.index.codec.composite.composite99.Composite99DocValuesFormat; import org.opensearch.index.compositeindex.datacube.Dimension; import org.opensearch.index.compositeindex.datacube.Metric; import org.opensearch.index.compositeindex.datacube.MetricStat; @@ -31,8 +34,8 @@ import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; +import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues; import org.opensearch.index.compositeindex.datacube.startree.utils.SequentialDocValuesIterator; -import org.opensearch.index.fielddata.IndexNumericFieldData; import org.opensearch.index.mapper.ContentPath; import org.opensearch.index.mapper.DocumentMapper; import org.opensearch.index.mapper.Mapper; @@ -51,6 +54,7 @@ import java.util.Map; import java.util.Set; import java.util.UUID; +import java.util.concurrent.atomic.AtomicInteger; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -75,9 +79,12 @@ public class BaseStarTreeBuilderTests extends OpenSearchTestCase { private static List metrics; private static Directory directory; private static FieldInfo[] fieldsInfo; - private static SegmentWriteState state; + private static SegmentWriteState writeState; private static StarTreeField starTreeField; + private static IndexOutput dataOut; + private static IndexOutput metaOut; + @BeforeClass public static void setup() throws IOException { @@ -138,7 +145,21 @@ public static void setup() throws IOException { fieldProducerMap.put(fields.get(i), docValuesProducer); } FieldInfos fieldInfos = new FieldInfos(fieldsInfo); - state = new SegmentWriteState(InfoStream.getDefault(), segmentInfo.dir, segmentInfo, fieldInfos, null, newIOContext(random())); + writeState = new SegmentWriteState(InfoStream.getDefault(), segmentInfo.dir, segmentInfo, fieldInfos, null, newIOContext(random())); + + String dataFileName = IndexFileNames.segmentFileName( + writeState.segmentInfo.name, + writeState.segmentSuffix, + Composite99DocValuesFormat.DATA_EXTENSION + ); + dataOut = writeState.directory.createOutput(dataFileName, writeState.context); + + String metaFileName = IndexFileNames.segmentFileName( + writeState.segmentInfo.name, + writeState.segmentSuffix, + Composite99DocValuesFormat.META_EXTENSION + ); + metaOut = writeState.directory.createOutput(metaFileName, writeState.context); mapperService = mock(MapperService.class); DocumentMapper documentMapper = mock(DocumentMapper.class); @@ -157,9 +178,13 @@ public static void setup() throws IOException { ); when(documentMapper.mappers()).thenReturn(fieldMappers); - builder = new BaseStarTreeBuilder(starTreeField, state, mapperService) { + builder = new BaseStarTreeBuilder(metaOut, dataOut, starTreeField, writeState, mapperService) { @Override - public void build(List starTreeValuesSubs) throws IOException {} + public void build( + List starTreeValuesSubs, + AtomicInteger fieldNumberAcrossStarTrees, + DocValuesConsumer starTreeDocValuesConsumer + ) throws IOException {} @Override public void appendStarTreeDocument(StarTreeDocument starTreeDocument) throws IOException {} @@ -203,8 +228,8 @@ Iterator mergeStarTrees(List starTreeValues) t public void test_generateMetricAggregatorInfos() throws IOException { List metricAggregatorInfos = builder.generateMetricAggregatorInfos(mapperService); List expectedMetricAggregatorInfos = List.of( - new MetricAggregatorInfo(MetricStat.SUM, "field2", starTreeField.getName(), IndexNumericFieldData.NumericType.DOUBLE), - new MetricAggregatorInfo(MetricStat.SUM, "field4", starTreeField.getName(), IndexNumericFieldData.NumericType.DOUBLE) + new MetricAggregatorInfo(MetricStat.SUM, "field2", starTreeField.getName(), NumberFieldMapper.NumberType.DOUBLE), + new MetricAggregatorInfo(MetricStat.SUM, "field4", starTreeField.getName(), NumberFieldMapper.NumberType.DOUBLE) ); assertEquals(metricAggregatorInfos, expectedMetricAggregatorInfos); } @@ -224,6 +249,8 @@ public void test_reduceStarTreeDocuments() { @Override public void tearDown() throws Exception { super.tearDown(); + dataOut.close(); + metaOut.close(); directory.close(); } } diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilderTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilderTests.java index 92382b78f60c6..496558dbc2e83 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilderTests.java @@ -9,7 +9,9 @@ package org.opensearch.index.compositeindex.datacube.startree.builder; import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.store.IndexOutput; import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; import org.opensearch.index.mapper.MapperService; import java.io.IOException; @@ -17,10 +19,18 @@ public class OffHeapStarTreeBuilderTests extends AbstractStarTreeBuilderTests { @Override public BaseStarTreeBuilder getStarTreeBuilder( + IndexOutput metaOut, + IndexOutput dataOut, StarTreeField starTreeField, SegmentWriteState segmentWriteState, MapperService mapperService ) throws IOException { - return new OffHeapStarTreeBuilder(starTreeField, segmentWriteState, mapperService); + return new OffHeapStarTreeBuilder(metaOut, dataOut, starTreeField, segmentWriteState, mapperService); } + + @Override + StarTreeFieldConfiguration.StarTreeBuildMode getBuildMode() { + return StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP; + } + } diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilderTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilderTests.java index aed08b7727be7..55cf3bde3cea7 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilderTests.java @@ -9,16 +9,29 @@ package org.opensearch.index.compositeindex.datacube.startree.builder; import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.store.IndexOutput; import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; import org.opensearch.index.mapper.MapperService; +import java.io.IOException; + public class OnHeapStarTreeBuilderTests extends AbstractStarTreeBuilderTests { + @Override public BaseStarTreeBuilder getStarTreeBuilder( + IndexOutput metaOut, + IndexOutput dataOut, StarTreeField starTreeField, SegmentWriteState segmentWriteState, MapperService mapperService - ) { - return new OnHeapStarTreeBuilder(starTreeField, segmentWriteState, mapperService); + ) throws IOException { + return new OnHeapStarTreeBuilder(metaOut, dataOut, starTreeField, segmentWriteState, mapperService); + } + + @Override + StarTreeFieldConfiguration.StarTreeBuildMode getBuildMode() { + return StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP; } + } diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilderTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilderTests.java index 828bddfb8aa6e..94fe148eab6b3 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreesBuilderTests.java @@ -8,6 +8,7 @@ package org.opensearch.index.compositeindex.datacube.startree.builder; +import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.codecs.lucene99.Lucene99Codec; import org.apache.lucene.index.FieldInfo; @@ -15,6 +16,7 @@ import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.Version; import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; @@ -31,6 +33,7 @@ import java.util.Map; import java.util.Set; import java.util.UUID; +import java.util.concurrent.atomic.AtomicInteger; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.verifyNoInteractions; @@ -45,9 +48,13 @@ public class StarTreesBuilderTests extends OpenSearchTestCase { private StarTreeField starTreeField; private Map fieldProducerMap; private Directory directory; + private IndexOutput dataOut; + private IndexOutput metaOut; public void setUp() throws Exception { super.setUp(); + metaOut = mock(IndexOutput.class); + dataOut = mock(IndexOutput.class); mapperService = mock(MapperService.class); directory = newFSDirectory(createTempDir()); SegmentInfo segmentInfo = new SegmentInfo( @@ -88,19 +95,29 @@ public void setUp() throws Exception { public void test_buildWithNoStarTreeFields() throws IOException { when(mapperService.getCompositeFieldTypes()).thenReturn(new HashSet<>()); - StarTreesBuilder starTreesBuilder = new StarTreesBuilder(segmentWriteState, mapperService); - starTreesBuilder.build(fieldProducerMap); + StarTreesBuilder starTreesBuilder = new StarTreesBuilder(segmentWriteState, mapperService, new AtomicInteger()); + starTreesBuilder.build(metaOut, dataOut, fieldProducerMap, mock(DocValuesConsumer.class)); verifyNoInteractions(docValuesProducer); } public void test_getStarTreeBuilder() throws IOException { when(mapperService.getCompositeFieldTypes()).thenReturn(Set.of(starTreeFieldType)); - StarTreesBuilder starTreesBuilder = new StarTreesBuilder(segmentWriteState, mapperService); - StarTreeBuilder starTreeBuilder = starTreesBuilder.getStarTreeBuilder(starTreeField, segmentWriteState, mapperService); + StarTreesBuilder starTreesBuilder = new StarTreesBuilder(segmentWriteState, mapperService, new AtomicInteger()); + StarTreeBuilder starTreeBuilder = starTreesBuilder.getStarTreeBuilder(metaOut, dataOut, starTreeField, segmentWriteState, mapperService); assertTrue(starTreeBuilder instanceof OnHeapStarTreeBuilder); } + public void test_getStarTreeBuilder_illegalArgument() throws IOException { + when(mapperService.getCompositeFieldTypes()).thenReturn(Set.of(starTreeFieldType)); + StarTreeFieldConfiguration starTreeFieldConfiguration = new StarTreeFieldConfiguration(1, new HashSet<>(), StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP); + StarTreeField starTreeField = new StarTreeField("star_tree", new ArrayList<>(), new ArrayList<>(), starTreeFieldConfiguration); + StarTreesBuilder starTreesBuilder = new StarTreesBuilder(segmentWriteState, mapperService, new AtomicInteger()); + StarTreeBuilder starTreeBuilder = starTreesBuilder.getStarTreeBuilder(metaOut, dataOut, starTreeField, segmentWriteState, mapperService); + assertTrue(starTreeBuilder instanceof OffHeapStarTreeBuilder); + starTreeBuilder.close(); + } + public void test_closeWithNoStarTreeFields() throws IOException { StarTreeFieldConfiguration starTreeFieldConfiguration = new StarTreeFieldConfiguration( 1, @@ -110,7 +127,7 @@ public void test_closeWithNoStarTreeFields() throws IOException { StarTreeField starTreeField = new StarTreeField("star_tree", new ArrayList<>(), new ArrayList<>(), starTreeFieldConfiguration); starTreeFieldType = new StarTreeMapper.StarTreeFieldType("star_tree", starTreeField); when(mapperService.getCompositeFieldTypes()).thenReturn(Set.of(starTreeFieldType)); - StarTreesBuilder starTreesBuilder = new StarTreesBuilder(segmentWriteState, mapperService); + StarTreesBuilder starTreesBuilder = new StarTreesBuilder(segmentWriteState, mapperService, new AtomicInteger()); starTreesBuilder.close(); verifyNoInteractions(docValuesProducer); @@ -119,6 +136,8 @@ public void test_closeWithNoStarTreeFields() throws IOException { @Override public void tearDown() throws Exception { super.tearDown(); + metaOut.close(); + dataOut.close(); directory.close(); } } diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java index 4653ac8b08198..4d2aa5eaf78cf 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java @@ -23,7 +23,6 @@ import java.io.IOException; import java.util.ArrayDeque; -import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.Map; @@ -121,8 +120,8 @@ public void test_starTreeSearch() throws IOException { assertNotNull(inMemoryTreeNode); for (int i = 0; i < maxLevels - 1; i++) { - InMemoryTreeNode randomChildNode = randomFrom(inMemoryTreeNode.children.values()); - StarTreeNode randomStarTreeChildNode = starTreeNode.getChildForDimensionValue(randomChildNode.dimensionValue); + InMemoryTreeNode randomChildNode = randomFrom(inMemoryTreeNode.getChildren().values()); + StarTreeNode randomStarTreeChildNode = starTreeNode.getChildForDimensionValue(randomChildNode.getDimensionValue()); assertNotNull(randomStarTreeChildNode); assertStarTreeNode(randomStarTreeChildNode, randomChildNode); @@ -135,18 +134,18 @@ public void test_starTreeSearch() throws IOException { } private void assertStarTreeNode(StarTreeNode starTreeNode, InMemoryTreeNode treeNode) throws IOException { - assertEquals(starTreeNode.getDimensionId(), treeNode.dimensionId); - assertEquals(starTreeNode.getDimensionValue(), treeNode.dimensionValue); - assertEquals(starTreeNode.getStartDocId(), treeNode.startDocId); - assertEquals(starTreeNode.getEndDocId(), treeNode.endDocId); - assertEquals(starTreeNode.getChildDimensionId(), treeNode.childDimensionId); - assertEquals(starTreeNode.getAggregatedDocId(), treeNode.aggregatedDocId); - assertEquals(starTreeNode.getStarTreeNodeType(), treeNode.nodeType); + assertEquals(starTreeNode.getDimensionId(), treeNode.getDimensionId()); + assertEquals(starTreeNode.getDimensionValue(), treeNode.getDimensionValue()); + assertEquals(starTreeNode.getStartDocId(), treeNode.getStartDocId()); + assertEquals(starTreeNode.getEndDocId(), treeNode.getEndDocId()); + assertEquals(starTreeNode.getChildDimensionId(), treeNode.getChildDimensionId()); + assertEquals(starTreeNode.getAggregatedDocId(), treeNode.getAggregatedDocId()); + assertEquals(starTreeNode.getStarTreeNodeType(), treeNode.getNodeType()); if (starTreeNode.getChildDimensionId() != -1) { assertFalse(starTreeNode.isLeaf()); - if (treeNode.children != null) { - assertEquals(starTreeNode.getNumChildren(), treeNode.children.values().size()); + if (treeNode.getChildren() != null) { + assertEquals(starTreeNode.getNumChildren(), treeNode.getChildren().values().size()); } } else { assertTrue(starTreeNode.isLeaf()); @@ -156,16 +155,11 @@ private void assertStarTreeNode(StarTreeNode starTreeNode, InMemoryTreeNode tree public InMemoryTreeNode generateSampleTree(Map inMemoryTreeNodeMap) { // Create the root node - InMemoryTreeNode root = new InMemoryTreeNode(); - root.dimensionId = 0; - root.startDocId = randomInt(); - root.endDocId = randomInt(); - root.childDimensionId = 1; - root.aggregatedDocId = randomInt(); - root.nodeType = (byte) 0; - root.children = new HashMap<>(); + InMemoryTreeNode root = new InMemoryTreeNode(0, randomInt(), randomInt(), (byte) 0, -1); + root.setChildDimensionId(1); + root.setAggregatedDocId(randomInt()); - inMemoryTreeNodeMap.put(root.dimensionValue, root); + inMemoryTreeNodeMap.put(root.getDimensionValue(), root); // Generate the tree recursively generateTreeRecursively(root, 1, inMemoryTreeNodeMap); @@ -181,19 +175,14 @@ private void generateTreeRecursively(InMemoryTreeNode parent, int currentLevel, int numChildren = randomIntBetween(1, 10); for (int i = 0; i < numChildren; i++) { - InMemoryTreeNode child = new InMemoryTreeNode(); dimensionValue++; - child.dimensionId = currentLevel; - child.dimensionValue = dimensionValue; // Assign a unique dimension value for each child - child.startDocId = randomInt(); - child.endDocId = randomInt(); - child.childDimensionId = (currentLevel == this.maxLevels - 1) ? -1 : (currentLevel + 1); - child.aggregatedDocId = randomInt(); - child.nodeType = (byte) 0; - child.children = new HashMap<>(); - - parent.children.put(child.dimensionValue, child); - inMemoryTreeNodeMap.put(child.dimensionValue, child); + InMemoryTreeNode child = new InMemoryTreeNode(currentLevel, randomInt(), randomInt(), (byte) 0, dimensionValue); + + child.setChildDimensionId((currentLevel == this.maxLevels - 1) ? -1 : (currentLevel + 1)); + child.setAggregatedDocId(randomInt()); + + parent.addChildNode(child, child.getDimensionValue()); + inMemoryTreeNodeMap.put(child.getDimensionValue(), child); generateTreeRecursively(child, currentLevel + 1, inMemoryTreeNodeMap); } diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadataTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadataTests.java index 62bd74cc0b3fc..2127eeb9636d3 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadataTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadataTests.java @@ -31,8 +31,8 @@ import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; import org.opensearch.index.compositeindex.datacube.startree.fileformats.StarTreeWriter; -import org.opensearch.index.fielddata.IndexNumericFieldData; import org.opensearch.index.mapper.CompositeMappedFieldType; +import org.opensearch.index.mapper.NumberFieldMapper; import org.opensearch.test.OpenSearchTestCase; import org.junit.Before; @@ -62,6 +62,7 @@ public class StarTreeMetadataTests extends OpenSearchTestCase { private List metrics; private List metricAggregatorInfos = new ArrayList<>(); private int segmentDocumentCount; + private int numStarTreeDocs; private long dataFilePointer; private long dataFileLength; @@ -136,7 +137,7 @@ public void test_starTreeMetadata() throws IOException { metricType, metric.getField(), starTreeField.getName(), - IndexNumericFieldData.NumericType.DOUBLE + NumberFieldMapper.NumberType.DOUBLE ); metricAggregatorInfos.add(metricAggregatorInfo); } @@ -145,6 +146,7 @@ public void test_starTreeMetadata() throws IOException { dataFileLength = randomNonNegativeLong(); dataFilePointer = randomNonNegativeLong(); segmentDocumentCount = randomInt(Integer.MAX_VALUE); + numStarTreeDocs = randomInt(Integer.MAX_VALUE); metaOut = directory.createOutput("star-tree-metadata", IOContext.DEFAULT); StarTreeWriter starTreeWriter = new StarTreeWriter(); int numberOfNodes = randomInt(Integer.MAX_VALUE); @@ -154,6 +156,7 @@ public void test_starTreeMetadata() throws IOException { metricAggregatorInfos, numberOfNodes, segmentDocumentCount, + numStarTreeDocs, dataFilePointer, dataFileLength ); diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNodeTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNodeTests.java index 6f24728c24f30..08815d5ef55f5 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNodeTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNodeTests.java @@ -12,18 +12,18 @@ import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeTestUtils; import org.opensearch.index.compositeindex.datacube.startree.fileformats.StarTreeWriter; import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.StarTreeMetadata; import org.opensearch.index.compositeindex.datacube.startree.node.InMemoryTreeNode; import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeFactory; -import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils; import org.opensearch.test.OpenSearchTestCase; import org.junit.Before; import java.io.IOException; -import java.util.HashMap; import java.util.Iterator; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.ALL; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -35,6 +35,7 @@ public class FixedLengthStarTreeNodeTests extends OpenSearchTestCase { InMemoryTreeNode node; InMemoryTreeNode starChild; InMemoryTreeNode nullChild; + InMemoryTreeNode childWithMinus1; FixedLengthStarTreeNode starTreeNode; @Before @@ -44,54 +45,42 @@ public void setup() throws IOException { dataOut = directory.createOutput("star-tree-data", IOContext.DEFAULT); StarTreeWriter starTreeWriter = new StarTreeWriter(); - node = new InMemoryTreeNode(); - node.dimensionId = 0; - node.startDocId = randomInt(); - node.endDocId = randomInt(); - node.childDimensionId = 1; - node.aggregatedDocId = randomInt(); - node.nodeType = randomFrom((byte) 0, (byte) -1, (byte) 2); - node.children = new HashMap<>(); - - starChild = new InMemoryTreeNode(); - starChild.dimensionId = node.dimensionId + 1; - starChild.dimensionValue = -1; - starChild.startDocId = randomInt(); - starChild.endDocId = randomInt(); - starChild.childDimensionId = -1; - starChild.aggregatedDocId = randomInt(); - starChild.nodeType = (byte) -2; - starChild.children = new HashMap<>(); - node.children.put(-1L, starChild); - - nullChild = new InMemoryTreeNode(); - nullChild.dimensionId = node.dimensionId + 1; - nullChild.dimensionValue = -1; - nullChild.startDocId = randomInt(); - nullChild.endDocId = randomInt(); - nullChild.childDimensionId = -1; - nullChild.aggregatedDocId = randomInt(); - nullChild.nodeType = (byte) -1; - nullChild.children = new HashMap<>(); - node.children.put(null, nullChild); + node = new InMemoryTreeNode(0, randomInt(), randomInt(), randomFrom((byte) 0, (byte) -1, (byte) 1), -1); + node.setChildDimensionId(1); + node.setAggregatedDocId(randomInt()); + + starChild = new InMemoryTreeNode(node.getDimensionId() + 1, randomInt(), randomInt(), (byte) -1, -1); + starChild.setChildDimensionId(-1); + starChild.setAggregatedDocId(randomInt()); + node.addChildNode(starChild, (long) ALL); + + childWithMinus1 = new InMemoryTreeNode(node.getDimensionId() + 1, randomInt(), randomInt(), (byte) 0, -1); + childWithMinus1.setChildDimensionId(-1); + childWithMinus1.setAggregatedDocId(randomInt()); + node.addChildNode(childWithMinus1, -1L); for (int i = 1; i < randomIntBetween(2, 5); i++) { - InMemoryTreeNode child = new InMemoryTreeNode(); - child.dimensionId = node.dimensionId + 1; - child.dimensionValue = node.dimensionValue + i; // Assign a unique dimension value for each child - child.startDocId = randomInt(); - child.endDocId = randomInt(); - child.childDimensionId = -1; - child.aggregatedDocId = randomInt(); - child.nodeType = (byte) 0; - child.children = new HashMap<>(); - node.children.put(child.dimensionValue, child); + InMemoryTreeNode child = new InMemoryTreeNode( + node.getDimensionId() + 1, + randomInt(), + randomInt(), + (byte) 0, + node.getDimensionValue() + i + ); + child.setChildDimensionId(-1); + child.setAggregatedDocId(randomInt()); + node.addChildNode(child, child.getDimensionValue()); } - long starTreeDataLength = starTreeWriter.writeStarTree(dataOut, node, 1 + node.children.size(), "star-tree"); + nullChild = new InMemoryTreeNode(node.getDimensionId() + 1, randomInt(), randomInt(), (byte) 1, -1); + nullChild.setChildDimensionId(-1); + nullChild.setAggregatedDocId(randomInt()); + node.addChildNode(nullChild, null); + + long starTreeDataLength = starTreeWriter.writeStarTree(dataOut, node, 2 + node.getChildren().size(), "star-tree"); // asserting on the actual length of the star tree data file - assertEquals(starTreeDataLength, 33L * node.children.size() + 33); + assertEquals(starTreeDataLength, 33L * node.getChildren().size() + 2 * 33); dataOut.close(); dataIn = directory.openInput("star-tree-data", IOContext.READONCE); @@ -119,27 +108,27 @@ public void testSerializableDataSize() { } public void testGetDimensionId() throws IOException { - assertEquals(node.dimensionId, starTreeNode.getDimensionId()); + assertEquals(node.getDimensionId(), starTreeNode.getDimensionId()); } public void testGetDimensionValue() throws IOException { - assertEquals(node.dimensionValue, starTreeNode.getDimensionValue()); + assertEquals(node.getDimensionValue(), starTreeNode.getDimensionValue()); } public void testGetStartDocId() throws IOException { - assertEquals(node.startDocId, starTreeNode.getStartDocId()); + assertEquals(node.getStartDocId(), starTreeNode.getStartDocId()); } public void testGetEndDocId() throws IOException { - assertEquals(node.endDocId, starTreeNode.getEndDocId()); + assertEquals(node.getEndDocId(), starTreeNode.getEndDocId()); } public void testGetAggregatedDocId() throws IOException { - assertEquals(node.aggregatedDocId, starTreeNode.getAggregatedDocId()); + assertEquals(node.getAggregatedDocId(), starTreeNode.getAggregatedDocId()); } public void testGetNumChildren() throws IOException { - assertEquals(node.children.size(), starTreeNode.getNumChildren()); + assertEquals(node.getChildren().size(), starTreeNode.getNumChildren() - 1); } public void testIsLeaf() { @@ -151,8 +140,7 @@ public void testGetStarTreeNodeType() throws IOException { } public void testGetChildForDimensionValue() throws IOException { - // TODO: Add a test to verify children with star node, null node and default node with default dimension value -1 - long dimensionValue = randomIntBetween(0, node.children.size() - 3); + long dimensionValue = randomIntBetween(-1, node.getChildren().size() - 3); FixedLengthStarTreeNode childNode = (FixedLengthStarTreeNode) starTreeNode.getChildForDimensionValue(dimensionValue); assertNotNull(childNode); assertEquals(dimensionValue, childNode.getDimensionValue()); @@ -169,11 +157,31 @@ public void testGetChildrenIterator() throws IOException { assertEquals(starTreeNode.getNumChildren(), count); } + public void testChildrenOrder() throws IOException { + Iterator iterator = starTreeNode.getChildrenIterator(); + int count = 0; + while (iterator.hasNext()) { + FixedLengthStarTreeNode child = iterator.next(); + count++; + if (count == 1) { + StarTreeTestUtils.assertStarTreeNode(child, starChild); + } else if (count == 2) { + StarTreeTestUtils.assertStarTreeNode(child, childWithMinus1); + } else if (count == starTreeNode.getNumChildren()) { + StarTreeTestUtils.assertStarTreeNode(child, nullChild); + } else { + StarTreeTestUtils.assertStarTreeNode(child, node.getChildren().get(child.getDimensionValue())); + } + assertNotNull(child); + } + assertEquals(starTreeNode.getNumChildren(), count); + } + public void testGetChildForStarNode() throws IOException { // Assuming the first child is a star node in our test data FixedLengthStarTreeNode starNode = (FixedLengthStarTreeNode) starTreeNode.getChildStarNode(); assertNotNull(starNode); - assertEquals(StarTreeUtils.ALL, starNode.getDimensionValue()); + assertEquals(ALL, starNode.getDimensionValue()); } public void testGetChildForNullNode() throws IOException { @@ -183,7 +191,7 @@ public void testGetChildForNullNode() throws IOException { public void testGetChildForInvalidDimensionValue() throws IOException { long invalidDimensionValue = Long.MAX_VALUE; - assertThrows(AssertionError.class, () -> starTreeNode.getChildForDimensionValue(invalidDimensionValue)); + assertNull(starTreeNode.getChildForDimensionValue(invalidDimensionValue)); } public void testOnlyRootNodePresent() throws IOException { @@ -193,14 +201,9 @@ public void testOnlyRootNodePresent() throws IOException { IndexOutput dataOut = directory.createOutput("star-tree-data-1", IOContext.DEFAULT); StarTreeWriter starTreeWriter = new StarTreeWriter(); - InMemoryTreeNode node = new InMemoryTreeNode(); - node.dimensionId = 0; - node.startDocId = randomInt(); - node.endDocId = randomInt(); - node.childDimensionId = 1; - node.aggregatedDocId = randomInt(); - node.nodeType = randomFrom((byte) 0, (byte) -1, (byte) 2); - node.children = new HashMap<>(); + InMemoryTreeNode node = new InMemoryTreeNode(0, randomInt(), randomInt(), randomFrom((byte) 0, (byte) -1, (byte) 2), -1); + node.setChildDimensionId(1); + node.setAggregatedDocId(randomInt()); long starTreeDataLength = starTreeWriter.writeStarTree(dataOut, node, 1, "star-tree"); diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeTypeTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeTypeTests.java index 81fb620da5af3..f1034e5d1d033 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeTypeTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeTypeTests.java @@ -14,12 +14,12 @@ public class StarTreeNodeTypeTests extends OpenSearchTestCase { public void testStarNodeType() { assertEquals("star", StarTreeNodeType.STAR.getName()); - assertEquals((byte) -2, StarTreeNodeType.STAR.getValue()); + assertEquals((byte) -1, StarTreeNodeType.STAR.getValue()); } public void testNullNodeType() { assertEquals("null", StarTreeNodeType.NULL.getName()); - assertEquals((byte) -1, StarTreeNodeType.NULL.getValue()); + assertEquals((byte) 1, StarTreeNodeType.NULL.getValue()); } public void testDefaultNodeType() { @@ -28,20 +28,20 @@ public void testDefaultNodeType() { } public void testFromValue() { - assertEquals(StarTreeNodeType.STAR, StarTreeNodeType.fromValue((byte) -2)); - assertEquals(StarTreeNodeType.NULL, StarTreeNodeType.fromValue((byte) -1)); + assertEquals(StarTreeNodeType.STAR, StarTreeNodeType.fromValue((byte) -1)); + assertEquals(StarTreeNodeType.NULL, StarTreeNodeType.fromValue((byte) 1)); assertEquals(StarTreeNodeType.DEFAULT, StarTreeNodeType.fromValue((byte) 0)); } public void testFromValueInvalid() { - IllegalStateException exception = expectThrows(IllegalStateException.class, () -> StarTreeNodeType.fromValue((byte) 1)); - assertEquals("Unrecognized value byte to determine star-tree node type: [1]", exception.getMessage()); + IllegalStateException exception = expectThrows(IllegalStateException.class, () -> StarTreeNodeType.fromValue((byte) 2)); + assertEquals("Unrecognized value byte to determine star-tree node type: [2]", exception.getMessage()); } public void testEnumValues() { StarTreeNodeType[] values = StarTreeNodeType.values(); assertEquals(3, values.length); - assertArrayEquals(new StarTreeNodeType[] { StarTreeNodeType.STAR, StarTreeNodeType.NULL, StarTreeNodeType.DEFAULT }, values); + assertArrayEquals(new StarTreeNodeType[] { StarTreeNodeType.STAR, StarTreeNodeType.DEFAULT, StarTreeNodeType.NULL }, values); } public void testEnumValueOf() { diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtilsTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtilsTests.java index a1d341615969e..9cca0b04e9ea4 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtilsTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtilsTests.java @@ -52,7 +52,7 @@ public void testGetFieldInfoList() { public void testGetFieldInfo() { String fieldName = UUID.randomUUID().toString(); int fieldNumber = randomInt(); - assertFieldInfos(StarTreeUtils.getFieldInfo(fieldName, fieldNumber), fieldName, fieldNumber); + assertFieldInfos(StarTreeUtils.getFieldInfo(fieldName, DocValuesType.SORTED_NUMERIC, fieldNumber), fieldName, fieldNumber); } diff --git a/server/src/test/java/org/opensearch/index/mapper/DateFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/DateFieldTypeTests.java index db5e1e419de93..259904fc143a1 100644 --- a/server/src/test/java/org/opensearch/index/mapper/DateFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/DateFieldTypeTests.java @@ -41,7 +41,6 @@ import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; import org.apache.lucene.search.Query; @@ -53,6 +52,7 @@ import org.opensearch.common.time.DateFormatters; import org.opensearch.common.time.DateMathParser; import org.opensearch.common.util.BigArrays; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.util.io.IOUtils; import org.opensearch.index.IndexSettings; import org.opensearch.index.fielddata.IndexNumericFieldData; @@ -65,12 +65,18 @@ import org.opensearch.index.query.DateRangeIncludingNowQuery; import org.opensearch.index.query.QueryRewriteContext; import org.opensearch.index.query.QueryShardContext; +import org.opensearch.search.approximate.ApproximateIndexOrDocValuesQuery; +import org.opensearch.search.approximate.ApproximatePointRangeQuery; import org.joda.time.DateTimeZone; import java.io.IOException; import java.time.ZoneOffset; import java.util.Collections; +import static org.hamcrest.CoreMatchers.is; +import static org.apache.lucene.document.LongPoint.pack; +import static org.junit.Assume.assumeThat; + public class DateFieldTypeTests extends FieldTypeTestCase { private static final long nowInMillis = 0; @@ -206,10 +212,26 @@ public void testTermQuery() { MappedFieldType ft = new DateFieldType("field"); String date = "2015-10-12T14:10:55"; long instant = DateFormatters.from(DateFieldMapper.getDefaultDateTimeFormatter().parse(date)).toInstant().toEpochMilli(); - Query expected = new IndexOrDocValuesQuery( + Query expected = new ApproximateIndexOrDocValuesQuery( LongPoint.newRangeQuery("field", instant, instant + 999), + new ApproximatePointRangeQuery( + "field", + pack(new long[] { instant }).bytes, + pack(new long[] { instant + 999 }).bytes, + new long[] { instant }.length + ) { + @Override + protected String toString(int dimension, byte[] value) { + return Long.toString(LongPoint.decodeDimension(value, 0)); + } + }, SortedNumericDocValuesField.newSlowRangeQuery("field", instant, instant + 999) ); + assumeThat( + "Using Approximate Range Query as default", + FeatureFlags.isEnabled(FeatureFlags.APPROXIMATE_POINT_RANGE_QUERY), + is(true) + ); assertEquals(expected, ft.termQuery(date, context)); MappedFieldType unsearchable = new DateFieldType( @@ -256,10 +278,26 @@ public void testRangeQuery() throws IOException { String date2 = "2016-04-28T11:33:52"; long instant1 = DateFormatters.from(DateFieldMapper.getDefaultDateTimeFormatter().parse(date1)).toInstant().toEpochMilli(); long instant2 = DateFormatters.from(DateFieldMapper.getDefaultDateTimeFormatter().parse(date2)).toInstant().toEpochMilli() + 999; - Query expected = new IndexOrDocValuesQuery( + Query expected = new ApproximateIndexOrDocValuesQuery( LongPoint.newRangeQuery("field", instant1, instant2), + new ApproximatePointRangeQuery( + "field", + pack(new long[] { instant1 }).bytes, + pack(new long[] { instant2 }).bytes, + new long[] { instant1 }.length + ) { + @Override + protected String toString(int dimension, byte[] value) { + return Long.toString(LongPoint.decodeDimension(value, 0)); + } + }, SortedNumericDocValuesField.newSlowRangeQuery("field", instant1, instant2) ); + assumeThat( + "Using Approximate Range Query as default", + FeatureFlags.isEnabled(FeatureFlags.APPROXIMATE_POINT_RANGE_QUERY), + is(true) + ); assertEquals( expected, ft.rangeQuery(date1, date2, true, true, null, null, null, context).rewrite(new IndexSearcher(new MultiReader())) @@ -268,11 +306,27 @@ public void testRangeQuery() throws IOException { instant1 = nowInMillis; instant2 = instant1 + 100; expected = new DateRangeIncludingNowQuery( - new IndexOrDocValuesQuery( + new ApproximateIndexOrDocValuesQuery( LongPoint.newRangeQuery("field", instant1, instant2), + new ApproximatePointRangeQuery( + "field", + pack(new long[] { instant1 }).bytes, + pack(new long[] { instant2 }).bytes, + new long[] { instant1 }.length + ) { + @Override + protected String toString(int dimension, byte[] value) { + return Long.toString(LongPoint.decodeDimension(value, 0)); + } + }, SortedNumericDocValuesField.newSlowRangeQuery("field", instant1, instant2) ) ); + assumeThat( + "Using Approximate Range Query as default", + FeatureFlags.isEnabled(FeatureFlags.APPROXIMATE_POINT_RANGE_QUERY), + is(true) + ); assertEquals(expected, ft.rangeQuery("now", instant2, true, true, null, null, null, context)); MappedFieldType unsearchable = new DateFieldType( @@ -329,13 +383,31 @@ public void testRangeQueryWithIndexSort() { long instant1 = DateFormatters.from(DateFieldMapper.getDefaultDateTimeFormatter().parse(date1)).toInstant().toEpochMilli(); long instant2 = DateFormatters.from(DateFieldMapper.getDefaultDateTimeFormatter().parse(date2)).toInstant().toEpochMilli() + 999; - Query pointQuery = LongPoint.newRangeQuery("field", instant1, instant2); Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery("field", instant1, instant2); Query expected = new IndexSortSortedNumericDocValuesRangeQuery( "field", instant1, instant2, - new IndexOrDocValuesQuery(pointQuery, dvQuery) + new ApproximateIndexOrDocValuesQuery( + LongPoint.newRangeQuery("field", instant1, instant2), + new ApproximatePointRangeQuery( + "field", + pack(new long[] { instant1 }).bytes, + pack(new long[] { instant2 }).bytes, + new long[] { instant1 }.length + ) { + @Override + protected String toString(int dimension, byte[] value) { + return Long.toString(LongPoint.decodeDimension(value, 0)); + } + }, + dvQuery + ) + ); + assumeThat( + "Using Approximate Range Query as default", + FeatureFlags.isEnabled(FeatureFlags.APPROXIMATE_POINT_RANGE_QUERY), + is(true) ); assertEquals(expected, ft.rangeQuery(date1, date2, true, true, null, null, null, context)); } diff --git a/server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java index b10035f54a0c0..f291b864beb59 100644 --- a/server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java @@ -136,7 +136,7 @@ public void testTermsQuery() { new TermInSetQuery("field", terms), new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, "field", terms) ); - assertEquals(expected, ft.termsQuery(Arrays.asList("foo", "bar"), null)); + assertEquals(expected, ft.termsQuery(Arrays.asList("foo", "bar"), MOCK_QSC_ENABLE_INDEX_DOC_VALUES)); MappedFieldType onlyIndexed = new KeywordFieldType("field", true, false, Collections.emptyMap()); Query expectedIndex = new TermInSetQuery("field", terms); @@ -225,7 +225,7 @@ public void testRegexpQuery() { new RegexpQuery(new Term("field", "foo.*")), new RegexpQuery(new Term("field", "foo.*"), 0, 0, RegexpQuery.DEFAULT_PROVIDER, 10, MultiTermQuery.DOC_VALUES_REWRITE) ), - ft.regexpQuery("foo.*", 0, 0, 10, MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC) + ft.regexpQuery("foo.*", 0, 0, 10, MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC_ENABLE_INDEX_DOC_VALUES) ); Query indexExpected = new RegexpQuery(new Term("field", "foo.*")); @@ -267,7 +267,7 @@ public void testFuzzyQuery() { new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true), new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true, MultiTermQuery.DOC_VALUES_REWRITE) ), - ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, null, MOCK_QSC) + ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, null, MOCK_QSC_ENABLE_INDEX_DOC_VALUES) ); Query indexExpected = new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true); @@ -308,7 +308,7 @@ public void testWildCardQuery() { MultiTermQuery.DOC_VALUES_REWRITE ) ); - assertEquals(expected, ft.wildcardQuery("foo*", MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC)); + assertEquals(expected, ft.wildcardQuery("foo*", MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC_ENABLE_INDEX_DOC_VALUES)); Query indexExpected = new WildcardQuery(new Term("field", new BytesRef("foo*"))); MappedFieldType onlyIndexed = new KeywordFieldType("field", true, false, Collections.emptyMap()); diff --git a/server/src/test/java/org/opensearch/index/mapper/ObjectMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/ObjectMapperTests.java index 504bc622ec12e..cb06bf23d9cbe 100644 --- a/server/src/test/java/org/opensearch/index/mapper/ObjectMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/ObjectMapperTests.java @@ -37,7 +37,11 @@ import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.core.common.bytes.BytesArray; +import org.opensearch.core.common.unit.ByteSizeUnit; +import org.opensearch.core.common.unit.ByteSizeValue; import org.opensearch.core.xcontent.MediaTypeRegistry; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeIndexSettings; import org.opensearch.index.mapper.MapperService.MergeReason; import org.opensearch.index.mapper.ObjectMapper.Dynamic; import org.opensearch.plugins.Plugin; @@ -500,7 +504,7 @@ public void testCompositeFields() throws Exception { .startObject("config") .startArray("ordered_dimensions") .startObject() - .field("name", "@timestamp") + .field("name", "node") .endObject() .startObject() .field("name", "status") @@ -518,8 +522,8 @@ public void testCompositeFields() throws Exception { .endObject() .endObject() .startObject("properties") - .startObject("@timestamp") - .field("type", "date") + .startObject("node") + .field("type", "integer") .endObject() .startObject("status") .field("type", "integer") @@ -544,7 +548,11 @@ public void testCompositeFields() throws Exception { final Settings starTreeEnabledSettings = Settings.builder().put(STAR_TREE_INDEX, "true").build(); FeatureFlags.initializeFeatureFlags(starTreeEnabledSettings); - DocumentMapper documentMapper = createIndex("test").mapperService() + Settings settings = Settings.builder() + .put(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey(), true) + .put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), new ByteSizeValue(512, ByteSizeUnit.MB)) + .build(); + DocumentMapper documentMapper = createIndex("test", settings).mapperService() .documentMapperParser() .parse("tweet", new CompressedXContent(mapping)); @@ -553,9 +561,9 @@ public void testCompositeFields() throws Exception { StarTreeMapper starTreeMapper = (StarTreeMapper) mapper; assertEquals("star_tree", starTreeMapper.fieldType().typeName()); // Check that field in properties was parsed correctly as well - mapper = documentMapper.root().getMapper("@timestamp"); + mapper = documentMapper.root().getMapper("node"); assertNotNull(mapper); - assertEquals("date", mapper.typeName()); + assertEquals("integer", mapper.typeName()); FeatureFlags.initializeFeatureFlags(Settings.EMPTY); } diff --git a/server/src/test/java/org/opensearch/index/mapper/RangeFieldQueryStringQueryBuilderTests.java b/server/src/test/java/org/opensearch/index/mapper/RangeFieldQueryStringQueryBuilderTests.java index 9dea7e13ac45e..7a8ac829bdd97 100644 --- a/server/src/test/java/org/opensearch/index/mapper/RangeFieldQueryStringQueryBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/RangeFieldQueryStringQueryBuilderTests.java @@ -47,15 +47,21 @@ import org.opensearch.common.compress.CompressedXContent; import org.opensearch.common.network.InetAddresses; import org.opensearch.common.time.DateMathParser; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.index.query.QueryShardContext; import org.opensearch.index.query.QueryStringQueryBuilder; +import org.opensearch.search.approximate.ApproximateIndexOrDocValuesQuery; +import org.opensearch.search.approximate.ApproximatePointRangeQuery; import org.opensearch.test.AbstractQueryTestCase; import java.io.IOException; import java.net.InetAddress; +import static org.hamcrest.CoreMatchers.is; import static org.hamcrest.Matchers.either; import static org.hamcrest.core.IsInstanceOf.instanceOf; +import static org.apache.lucene.document.LongPoint.pack; +import static org.junit.Assume.assumeThat; public class RangeFieldQueryStringQueryBuilderTests extends AbstractQueryTestCase { @@ -173,18 +179,39 @@ public void testDateRangeQuery() throws Exception { DateFieldMapper.DateFieldType dateType = (DateFieldMapper.DateFieldType) context.fieldMapper(DATE_FIELD_NAME); parser = dateType.dateMathParser; Query queryOnDateField = new QueryStringQueryBuilder(DATE_FIELD_NAME + ":[2010-01-01 TO 2018-01-01]").toQuery(createShardContext()); - Query controlQuery = LongPoint.newRangeQuery( - DATE_FIELD_NAME, - new long[] { parser.parse(lowerBoundExact, () -> 0).toEpochMilli() }, - new long[] { parser.parse(upperBoundExact, () -> 0).toEpochMilli() } - ); Query controlDv = SortedNumericDocValuesField.newSlowRangeQuery( DATE_FIELD_NAME, parser.parse(lowerBoundExact, () -> 0).toEpochMilli(), parser.parse(upperBoundExact, () -> 0).toEpochMilli() ); - assertEquals(new IndexOrDocValuesQuery(controlQuery, controlDv), queryOnDateField); + assumeThat( + "Using Approximate Range Query as default", + FeatureFlags.isEnabled(FeatureFlags.APPROXIMATE_POINT_RANGE_QUERY), + is(true) + ); + assertEquals( + new ApproximateIndexOrDocValuesQuery( + LongPoint.newRangeQuery( + DATE_FIELD_NAME, + parser.parse(lowerBoundExact, () -> 0).toEpochMilli(), + parser.parse(upperBoundExact, () -> 0).toEpochMilli() + ), + new ApproximatePointRangeQuery( + DATE_FIELD_NAME, + pack(new long[] { parser.parse(lowerBoundExact, () -> 0).toEpochMilli() }).bytes, + pack(new long[] { parser.parse(upperBoundExact, () -> 0).toEpochMilli() }).bytes, + new long[] { parser.parse(lowerBoundExact, () -> 0).toEpochMilli() }.length + ) { + @Override + protected String toString(int dimension, byte[] value) { + return Long.toString(LongPoint.decodeDimension(value, 0)); + } + }, + controlDv + ), + queryOnDateField + ); } public void testIPRangeQuery() throws Exception { diff --git a/server/src/test/java/org/opensearch/index/mapper/RangeFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/RangeFieldTypeTests.java index 49bf227e5073c..b157c43e45451 100644 --- a/server/src/test/java/org/opensearch/index/mapper/RangeFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/RangeFieldTypeTests.java @@ -57,6 +57,7 @@ import org.opensearch.index.mapper.RangeFieldMapper.RangeFieldType; import org.opensearch.index.query.QueryShardContext; import org.opensearch.index.query.QueryShardException; +import org.opensearch.search.approximate.ApproximateIndexOrDocValuesQuery; import org.opensearch.test.IndexSettingsModule; import org.joda.time.DateTime; import org.junit.Before; @@ -285,7 +286,15 @@ public void testDateRangeQueryUsingMappingFormatLegacy() { // compare lower and upper bounds with what we would get on a `date` field DateFieldType dateFieldType = new DateFieldType("field", DateFieldMapper.Resolution.MILLISECONDS, formatter); final Query queryOnDateField = dateFieldType.rangeQuery(from, to, true, true, relation, null, fieldType.dateMathParser(), context); - assertEquals("field:[1465975790000 TO 1466062190999]", ((IndexOrDocValuesQuery) queryOnDateField).getIndexQuery().toString()); + assumeThat( + "Using Approximate Range Query as default", + FeatureFlags.isEnabled(FeatureFlags.APPROXIMATE_POINT_RANGE_QUERY), + is(true) + ); + assertEquals( + "field:[1465975790000 TO 1466062190999]", + ((IndexOrDocValuesQuery) ((ApproximateIndexOrDocValuesQuery) queryOnDateField).getOriginalQuery()).getIndexQuery().toString() + ); } public void testDateRangeQueryUsingMappingFormat() { diff --git a/server/src/test/java/org/opensearch/index/mapper/StarTreeMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/StarTreeMapperTests.java index 81454b210d6be..daa9fda7a5a3b 100644 --- a/server/src/test/java/org/opensearch/index/mapper/StarTreeMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/StarTreeMapperTests.java @@ -13,6 +13,8 @@ import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.common.util.FeatureFlags; +import org.opensearch.core.common.unit.ByteSizeUnit; +import org.opensearch.core.common.unit.ByteSizeValue; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.index.compositeindex.CompositeIndexSettings; import org.opensearch.index.compositeindex.CompositeIndexValidator; @@ -24,6 +26,7 @@ import org.opensearch.index.compositeindex.datacube.ReadDimension; import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeIndexSettings; import org.junit.After; import org.junit.Before; @@ -35,6 +38,9 @@ import java.util.List; import java.util.Set; +import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder; +import static org.opensearch.index.IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING; +import static org.opensearch.index.compositeindex.CompositeIndexSettings.COMPOSITE_INDEX_MAX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING; import static org.hamcrest.Matchers.containsString; /** @@ -52,20 +58,23 @@ public void teardown() { FeatureFlags.initializeFeatureFlags(Settings.EMPTY); } + @Override + protected Settings getIndexSettings() { + return Settings.builder() + .put(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey(), true) + .put(INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), new ByteSizeValue(512, ByteSizeUnit.MB)) + .put(SETTINGS) + .build(); + } + public void testValidStarTree() throws IOException { + MapperService mapperService = createMapperService(getExpandedMappingWithJustAvg("status", "size")); Set compositeFieldTypes = mapperService.getCompositeFieldTypes(); for (CompositeMappedFieldType type : compositeFieldTypes) { StarTreeMapper.StarTreeFieldType starTreeFieldType = (StarTreeMapper.StarTreeFieldType) type; assertEquals(2, starTreeFieldType.getDimensions().size()); - assertEquals("@timestamp", starTreeFieldType.getDimensions().get(0).getField()); - assertTrue(starTreeFieldType.getDimensions().get(0) instanceof DateDimension); - DateDimension dateDim = (DateDimension) starTreeFieldType.getDimensions().get(0); - List expectedTimeUnits = Arrays.asList( - Rounding.DateTimeUnit.DAY_OF_MONTH, - Rounding.DateTimeUnit.MONTH_OF_YEAR - ); - assertEquals(expectedTimeUnits, dateDim.getIntervals()); + assertEquals("node", starTreeFieldType.getDimensions().get(0).getField()); assertEquals("status", starTreeFieldType.getDimensions().get(1).getField()); assertEquals(2, starTreeFieldType.getMetrics().size()); assertEquals("size", starTreeFieldType.getMetrics().get(0).getField()); @@ -76,25 +85,52 @@ public void testValidStarTree() throws IOException { assertEquals(100, starTreeFieldType.getStarTreeConfig().maxLeafDocs()); assertEquals(StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP, starTreeFieldType.getStarTreeConfig().getBuildMode()); assertEquals( - new HashSet<>(Arrays.asList("@timestamp", "status")), + new HashSet<>(Arrays.asList("node", "status")), starTreeFieldType.getStarTreeConfig().getSkipStarNodeCreationInDims() ); } } + public void testCompositeIndexWithArraysInCompositeField() throws IOException { + DocumentMapper mapper = createDocumentMapper(getExpandedMappingWithJustAvg("status", "status")); + MapperParsingException ex = expectThrows( + MapperParsingException.class, + () -> mapper.parse(source(b -> b.startArray("status").value(0).value(1).endArray())) + ); + assertEquals( + "object mapping for [_doc] with array for [status] cannot be accepted as field is also part of composite index mapping which does not accept arrays", + ex.getMessage() + ); + ParsedDocument doc = mapper.parse(source(b -> b.startArray("size").value(0).value(1).endArray())); + // 1 intPoint , 1 SNDV field for each value , so 4 in total + assertEquals(4, doc.rootDoc().getFields("size").length); + } + + public void testValidValueForFlushTresholdSizeWithoutCompositeIndex() { + Settings settings = Settings.builder() + .put(INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), "256mb") + .put(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey(), false) + .build(); + + assertEquals(new ByteSizeValue(256, ByteSizeUnit.MB), INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.get(settings)); + } + + public void testValidValueForCompositeIndex() { + Settings settings = Settings.builder() + .put(INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), "256mb") + .put(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey(), true) + .put(COMPOSITE_INDEX_MAX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), "512mb") + .build(); + + assertEquals(new ByteSizeValue(256, ByteSizeUnit.MB), INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.get(settings)); + } + public void testMetricsWithJustSum() throws IOException { MapperService mapperService = createMapperService(getExpandedMappingWithJustSum("status", "size")); Set compositeFieldTypes = mapperService.getCompositeFieldTypes(); for (CompositeMappedFieldType type : compositeFieldTypes) { StarTreeMapper.StarTreeFieldType starTreeFieldType = (StarTreeMapper.StarTreeFieldType) type; - assertEquals("@timestamp", starTreeFieldType.getDimensions().get(0).getField()); - assertTrue(starTreeFieldType.getDimensions().get(0) instanceof DateDimension); - DateDimension dateDim = (DateDimension) starTreeFieldType.getDimensions().get(0); - List expectedTimeUnits = Arrays.asList( - Rounding.DateTimeUnit.DAY_OF_MONTH, - Rounding.DateTimeUnit.MONTH_OF_YEAR - ); - assertEquals(expectedTimeUnits, dateDim.getIntervals()); + assertEquals("node", starTreeFieldType.getDimensions().get(0).getField()); assertEquals("status", starTreeFieldType.getDimensions().get(1).getField()); assertEquals("size", starTreeFieldType.getMetrics().get(0).getField()); @@ -104,7 +140,7 @@ public void testMetricsWithJustSum() throws IOException { assertEquals(100, starTreeFieldType.getStarTreeConfig().maxLeafDocs()); assertEquals(StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP, starTreeFieldType.getStarTreeConfig().getBuildMode()); assertEquals( - new HashSet<>(Arrays.asList("@timestamp", "status")), + new HashSet<>(Arrays.asList("node", "status")), starTreeFieldType.getStarTreeConfig().getSkipStarNodeCreationInDims() ); } @@ -115,14 +151,7 @@ public void testMetricsWithCountAndSum() throws IOException { Set compositeFieldTypes = mapperService.getCompositeFieldTypes(); for (CompositeMappedFieldType type : compositeFieldTypes) { StarTreeMapper.StarTreeFieldType starTreeFieldType = (StarTreeMapper.StarTreeFieldType) type; - assertEquals("@timestamp", starTreeFieldType.getDimensions().get(0).getField()); - assertTrue(starTreeFieldType.getDimensions().get(0) instanceof DateDimension); - DateDimension dateDim = (DateDimension) starTreeFieldType.getDimensions().get(0); - List expectedTimeUnits = Arrays.asList( - Rounding.DateTimeUnit.DAY_OF_MONTH, - Rounding.DateTimeUnit.MONTH_OF_YEAR - ); - assertEquals(expectedTimeUnits, dateDim.getIntervals()); + assertEquals("node", starTreeFieldType.getDimensions().get(0).getField()); assertEquals("status", starTreeFieldType.getDimensions().get(1).getField()); assertEquals("size", starTreeFieldType.getMetrics().get(0).getField()); @@ -137,7 +166,7 @@ public void testMetricsWithCountAndSum() throws IOException { assertEquals(100, starTreeFieldType.getStarTreeConfig().maxLeafDocs()); assertEquals(StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP, starTreeFieldType.getStarTreeConfig().getBuildMode()); assertEquals( - new HashSet<>(Arrays.asList("@timestamp", "status")), + new HashSet<>(Arrays.asList("node", "status")), starTreeFieldType.getStarTreeConfig().getSkipStarNodeCreationInDims() ); } @@ -148,14 +177,7 @@ public void testValidStarTreeDefaults() throws IOException { Set compositeFieldTypes = mapperService.getCompositeFieldTypes(); for (CompositeMappedFieldType type : compositeFieldTypes) { StarTreeMapper.StarTreeFieldType starTreeFieldType = (StarTreeMapper.StarTreeFieldType) type; - assertEquals("@timestamp", starTreeFieldType.getDimensions().get(0).getField()); - assertTrue(starTreeFieldType.getDimensions().get(0) instanceof DateDimension); - DateDimension dateDim = (DateDimension) starTreeFieldType.getDimensions().get(0); - List expectedTimeUnits = Arrays.asList( - Rounding.DateTimeUnit.MINUTES_OF_HOUR, - Rounding.DateTimeUnit.HOUR_OF_DAY - ); - assertEquals(expectedTimeUnits, dateDim.getIntervals()); + assertEquals("node", starTreeFieldType.getDimensions().get(0).getField()); assertEquals("status", starTreeFieldType.getDimensions().get(1).getField()); assertEquals(3, starTreeFieldType.getMetrics().size()); assertEquals("status", starTreeFieldType.getMetrics().get(0).getField()); @@ -228,7 +250,7 @@ public void testMissingDims() { MapperParsingException.class, () -> createMapperService(getMinMapping(false, false, true, false)) ); - assertThat(ex.getMessage(), containsString("Failed to parse mapping [_doc]: unknown dimension field [@timestamp]")); + assertThat(ex.getMessage(), containsString("Failed to parse mapping [_doc]: unknown dimension field [node]")); } public void testMissingMetrics() { @@ -264,7 +286,7 @@ public void testInvalidDimType() { () -> createMapperService(getInvalidMapping(false, false, true, false)) ); assertEquals( - "Failed to parse mapping [_doc]: unsupported field type associated with dimension [@timestamp] as part of star tree field [startree]", + "Failed to parse mapping [_doc]: unsupported field type associated with dimension [node] as part of star tree field [startree]", ex.getMessage() ); } @@ -291,6 +313,24 @@ public void testInvalidSingleDim() { ); } + public void testDuplicateDimensions() { + XContentBuilder finalMapping = getMappingWithDuplicateFields(true, false); + MapperParsingException ex = expectThrows(MapperParsingException.class, () -> createMapperService(finalMapping)); + assertEquals( + "Failed to parse mapping [_doc]: Duplicate dimension [numeric_dv] present as part star tree index field [startree-1]", + ex.getMessage() + ); + } + + public void testDuplicateMetrics() { + XContentBuilder finalMapping = getMappingWithDuplicateFields(false, true); + MapperParsingException ex = expectThrows(MapperParsingException.class, () -> createMapperService(finalMapping)); + assertEquals( + "Failed to parse mapping [_doc]: Duplicate metrics [numeric_dv] present as part star tree index field [startree-1]", + ex.getMessage() + ); + } + public void testMetric() { List m1 = new ArrayList<>(); m1.add(MetricStat.MAX); @@ -443,7 +483,7 @@ public void testValidations() throws IOException { ) ); assertEquals( - "Aggregations not supported for the dimension field [@timestamp] with field type [date] as part of star tree field", + "Aggregations not supported for the dimension field [node] with field type [integer] as part of star tree field", ex.getMessage() ); @@ -466,17 +506,13 @@ private XContentBuilder getExpandedMappingWithJustAvg(String dim, String metric) b.field("max_leaf_docs", 100); b.startArray("skip_star_node_creation_for_dimensions"); { - b.value("@timestamp"); + b.value("node"); b.value("status"); } b.endArray(); b.startArray("ordered_dimensions"); b.startObject(); - b.field("name", "@timestamp"); - b.startArray("calendar_intervals"); - b.value("day"); - b.value("month"); - b.endArray(); + b.field("name", "node"); b.endObject(); b.startObject(); b.field("name", dim); @@ -494,8 +530,8 @@ private XContentBuilder getExpandedMappingWithJustAvg(String dim, String metric) b.endObject(); b.endObject(); b.startObject("properties"); - b.startObject("@timestamp"); - b.field("type", "date"); + b.startObject("node"); + b.field("type", "integer"); b.endObject(); b.startObject("status"); b.field("type", "integer"); @@ -507,6 +543,56 @@ private XContentBuilder getExpandedMappingWithJustAvg(String dim, String metric) }); } + private XContentBuilder getMappingWithDuplicateFields(boolean isDuplicateDim, boolean isDuplicateMetric) { + XContentBuilder mapping = null; + try { + mapping = jsonBuilder().startObject() + .startObject("composite") + .startObject("startree-1") + .field("type", "star_tree") + .startObject("config") + .startArray("ordered_dimensions") + .startObject() + .field("name", "node") + .endObject() + .startObject() + .field("name", "numeric_dv") + .endObject() + .startObject() + .field("name", isDuplicateDim ? "numeric_dv" : "numeric_dv1") // Duplicate dimension + .endObject() + .endArray() + .startArray("metrics") + .startObject() + .field("name", "numeric_dv") + .endObject() + .startObject() + .field("name", isDuplicateMetric ? "numeric_dv" : "numeric_dv1") // Duplicate metric + .endObject() + .endArray() + .endObject() + .endObject() + .endObject() + .startObject("properties") + .startObject("node") + .field("type", "integer") + .endObject() + .startObject("numeric_dv") + .field("type", "integer") + .field("doc_values", true) + .endObject() + .startObject("numeric_dv1") + .field("type", "integer") + .field("doc_values", true) + .endObject() + .endObject() + .endObject(); + } catch (IOException e) { + fail("Failed to create mapping: " + e.getMessage()); + } + return mapping; + } + private XContentBuilder getExpandedMappingWithJustSum(String dim, String metric) throws IOException { return topMapping(b -> { b.startObject("composite"); @@ -516,17 +602,13 @@ private XContentBuilder getExpandedMappingWithJustSum(String dim, String metric) b.field("max_leaf_docs", 100); b.startArray("skip_star_node_creation_for_dimensions"); { - b.value("@timestamp"); + b.value("node"); b.value("status"); } b.endArray(); b.startArray("ordered_dimensions"); b.startObject(); - b.field("name", "@timestamp"); - b.startArray("calendar_intervals"); - b.value("day"); - b.value("month"); - b.endArray(); + b.field("name", "node"); b.endObject(); b.startObject(); b.field("name", dim); @@ -544,8 +626,8 @@ private XContentBuilder getExpandedMappingWithJustSum(String dim, String metric) b.endObject(); b.endObject(); b.startObject("properties"); - b.startObject("@timestamp"); - b.field("type", "date"); + b.startObject("node"); + b.field("type", "integer"); b.endObject(); b.startObject("status"); b.field("type", "integer"); @@ -566,17 +648,13 @@ private XContentBuilder getExpandedMappingWithSumAndCount(String dim, String met b.field("max_leaf_docs", 100); b.startArray("skip_star_node_creation_for_dimensions"); { - b.value("@timestamp"); + b.value("node"); b.value("status"); } b.endArray(); b.startArray("ordered_dimensions"); b.startObject(); - b.field("name", "@timestamp"); - b.startArray("calendar_intervals"); - b.value("day"); - b.value("month"); - b.endArray(); + b.field("name", "node"); b.endObject(); b.startObject(); b.field("name", dim); @@ -595,8 +673,8 @@ private XContentBuilder getExpandedMappingWithSumAndCount(String dim, String met b.endObject(); b.endObject(); b.startObject("properties"); - b.startObject("@timestamp"); - b.field("type", "date"); + b.startObject("node"); + b.field("type", "integer"); b.endObject(); b.startObject("status"); b.field("type", "integer"); @@ -622,7 +700,7 @@ private XContentBuilder getMinMapping(boolean isEmptyDims, boolean isEmptyMetric if (!isEmptyDims) { b.startArray("ordered_dimensions"); b.startObject(); - b.field("name", "@timestamp"); + b.field("name", "node"); b.endObject(); b.startObject(); b.field("name", "status"); @@ -644,8 +722,8 @@ private XContentBuilder getMinMapping(boolean isEmptyDims, boolean isEmptyMetric b.endObject(); b.startObject("properties"); if (!missingDim) { - b.startObject("@timestamp"); - b.field("type", "date"); + b.startObject("node"); + b.field("type", "integer"); b.endObject(); } b.startObject("status"); @@ -758,7 +836,7 @@ private XContentBuilder getInvalidMapping( b.startArray("ordered_dimensions"); if (!singleDim) { b.startObject(); - b.field("name", "@timestamp"); + b.field("name", "node"); b.endObject(); } b.startObject(); @@ -783,9 +861,9 @@ private XContentBuilder getInvalidMapping( b.endObject(); b.endObject(); b.startObject("properties"); - b.startObject("@timestamp"); + b.startObject("node"); if (!invalidDimType) { - b.field("type", "date"); + b.field("type", "integer"); } else { b.field("type", "keyword"); } @@ -828,7 +906,7 @@ private XContentBuilder getInvalidMappingWithDv( b.startArray("ordered_dimensions"); if (!singleDim) { b.startObject(); - b.field("name", "@timestamp"); + b.field("name", "node"); b.endObject(); } b.startObject(); @@ -847,12 +925,12 @@ private XContentBuilder getInvalidMappingWithDv( b.endObject(); b.endObject(); b.startObject("properties"); - b.startObject("@timestamp"); + b.startObject("node"); if (!invalidDimType) { - b.field("type", "date"); + b.field("type", "integer"); b.field("doc_values", "true"); } else { - b.field("type", "date"); + b.field("type", "integer"); b.field("doc_values", "false"); } b.endObject(); diff --git a/server/src/test/java/org/opensearch/index/mapper/WildcardFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/WildcardFieldTypeTests.java index cd2a23cf94c37..1a813495e9033 100644 --- a/server/src/test/java/org/opensearch/index/mapper/WildcardFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/WildcardFieldTypeTests.java @@ -88,6 +88,32 @@ public void testWildcardQuery() { ); } + public void testEscapedWildcardQuery() { + MappedFieldType ft = new WildcardFieldMapper.WildcardFieldType("field"); + Set expectedTerms = new HashSet<>(); + expectedTerms.add(prefixAnchored("*")); + expectedTerms.add(suffixAnchored("*")); + + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + for (String term : expectedTerms) { + builder.add(new TermQuery(new Term("field", term)), BooleanClause.Occur.FILTER); + } + + assertEquals( + new WildcardFieldMapper.WildcardMatchingQuery("field", builder.build(), "\\**\\*"), + ft.wildcardQuery("\\**\\*", null, null) + ); + + assertEquals(new WildcardFieldMapper.WildcardMatchingQuery("field", builder.build(), "\\*"), ft.wildcardQuery("\\*", null, null)); + + expectedTerms.remove(suffixAnchored("*")); + builder = new BooleanQuery.Builder(); + for (String term : expectedTerms) { + builder.add(new TermQuery(new Term("field", term)), BooleanClause.Occur.FILTER); + } + assertEquals(new WildcardFieldMapper.WildcardMatchingQuery("field", builder.build(), "\\**"), ft.wildcardQuery("\\**", null, null)); + } + public void testMultipleWildcardsInQuery() { final String pattern = "a?cd*efg?h"; MappedFieldType ft = new WildcardFieldMapper.WildcardFieldType("field"); diff --git a/server/src/test/java/org/opensearch/index/query/MatchPhraseQueryBuilderTests.java b/server/src/test/java/org/opensearch/index/query/MatchPhraseQueryBuilderTests.java index f8d5d2ce3d062..ddf58073a5206 100644 --- a/server/src/test/java/org/opensearch/index/query/MatchPhraseQueryBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/query/MatchPhraseQueryBuilderTests.java @@ -42,6 +42,7 @@ import org.apache.lucene.search.TermQuery; import org.opensearch.core.common.ParsingException; import org.opensearch.index.search.MatchQuery.ZeroTermsQuery; +import org.opensearch.search.approximate.ApproximateIndexOrDocValuesQuery; import org.opensearch.test.AbstractQueryTestCase; import java.io.IOException; @@ -130,6 +131,7 @@ protected void doAssertLuceneQuery(MatchPhraseQueryBuilder queryBuilder, Query q .or(instanceOf(PointRangeQuery.class)) .or(instanceOf(IndexOrDocValuesQuery.class)) .or(instanceOf(MatchNoDocsQuery.class)) + .or(instanceOf(ApproximateIndexOrDocValuesQuery.class)) ); } diff --git a/server/src/test/java/org/opensearch/index/query/QueryStringQueryBuilderTests.java b/server/src/test/java/org/opensearch/index/query/QueryStringQueryBuilderTests.java index af4a34aa98116..5b030df20e889 100644 --- a/server/src/test/java/org/opensearch/index/query/QueryStringQueryBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/query/QueryStringQueryBuilderTests.java @@ -47,7 +47,6 @@ import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.FuzzyQuery; -import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.MultiTermQuery; @@ -71,11 +70,14 @@ import org.opensearch.common.compress.CompressedXContent; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.Fuzziness; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.xcontent.json.JsonXContent; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.index.mapper.FieldNamesFieldMapper; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.search.QueryStringQueryParser; +import org.opensearch.search.approximate.ApproximateIndexOrDocValuesQuery; +import org.opensearch.search.approximate.ApproximatePointRangeQuery; import org.opensearch.test.AbstractQueryTestCase; import org.hamcrest.CoreMatchers; import org.hamcrest.Matchers; @@ -98,6 +100,9 @@ import static org.hamcrest.CoreMatchers.hasItems; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.is; +import static org.apache.lucene.document.LongPoint.pack; +import static org.junit.Assume.assumeThat; public class QueryStringQueryBuilderTests extends AbstractQueryTestCase { @@ -853,7 +858,12 @@ public void testToQueryDateWithTimeZone() throws Exception { QueryStringQueryBuilder qsq = queryStringQuery(DATE_FIELD_NAME + ":1970-01-01"); QueryShardContext context = createShardContext(); Query query = qsq.toQuery(context); - assertThat(query, instanceOf(IndexOrDocValuesQuery.class)); + assumeThat( + "Using Approximate Range Query as default", + FeatureFlags.isEnabled(FeatureFlags.APPROXIMATE_POINT_RANGE_QUERY), + is(true) + ); + assertThat(query, instanceOf(ApproximateIndexOrDocValuesQuery.class)); long lower = 0; // 1970-01-01T00:00:00.999 UTC long upper = 86399999; // 1970-01-01T23:59:59.999 UTC assertEquals(calculateExpectedDateQuery(lower, upper), query); @@ -862,10 +872,22 @@ public void testToQueryDateWithTimeZone() throws Exception { assertEquals(calculateExpectedDateQuery(lower + msPerHour, upper + msPerHour), qsq.timeZone("-01:00").toQuery(context)); } - private IndexOrDocValuesQuery calculateExpectedDateQuery(long lower, long upper) { - Query query = LongPoint.newRangeQuery(DATE_FIELD_NAME, lower, upper); - Query dv = SortedNumericDocValuesField.newSlowRangeQuery(DATE_FIELD_NAME, lower, upper); - return new IndexOrDocValuesQuery(query, dv); + private ApproximateIndexOrDocValuesQuery calculateExpectedDateQuery(long lower, long upper) { + return new ApproximateIndexOrDocValuesQuery( + LongPoint.newRangeQuery(DATE_FIELD_NAME, lower, upper), + new ApproximatePointRangeQuery( + DATE_FIELD_NAME, + pack(new long[] { lower }).bytes, + pack(new long[] { upper }).bytes, + new long[] { lower }.length + ) { + @Override + protected String toString(int dimension, byte[] value) { + return Long.toString(LongPoint.decodeDimension(value, 0)); + } + }, + SortedNumericDocValuesField.newSlowRangeQuery(DATE_FIELD_NAME, lower, upper) + ); } public void testFuzzyNumeric() throws Exception { diff --git a/server/src/test/java/org/opensearch/index/query/RangeQueryBuilderTests.java b/server/src/test/java/org/opensearch/index/query/RangeQueryBuilderTests.java index e72be29b85b63..799d7c7b63462 100644 --- a/server/src/test/java/org/opensearch/index/query/RangeQueryBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/query/RangeQueryBuilderTests.java @@ -34,6 +34,7 @@ import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.LongPoint; +import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.index.Term; import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.DocValuesFieldExistsQuery; @@ -47,12 +48,16 @@ import org.opensearch.OpenSearchParseException; import org.opensearch.common.geo.ShapeRelation; import org.opensearch.common.lucene.BytesRefs; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.common.ParsingException; import org.opensearch.index.mapper.DateFieldMapper; import org.opensearch.index.mapper.FieldNamesFieldMapper; import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.MappedFieldType.Relation; import org.opensearch.index.mapper.MapperService; +import org.opensearch.search.approximate.ApproximateIndexOrDocValuesQuery; +import org.opensearch.search.approximate.ApproximatePointRangeQuery; +import org.opensearch.search.approximate.ApproximateQuery; import org.opensearch.test.AbstractQueryTestCase; import org.joda.time.DateTime; import org.joda.time.chrono.ISOChronology; @@ -65,9 +70,12 @@ import java.util.Map; import static org.opensearch.index.query.QueryBuilders.rangeQuery; +import static org.hamcrest.CoreMatchers.is; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.sameInstance; +import static org.apache.lucene.document.LongPoint.pack; +import static org.junit.Assume.assumeThat; public class RangeQueryBuilderTests extends AbstractQueryTestCase { @Override @@ -183,9 +191,16 @@ protected void doAssertLuceneQuery(RangeQueryBuilder queryBuilder, Query query, assertThat(termRangeQuery.includesLower(), equalTo(queryBuilder.includeLower())); assertThat(termRangeQuery.includesUpper(), equalTo(queryBuilder.includeUpper())); } else if (expectedFieldName.equals(DATE_FIELD_NAME)) { - assertThat(query, instanceOf(IndexOrDocValuesQuery.class)); - query = ((IndexOrDocValuesQuery) query).getIndexQuery(); - assertThat(query, instanceOf(PointRangeQuery.class)); + assumeThat( + "Using Approximate Range Query as default", + FeatureFlags.isEnabled(FeatureFlags.APPROXIMATE_POINT_RANGE_QUERY), + is(true) + ); + assertThat(query, instanceOf(ApproximateIndexOrDocValuesQuery.class)); + Query approximationQuery = ((ApproximateIndexOrDocValuesQuery) query).getApproximationQuery(); + assertThat(approximationQuery, instanceOf(ApproximateQuery.class)); + Query originalQuery = ((ApproximateIndexOrDocValuesQuery) query).getOriginalQuery(); + assertThat(originalQuery, instanceOf(IndexOrDocValuesQuery.class)); MapperService mapperService = context.getMapperService(); MappedFieldType mappedFieldType = mapperService.fieldType(expectedFieldName); final Long fromInMillis; @@ -234,7 +249,24 @@ protected void doAssertLuceneQuery(RangeQueryBuilder queryBuilder, Query query, maxLong--; } } - assertEquals(LongPoint.newRangeQuery(DATE_FIELD_NAME, minLong, maxLong), query); + assertEquals( + new ApproximateIndexOrDocValuesQuery( + LongPoint.newRangeQuery(DATE_FIELD_NAME, minLong, maxLong), + new ApproximatePointRangeQuery( + DATE_FIELD_NAME, + pack(new long[] { minLong }).bytes, + pack(new long[] { maxLong }).bytes, + new long[] { minLong }.length + ) { + @Override + protected String toString(int dimension, byte[] value) { + return Long.toString(LongPoint.decodeDimension(value, 0)); + } + }, + SortedNumericDocValuesField.newSlowRangeQuery(DATE_FIELD_NAME, minLong, maxLong) + ), + query + ); } else if (expectedFieldName.equals(INT_FIELD_NAME)) { assertThat(query, instanceOf(IndexOrDocValuesQuery.class)); query = ((IndexOrDocValuesQuery) query).getIndexQuery(); @@ -299,15 +331,33 @@ public void testDateRangeQueryFormat() throws IOException { + " }\n" + "}"; Query parsedQuery = parseQuery(query).toQuery(createShardContext()); - assertThat(parsedQuery, instanceOf(IndexOrDocValuesQuery.class)); - parsedQuery = ((IndexOrDocValuesQuery) parsedQuery).getIndexQuery(); - assertThat(parsedQuery, instanceOf(PointRangeQuery.class)); - + assumeThat( + "Using Approximate Range Query as default", + FeatureFlags.isEnabled(FeatureFlags.APPROXIMATE_POINT_RANGE_QUERY), + is(true) + ); + assertThat(parsedQuery, instanceOf(ApproximateIndexOrDocValuesQuery.class)); + Query approximationQuery = ((ApproximateIndexOrDocValuesQuery) parsedQuery).getApproximationQuery(); + assertThat(approximationQuery, instanceOf(ApproximateQuery.class)); + Query originalQuery = ((ApproximateIndexOrDocValuesQuery) parsedQuery).getOriginalQuery(); + assertThat(originalQuery, instanceOf(IndexOrDocValuesQuery.class)); + long lower = DateTime.parse("2012-01-01T00:00:00.000+00").getMillis(); + long upper = DateTime.parse("2030-01-01T00:00:00.000+00").getMillis() - 1; assertEquals( - LongPoint.newRangeQuery( - DATE_FIELD_NAME, - DateTime.parse("2012-01-01T00:00:00.000+00").getMillis(), - DateTime.parse("2030-01-01T00:00:00.000+00").getMillis() - 1 + new ApproximateIndexOrDocValuesQuery( + LongPoint.newRangeQuery(DATE_FIELD_NAME, lower, upper), + new ApproximatePointRangeQuery( + DATE_FIELD_NAME, + pack(new long[] { lower }).bytes, + pack(new long[] { upper }).bytes, + new long[] { lower }.length + ) { + @Override + protected String toString(int dimension, byte[] value) { + return Long.toString(LongPoint.decodeDimension(value, 0)); + } + }, + SortedNumericDocValuesField.newSlowRangeQuery(DATE_FIELD_NAME, lower, upper) ), parsedQuery ); @@ -339,15 +389,33 @@ public void testDateRangeBoundaries() throws IOException { + " }\n" + "}\n"; Query parsedQuery = parseQuery(query).toQuery(createShardContext()); - assertThat(parsedQuery, instanceOf(IndexOrDocValuesQuery.class)); - parsedQuery = ((IndexOrDocValuesQuery) parsedQuery).getIndexQuery(); - assertThat(parsedQuery, instanceOf(PointRangeQuery.class)); + assumeThat( + "Using Approximate Range Query as default", + FeatureFlags.isEnabled(FeatureFlags.APPROXIMATE_POINT_RANGE_QUERY), + is(true) + ); + assertThat(parsedQuery, instanceOf(ApproximateIndexOrDocValuesQuery.class)); + + long lower = DateTime.parse("2014-11-01T00:00:00.000+00").getMillis(); + long upper = DateTime.parse("2014-12-08T23:59:59.999+00").getMillis(); assertEquals( - LongPoint.newRangeQuery( - DATE_FIELD_NAME, - DateTime.parse("2014-11-01T00:00:00.000+00").getMillis(), - DateTime.parse("2014-12-08T23:59:59.999+00").getMillis() - ), + new ApproximateIndexOrDocValuesQuery( + LongPoint.newRangeQuery(DATE_FIELD_NAME, lower, upper), + new ApproximatePointRangeQuery( + DATE_FIELD_NAME, + pack(new long[] { lower }).bytes, + pack(new long[] { upper }).bytes, + new long[] { lower }.length + ) { + @Override + protected String toString(int dimension, byte[] value) { + return Long.toString(LongPoint.decodeDimension(value, 0)); + } + }, + SortedNumericDocValuesField.newSlowRangeQuery(DATE_FIELD_NAME, lower, upper) + ) + + , parsedQuery ); @@ -362,15 +430,27 @@ public void testDateRangeBoundaries() throws IOException { + " }\n" + "}"; parsedQuery = parseQuery(query).toQuery(createShardContext()); - assertThat(parsedQuery, instanceOf(IndexOrDocValuesQuery.class)); - parsedQuery = ((IndexOrDocValuesQuery) parsedQuery).getIndexQuery(); - assertThat(parsedQuery, instanceOf(PointRangeQuery.class)); + assertThat(parsedQuery, instanceOf(ApproximateIndexOrDocValuesQuery.class)); + lower = DateTime.parse("2014-11-30T23:59:59.999+00").getMillis() + 1; + upper = DateTime.parse("2014-12-08T00:00:00.000+00").getMillis() - 1; assertEquals( - LongPoint.newRangeQuery( - DATE_FIELD_NAME, - DateTime.parse("2014-11-30T23:59:59.999+00").getMillis() + 1, - DateTime.parse("2014-12-08T00:00:00.000+00").getMillis() - 1 - ), + new ApproximateIndexOrDocValuesQuery( + LongPoint.newRangeQuery(DATE_FIELD_NAME, lower, upper), + new ApproximatePointRangeQuery( + DATE_FIELD_NAME, + pack(new long[] { lower }).bytes, + pack(new long[] { upper }).bytes, + new long[] { lower }.length + ) { + @Override + protected String toString(int dimension, byte[] value) { + return Long.toString(LongPoint.decodeDimension(value, 0)); + } + }, + SortedNumericDocValuesField.newSlowRangeQuery(DATE_FIELD_NAME, lower, upper) + ) + + , parsedQuery ); } @@ -391,9 +471,14 @@ public void testDateRangeQueryTimezone() throws IOException { Query parsedQuery = parseQuery(query).toQuery(context); assertThat(parsedQuery, instanceOf(DateRangeIncludingNowQuery.class)); parsedQuery = ((DateRangeIncludingNowQuery) parsedQuery).getQuery(); - assertThat(parsedQuery, instanceOf(IndexOrDocValuesQuery.class)); - parsedQuery = ((IndexOrDocValuesQuery) parsedQuery).getIndexQuery(); - assertThat(parsedQuery, instanceOf(PointRangeQuery.class)); + assumeThat( + "Using Approximate Range Query as default", + FeatureFlags.isEnabled(FeatureFlags.APPROXIMATE_POINT_RANGE_QUERY), + is(true) + ); + assertThat(parsedQuery, instanceOf(ApproximateIndexOrDocValuesQuery.class)); + parsedQuery = ((ApproximateIndexOrDocValuesQuery) parsedQuery).getApproximationQuery(); + assertThat(parsedQuery, instanceOf(ApproximateQuery.class)); // TODO what else can we assert query = "{\n" diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteIndexPathTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteIndexPathTests.java index 8ddbd383756e7..2351d165b369f 100644 --- a/server/src/test/java/org/opensearch/index/remote/RemoteIndexPathTests.java +++ b/server/src/test/java/org/opensearch/index/remote/RemoteIndexPathTests.java @@ -9,12 +9,16 @@ package org.opensearch.index.remote; import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; import org.opensearch.core.xcontent.MediaTypeRegistry; import org.opensearch.core.xcontent.ToXContent; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentParser; import org.opensearch.index.remote.RemoteStoreEnums.PathHashAlgorithm; import org.opensearch.index.remote.RemoteStoreEnums.PathType; +import org.opensearch.indices.DefaultRemoteStoreSettings; +import org.opensearch.indices.RemoteStoreSettings; import org.opensearch.test.OpenSearchTestCase; import java.io.IOException; @@ -40,7 +44,8 @@ public void testToXContentWithSegmentRepo() throws IOException { new BlobPath().add("djsd878ndjh").add("hcs87cj8"), PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A_BASE64, - RemoteIndexPath.SEGMENT_PATH + RemoteIndexPath.SEGMENT_PATH, + DefaultRemoteStoreSettings.INSTANCE ); XContentBuilder xContentBuilder = MediaTypeRegistry.contentBuilder(MediaTypeRegistry.JSON); xContentBuilder.startObject(); @@ -49,6 +54,28 @@ public void testToXContentWithSegmentRepo() throws IOException { String expected = "{\"version\":\"1\",\"index_uuid\":\"djjsid73he8yd7usduh\",\"shard_count\":2,\"path_type\":\"HASHED_PREFIX\",\"path_hash_algorithm\":\"FNV_1A_BASE64\",\"path_creation_map\":{\"segments\":[\"data\",\"metadata\",\"lock_files\"]},\"paths\":[\"9BmBinD5HYs/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/0/segments/data/\",\"ExCNOD8_5ew/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/1/segments/data/\",\"z8wtf0yr2l4/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/0/segments/metadata/\",\"VheHVwFlExE/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/1/segments/metadata/\",\"IgFKbsDeUpQ/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/0/segments/lock_files/\",\"pA3gy_GZtns/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/1/segments/lock_files/\"]}"; assertEquals(expected, xContentBuilder.toString()); + + // Fixed prefix + Settings settings = Settings.builder().put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_SEGMENTS_PATH_PREFIX.getKey(), ".").build(); + ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + RemoteStoreSettings remoteStoreSettings = new RemoteStoreSettings(settings, clusterSettings); + indexPath = new RemoteIndexPath( + "djjsid73he8yd7usduh", + 2, + new BlobPath().add("djsd878ndjh").add("hcs87cj8"), + PathType.HASHED_PREFIX, + PathHashAlgorithm.FNV_1A_BASE64, + RemoteIndexPath.SEGMENT_PATH, + remoteStoreSettings + ); + xContentBuilder = MediaTypeRegistry.contentBuilder(MediaTypeRegistry.JSON); + xContentBuilder.startObject(); + xContentBuilder = indexPath.toXContent(xContentBuilder, ToXContent.EMPTY_PARAMS); + xContentBuilder.endObject(); + expected = + "{\"version\":\"1\",\"index_uuid\":\"djjsid73he8yd7usduh\",\"shard_count\":2,\"path_type\":\"HASHED_PREFIX\",\"path_hash_algorithm\":\"FNV_1A_BASE64\",\"path_creation_map\":{\"segments\":[\"data\",\"metadata\",\"lock_files\"]},\"paths\":[\".9BmBinD5HYs/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/0/segments/data/\",\".ExCNOD8_5ew/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/1/segments/data/\",\".z8wtf0yr2l4/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/0/segments/metadata/\",\".VheHVwFlExE/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/1/segments/metadata/\",\".IgFKbsDeUpQ/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/0/segments/lock_files/\",\".pA3gy_GZtns/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/1/segments/lock_files/\"]}"; + assertEquals(expected, xContentBuilder.toString()); + } /** @@ -61,7 +88,8 @@ public void testToXContentForTranslogRepoOnly() throws IOException { new BlobPath().add("djsd878ndjh").add("hcs87cj8"), PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A_BASE64, - RemoteIndexPath.TRANSLOG_PATH + RemoteIndexPath.TRANSLOG_PATH, + DefaultRemoteStoreSettings.INSTANCE ); XContentBuilder xContentBuilder = MediaTypeRegistry.contentBuilder(MediaTypeRegistry.JSON); xContentBuilder.startObject(); @@ -70,6 +98,27 @@ public void testToXContentForTranslogRepoOnly() throws IOException { String expected = "{\"version\":\"1\",\"index_uuid\":\"djjsid73he8yd7usduh\",\"shard_count\":2,\"path_type\":\"HASHED_PREFIX\",\"path_hash_algorithm\":\"FNV_1A_BASE64\",\"path_creation_map\":{\"translog\":[\"data\",\"metadata\"]},\"paths\":[\"2EaVODaKBck/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/0/translog/data/\",\"dTS2VqEOUNo/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/1/translog/data/\",\"PVNKNGonmZw/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/0/translog/metadata/\",\"NXmt0Y6NjA8/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/1/translog/metadata/\"]}"; assertEquals(expected, xContentBuilder.toString()); + + // Fixed prefix + Settings settings = Settings.builder().put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_TRANSLOG_PATH_PREFIX.getKey(), ".").build(); + ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + RemoteStoreSettings remoteStoreSettings = new RemoteStoreSettings(settings, clusterSettings); + indexPath = new RemoteIndexPath( + "djjsid73he8yd7usduh", + 2, + new BlobPath().add("djsd878ndjh").add("hcs87cj8"), + PathType.HASHED_PREFIX, + PathHashAlgorithm.FNV_1A_BASE64, + RemoteIndexPath.TRANSLOG_PATH, + remoteStoreSettings + ); + xContentBuilder = MediaTypeRegistry.contentBuilder(MediaTypeRegistry.JSON); + xContentBuilder.startObject(); + xContentBuilder = indexPath.toXContent(xContentBuilder, ToXContent.EMPTY_PARAMS); + xContentBuilder.endObject(); + expected = + "{\"version\":\"1\",\"index_uuid\":\"djjsid73he8yd7usduh\",\"shard_count\":2,\"path_type\":\"HASHED_PREFIX\",\"path_hash_algorithm\":\"FNV_1A_BASE64\",\"path_creation_map\":{\"translog\":[\"data\",\"metadata\"]},\"paths\":[\".2EaVODaKBck/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/0/translog/data/\",\".dTS2VqEOUNo/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/1/translog/data/\",\".PVNKNGonmZw/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/0/translog/metadata/\",\".NXmt0Y6NjA8/djsd878ndjh/hcs87cj8/djjsid73he8yd7usduh/1/translog/metadata/\"]}"; + assertEquals(expected, xContentBuilder.toString()); } /** @@ -85,7 +134,8 @@ public void testToXContentForBothRepos() throws IOException { new BlobPath().add("nxf9yv0").add("c3ejoi"), PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A_BASE64, - pathCreationMap + pathCreationMap, + DefaultRemoteStoreSettings.INSTANCE ); XContentBuilder xContentBuilder = MediaTypeRegistry.contentBuilder(MediaTypeRegistry.JSON); xContentBuilder.startObject(); @@ -94,9 +144,42 @@ public void testToXContentForBothRepos() throws IOException { String expected = "{\"version\":\"1\",\"index_uuid\":\"csbdqiu8a7sdnjdks\",\"shard_count\":3,\"path_type\":\"HASHED_PREFIX\",\"path_hash_algorithm\":\"FNV_1A_BASE64\",\"path_creation_map\":{\"translog\":[\"data\",\"metadata\"],\"segments\":[\"data\",\"metadata\",\"lock_files\"]},\"paths\":[\"Cjo0F6kNjYk/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/0/segments/data/\",\"kpayyhxct1I/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/1/segments/data/\",\"p2RlgnHeIgc/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/2/segments/data/\",\"gkPIurBtB1w/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/0/segments/metadata/\",\"Y4YhlbxAB1c/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/1/segments/metadata/\",\"HYc8fyVPouI/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/2/segments/metadata/\",\"igzyZCz1ysI/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/0/segments/lock_files/\",\"uEluEiYmptk/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/1/segments/lock_files/\",\"TfAD8f06_7A/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/2/segments/lock_files/\",\"QqKEpasbEGs/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/0/translog/data/\",\"sNyoimoe1Bw/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/1/translog/data/\",\"d4YQtONfq50/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/2/translog/data/\",\"zLr4UXjK8T4/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/0/translog/metadata/\",\"_s8i7ZmlXGE/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/1/translog/metadata/\",\"tvtD3-k5ISg/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/2/translog/metadata/\"]}"; assertEquals(expected, xContentBuilder.toString()); + + // Fixed prefix + Settings settings = Settings.builder() + .put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_TRANSLOG_PATH_PREFIX.getKey(), ".") + .put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_SEGMENTS_PATH_PREFIX.getKey(), ".") + .build(); + ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + RemoteStoreSettings remoteStoreSettings = new RemoteStoreSettings(settings, clusterSettings); + indexPath = new RemoteIndexPath( + "csbdqiu8a7sdnjdks", + 3, + new BlobPath().add("nxf9yv0").add("c3ejoi"), + PathType.HASHED_PREFIX, + PathHashAlgorithm.FNV_1A_BASE64, + pathCreationMap, + remoteStoreSettings + ); + xContentBuilder = MediaTypeRegistry.contentBuilder(MediaTypeRegistry.JSON); + xContentBuilder.startObject(); + xContentBuilder = indexPath.toXContent(xContentBuilder, ToXContent.EMPTY_PARAMS); + xContentBuilder.endObject(); + expected = + "{\"version\":\"1\",\"index_uuid\":\"csbdqiu8a7sdnjdks\",\"shard_count\":3,\"path_type\":\"HASHED_PREFIX\",\"path_hash_algorithm\":\"FNV_1A_BASE64\",\"path_creation_map\":{\"translog\":[\"data\",\"metadata\"],\"segments\":[\"data\",\"metadata\",\"lock_files\"]},\"paths\":[\".Cjo0F6kNjYk/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/0/segments/data/\",\".kpayyhxct1I/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/1/segments/data/\",\".p2RlgnHeIgc/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/2/segments/data/\",\".gkPIurBtB1w/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/0/segments/metadata/\",\".Y4YhlbxAB1c/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/1/segments/metadata/\",\".HYc8fyVPouI/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/2/segments/metadata/\",\".igzyZCz1ysI/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/0/segments/lock_files/\",\".uEluEiYmptk/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/1/segments/lock_files/\",\".TfAD8f06_7A/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/2/segments/lock_files/\",\".QqKEpasbEGs/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/0/translog/data/\",\".sNyoimoe1Bw/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/1/translog/data/\",\".d4YQtONfq50/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/2/translog/data/\",\".zLr4UXjK8T4/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/0/translog/metadata/\",\"._s8i7ZmlXGE/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/1/translog/metadata/\",\".tvtD3-k5ISg/nxf9yv0/c3ejoi/csbdqiu8a7sdnjdks/2/translog/metadata/\"]}"; + assertEquals(expected, xContentBuilder.toString()); } - public void testRemoteIndexPathWithInvalidPathCreationMap() throws IOException { + public void testRemoteIndexPathWithInvalidPathCreationMap() { + Settings.Builder builder = Settings.builder(); + if (randomBoolean()) { + builder.put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_TRANSLOG_PATH_PREFIX.getKey(), "."); + } + if (randomBoolean()) { + builder.put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_SEGMENTS_PATH_PREFIX.getKey(), "."); + } + ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + RemoteStoreSettings remoteStoreSettings = new RemoteStoreSettings(builder.build(), clusterSettings); IllegalArgumentException ex = assertThrows( IllegalArgumentException.class, () -> new RemoteIndexPath( @@ -105,7 +188,8 @@ public void testRemoteIndexPathWithInvalidPathCreationMap() throws IOException { new BlobPath().add("djsd878ndjh").add("hcs87cj8"), PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A_BASE64, - new HashMap<>() + new HashMap<>(), + remoteStoreSettings ) ); assertEquals( @@ -124,6 +208,15 @@ public void testFromXContent() { } public void testInvalidPathCreationMap() { + Settings.Builder builder = Settings.builder(); + if (randomBoolean()) { + builder.put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_TRANSLOG_PATH_PREFIX.getKey(), "."); + } + if (randomBoolean()) { + builder.put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_SEGMENTS_PATH_PREFIX.getKey(), "."); + } + ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + RemoteStoreSettings remoteStoreSettings = new RemoteStoreSettings(builder.build(), clusterSettings); IllegalArgumentException ex = assertThrows( IllegalArgumentException.class, () -> new RemoteIndexPath( @@ -132,7 +225,8 @@ public void testInvalidPathCreationMap() { new BlobPath().add("djsd878ndjh").add("hcs87cj8"), PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A_BASE64, - Map.of(TRANSLOG, List.of(LOCK_FILES)) + Map.of(TRANSLOG, List.of(LOCK_FILES)), + remoteStoreSettings ) ); assertEquals("pathCreationMap={TRANSLOG=[LOCK_FILES]} is having illegal combination of category and type", ex.getMessage()); diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteIndexPathUploaderTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteIndexPathUploaderTests.java index e0a75f7296705..d6519d9db8ee6 100644 --- a/server/src/test/java/org/opensearch/index/remote/RemoteIndexPathUploaderTests.java +++ b/server/src/test/java/org/opensearch/index/remote/RemoteIndexPathUploaderTests.java @@ -25,6 +25,7 @@ import org.opensearch.gateway.remote.RemoteStateTransferException; import org.opensearch.index.remote.RemoteStoreEnums.PathHashAlgorithm; import org.opensearch.index.remote.RemoteStoreEnums.PathType; +import org.opensearch.indices.DefaultRemoteStoreSettings; import org.opensearch.node.Node; import org.opensearch.node.remotestore.RemoteStoreNodeAttribute; import org.opensearch.repositories.RepositoriesService; @@ -131,7 +132,8 @@ public void testInterceptWithNoRemoteDataAttributes() { threadPool, settings, () -> repositoriesService, - clusterSettings + clusterSettings, + DefaultRemoteStoreSettings.INSTANCE ); List indexMetadataList = Mockito.mock(List.class); ActionListener actionListener = ActionListener.wrap( @@ -149,7 +151,8 @@ public void testInterceptWithEmptyIndexMetadataList() { threadPool, settings, () -> repositoriesService, - clusterSettings + clusterSettings, + DefaultRemoteStoreSettings.INSTANCE ); remoteIndexPathUploader.start(); ActionListener actionListener = ActionListener.wrap( @@ -166,7 +169,8 @@ public void testInterceptWithEmptyEligibleIndexMetadataList() { threadPool, settings, () -> repositoriesService, - clusterSettings + clusterSettings, + DefaultRemoteStoreSettings.INSTANCE ); remoteIndexPathUploader.start(); ActionListener actionListener = ActionListener.wrap( @@ -228,7 +232,8 @@ public void testInterceptWithSameRepo() throws IOException { threadPool, settings, () -> repositoriesService, - clusterSettings + clusterSettings, + DefaultRemoteStoreSettings.INSTANCE ); remoteIndexPathUploader.start(); ActionListener actionListener = ActionListener.wrap( @@ -251,7 +256,8 @@ public void testInterceptWithDifferentRepo() throws IOException { threadPool, settings, () -> repositoriesService, - clusterSettings + clusterSettings, + DefaultRemoteStoreSettings.INSTANCE ); remoteIndexPathUploader.start(); ActionListener actionListener = ActionListener.wrap( @@ -271,7 +277,8 @@ public void testInterceptWithLatchAwaitTimeout() throws IOException { threadPool, settings, () -> repositoriesService, - clusterSettings + clusterSettings, + DefaultRemoteStoreSettings.INSTANCE ); remoteIndexPathUploader.start(); @@ -302,7 +309,8 @@ public void testInterceptWithInterruptedExceptionDuringLatchAwait() throws Excep threadPool, settings, () -> repositoriesService, - clusterSettings + clusterSettings, + DefaultRemoteStoreSettings.INSTANCE ); remoteIndexPathUploader.start(); Settings settings = Settings.builder() diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteStoreCustomMetadataResolverTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteStoreCustomMetadataResolverTests.java index abd115732c4db..331ebd0e3fcd4 100644 --- a/server/src/test/java/org/opensearch/index/remote/RemoteStoreCustomMetadataResolverTests.java +++ b/server/src/test/java/org/opensearch/index/remote/RemoteStoreCustomMetadataResolverTests.java @@ -220,4 +220,50 @@ public void testTranslogMetadataAllowedMinVersionOlder() { assertFalse(resolver.isTranslogMetadataEnabled()); } + public void testTranslogPathFixedPathSetting() { + + // Default settings + Settings settings = Settings.builder().build(); + ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + RemoteStoreSettings remoteStoreSettings = new RemoteStoreSettings(settings, clusterSettings); + assertEquals("", remoteStoreSettings.getTranslogPathFixedPrefix()); + + // Any other random value + String randomPrefix = randomAlphaOfLengthBetween(2, 5); + settings = Settings.builder().put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_TRANSLOG_PATH_PREFIX.getKey(), randomPrefix).build(); + remoteStoreSettings = new RemoteStoreSettings(settings, clusterSettings); + assertEquals(randomPrefix, remoteStoreSettings.getTranslogPathFixedPrefix()); + + // Set any other random value, the setting still points to the old value + clusterSettings.applySettings( + Settings.builder() + .put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_TRANSLOG_PATH_PREFIX.getKey(), randomAlphaOfLengthBetween(2, 5)) + .build() + ); + assertEquals(randomPrefix, remoteStoreSettings.getTranslogPathFixedPrefix()); + } + + public void testSegmentsPathFixedPathSetting() { + + // Default settings + Settings settings = Settings.builder().build(); + ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + RemoteStoreSettings remoteStoreSettings = new RemoteStoreSettings(settings, clusterSettings); + assertEquals("", remoteStoreSettings.getSegmentsPathFixedPrefix()); + + // Any other random value + String randomPrefix = randomAlphaOfLengthBetween(2, 5); + settings = Settings.builder().put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_SEGMENTS_PATH_PREFIX.getKey(), randomPrefix).build(); + remoteStoreSettings = new RemoteStoreSettings(settings, clusterSettings); + assertEquals(randomPrefix, remoteStoreSettings.getSegmentsPathFixedPrefix()); + + // Set any other random value, the setting still points to the old value + clusterSettings.applySettings( + Settings.builder() + .put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_SEGMENTS_PATH_PREFIX.getKey(), randomAlphaOfLengthBetween(2, 5)) + .build() + ); + assertEquals(randomPrefix, remoteStoreSettings.getSegmentsPathFixedPrefix()); + + } } diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteStoreEnumsTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteStoreEnumsTests.java index 481a0568ff0a7..b2203d6eccd27 100644 --- a/server/src/test/java/org/opensearch/index/remote/RemoteStoreEnumsTests.java +++ b/server/src/test/java/org/opensearch/index/remote/RemoteStoreEnumsTests.java @@ -14,6 +14,7 @@ import org.opensearch.index.remote.RemoteStoreEnums.DataType; import org.opensearch.index.remote.RemoteStoreEnums.PathType; import org.opensearch.index.remote.RemoteStorePathStrategy.ShardDataPathInput; +import org.opensearch.index.remote.RemoteStorePathStrategy.SnapshotShardPathInput; import org.opensearch.test.OpenSearchTestCase; import java.util.ArrayList; @@ -141,6 +142,7 @@ public void testGeneratePathForHashedPrefixType() { String shardId = String.valueOf(randomInt(100)); DataCategory dataCategory = TRANSLOG; DataType dataType = DATA; + String fixedPrefix = "."; String basePath = getPath(pathList) + indexUUID + SEPARATOR + shardId; // Translog Data @@ -150,11 +152,20 @@ public void testGeneratePathForHashedPrefixType() { .shardId(shardId) .dataCategory(dataCategory) .dataType(dataType) + .fixedPrefix(fixedPrefix) .build(); BlobPath result = HASHED_PREFIX.path(pathInput, FNV_1A_BASE64); assertTrue( result.buildAsString() - .startsWith(String.join(SEPARATOR, FNV_1A_BASE64.hash(pathInput), basePath, dataCategory.getName(), dataType.getName())) + .startsWith( + String.join( + SEPARATOR, + fixedPrefix + FNV_1A_BASE64.hash(pathInput), + basePath, + dataCategory.getName(), + dataType.getName() + ) + ) ); // assert with exact value for known base path @@ -167,9 +178,10 @@ public void testGeneratePathForHashedPrefixType() { .shardId(fixedShardId) .dataCategory(dataCategory) .dataType(dataType) + .fixedPrefix(fixedPrefix) .build(); result = HASHED_PREFIX.path(pathInput, FNV_1A_BASE64); - assertEquals("DgSI70IciXs/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/translog/data/", result.buildAsString()); + assertEquals(".DgSI70IciXs/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/translog/data/", result.buildAsString()); // Translog Metadata dataType = METADATA; @@ -179,11 +191,20 @@ public void testGeneratePathForHashedPrefixType() { .shardId(shardId) .dataCategory(dataCategory) .dataType(dataType) + .fixedPrefix(fixedPrefix) .build(); result = HASHED_PREFIX.path(pathInput, FNV_1A_BASE64); assertTrue( result.buildAsString() - .startsWith(String.join(SEPARATOR, FNV_1A_BASE64.hash(pathInput), basePath, dataCategory.getName(), dataType.getName())) + .startsWith( + String.join( + SEPARATOR, + fixedPrefix + FNV_1A_BASE64.hash(pathInput), + basePath, + dataCategory.getName(), + dataType.getName() + ) + ) ); // assert with exact value for known base path @@ -193,9 +214,10 @@ public void testGeneratePathForHashedPrefixType() { .shardId(fixedShardId) .dataCategory(dataCategory) .dataType(dataType) + .fixedPrefix(fixedPrefix) .build(); result = HASHED_PREFIX.path(pathInput, FNV_1A_BASE64); - assertEquals("oKU5SjILiy4/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/translog/metadata/", result.buildAsString()); + assertEquals(".oKU5SjILiy4/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/translog/metadata/", result.buildAsString()); // Segment Data dataCategory = SEGMENTS; @@ -206,11 +228,20 @@ public void testGeneratePathForHashedPrefixType() { .shardId(shardId) .dataCategory(dataCategory) .dataType(dataType) + .fixedPrefix(fixedPrefix) .build(); result = HASHED_PREFIX.path(pathInput, FNV_1A_BASE64); assertTrue( result.buildAsString() - .startsWith(String.join(SEPARATOR, FNV_1A_BASE64.hash(pathInput), basePath, dataCategory.getName(), dataType.getName())) + .startsWith( + String.join( + SEPARATOR, + fixedPrefix + FNV_1A_BASE64.hash(pathInput), + basePath, + dataCategory.getName(), + dataType.getName() + ) + ) ); // assert with exact value for known base path @@ -220,9 +251,10 @@ public void testGeneratePathForHashedPrefixType() { .shardId(fixedShardId) .dataCategory(dataCategory) .dataType(dataType) + .fixedPrefix(fixedPrefix) .build(); result = HASHED_PREFIX.path(pathInput, FNV_1A_BASE64); - assertEquals("AUBRfCIuWdk/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/segments/data/", result.buildAsString()); + assertEquals(".AUBRfCIuWdk/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/segments/data/", result.buildAsString()); // Segment Metadata dataType = METADATA; @@ -232,11 +264,20 @@ public void testGeneratePathForHashedPrefixType() { .shardId(shardId) .dataCategory(dataCategory) .dataType(dataType) + .fixedPrefix(fixedPrefix) .build(); result = HASHED_PREFIX.path(pathInput, FNV_1A_BASE64); assertTrue( result.buildAsString() - .startsWith(String.join(SEPARATOR, FNV_1A_BASE64.hash(pathInput), basePath, dataCategory.getName(), dataType.getName())) + .startsWith( + String.join( + SEPARATOR, + fixedPrefix + FNV_1A_BASE64.hash(pathInput), + basePath, + dataCategory.getName(), + dataType.getName() + ) + ) ); // assert with exact value for known base path @@ -246,9 +287,10 @@ public void testGeneratePathForHashedPrefixType() { .shardId(fixedShardId) .dataCategory(dataCategory) .dataType(dataType) + .fixedPrefix(fixedPrefix) .build(); result = HASHED_PREFIX.path(pathInput, FNV_1A_BASE64); - assertEquals("erwR-G735Uw/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/segments/metadata/", result.buildAsString()); + assertEquals(".erwR-G735Uw/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/segments/metadata/", result.buildAsString()); // Segment Lockfiles dataType = LOCK_FILES; @@ -258,11 +300,20 @@ public void testGeneratePathForHashedPrefixType() { .shardId(shardId) .dataCategory(dataCategory) .dataType(dataType) + .fixedPrefix(fixedPrefix) .build(); result = HASHED_PREFIX.path(pathInput, FNV_1A_BASE64); assertTrue( result.buildAsString() - .startsWith(String.join(SEPARATOR, FNV_1A_BASE64.hash(pathInput), basePath, dataCategory.getName(), dataType.getName())) + .startsWith( + String.join( + SEPARATOR, + fixedPrefix + FNV_1A_BASE64.hash(pathInput), + basePath, + dataCategory.getName(), + dataType.getName() + ) + ) ); // assert with exact value for known base path @@ -272,12 +323,14 @@ public void testGeneratePathForHashedPrefixType() { .shardId(fixedShardId) .dataCategory(dataCategory) .dataType(dataType) + .fixedPrefix(fixedPrefix) .build(); result = HASHED_PREFIX.path(pathInput, FNV_1A_BASE64); - assertEquals("KeYDIk0mJXI/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/segments/lock_files/", result.buildAsString()); + assertEquals(".KeYDIk0mJXI/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/segments/lock_files/", result.buildAsString()); } public void testGeneratePathForHashedPrefixTypeAndFNVCompositeHashAlgorithm() { + String fixedPrefix = "."; BlobPath blobPath = new BlobPath(); List pathList = getPathList(); for (String path : pathList) { @@ -297,12 +350,19 @@ public void testGeneratePathForHashedPrefixTypeAndFNVCompositeHashAlgorithm() { .shardId(shardId) .dataCategory(dataCategory) .dataType(dataType) + .fixedPrefix(fixedPrefix) .build(); BlobPath result = HASHED_PREFIX.path(pathInput, FNV_1A_COMPOSITE_1); assertTrue( result.buildAsString() .startsWith( - String.join(SEPARATOR, FNV_1A_COMPOSITE_1.hash(pathInput), basePath, dataCategory.getName(), dataType.getName()) + String.join( + SEPARATOR, + fixedPrefix + FNV_1A_COMPOSITE_1.hash(pathInput), + basePath, + dataCategory.getName(), + dataType.getName() + ) ) ); @@ -316,9 +376,10 @@ public void testGeneratePathForHashedPrefixTypeAndFNVCompositeHashAlgorithm() { .shardId(fixedShardId) .dataCategory(dataCategory) .dataType(dataType) + .fixedPrefix(fixedPrefix) .build(); result = HASHED_PREFIX.path(pathInput, FNV_1A_COMPOSITE_1); - assertEquals("D10000001001000/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/translog/data/", result.buildAsString()); + assertEquals(".D10000001001000/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/translog/data/", result.buildAsString()); // Translog Metadata dataType = METADATA; @@ -328,12 +389,19 @@ public void testGeneratePathForHashedPrefixTypeAndFNVCompositeHashAlgorithm() { .shardId(shardId) .dataCategory(dataCategory) .dataType(dataType) + .fixedPrefix(fixedPrefix) .build(); result = HASHED_PREFIX.path(pathInput, FNV_1A_COMPOSITE_1); assertTrue( result.buildAsString() .startsWith( - String.join(SEPARATOR, FNV_1A_COMPOSITE_1.hash(pathInput), basePath, dataCategory.getName(), dataType.getName()) + String.join( + SEPARATOR, + fixedPrefix + FNV_1A_COMPOSITE_1.hash(pathInput), + basePath, + dataCategory.getName(), + dataType.getName() + ) ) ); @@ -344,10 +412,11 @@ public void testGeneratePathForHashedPrefixTypeAndFNVCompositeHashAlgorithm() { .shardId(fixedShardId) .dataCategory(dataCategory) .dataType(dataType) + .fixedPrefix(fixedPrefix) .build(); result = HASHED_PREFIX.path(pathInput, FNV_1A_COMPOSITE_1); assertEquals( - "o00101001010011/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/translog/metadata/", + ".o00101001010011/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/translog/metadata/", result.buildAsString() ); @@ -360,12 +429,19 @@ public void testGeneratePathForHashedPrefixTypeAndFNVCompositeHashAlgorithm() { .shardId(shardId) .dataCategory(dataCategory) .dataType(dataType) + .fixedPrefix(fixedPrefix) .build(); result = HASHED_PREFIX.path(pathInput, FNV_1A_COMPOSITE_1); assertTrue( result.buildAsString() .startsWith( - String.join(SEPARATOR, FNV_1A_COMPOSITE_1.hash(pathInput), basePath, dataCategory.getName(), dataType.getName()) + String.join( + SEPARATOR, + fixedPrefix + FNV_1A_COMPOSITE_1.hash(pathInput), + basePath, + dataCategory.getName(), + dataType.getName() + ) ) ); @@ -376,9 +452,10 @@ public void testGeneratePathForHashedPrefixTypeAndFNVCompositeHashAlgorithm() { .shardId(fixedShardId) .dataCategory(dataCategory) .dataType(dataType) + .fixedPrefix(fixedPrefix) .build(); result = HASHED_PREFIX.path(pathInput, FNV_1A_COMPOSITE_1); - assertEquals("A01010000000101/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/segments/data/", result.buildAsString()); + assertEquals(".A01010000000101/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/segments/data/", result.buildAsString()); // Segment Metadata dataType = METADATA; @@ -388,12 +465,19 @@ public void testGeneratePathForHashedPrefixTypeAndFNVCompositeHashAlgorithm() { .shardId(shardId) .dataCategory(dataCategory) .dataType(dataType) + .fixedPrefix(fixedPrefix) .build(); result = HASHED_PREFIX.path(pathInput, FNV_1A_COMPOSITE_1); assertTrue( result.buildAsString() .startsWith( - String.join(SEPARATOR, FNV_1A_COMPOSITE_1.hash(pathInput), basePath, dataCategory.getName(), dataType.getName()) + String.join( + SEPARATOR, + fixedPrefix + FNV_1A_COMPOSITE_1.hash(pathInput), + basePath, + dataCategory.getName(), + dataType.getName() + ) ) ); @@ -404,10 +488,11 @@ public void testGeneratePathForHashedPrefixTypeAndFNVCompositeHashAlgorithm() { .shardId(fixedShardId) .dataCategory(dataCategory) .dataType(dataType) + .fixedPrefix(fixedPrefix) .build(); result = HASHED_PREFIX.path(pathInput, FNV_1A_COMPOSITE_1); assertEquals( - "e10101111000001/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/segments/metadata/", + ".e10101111000001/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/segments/metadata/", result.buildAsString() ); @@ -419,12 +504,19 @@ public void testGeneratePathForHashedPrefixTypeAndFNVCompositeHashAlgorithm() { .shardId(shardId) .dataCategory(dataCategory) .dataType(dataType) + .fixedPrefix(fixedPrefix) .build(); result = HASHED_PREFIX.path(pathInput, FNV_1A_COMPOSITE_1); assertTrue( result.buildAsString() .startsWith( - String.join(SEPARATOR, FNV_1A_COMPOSITE_1.hash(pathInput), basePath, dataCategory.getName(), dataType.getName()) + String.join( + SEPARATOR, + fixedPrefix + FNV_1A_COMPOSITE_1.hash(pathInput), + basePath, + dataCategory.getName(), + dataType.getName() + ) ) ); @@ -435,10 +527,11 @@ public void testGeneratePathForHashedPrefixTypeAndFNVCompositeHashAlgorithm() { .shardId(fixedShardId) .dataCategory(dataCategory) .dataType(dataType) + .fixedPrefix(fixedPrefix) .build(); result = HASHED_PREFIX.path(pathInput, FNV_1A_COMPOSITE_1); assertEquals( - "K01111001100000/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/segments/lock_files/", + ".K01111001100000/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/segments/lock_files/", result.buildAsString() ); } @@ -454,6 +547,7 @@ public void testGeneratePathForHashedInfixType() { String shardId = String.valueOf(randomInt(100)); DataCategory dataCategory = TRANSLOG; DataType dataType = DATA; + String fixedPrefix = "."; String basePath = getPath(pathList); basePath = basePath.isEmpty() ? basePath : basePath.substring(0, basePath.length() - 1); @@ -464,9 +558,10 @@ public void testGeneratePathForHashedInfixType() { .shardId(shardId) .dataCategory(dataCategory) .dataType(dataType) + .fixedPrefix(fixedPrefix) .build(); BlobPath result = HASHED_INFIX.path(pathInput, FNV_1A_BASE64); - String expected = derivePath(basePath, pathInput); + String expected = derivePath(basePath, pathInput, fixedPrefix); String actual = result.buildAsString(); assertTrue(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual.startsWith(expected)); @@ -480,9 +575,10 @@ public void testGeneratePathForHashedInfixType() { .shardId(fixedShardId) .dataCategory(dataCategory) .dataType(dataType) + .fixedPrefix(fixedPrefix) .build(); result = HASHED_INFIX.path(pathInput, FNV_1A_BASE64); - expected = "xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/DgSI70IciXs/k2ijhe877d7yuhx7/10/translog/data/"; + expected = "xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/.DgSI70IciXs/k2ijhe877d7yuhx7/10/translog/data/"; actual = result.buildAsString(); assertTrue(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual.startsWith(expected)); @@ -494,10 +590,11 @@ public void testGeneratePathForHashedInfixType() { .shardId(shardId) .dataCategory(dataCategory) .dataType(dataType) + .fixedPrefix(fixedPrefix) .build(); result = HASHED_INFIX.path(pathInput, FNV_1A_BASE64); - expected = derivePath(basePath, pathInput); + expected = derivePath(basePath, pathInput, fixedPrefix); actual = result.buildAsString(); assertTrue(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual.startsWith(expected)); @@ -508,9 +605,10 @@ public void testGeneratePathForHashedInfixType() { .shardId(fixedShardId) .dataCategory(dataCategory) .dataType(dataType) + .fixedPrefix(fixedPrefix) .build(); result = HASHED_INFIX.path(pathInput, FNV_1A_BASE64); - expected = "xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/oKU5SjILiy4/k2ijhe877d7yuhx7/10/translog/metadata/"; + expected = "xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/.oKU5SjILiy4/k2ijhe877d7yuhx7/10/translog/metadata/"; actual = result.buildAsString(); assertTrue(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual.startsWith(expected)); @@ -523,9 +621,10 @@ public void testGeneratePathForHashedInfixType() { .shardId(shardId) .dataCategory(dataCategory) .dataType(dataType) + .fixedPrefix(fixedPrefix) .build(); result = HASHED_INFIX.path(pathInput, FNV_1A_BASE64); - expected = derivePath(basePath, pathInput); + expected = derivePath(basePath, pathInput, fixedPrefix); actual = result.buildAsString(); assertTrue(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual.startsWith(expected)); @@ -536,9 +635,10 @@ public void testGeneratePathForHashedInfixType() { .shardId(fixedShardId) .dataCategory(dataCategory) .dataType(dataType) + .fixedPrefix(fixedPrefix) .build(); result = HASHED_INFIX.path(pathInput, FNV_1A_BASE64); - expected = "xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/AUBRfCIuWdk/k2ijhe877d7yuhx7/10/segments/data/"; + expected = "xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/.AUBRfCIuWdk/k2ijhe877d7yuhx7/10/segments/data/"; actual = result.buildAsString(); assertTrue(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual.startsWith(expected)); @@ -550,9 +650,10 @@ public void testGeneratePathForHashedInfixType() { .shardId(shardId) .dataCategory(dataCategory) .dataType(dataType) + .fixedPrefix(fixedPrefix) .build(); result = HASHED_INFIX.path(pathInput, FNV_1A_BASE64); - expected = derivePath(basePath, pathInput); + expected = derivePath(basePath, pathInput, fixedPrefix); actual = result.buildAsString(); assertTrue(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual.startsWith(expected)); @@ -563,9 +664,10 @@ public void testGeneratePathForHashedInfixType() { .shardId(fixedShardId) .dataCategory(dataCategory) .dataType(dataType) + .fixedPrefix(fixedPrefix) .build(); result = HASHED_INFIX.path(pathInput, FNV_1A_BASE64); - expected = "xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/erwR-G735Uw/k2ijhe877d7yuhx7/10/segments/metadata/"; + expected = "xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/.erwR-G735Uw/k2ijhe877d7yuhx7/10/segments/metadata/"; actual = result.buildAsString(); assertTrue(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual.startsWith(expected)); @@ -577,9 +679,10 @@ public void testGeneratePathForHashedInfixType() { .shardId(shardId) .dataCategory(dataCategory) .dataType(dataType) + .fixedPrefix(fixedPrefix) .build(); result = HASHED_INFIX.path(pathInput, FNV_1A_BASE64); - expected = derivePath(basePath, pathInput); + expected = derivePath(basePath, pathInput, fixedPrefix); actual = result.buildAsString(); assertTrue(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual.startsWith(expected)); @@ -590,18 +693,62 @@ public void testGeneratePathForHashedInfixType() { .shardId(fixedShardId) .dataCategory(dataCategory) .dataType(dataType) + .fixedPrefix(fixedPrefix) .build(); result = HASHED_INFIX.path(pathInput, FNV_1A_BASE64); - expected = "xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/KeYDIk0mJXI/k2ijhe877d7yuhx7/10/segments/lock_files/"; + expected = "xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/.KeYDIk0mJXI/k2ijhe877d7yuhx7/10/segments/lock_files/"; actual = result.buildAsString(); assertTrue(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual.startsWith(expected)); } - private String derivePath(String basePath, ShardDataPathInput pathInput) { + public void testGeneratePathForSnapshotShardPathInput() { + String fixedPrefix = "snap"; + BlobPath blobPath = BlobPath.cleanPath().add("xjsdhj").add("ddjsha").add("yudy7sd").add("32hdhua7").add("89jdij"); + String indexUUID = "dsdkjsu8832njn"; + String shardId = "10"; + SnapshotShardPathInput pathInput = SnapshotShardPathInput.builder() + .basePath(blobPath) + .indexUUID(indexUUID) + .shardId(shardId) + .fixedPrefix(fixedPrefix) + .build(); + + // FIXED PATH + BlobPath result = FIXED.path(pathInput, null); + String expected = "xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/indices/dsdkjsu8832njn/10/"; + String actual = result.buildAsString(); + assertEquals(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual, expected); + + // HASHED_PREFIX - FNV_1A_COMPOSITE_1 + result = HASHED_PREFIX.path(pathInput, FNV_1A_COMPOSITE_1); + expected = "snap_11001000010110/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/indices/dsdkjsu8832njn/10/"; + actual = result.buildAsString(); + assertEquals(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual, expected); + + // HASHED_PREFIX - FNV_1A_BASE64 + result = HASHED_PREFIX.path(pathInput, FNV_1A_BASE64); + expected = "snap_yFiSl_VGGM/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/indices/dsdkjsu8832njn/10/"; + actual = result.buildAsString(); + assertEquals(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual, expected); + + // HASHED_INFIX - FNV_1A_COMPOSITE_1 + result = HASHED_INFIX.path(pathInput, FNV_1A_COMPOSITE_1); + expected = "xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/snap_11001000010110/indices/dsdkjsu8832njn/10/"; + actual = result.buildAsString(); + assertEquals(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual, expected); + + // HASHED_INFIX - FNV_1A_BASE64 + result = HASHED_INFIX.path(pathInput, FNV_1A_BASE64); + expected = "xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/snap_yFiSl_VGGM/indices/dsdkjsu8832njn/10/"; + actual = result.buildAsString(); + assertEquals(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual, expected); + } + + private String derivePath(String basePath, ShardDataPathInput pathInput, String fixedPrefix) { return "".equals(basePath) ? String.join( SEPARATOR, - FNV_1A_BASE64.hash(pathInput), + fixedPrefix + FNV_1A_BASE64.hash(pathInput), pathInput.indexUUID(), pathInput.shardId(), pathInput.dataCategory().getName(), @@ -610,7 +757,7 @@ private String derivePath(String basePath, ShardDataPathInput pathInput) { : String.join( SEPARATOR, basePath, - FNV_1A_BASE64.hash(pathInput), + fixedPrefix + FNV_1A_BASE64.hash(pathInput), pathInput.indexUUID(), pathInput.shardId(), pathInput.dataCategory().getName(), diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteStorePathStrategyTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteStorePathStrategyTests.java index cf5876cb5caf1..c27895e4e96b5 100644 --- a/server/src/test/java/org/opensearch/index/remote/RemoteStorePathStrategyTests.java +++ b/server/src/test/java/org/opensearch/index/remote/RemoteStorePathStrategyTests.java @@ -82,6 +82,46 @@ public void testFixedSubPath() { .dataType(DATA) .build(); assertEquals(BlobPath.cleanPath().add(INDEX_UUID).add(SHARD_ID).add(TRANSLOG.getName()).add(DATA.getName()), input2.fixedSubPath()); + } + + public void testSnapshotShardPathInput() { + assertThrows(NullPointerException.class, () -> RemoteStorePathStrategy.SnapshotShardPathInput.builder().build()); + assertThrows( + NullPointerException.class, + () -> RemoteStorePathStrategy.SnapshotShardPathInput.builder().basePath(BASE_PATH).build() + ); + assertThrows( + NullPointerException.class, + () -> RemoteStorePathStrategy.SnapshotShardPathInput.builder().indexUUID(INDEX_UUID).build() + ); + assertThrows(NullPointerException.class, () -> RemoteStorePathStrategy.SnapshotShardPathInput.builder().shardId(SHARD_ID).build()); + + RemoteStorePathStrategy.SnapshotShardPathInput input = RemoteStorePathStrategy.SnapshotShardPathInput.builder() + .basePath(BASE_PATH) + .indexUUID(INDEX_UUID) + .shardId(SHARD_ID) + .build(); + assertEquals(BASE_PATH, input.basePath()); + assertEquals(INDEX_UUID, input.indexUUID()); + assertEquals(SHARD_ID, input.shardId()); + } + + public void testSnapshotShardPathInputFixedSubPath() { + RemoteStorePathStrategy.SnapshotShardPathInput input = RemoteStorePathStrategy.SnapshotShardPathInput.builder() + .basePath(BASE_PATH) + .indexUUID(INDEX_UUID) + .shardId(SHARD_ID) + .build(); + assertEquals(BlobPath.cleanPath().add("indices").add(INDEX_UUID).add(SHARD_ID), input.fixedSubPath()); + } + public void testSnapshotShardPathInputHashPath() { + RemoteStorePathStrategy.SnapshotShardPathInput input = RemoteStorePathStrategy.SnapshotShardPathInput.builder() + .basePath(BASE_PATH) + .indexUUID(INDEX_UUID) + .shardId(SHARD_ID) + .build(); + assertEquals(BlobPath.cleanPath().add(SHARD_ID).add(INDEX_UUID), input.hashPath()); } + } diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java index a6db37285fe6f..be30de97ee830 100644 --- a/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java +++ b/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java @@ -67,6 +67,7 @@ import static org.opensearch.index.store.RemoteSegmentStoreDirectory.MetadataFilenameUtils.SEPARATOR; import static org.opensearch.index.translog.transfer.TranslogTransferMetadata.METADATA_SEPARATOR; import static org.opensearch.indices.RemoteStoreSettings.CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_ENABLED; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; @@ -540,6 +541,7 @@ private RoutingTable createRoutingTableAllShardsStarted( private Map getRemoteStoreNodeAttributes() { Map remoteStoreNodeAttributes = new HashMap<>(); + remoteStoreNodeAttributes.put(REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-cluster-repo-1"); remoteStoreNodeAttributes.put(REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-segment-repo-1"); remoteStoreNodeAttributes.put(REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-translog-repo-1"); return remoteStoreNodeAttributes; @@ -635,7 +637,7 @@ private Tuple, Set> testGetPinnedTimestampLockedFilesW String metadataPrefix = "metadata__1__2__3__4__5__"; Map metadataFiles = new HashMap<>(); for (Long metadataFileTimestamp : metadataFileTimestamps) { - metadataFiles.put(metadataFileTimestamp, metadataPrefix + RemoteStoreUtils.invertLong(metadataFileTimestamp)); + metadataFiles.put(metadataFileTimestamp, metadataPrefix + RemoteStoreUtils.invertLong(metadataFileTimestamp) + "__1"); } return new Tuple<>( metadataFiles, @@ -660,7 +662,7 @@ private Tuple, Set> testGetPinnedTimestampLockedFilesW String primaryTerm = RemoteStoreUtils.invertLong(metadataFileTimestampPrimaryTerm.getValue()); String metadataPrefix = "metadata__" + primaryTerm + "__2__3__4__5__"; long metadataFileTimestamp = metadataFileTimestampPrimaryTerm.getKey(); - metadataFiles.put(metadataFileTimestamp, metadataPrefix + RemoteStoreUtils.invertLong(metadataFileTimestamp)); + metadataFiles.put(metadataFileTimestamp, metadataPrefix + RemoteStoreUtils.invertLong(metadataFileTimestamp) + "__1"); } return new Tuple<>( metadataFiles, diff --git a/server/src/test/java/org/opensearch/index/search/NestedHelperTests.java b/server/src/test/java/org/opensearch/index/search/NestedHelperTests.java index 7ffcc0fb7437a..f7f921e824490 100644 --- a/server/src/test/java/org/opensearch/index/search/NestedHelperTests.java +++ b/server/src/test/java/org/opensearch/index/search/NestedHelperTests.java @@ -57,6 +57,8 @@ import java.io.IOException; import java.util.Collections; +import static org.opensearch.index.mapper.FieldTypeTestCase.MOCK_QSC_ENABLE_INDEX_DOC_VALUES; + public class NestedHelperTests extends OpenSearchSingleNodeTestCase { IndexService indexService; @@ -132,28 +134,28 @@ public void testMatchNo() { } public void testTermsQuery() { - Query termsQuery = mapperService.fieldType("foo").termsQuery(Collections.singletonList("bar"), null); + Query termsQuery = mapperService.fieldType("foo").termsQuery(Collections.singletonList("bar"), MOCK_QSC_ENABLE_INDEX_DOC_VALUES); assertFalse(new NestedHelper(mapperService).mightMatchNestedDocs(termsQuery)); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested1")); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested2")); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested3")); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested_missing")); - termsQuery = mapperService.fieldType("nested1.foo").termsQuery(Collections.singletonList("bar"), null); + termsQuery = mapperService.fieldType("nested1.foo").termsQuery(Collections.singletonList("bar"), MOCK_QSC_ENABLE_INDEX_DOC_VALUES); assertTrue(new NestedHelper(mapperService).mightMatchNestedDocs(termsQuery)); assertFalse(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested1")); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested2")); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested3")); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested_missing")); - termsQuery = mapperService.fieldType("nested2.foo").termsQuery(Collections.singletonList("bar"), null); + termsQuery = mapperService.fieldType("nested2.foo").termsQuery(Collections.singletonList("bar"), MOCK_QSC_ENABLE_INDEX_DOC_VALUES); assertTrue(new NestedHelper(mapperService).mightMatchNestedDocs(termsQuery)); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested1")); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested2")); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested3")); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested_missing")); - termsQuery = mapperService.fieldType("nested3.foo").termsQuery(Collections.singletonList("bar"), null); + termsQuery = mapperService.fieldType("nested3.foo").termsQuery(Collections.singletonList("bar"), MOCK_QSC_ENABLE_INDEX_DOC_VALUES); assertTrue(new NestedHelper(mapperService).mightMatchNestedDocs(termsQuery)); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested1")); assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested2")); diff --git a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java index 377e4e99e9964..96794a83ef762 100644 --- a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java @@ -2899,7 +2899,7 @@ public void testSyncSegmentsFromGivenRemoteSegmentStore() throws IOException { target = reinitShard(target, routing); DiscoveryNode localNode = new DiscoveryNode("foo", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT); target.markAsRecovering("from snapshot", new RecoveryState(routing, localNode, null)); - target.syncSegmentsFromGivenRemoteSegmentStore(false, tempRemoteSegmentDirectory, primaryTerm, commitGeneration); + target.syncSegmentsFromGivenRemoteSegmentStore(false, tempRemoteSegmentDirectory, null, false); RemoteSegmentStoreDirectory remoteStoreDirectory = ((RemoteSegmentStoreDirectory) ((FilterDirectory) ((FilterDirectory) target .remoteStore() .directory()).getDelegate()).getDelegate()); diff --git a/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryFactoryTests.java b/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryFactoryTests.java index cad5e47531cc6..95b5d4456baf6 100644 --- a/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryFactoryTests.java +++ b/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryFactoryTests.java @@ -57,7 +57,7 @@ public void setup() { repositoriesService = mock(RepositoriesService.class); threadPool = mock(ThreadPool.class); when(repositoriesServiceSupplier.get()).thenReturn(repositoriesService); - remoteSegmentStoreDirectoryFactory = new RemoteSegmentStoreDirectoryFactory(repositoriesServiceSupplier, threadPool); + remoteSegmentStoreDirectoryFactory = new RemoteSegmentStoreDirectoryFactory(repositoriesServiceSupplier, threadPool, ""); } public void testNewDirectory() throws IOException { diff --git a/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java b/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java index 336d4bafd4b66..ecd6620dbea15 100644 --- a/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java +++ b/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java @@ -1170,9 +1170,9 @@ public void testInitializeToSpecificTimestampNoMdMatchingTimestamp() throws IOEx public void testInitializeToSpecificTimestampMatchingMdFile() throws IOException { String metadataPrefix = "metadata__1__2__3__4__5__"; List metadataFiles = new ArrayList<>(); - metadataFiles.add(metadataPrefix + RemoteStoreUtils.invertLong(1000)); - metadataFiles.add(metadataPrefix + RemoteStoreUtils.invertLong(2000)); - metadataFiles.add(metadataPrefix + RemoteStoreUtils.invertLong(3000)); + metadataFiles.add(metadataPrefix + RemoteStoreUtils.invertLong(1000) + "__1"); + metadataFiles.add(metadataPrefix + RemoteStoreUtils.invertLong(2000) + "__1"); + metadataFiles.add(metadataPrefix + RemoteStoreUtils.invertLong(3000) + "__1"); Map metadata = new HashMap<>(); metadata.put("_0.cfe", "_0.cfe::_0.cfe__" + UUIDs.base64UUID() + "::1234::512::" + Version.LATEST.major); @@ -1184,7 +1184,7 @@ public void testInitializeToSpecificTimestampMatchingMdFile() throws IOException Integer.MAX_VALUE ) ).thenReturn(metadataFiles); - when(remoteMetadataDirectory.getBlobStream(metadataPrefix + RemoteStoreUtils.invertLong(1000))).thenReturn( + when(remoteMetadataDirectory.getBlobStream(metadataPrefix + RemoteStoreUtils.invertLong(1000) + "__1")).thenReturn( createMetadataFileBytes(metadata, indexShard.getLatestReplicationCheckpoint(), segmentInfos) ); diff --git a/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryWithPinnedTimestampTests.java b/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryWithPinnedTimestampTests.java index 107d59aa97549..e71023125d4cd 100644 --- a/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryWithPinnedTimestampTests.java +++ b/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryWithPinnedTimestampTests.java @@ -8,15 +8,12 @@ package org.opensearch.index.store; -import org.opensearch.common.blobstore.BlobMetadata; +import org.opensearch.common.blobstore.BlobContainer; import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.blobstore.BlobStore; import org.opensearch.common.blobstore.support.PlainBlobMetadata; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; -import org.opensearch.core.action.ActionListener; -import org.opensearch.gateway.remote.model.RemotePinnedTimestamps; -import org.opensearch.gateway.remote.model.RemoteStorePinnedTimestampsBlobStore; -import org.opensearch.index.translog.transfer.BlobStoreTransferService; import org.opensearch.indices.RemoteStoreSettings; import org.opensearch.node.Node; import org.opensearch.node.remotestore.RemoteStoreNodeAttribute; @@ -26,7 +23,7 @@ import org.junit.Before; import java.io.IOException; -import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.function.Supplier; @@ -38,7 +35,6 @@ import static org.hamcrest.CoreMatchers.is; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.anyInt; -import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; @@ -48,8 +44,7 @@ public class RemoteSegmentStoreDirectoryWithPinnedTimestampTests extends RemoteSegmentStoreDirectoryTests { Runnable updatePinnedTimstampTask; - BlobStoreTransferService blobStoreTransferService; - RemoteStorePinnedTimestampsBlobStore remoteStorePinnedTimestampsBlobStore; + BlobContainer blobContainer; RemoteStorePinnedTimestampService remoteStorePinnedTimestampServiceSpy; @Before @@ -82,16 +77,13 @@ public void setupPinnedTimestamp() throws IOException { ); remoteStorePinnedTimestampServiceSpy = Mockito.spy(remoteStorePinnedTimestampService); - remoteStorePinnedTimestampsBlobStore = mock(RemoteStorePinnedTimestampsBlobStore.class); - blobStoreTransferService = mock(BlobStoreTransferService.class); - when(remoteStorePinnedTimestampServiceSpy.pinnedTimestampsBlobStore()).thenReturn(remoteStorePinnedTimestampsBlobStore); - when(remoteStorePinnedTimestampServiceSpy.blobStoreTransferService()).thenReturn(blobStoreTransferService); + BlobStore blobStore = mock(BlobStore.class); + when(blobStoreRepository.blobStore()).thenReturn(blobStore); + when(blobStoreRepository.basePath()).thenReturn(new BlobPath()); + blobContainer = mock(BlobContainer.class); + when(blobStore.blobContainer(any())).thenReturn(blobContainer); - doAnswer(invocationOnMock -> { - ActionListener> actionListener = invocationOnMock.getArgument(3); - actionListener.onResponse(new ArrayList<>()); - return null; - }).when(blobStoreTransferService).listAllInSortedOrder(any(), any(), eq(1), any()); + when(blobContainer.listBlobs()).thenReturn(new HashMap<>()); remoteStorePinnedTimestampServiceSpy.start(); @@ -195,15 +187,9 @@ public void testDeleteStaleCommitsPinnedTimestampMdFile() throws Exception { ) ).thenReturn(List.of(metadataFilename, metadataFilename2, metadataFilename3)); - doAnswer(invocationOnMock -> { - ActionListener> actionListener = invocationOnMock.getArgument(3); - actionListener.onResponse(List.of(new PlainBlobMetadata("pinned_timestamp_123", 1000))); - return null; - }).when(blobStoreTransferService).listAllInSortedOrder(any(), any(), eq(1), any()); - long pinnedTimestampMatchingMetadataFilename2 = RemoteSegmentStoreDirectory.MetadataFilenameUtils.getTimestamp(metadataFilename2) + 10; - when(remoteStorePinnedTimestampsBlobStore.read(any())).thenReturn(new RemotePinnedTimestamps.PinnedTimestamps(Map.of(pinnedTimestampMatchingMetadataFilename2, List.of("xyz")))); - when(remoteStorePinnedTimestampsBlobStore.getBlobPathForUpload(any())).thenReturn(new BlobPath()); + String blobName = "snapshot1__" + pinnedTimestampMatchingMetadataFilename2; + when(blobContainer.listBlobs()).thenReturn(Map.of(blobName, new PlainBlobMetadata(blobName, 100))); final Map> metadataFilenameContentMapping = populateMetadata(); final List filesToBeDeleted = metadataFilenameContentMapping.get(metadataFilename3) diff --git a/server/src/test/java/org/opensearch/index/store/lockmanager/RemoteStoreLockManagerFactoryTests.java b/server/src/test/java/org/opensearch/index/store/lockmanager/RemoteStoreLockManagerFactoryTests.java index de3dfbdaa4778..37ae0484d5ee1 100644 --- a/server/src/test/java/org/opensearch/index/store/lockmanager/RemoteStoreLockManagerFactoryTests.java +++ b/server/src/test/java/org/opensearch/index/store/lockmanager/RemoteStoreLockManagerFactoryTests.java @@ -42,7 +42,7 @@ public void setup() throws IOException { repositoriesServiceSupplier = mock(Supplier.class); repositoriesService = mock(RepositoriesService.class); when(repositoriesServiceSupplier.get()).thenReturn(repositoriesService); - remoteStoreLockManagerFactory = new RemoteStoreLockManagerFactory(repositoriesServiceSupplier); + remoteStoreLockManagerFactory = new RemoteStoreLockManagerFactory(repositoriesServiceSupplier, ""); } public void testNewLockManager() throws IOException { diff --git a/server/src/test/java/org/opensearch/index/store/remote/utils/TransferManagerTestCase.java b/server/src/test/java/org/opensearch/index/store/remote/utils/TransferManagerTestCase.java index 810a4c336fdf7..1eae5119ab462 100644 --- a/server/src/test/java/org/opensearch/index/store/remote/utils/TransferManagerTestCase.java +++ b/server/src/test/java/org/opensearch/index/store/remote/utils/TransferManagerTestCase.java @@ -99,7 +99,7 @@ public void testConcurrentAccess() throws Exception { } } - public void testFetchBlobWithConcurrentCacheEvictions() throws Exception { + public void testFetchBlobWithConcurrentCacheEvictions() { // Submit 256 tasks to an executor with 16 threads that will each randomly // request one of eight blobs. Given that the cache can only hold two // blobs this will lead to a huge amount of contention and thrashing. @@ -114,41 +114,34 @@ public void testFetchBlobWithConcurrentCacheEvictions() throws Exception { try (IndexInput indexInput = fetchBlobWithName(blobname)) { assertIndexInputIsFunctional(indexInput); } + } catch (IOException ignored) { // fetchBlobWithName may fail due to fixed capacity } catch (Exception e) { throw new AssertionError(e); } })); } // Wait for all threads to complete - for (Future future : futures) { - future.get(10, TimeUnit.SECONDS); + try { + for (Future future : futures) { + future.get(10, TimeUnit.SECONDS); + } + } catch (java.util.concurrent.ExecutionException ignored) { // Index input may be null + } catch (Exception e) { + throw new AssertionError(e); } + } finally { assertTrue(terminate(testRunner)); } MatcherAssert.assertThat("Expected many evictions to happen", fileCache.stats().evictionCount(), greaterThan(0L)); } - public void testUsageExceedsCapacity() throws Exception { - // Fetch resources that exceed the configured capacity of the cache and assert that the - // returned IndexInputs are still functional. - try (IndexInput i1 = fetchBlobWithName("1"); IndexInput i2 = fetchBlobWithName("2"); IndexInput i3 = fetchBlobWithName("3")) { - assertIndexInputIsFunctional(i1); - assertIndexInputIsFunctional(i2); - assertIndexInputIsFunctional(i3); - MatcherAssert.assertThat(fileCache.usage().activeUsage(), equalTo((long) EIGHT_MB * 3)); - MatcherAssert.assertThat(fileCache.usage().usage(), equalTo((long) EIGHT_MB * 3)); - } - MatcherAssert.assertThat(fileCache.usage().activeUsage(), equalTo(0L)); - MatcherAssert.assertThat(fileCache.usage().usage(), equalTo((long) EIGHT_MB * 3)); - // Fetch another resource which will trigger an eviction - try (IndexInput i1 = fetchBlobWithName("1")) { - assertIndexInputIsFunctional(i1); - MatcherAssert.assertThat(fileCache.usage().activeUsage(), equalTo((long) EIGHT_MB)); - MatcherAssert.assertThat(fileCache.usage().usage(), equalTo((long) EIGHT_MB)); - } - MatcherAssert.assertThat(fileCache.usage().activeUsage(), equalTo(0L)); - MatcherAssert.assertThat(fileCache.usage().usage(), equalTo((long) EIGHT_MB)); + public void testOverflowDisabled() throws Exception { + initializeTransferManager(); + IndexInput i1 = fetchBlobWithName("1"); + IndexInput i2 = fetchBlobWithName("2"); + + assertThrows(IOException.class, () -> { IndexInput i3 = fetchBlobWithName("3"); }); } public void testDownloadFails() throws Exception { diff --git a/server/src/test/java/org/opensearch/index/translog/RemoteFsTimestampAwareTranslogTests.java b/server/src/test/java/org/opensearch/index/translog/RemoteFsTimestampAwareTranslogTests.java new file mode 100644 index 0000000000000..4ec68f7fb75b4 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/translog/RemoteFsTimestampAwareTranslogTests.java @@ -0,0 +1,987 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.translog; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.tests.util.LuceneTestCase; +import org.opensearch.action.LatchedActionListener; +import org.opensearch.cluster.metadata.RepositoryMetadata; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.blobstore.BlobContainer; +import org.opensearch.common.blobstore.BlobMetadata; +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.blobstore.BlobStore; +import org.opensearch.common.blobstore.support.PlainBlobMetadata; +import org.opensearch.common.collect.Tuple; +import org.opensearch.common.lease.Releasable; +import org.opensearch.common.lease.Releasables; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.action.ActionListener; +import org.opensearch.index.remote.RemoteStoreUtils; +import org.opensearch.index.remote.RemoteTranslogTransferTracker; +import org.opensearch.index.seqno.SequenceNumbers; +import org.opensearch.index.translog.transfer.TranslogTransferManager; +import org.opensearch.index.translog.transfer.TranslogTransferMetadata; +import org.opensearch.index.translog.transfer.TranslogUploadFailedException; +import org.opensearch.indices.DefaultRemoteStoreSettings; +import org.opensearch.indices.RemoteStoreSettings; +import org.opensearch.node.Node; +import org.opensearch.node.remotestore.RemoteStoreNodeAttribute; +import org.opensearch.node.remotestore.RemoteStorePinnedTimestampService; +import org.opensearch.repositories.RepositoriesService; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.repositories.blobstore.BlobStoreTestUtil; +import org.opensearch.repositories.fs.FsRepository; +import org.opensearch.threadpool.ThreadPool; +import org.junit.Before; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.TreeSet; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import java.util.stream.LongStream; + +import org.mockito.Mockito; + +import static org.opensearch.index.translog.TranslogDeletionPolicies.createTranslogDeletionPolicy; +import static org.opensearch.index.translog.transfer.TranslogTransferMetadata.METADATA_SEPARATOR; +import static org.opensearch.indices.RemoteStoreSettings.CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_ENABLED; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +@LuceneTestCase.SuppressFileSystems("ExtrasFS") +public class RemoteFsTimestampAwareTranslogTests extends RemoteFsTranslogTests { + + Runnable updatePinnedTimstampTask; + BlobContainer blobContainer; + RemoteStorePinnedTimestampService remoteStorePinnedTimestampServiceSpy; + + @Before + public void setUp() throws Exception { + super.setUp(); + + RemoteStoreSettings remoteStoreSettings = new RemoteStoreSettings( + Settings.builder().put(CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_ENABLED.getKey(), true).build(), + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) + ); + + Supplier repositoriesServiceSupplier = mock(Supplier.class); + Settings settings = Settings.builder() + .put(Node.NODE_ATTRIBUTES.getKey() + RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, "remote-repo") + .build(); + RepositoriesService repositoriesService = mock(RepositoriesService.class); + when(repositoriesServiceSupplier.get()).thenReturn(repositoriesService); + BlobStoreRepository blobStoreRepository = mock(BlobStoreRepository.class); + when(repositoriesService.repository("remote-repo")).thenReturn(blobStoreRepository); + + ThreadPool threadPool = mock(ThreadPool.class); + when(threadPool.schedule(any(), any(), any())).then(invocationOnMock -> { + Runnable updateTask = invocationOnMock.getArgument(0); + updatePinnedTimstampTask = () -> { + long currentTime = System.currentTimeMillis(); + while (RemoteStorePinnedTimestampService.getPinnedTimestamps().v1() < currentTime) { + updateTask.run(); + } + }; + return null; + }).then(subsequentInvocationsOnMock -> null); + + RepositoryMetadata repositoryMetadata = new RepositoryMetadata(randomAlphaOfLength(10), FsRepository.TYPE, settings); + final ClusterService clusterService = BlobStoreTestUtil.mockClusterService(repositoryMetadata); + + RemoteStorePinnedTimestampService remoteStorePinnedTimestampService = new RemoteStorePinnedTimestampService( + repositoriesServiceSupplier, + settings, + threadPool, + clusterService + ); + remoteStorePinnedTimestampServiceSpy = Mockito.spy(remoteStorePinnedTimestampService); + + BlobStore blobStore = mock(BlobStore.class); + when(blobStoreRepository.blobStore()).thenReturn(blobStore); + when(blobStoreRepository.basePath()).thenReturn(new BlobPath()); + blobContainer = mock(BlobContainer.class); + when(blobStore.blobContainer(any())).thenReturn(blobContainer); + + when(blobContainer.listBlobs()).thenReturn(new HashMap<>()); + + remoteStorePinnedTimestampServiceSpy.start(); + } + + @Override + protected RemoteFsTranslog createTranslogInstance( + TranslogConfig translogConfig, + String translogUUID, + TranslogDeletionPolicy deletionPolicy + ) throws IOException { + return new RemoteFsTimestampAwareTranslog( + translogConfig, + translogUUID, + deletionPolicy, + () -> globalCheckpoint.get(), + primaryTerm::get, + getPersistedSeqNoConsumer(), + repository, + threadPool, + primaryMode::get, + new RemoteTranslogTransferTracker(shardId, 10), + DefaultRemoteStoreSettings.INSTANCE + ); + } + + @Override + public void testSyncUpAlwaysFailure() throws IOException { + int translogOperations = randomIntBetween(1, 20); + int count = 0; + fail.failAlways(); + for (int op = 0; op < translogOperations; op++) { + translog.add( + new Translog.Index(String.valueOf(op), count, primaryTerm.get(), Integer.toString(count).getBytes(StandardCharsets.UTF_8)) + ); + try { + translog.sync(); + fail("io exception expected"); + } catch (TranslogUploadFailedException e) { + assertTrue("at least one operation pending", translog.syncNeeded()); + } + } + assertTrue(translog.isOpen()); + fail.failNever(); + translog.sync(); + } + + public void testGetMinMaxTranslogGenerationFromFilename() throws Exception { + RemoteStoreSettings.setPinnedTimestampsLookbackInterval(TimeValue.ZERO); + ArrayList ops = new ArrayList<>(); + + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("0", 0, primaryTerm.get(), new byte[] { 1 })); + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("1", 1, primaryTerm.get(), new byte[] { 1 })); + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("2", 2, primaryTerm.get(), new byte[] { 1 })); + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("3", 3, primaryTerm.get(), new byte[] { 1 })); + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("4", 4, primaryTerm.get(), new byte[] { 1 })); + + CountDownLatch latch = new CountDownLatch(1); + blobStoreTransferService.listAllInSortedOrder( + getTranslogDirectory().add(METADATA_DIR), + "metadata", + Integer.MAX_VALUE, + new LatchedActionListener<>(new ActionListener>() { + @Override + public void onResponse(List blobMetadataList) { + Long minGen = 1L; + Long maxGen = 6L; + for (BlobMetadata blobMetadata : blobMetadataList) { + Tuple minMaxGen = TranslogTransferMetadata.getMinMaxTranslogGenerationFromFilename(blobMetadata.name()); + assertEquals(minGen, minMaxGen.v1()); + assertEquals(maxGen, minMaxGen.v2()); + maxGen -= 1; + } + } + + @Override + public void onFailure(Exception e) { + // This means test failure + fail(); + } + }, latch) + ); + latch.await(); + + // Old format metadata file + String oldFormatMdFilename = "metadata__9223372036438563903__9223372036854774799__9223370311919910393__31__1"; + assertNull(TranslogTransferMetadata.getMinMaxTranslogGenerationFromFilename(oldFormatMdFilename)); + + // Node id containing separator + String nodeIdWithSeparator = + "metadata__9223372036438563903__9223372036854774799__9223370311919910393__node__1__9223372036438563958__2__1"; + Tuple minMaxGen = TranslogTransferMetadata.getMinMaxTranslogGenerationFromFilename(nodeIdWithSeparator); + Long minGen = Long.MAX_VALUE - 9223372036438563958L; + assertEquals(minGen, minMaxGen.v1()); + + // Malformed md filename + String malformedMdFileName = "metadata__9223372036438563903__9223372036854774799__9223370311919910393__node1__xyz__3__1"; + assertNull(TranslogTransferMetadata.getMinMaxTranslogGenerationFromFilename(malformedMdFileName)); + } + + public void testGetMinMaxPrimaryTermFromFilename() throws Exception { + // New format metadata file + String newFormatMetadataFile = + "metadata__9223372036854775800__9223372036854774799__9223370311919910393__node1__9223372036438563958__2__1"; + Tuple minMaxPrimaryterm = TranslogTransferMetadata.getMinMaxPrimaryTermFromFilename(newFormatMetadataFile); + Long minPrimaryTerm = 2L; + Long maxPrimaryTerm = 7L; + assertEquals(minPrimaryTerm, minMaxPrimaryterm.v1()); + assertEquals(maxPrimaryTerm, minMaxPrimaryterm.v2()); + + // Old format metadata file + String oldFormatMdFilename = "metadata__9223372036438563903__9223372036854774799__9223370311919910393__31__1"; + assertNull(TranslogTransferMetadata.getMinMaxPrimaryTermFromFilename(oldFormatMdFilename)); + + // Node id containing separator + String nodeIdWithSeparator = + "metadata__9223372036854775800__9223372036854774799__9223370311919910393__node__1__9223372036438563958__2__1"; + minMaxPrimaryterm = TranslogTransferMetadata.getMinMaxPrimaryTermFromFilename(nodeIdWithSeparator); + minPrimaryTerm = 2L; + maxPrimaryTerm = 7L; + assertEquals(minPrimaryTerm, minMaxPrimaryterm.v1()); + assertEquals(maxPrimaryTerm, minMaxPrimaryterm.v2()); + + // Malformed md filename + String malformedMdFileName = "metadata__9223372036854775800__9223372036854774799__9223370311919910393__node1__xyz__3qwe__1"; + assertNull(TranslogTransferMetadata.getMinMaxPrimaryTermFromFilename(malformedMdFileName)); + } + + public void testIndexDeletionWithNoPinnedTimestampNoRecentMdFiles() throws Exception { + RemoteStoreSettings.setPinnedTimestampsLookbackInterval(TimeValue.ZERO); + ArrayList ops = new ArrayList<>(); + + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("0", 0, primaryTerm.get(), new byte[] { 1 })); + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("1", 1, primaryTerm.get(), new byte[] { 1 })); + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("2", 2, primaryTerm.get(), new byte[] { 1 })); + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("3", 3, primaryTerm.get(), new byte[] { 1 })); + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("4", 4, primaryTerm.get(), new byte[] { 1 })); + + assertBusy(() -> { + assertEquals(5, blobStoreTransferService.listAll(getTranslogDirectory().add(METADATA_DIR)).size()); + assertEquals( + 12, + blobStoreTransferService.listAll(getTranslogDirectory().add(DATA_DIR).add(String.valueOf(primaryTerm.get()))).size() + ); + }); + + assertBusy(() -> assertTrue(translog.isRemoteGenerationDeletionPermitsAvailable())); + updatePinnedTimstampTask.run(); + ((RemoteFsTimestampAwareTranslog) translog).trimUnreferencedReaders(true, false); + + assertBusy(() -> assertTrue(translog.isRemoteGenerationDeletionPermitsAvailable())); + + assertBusy(() -> { + assertEquals(0, blobStoreTransferService.listAll(getTranslogDirectory().add(METADATA_DIR)).size()); + assertEquals( + 0, + blobStoreTransferService.listAll(getTranslogDirectory().add(DATA_DIR).add(String.valueOf(primaryTerm.get()))).size() + ); + }); + } + + public void testIndexDeletionWithNoPinnedTimestampButRecentFiles() throws Exception { + ArrayList ops = new ArrayList<>(); + + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("0", 0, primaryTerm.get(), new byte[] { 1 })); + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("1", 1, primaryTerm.get(), new byte[] { 1 })); + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("2", 2, primaryTerm.get(), new byte[] { 1 })); + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("3", 3, primaryTerm.get(), new byte[] { 1 })); + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("4", 4, primaryTerm.get(), new byte[] { 1 })); + + updatePinnedTimstampTask.run(); + ((RemoteFsTimestampAwareTranslog) translog).trimUnreferencedReaders(true, false); + + assertBusy(() -> assertTrue(translog.isRemoteGenerationDeletionPermitsAvailable())); + assertBusy(() -> { + assertEquals(5, blobStoreTransferService.listAll(getTranslogDirectory().add(METADATA_DIR)).size()); + assertEquals( + 12, + blobStoreTransferService.listAll(getTranslogDirectory().add(DATA_DIR).add(String.valueOf(primaryTerm.get()))).size() + ); + }); + } + + @Override + public void testSimpleOperationsUpload() throws Exception { + ArrayList ops = new ArrayList<>(); + + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("0", 0, primaryTerm.get(), new byte[] { 1 })); + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("1", 1, primaryTerm.get(), new byte[] { 1 })); + + // First reader is created at the init of translog + assertEquals(3, translog.readers.size()); + assertEquals(2, blobStoreTransferService.listAll(getTranslogDirectory().add(METADATA_DIR)).size()); + assertBusy(() -> { + assertEquals(6, translog.allUploaded().size()); + assertEquals( + 6, + blobStoreTransferService.listAll(getTranslogDirectory().add(DATA_DIR).add(String.valueOf(primaryTerm.get()))).size() + ); + }); + + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("2", 2, primaryTerm.get(), new byte[] { 1 })); + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("3", 3, primaryTerm.get(), new byte[] { 1 })); + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("4", 4, primaryTerm.get(), new byte[] { 1 })); + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("5", 5, primaryTerm.get(), new byte[] { 1 })); + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("6", 6, primaryTerm.get(), new byte[] { 1 })); + + assertBusy(() -> { + assertEquals( + 16, + blobStoreTransferService.listAll(getTranslogDirectory().add(DATA_DIR).add(String.valueOf(primaryTerm.get()))).size() + ); + }); + + assertBusy(() -> assertTrue(translog.isRemoteGenerationDeletionPermitsAvailable())); + + RemoteStoreSettings.setPinnedTimestampsLookbackInterval(TimeValue.ZERO); + // Fetch pinned timestamps so that it won't be stale + updatePinnedTimstampTask.run(); + + translog.setMinSeqNoToKeep(4); + translog.trimUnreferencedReaders(); + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("7", 7, primaryTerm.get(), new byte[] { 1 })); + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("8", 8, primaryTerm.get(), new byte[] { 1 })); + assertBusy(() -> assertTrue(translog.isRemoteGenerationDeletionPermitsAvailable())); + + // Fetch pinned timestamps so that it won't be stale + updatePinnedTimstampTask.run(); + translog.trimUnreferencedReaders(); + + assertBusy(() -> assertTrue(translog.isRemoteGenerationDeletionPermitsAvailable())); + assertEquals(5, translog.readers.size()); + assertBusy(() -> { + assertEquals(1, blobStoreTransferService.listAll(getTranslogDirectory().add(METADATA_DIR)).size()); + assertEquals(10, translog.allUploaded().size()); + assertEquals( + 10, + blobStoreTransferService.listAll(getTranslogDirectory().add(DATA_DIR).add(String.valueOf(primaryTerm.get()))).size() + ); + }, 60, TimeUnit.SECONDS); + } + + @Override + public void testMetadataFileDeletion() throws Exception { + RemoteStoreSettings.setPinnedTimestampsLookbackInterval(TimeValue.ZERO); + ArrayList ops = new ArrayList<>(); + // Test deletion of metadata files + int numDocs = randomIntBetween(6, 10); + for (int i = 0; i < numDocs; i++) { + addToTranslogAndListAndUpload(translog, ops, new Translog.Index(String.valueOf(i), i, primaryTerm.get(), new byte[] { 1 })); + translog.setMinSeqNoToKeep(i); + // Fetch pinned timestamps so that it won't be stale + updatePinnedTimstampTask.run(); + translog.trimUnreferencedReaders(); + assertBusy(() -> assertTrue(translog.isRemoteGenerationDeletionPermitsAvailable())); + assertEquals(1, translog.readers.size()); + } + assertBusy(() -> assertEquals(2, translog.allUploaded().size())); + addToTranslogAndListAndUpload( + translog, + ops, + new Translog.Index(String.valueOf(numDocs), numDocs, primaryTerm.get(), new byte[] { 1 }) + ); + addToTranslogAndListAndUpload( + translog, + ops, + new Translog.Index(String.valueOf(numDocs + 1), numDocs + 1, primaryTerm.get(), new byte[] { 1 }) + ); + updatePinnedTimstampTask.run(); + translog.trimUnreferencedReaders(); + assertBusy(() -> { assertEquals(1, blobStoreTransferService.listAll(getTranslogDirectory().add(METADATA_DIR)).size()); }); + } + + public void testMetadataFileDeletionWithPinnedTimestamps() throws Exception { + ArrayList ops = new ArrayList<>(); + // Test deletion of metadata files + int numDocs = randomIntBetween(16, 20); + for (int i = 0; i < numDocs; i++) { + addToTranslogAndListAndUpload(translog, ops, new Translog.Index(String.valueOf(i), i, primaryTerm.get(), new byte[] { 1 })); + translog.setMinSeqNoToKeep(i); + assertBusy(() -> assertTrue(translog.isRemoteGenerationDeletionPermitsAvailable())); + translog.trimUnreferencedReaders(); + // This is just to make sure that each metadata is at least 1ms apart + Thread.sleep(1); + } + + CountDownLatch latch = new CountDownLatch(1); + blobStoreTransferService.listAllInSortedOrder( + getTranslogDirectory().add(METADATA_DIR), + "metadata", + Integer.MAX_VALUE, + new LatchedActionListener<>(new ActionListener<>() { + @Override + public void onResponse(List blobMetadataList) { + List pinnedTimestampMatchingMetadataFiles = new ArrayList<>(); + List pinnedTimestamps = new ArrayList<>(); + for (BlobMetadata blobMetadata : blobMetadataList) { + String metadataFilename = blobMetadata.name(); + if (randomBoolean()) { + long timestamp = RemoteStoreUtils.invertLong(metadataFilename.split(METADATA_SEPARATOR)[3]); + pinnedTimestamps.add(timestamp); + pinnedTimestampMatchingMetadataFiles.add(metadataFilename); + } + } + + Map pinnedTimestampsMap = new HashMap<>(); + pinnedTimestamps.forEach(ts -> pinnedTimestampsMap.put(randomInt(1000) + "__" + ts, new PlainBlobMetadata("x", 100))); + + try { + + when(blobContainer.listBlobs()).thenReturn(pinnedTimestampsMap); + + Set dataFilesBeforeTrim = blobStoreTransferService.listAll( + getTranslogDirectory().add(DATA_DIR).add(String.valueOf(primaryTerm.get())) + ); + + assertBusy(() -> assertTrue(translog.isRemoteGenerationDeletionPermitsAvailable())); + updatePinnedTimstampTask.run(); + RemoteStoreSettings.setPinnedTimestampsLookbackInterval(TimeValue.ZERO); + translog.trimUnreferencedReaders(); + assertBusy(() -> assertTrue(translog.isRemoteGenerationDeletionPermitsAvailable())); + + Set metadataFilesAfterTrim = blobStoreTransferService.listAll(getTranslogDirectory().add(METADATA_DIR)); + Set dataFilesAfterTrim = blobStoreTransferService.listAll( + getTranslogDirectory().add(DATA_DIR).add(String.valueOf(primaryTerm.get())) + ); + + // If non pinned generations are within, minRemoteGenReferenced - 1 - indexSettings().getRemoteTranslogExtraKeep() + // we will not delete them + if (dataFilesAfterTrim.equals(dataFilesBeforeTrim) == false) { + // We check for number of pinned timestamp or +1 due to latest metadata. + assertTrue( + metadataFilesAfterTrim.size() == pinnedTimestamps.size() + || metadataFilesAfterTrim.size() == pinnedTimestamps.size() + 1 + ); + } + + for (String md : pinnedTimestampMatchingMetadataFiles) { + assertTrue(metadataFilesAfterTrim.contains(md)); + Tuple minMaXGen = TranslogTransferMetadata.getMinMaxTranslogGenerationFromFilename(md); + for (long i = minMaXGen.v1(); i <= minMaXGen.v2(); i++) { + assertTrue(dataFilesAfterTrim.contains(Translog.getFilename(i))); + } + } + } catch (Exception e) { + fail(); + } + } + + @Override + public void onFailure(Exception e) { + fail(); + } + }, latch) + ); + + latch.await(); + } + + @Override + public void testDrainSync() throws Exception { + RemoteStoreSettings.setPinnedTimestampsLookbackInterval(TimeValue.ZERO); + + // This test checks following scenarios - + // 1. During ongoing uploads, the available permits are 0. + // 2. During an upload, if drainSync is called, it will wait for it to acquire and available permits are 0. + // 3. After drainSync, if trimUnreferencedReaders is attempted, we do not delete from remote store. + // 4. After drainSync, if an upload is an attempted, we do not upload to remote store. + ArrayList ops = new ArrayList<>(); + assertEquals(0, translog.allUploaded().size()); + assertEquals(1, translog.readers.size()); + + addToTranslogAndListAndUpload(translog, ops, new Translog.Index(String.valueOf(0), 0, primaryTerm.get(), new byte[] { 1 })); + assertEquals(4, translog.allUploaded().size()); + assertEquals(2, translog.readers.size()); + assertBusy(() -> assertEquals(1, blobStoreTransferService.listAll(getTranslogDirectory().add(METADATA_DIR)).size())); + + translog.setMinSeqNoToKeep(0); + // Fetch pinned timestamps so that it won't be stale + updatePinnedTimstampTask.run(); + translog.trimUnreferencedReaders(); + assertBusy(() -> assertTrue(translog.isRemoteGenerationDeletionPermitsAvailable())); + assertEquals(1, translog.readers.size()); + + // Case 1 - During ongoing uploads, the available permits are 0. + slowDown.setSleepSeconds(2); + CountDownLatch latch = new CountDownLatch(1); + Thread thread1 = new Thread(() -> { + try { + addToTranslogAndListAndUpload(translog, ops, new Translog.Index(String.valueOf(1), 1, primaryTerm.get(), new byte[] { 1 })); + assertEquals(2, blobStoreTransferService.listAll(getTranslogDirectory().add(METADATA_DIR)).size()); + latch.countDown(); + } catch (IOException e) { + throw new AssertionError(e); + } + }); + thread1.start(); + assertBusy(() -> assertEquals(0, translog.availablePermits())); + // Case 2 - During an upload, if drainSync is called, it will wait for it to acquire and available permits are 0. + Releasable releasable = translog.drainSync(); + assertBusy(() -> assertEquals(0, latch.getCount())); + assertEquals(0, translog.availablePermits()); + slowDown.setSleepSeconds(0); + assertEquals(4, translog.allUploaded().size()); + assertEquals(2, translog.readers.size()); + Set mdFiles = blobStoreTransferService.listAll(getTranslogDirectory().add(METADATA_DIR)); + + // Case 3 - After drainSync, if trimUnreferencedReaders is attempted, we do not delete from remote store. + translog.setMinSeqNoToKeep(1); + // Fetch pinned timestamps so that it won't be stale + updatePinnedTimstampTask.run(); + translog.trimUnreferencedReaders(); + assertBusy(() -> assertTrue(translog.isRemoteGenerationDeletionPermitsAvailable())); + assertEquals(1, translog.readers.size()); + assertEquals(2, translog.allUploaded().size()); + assertEquals(mdFiles, blobStoreTransferService.listAll(getTranslogDirectory().add(METADATA_DIR))); + + // Case 4 - After drainSync, if an upload is an attempted, we do not upload to remote store. + Translog.Location loc = addToTranslogAndListAndUpload( + translog, + ops, + new Translog.Index(String.valueOf(2), 2, primaryTerm.get(), new byte[] { 1 }) + ); + assertEquals(1, translog.readers.size()); + assertEquals(2, translog.allUploaded().size()); + assertEquals(mdFiles, blobStoreTransferService.listAll(getTranslogDirectory().add(METADATA_DIR))); + + // Refill the permits back + Releasables.close(releasable); + addToTranslogAndListAndUpload(translog, ops, new Translog.Index(String.valueOf(3), 3, primaryTerm.get(), new byte[] { 1 })); + assertEquals(2, translog.readers.size()); + assertEquals(4, translog.allUploaded().size()); + assertEquals(3, blobStoreTransferService.listAll(getTranslogDirectory().add(METADATA_DIR)).size()); + + translog.setMinSeqNoToKeep(3); + // Fetch pinned timestamps so that it won't be stale + updatePinnedTimstampTask.run(); + translog.trimUnreferencedReaders(); + assertBusy(() -> assertTrue(translog.isRemoteGenerationDeletionPermitsAvailable())); + assertEquals(1, translog.readers.size()); + assertBusy(() -> assertEquals(2, translog.allUploaded().size())); + assertBusy(() -> assertEquals(1, blobStoreTransferService.listAll(getTranslogDirectory().add(METADATA_DIR)).size())); + } + + @Override + public void testExtraGenToKeep() throws Exception { + RemoteStoreSettings.setPinnedTimestampsLookbackInterval(TimeValue.ZERO); + + TranslogConfig config = getConfig(1); + ChannelFactory channelFactory = getChannelFactory(); + final Set persistedSeqNos = new HashSet<>(); + String translogUUID = Translog.createEmptyTranslog( + config.getTranslogPath(), + SequenceNumbers.NO_OPS_PERFORMED, + shardId, + channelFactory, + primaryTerm.get() + ); + TranslogDeletionPolicy deletionPolicy = createTranslogDeletionPolicy(config.getIndexSettings()); + ArrayList ops = new ArrayList<>(); + try ( + RemoteFsTranslog translog = new RemoteFsTranslog( + config, + translogUUID, + deletionPolicy, + () -> SequenceNumbers.NO_OPS_PERFORMED, + primaryTerm::get, + persistedSeqNos::add, + repository, + threadPool, + () -> Boolean.TRUE, + new RemoteTranslogTransferTracker(shardId, 10), + DefaultRemoteStoreSettings.INSTANCE + ) { + @Override + ChannelFactory getChannelFactory() { + return channelFactory; + } + } + ) { + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("1", 0, primaryTerm.get(), new byte[] { 1 })); + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("2", 1, primaryTerm.get(), new byte[] { 1 })); + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("3", 2, primaryTerm.get(), new byte[] { 1 })); + + // expose the new checkpoint (simulating a commit), before we trim the translog + translog.setMinSeqNoToKeep(2); + + // Trims from local + // Fetch pinned timestamps so that it won't be stale + updatePinnedTimstampTask.run(); + translog.trimUnreferencedReaders(); + assertBusy(() -> assertTrue(translog.isRemoteGenerationDeletionPermitsAvailable())); + + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("4", 3, primaryTerm.get(), new byte[] { 1 })); + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("5", 4, primaryTerm.get(), new byte[] { 1 })); + // Trims from remote now + // Fetch pinned timestamps so that it won't be stale + updatePinnedTimstampTask.run(); + translog.trimUnreferencedReaders(); + assertBusy(() -> assertTrue(translog.isRemoteGenerationDeletionPermitsAvailable())); + assertEquals( + 8, + blobStoreTransferService.listAll(getTranslogDirectory().add(DATA_DIR).add(String.valueOf(primaryTerm.get()))).size() + ); + } + } + + public void testGetGenerationsToBeDeletedEmptyMetadataFilesNotToBeDeleted() throws IOException { + List metadataFilesNotToBeDeleted = new ArrayList<>(); + List metadataFilesToBeDeleted = List.of( + // 4 to 7 + "metadata__9223372036854775806__9223372036854775800__9223370311919910398__31__9223372036854775803__1__1", + // 17 to 37 + "metadata__9223372036854775806__9223372036854775770__9223370311919910398__31__9223372036854775790__1__1", + // 27 to 42 + "metadata__9223372036854775806__9223372036854775765__9223370311919910403__31__9223372036854775780__1__1" + ); + Set generations = ((RemoteFsTimestampAwareTranslog) translog).getGenerationsToBeDeleted( + metadataFilesNotToBeDeleted, + metadataFilesToBeDeleted, + true + ); + Set md1Generations = LongStream.rangeClosed(4, 7).boxed().collect(Collectors.toSet()); + Set md2Generations = LongStream.rangeClosed(17, 37).boxed().collect(Collectors.toSet()); + Set md3Generations = LongStream.rangeClosed(27, 42).boxed().collect(Collectors.toSet()); + + assertTrue(generations.containsAll(md1Generations)); + assertTrue(generations.containsAll(md2Generations)); + assertTrue(generations.containsAll(md3Generations)); + + generations.removeAll(md1Generations); + generations.removeAll(md2Generations); + generations.removeAll(md3Generations); + assertTrue(generations.isEmpty()); + } + + public void testGetGenerationsToBeDeleted() throws IOException { + List metadataFilesNotToBeDeleted = List.of( + // 1 to 4 + "metadata__9223372036854775806__9223372036854775803__9223370311919910398__31__9223372036854775806__1__1", + // 26 to 30 + "metadata__9223372036854775806__9223372036854775777__9223370311919910398__31__9223372036854775781__1__1", + // 42 to 100 + "metadata__9223372036854775806__9223372036854775707__9223370311919910403__31__9223372036854775765__1__1" + ); + List metadataFilesToBeDeleted = List.of( + // 4 to 7 + "metadata__9223372036854775806__9223372036854775800__9223370311919910398__31__9223372036854775803__1__1", + // 17 to 37 + "metadata__9223372036854775806__9223372036854775770__9223370311919910398__31__9223372036854775790__1__1", + // 27 to 42 + "metadata__9223372036854775806__9223372036854775765__9223370311919910403__31__9223372036854775780__1__1" + ); + Set generations = ((RemoteFsTimestampAwareTranslog) translog).getGenerationsToBeDeleted( + metadataFilesNotToBeDeleted, + metadataFilesToBeDeleted, + true + ); + Set md1Generations = LongStream.rangeClosed(5, 7).boxed().collect(Collectors.toSet()); + Set md2Generations = LongStream.rangeClosed(17, 25).boxed().collect(Collectors.toSet()); + Set md3Generations = LongStream.rangeClosed(31, 41).boxed().collect(Collectors.toSet()); + + assertTrue(generations.containsAll(md1Generations)); + assertTrue(generations.containsAll(md2Generations)); + assertTrue(generations.containsAll(md3Generations)); + + generations.removeAll(md1Generations); + generations.removeAll(md2Generations); + generations.removeAll(md3Generations); + assertTrue(generations.isEmpty()); + } + + public void testGetMetadataFilesToBeDeletedNoExclusion() { + updatePinnedTimstampTask.run(); + + List metadataFiles = List.of( + "metadata__9223372036438563903__9223372036854774799__9223370311919910393__31__9223372036854775106__1", + "metadata__9223372036438563903__9223372036854775800__9223370311919910398__31__9223372036854775803__1", + "metadata__9223372036438563903__9223372036854775701__9223370311919910403__31__9223372036854775701__1" + ); + + assertEquals(metadataFiles, ((RemoteFsTimestampAwareTranslog) translog).getMetadataFilesToBeDeleted(metadataFiles)); + } + + public void testGetMetadataFilesToBeDeletedExclusionBasedOnAgeOnly() { + updatePinnedTimstampTask.run(); + long currentTimeInMillis = System.currentTimeMillis(); + String md1Timestamp = RemoteStoreUtils.invertLong(currentTimeInMillis - 200000); + String md2Timestamp = RemoteStoreUtils.invertLong(currentTimeInMillis + 30000); + String md3Timestamp = RemoteStoreUtils.invertLong(currentTimeInMillis + 60000); + + List metadataFiles = List.of( + "metadata__9223372036438563903__9223372036854774799__" + md1Timestamp + "__31__9223372036854775106__1", + "metadata__9223372036438563903__9223372036854775800__" + md2Timestamp + "__31__9223372036854775803__1", + "metadata__9223372036438563903__9223372036854775701__" + md3Timestamp + "__31__9223372036854775701__1" + ); + + List metadataFilesToBeDeleted = ((RemoteFsTimestampAwareTranslog) translog).getMetadataFilesToBeDeleted(metadataFiles); + assertEquals(1, metadataFilesToBeDeleted.size()); + assertEquals(metadataFiles.get(0), metadataFilesToBeDeleted.get(0)); + } + + public void testGetMetadataFilesToBeDeletedExclusionBasedOnPinningOnly() throws IOException { + long currentTimeInMillis = System.currentTimeMillis(); + String md1Timestamp = RemoteStoreUtils.invertLong(currentTimeInMillis - 200000); + String md2Timestamp = RemoteStoreUtils.invertLong(currentTimeInMillis - 300000); + String md3Timestamp = RemoteStoreUtils.invertLong(currentTimeInMillis - 600000); + + long pinnedTimestamp = RemoteStoreUtils.invertLong(md2Timestamp) + 10000; + when(blobContainer.listBlobs()).thenReturn(Map.of(randomInt(100) + "__" + pinnedTimestamp, new PlainBlobMetadata("xyz", 100))); + + updatePinnedTimstampTask.run(); + + List metadataFiles = List.of( + "metadata__9223372036438563903__9223372036854774799__" + md1Timestamp + "__31__9223372036854775106__1", + "metadata__9223372036438563903__9223372036854775600__" + md2Timestamp + "__31__9223372036854775803__1", + "metadata__9223372036438563903__9223372036854775701__" + md3Timestamp + "__31__9223372036854775701__1" + ); + + List metadataFilesToBeDeleted = ((RemoteFsTimestampAwareTranslog) translog).getMetadataFilesToBeDeleted(metadataFiles); + assertEquals(2, metadataFilesToBeDeleted.size()); + assertEquals(metadataFiles.get(0), metadataFilesToBeDeleted.get(0)); + assertEquals(metadataFiles.get(2), metadataFilesToBeDeleted.get(1)); + } + + public void testGetMetadataFilesToBeDeletedExclusionBasedOnAgeAndPinning() throws IOException { + long currentTimeInMillis = System.currentTimeMillis(); + String md1Timestamp = RemoteStoreUtils.invertLong(currentTimeInMillis + 100000); + String md2Timestamp = RemoteStoreUtils.invertLong(currentTimeInMillis - 300000); + String md3Timestamp = RemoteStoreUtils.invertLong(currentTimeInMillis - 600000); + + long pinnedTimestamp = RemoteStoreUtils.invertLong(md2Timestamp) + 10000; + when(blobContainer.listBlobs()).thenReturn(Map.of(randomInt(100) + "__" + pinnedTimestamp, new PlainBlobMetadata("xyz", 100))); + + updatePinnedTimstampTask.run(); + + List metadataFiles = List.of( + "metadata__9223372036438563903__9223372036854774799__" + md1Timestamp + "__31__9223372036854775106__1", + "metadata__9223372036438563903__9223372036854775600__" + md2Timestamp + "__31__9223372036854775803__1", + "metadata__9223372036438563903__9223372036854775701__" + md3Timestamp + "__31__9223372036854775701__1" + ); + + List metadataFilesToBeDeleted = ((RemoteFsTimestampAwareTranslog) translog).getMetadataFilesToBeDeleted(metadataFiles); + assertEquals(1, metadataFilesToBeDeleted.size()); + assertEquals(metadataFiles.get(2), metadataFilesToBeDeleted.get(0)); + } + + public void testIsGenerationPinned() { + TreeSet> pinnedGenerations = new TreeSet<>(new TreeSet<>((o1, o2) -> { + if (Objects.equals(o1.v1(), o2.v1()) == false) { + return o1.v1().compareTo(o2.v1()); + } else { + return o1.v2().compareTo(o2.v2()); + } + })); + + pinnedGenerations.add(new Tuple<>(12L, 34L)); + pinnedGenerations.add(new Tuple<>(121L, 140L)); + pinnedGenerations.add(new Tuple<>(142L, 160L)); + pinnedGenerations.add(new Tuple<>(12L, 120L)); + pinnedGenerations.add(new Tuple<>(12L, 78L)); + pinnedGenerations.add(new Tuple<>(142L, 170L)); + pinnedGenerations.add(new Tuple<>(1L, 1L)); + pinnedGenerations.add(new Tuple<>(12L, 56L)); + pinnedGenerations.add(new Tuple<>(142L, 180L)); + pinnedGenerations.add(new Tuple<>(4L, 9L)); + + RemoteFsTimestampAwareTranslog translog = (RemoteFsTimestampAwareTranslog) this.translog; + + assertFalse(translog.isGenerationPinned(3, pinnedGenerations)); + assertFalse(translog.isGenerationPinned(10, pinnedGenerations)); + assertFalse(translog.isGenerationPinned(141, pinnedGenerations)); + assertFalse(translog.isGenerationPinned(181, pinnedGenerations)); + assertFalse(translog.isGenerationPinned(5000, pinnedGenerations)); + assertFalse(translog.isGenerationPinned(0, pinnedGenerations)); + + assertTrue(translog.isGenerationPinned(1, pinnedGenerations)); + assertTrue(translog.isGenerationPinned(120, pinnedGenerations)); + assertTrue(translog.isGenerationPinned(121, pinnedGenerations)); + assertTrue(translog.isGenerationPinned(156, pinnedGenerations)); + assertTrue(translog.isGenerationPinned(12, pinnedGenerations)); + } + + public void testGetMinMaxTranslogGenerationFromMetadataFile() throws IOException { + TranslogTransferManager translogTransferManager = mock(TranslogTransferManager.class); + + RemoteFsTimestampAwareTranslog translog = (RemoteFsTimestampAwareTranslog) this.translog; + + // Fetch generations directly from the filename + assertEquals( + new Tuple<>(701L, 1008L), + translog.getMinMaxTranslogGenerationFromMetadataFile( + "metadata__9223372036438563903__9223372036854774799__9223370311919910393__31__9223372036854775106__1__1", + translogTransferManager + ) + ); + assertEquals( + new Tuple<>(4L, 7L), + translog.getMinMaxTranslogGenerationFromMetadataFile( + "metadata__9223372036438563903__9223372036854775800__9223370311919910398__31__9223372036854775803__2__1", + translogTransferManager + ) + ); + assertEquals( + new Tuple<>(106L, 106L), + translog.getMinMaxTranslogGenerationFromMetadataFile( + "metadata__9223372036438563903__9223372036854775701__9223370311919910403__31__9223372036854775701__3__1", + translogTransferManager + ) + ); + assertEquals( + new Tuple<>(4573L, 99964L), + translog.getMinMaxTranslogGenerationFromMetadataFile( + "metadata__9223372036438563903__9223372036854675843__9223370311919910408__31__9223372036854771234__4__1", + translogTransferManager + ) + ); + assertEquals( + new Tuple<>(1L, 4L), + translog.getMinMaxTranslogGenerationFromMetadataFile( + "metadata__9223372036438563903__9223372036854775803__9223370311919910413__31__9223372036854775806__5__1", + translogTransferManager + ) + ); + assertEquals( + new Tuple<>(2474L, 3462L), + translog.getMinMaxTranslogGenerationFromMetadataFile( + "metadata__9223372036438563903__9223372036854772345__9223370311919910429__31__9223372036854773333__6__1", + translogTransferManager + ) + ); + assertEquals( + new Tuple<>(5807L, 7917L), + translog.getMinMaxTranslogGenerationFromMetadataFile( + "metadata__9223372036438563903__9223372036854767890__9223370311919910434__31__9223372036854770000__7__1", + translogTransferManager + ) + ); + + // For older md filenames, it needs to read the content + TranslogTransferMetadata md1 = mock(TranslogTransferMetadata.class); + when(md1.getMinTranslogGeneration()).thenReturn(701L); + when(md1.getGeneration()).thenReturn(1008L); + when(translogTransferManager.readMetadata("metadata__9223372036438563903__9223372036854774799__9223370311919910393__31__1")) + .thenReturn(md1); + assertEquals( + new Tuple<>(701L, 1008L), + translog.getMinMaxTranslogGenerationFromMetadataFile( + "metadata__9223372036438563903__9223372036854774799__9223370311919910393__31__1", + translogTransferManager + ) + ); + TranslogTransferMetadata md2 = mock(TranslogTransferMetadata.class); + when(md2.getMinTranslogGeneration()).thenReturn(4L); + when(md2.getGeneration()).thenReturn(7L); + when(translogTransferManager.readMetadata("metadata__9223372036438563903__9223372036854775800__9223370311919910398__31__1")) + .thenReturn(md2); + assertEquals( + new Tuple<>(4L, 7L), + translog.getMinMaxTranslogGenerationFromMetadataFile( + "metadata__9223372036438563903__9223372036854775800__9223370311919910398__31__1", + translogTransferManager + ) + ); + + verify(translogTransferManager).readMetadata("metadata__9223372036438563903__9223372036854774799__9223370311919910393__31__1"); + verify(translogTransferManager).readMetadata("metadata__9223372036438563903__9223372036854775800__9223370311919910398__31__1"); + } + + public void testDeleteStaleRemotePrimaryTerms() throws IOException { + TranslogTransferManager translogTransferManager = mock(TranslogTransferManager.class); + + List metadataFiles = List.of( + // PT 4 to 9 + "metadata__9223372036854775798__9223372036854774799__9223370311919910393__node1__9223372036438563958__4__1", + // PT 2 to 7 + "metadata__9223372036854775800__9223372036854774799__9223370311919910393__node1__9223372036438563958__2__1", + // PT 2 to 6 + "metadata__9223372036854775801__9223372036854774799__9223370311919910393__node1__9223372036438563958__2__1" + ); + + Logger staticLogger = LogManager.getLogger(RemoteFsTimestampAwareTranslogTests.class); + when(translogTransferManager.listPrimaryTermsInRemote()).thenReturn(Set.of(1L, 2L, 3L, 4L)); + AtomicLong minPrimaryTermInRemote = new AtomicLong(Long.MAX_VALUE); + RemoteFsTimestampAwareTranslog.deleteStaleRemotePrimaryTerms( + metadataFiles, + translogTransferManager, + new HashMap<>(), + minPrimaryTermInRemote, + staticLogger + ); + verify(translogTransferManager).deletePrimaryTermsAsync(2L); + assertEquals(2, minPrimaryTermInRemote.get()); + + RemoteFsTimestampAwareTranslog.deleteStaleRemotePrimaryTerms( + metadataFiles, + translogTransferManager, + new HashMap<>(), + minPrimaryTermInRemote, + staticLogger + ); + // This means there are no new invocations of deletePrimaryTermAsync + verify(translogTransferManager, times(1)).deletePrimaryTermsAsync(anyLong()); + } + + public void testDeleteStaleRemotePrimaryTermsNoPrimaryTermInRemote() throws IOException { + TranslogTransferManager translogTransferManager = mock(TranslogTransferManager.class); + + List metadataFiles = List.of( + // PT 4 to 9 + "metadata__9223372036854775798__9223372036854774799__9223370311919910393__node1__9223372036438563958__4__1", + // PT 2 to 7 + "metadata__9223372036854775800__9223372036854774799__9223370311919910393__node1__9223372036438563958__2__1", + // PT 2 to 6 + "metadata__9223372036854775801__9223372036854774799__9223370311919910393__node1__9223372036438563958__2__1" + ); + + Logger staticLogger = LogManager.getLogger(RemoteFsTimestampAwareTranslogTests.class); + when(translogTransferManager.listPrimaryTermsInRemote()).thenReturn(Set.of()); + AtomicLong minPrimaryTermInRemote = new AtomicLong(Long.MAX_VALUE); + RemoteFsTimestampAwareTranslog.deleteStaleRemotePrimaryTerms( + metadataFiles, + translogTransferManager, + new HashMap<>(), + minPrimaryTermInRemote, + staticLogger + ); + verify(translogTransferManager, times(0)).deletePrimaryTermsAsync(anyLong()); + assertEquals(Long.MAX_VALUE, minPrimaryTermInRemote.get()); + } + + public void testDeleteStaleRemotePrimaryTermsPrimaryTermInRemoteIsBigger() throws IOException { + TranslogTransferManager translogTransferManager = mock(TranslogTransferManager.class); + + List metadataFiles = List.of( + // PT 4 to 9 + "metadata__9223372036854775798__9223372036854774799__9223370311919910393__node1__9223372036438563958__4__1", + // PT 2 to 7 + "metadata__9223372036854775800__9223372036854774799__9223370311919910393__node1__9223372036438563958__2__1", + // PT 2 to 6 + "metadata__9223372036854775801__9223372036854774799__9223370311919910393__node1__9223372036438563958__2__1" + ); + + Logger staticLogger = LogManager.getLogger(RemoteFsTimestampAwareTranslogTests.class); + when(translogTransferManager.listPrimaryTermsInRemote()).thenReturn(Set.of(2L, 3L, 4L)); + AtomicLong minPrimaryTermInRemote = new AtomicLong(Long.MAX_VALUE); + RemoteFsTimestampAwareTranslog.deleteStaleRemotePrimaryTerms( + metadataFiles, + translogTransferManager, + new HashMap<>(), + minPrimaryTermInRemote, + staticLogger + ); + verify(translogTransferManager, times(0)).deletePrimaryTermsAsync(anyLong()); + assertEquals(2, minPrimaryTermInRemote.get()); + } + +} diff --git a/server/src/test/java/org/opensearch/index/translog/RemoteFsTranslogTests.java b/server/src/test/java/org/opensearch/index/translog/RemoteFsTranslogTests.java index 6bf35cc1eac9b..339d876274557 100644 --- a/server/src/test/java/org/opensearch/index/translog/RemoteFsTranslogTests.java +++ b/server/src/test/java/org/opensearch/index/translog/RemoteFsTranslogTests.java @@ -118,15 +118,15 @@ public class RemoteFsTranslogTests extends OpenSearchTestCase { protected final ShardId shardId = new ShardId("index", "_na_", 1); protected RemoteFsTranslog translog; - private AtomicLong globalCheckpoint; + protected AtomicLong globalCheckpoint; protected Path translogDir; // A default primary term is used by translog instances created in this test. - private final AtomicLong primaryTerm = new AtomicLong(); - private final AtomicBoolean primaryMode = new AtomicBoolean(true); + protected final AtomicLong primaryTerm = new AtomicLong(); + protected final AtomicBoolean primaryMode = new AtomicBoolean(true); private final AtomicReference persistedSeqNoConsumer = new AtomicReference<>(); - private ThreadPool threadPool; - private final static String METADATA_DIR = "metadata"; - private final static String DATA_DIR = "data"; + protected ThreadPool threadPool; + protected final static String METADATA_DIR = "metadata"; + protected final static String DATA_DIR = "data"; AtomicInteger writeCalls = new AtomicInteger(); BlobStoreRepository repository; @@ -136,7 +136,7 @@ public class RemoteFsTranslogTests extends OpenSearchTestCase { TestTranslog.SlowDownWriteSwitch slowDown; - private LongConsumer getPersistedSeqNoConsumer() { + protected LongConsumer getPersistedSeqNoConsumer() { return seqNo -> { final LongConsumer consumer = persistedSeqNoConsumer.get(); if (consumer != null) { @@ -167,7 +167,7 @@ public void tearDown() throws Exception { } } - private RemoteFsTranslog create(Path path) throws IOException { + protected RemoteFsTranslog create(Path path) throws IOException { final String translogUUID = Translog.createEmptyTranslog(path, SequenceNumbers.NO_OPS_PERFORMED, shardId, primaryTerm.get()); return create(path, createRepository(), translogUUID, 0); } @@ -179,6 +179,14 @@ private RemoteFsTranslog create(Path path, BlobStoreRepository repository, Strin final TranslogDeletionPolicy deletionPolicy = createTranslogDeletionPolicy(translogConfig.getIndexSettings()); threadPool = new TestThreadPool(getClass().getName()); blobStoreTransferService = new BlobStoreTransferService(repository.blobStore(), threadPool); + return createTranslogInstance(translogConfig, translogUUID, deletionPolicy); + } + + protected RemoteFsTranslog createTranslogInstance( + TranslogConfig translogConfig, + String translogUUID, + TranslogDeletionPolicy deletionPolicy + ) throws IOException { return new RemoteFsTranslog( translogConfig, translogUUID, @@ -277,7 +285,7 @@ private Translog.Location addToTranslogAndList(Translog translog, List list, Translog.Operation op) + protected Translog.Location addToTranslogAndListAndUpload(Translog translog, List list, Translog.Operation op) throws IOException { Translog.Location loc = translog.add(op); translog.ensureSynced(loc); @@ -390,7 +398,7 @@ public void testSimpleOperations() throws IOException { } - private TranslogConfig getConfig(int gensToKeep) { + protected TranslogConfig getConfig(int gensToKeep) { Path tempDir = createTempDir(); final TranslogConfig temp = getTranslogConfig(tempDir, gensToKeep); final TranslogConfig config = new TranslogConfig( @@ -405,7 +413,7 @@ private TranslogConfig getConfig(int gensToKeep) { return config; } - private ChannelFactory getChannelFactory() { + protected ChannelFactory getChannelFactory() { writeCalls = new AtomicInteger(); final ChannelFactory channelFactory = (file, openOption) -> { FileChannel delegate = FileChannel.open(file, openOption); @@ -493,8 +501,8 @@ ChannelFactory getChannelFactory() { 6, blobStoreTransferService.listAll(getTranslogDirectory().add(DATA_DIR).add(String.valueOf(primaryTerm.get()))).size() ); - } + } public void testReadLocation() throws IOException { @@ -761,7 +769,6 @@ public void testSimpleOperationsUpload() throws Exception { blobStoreTransferService.listAll(getTranslogDirectory().add(DATA_DIR).add(String.valueOf(primaryTerm.get()))).size() ); }); - } public void testMetadataFileDeletion() throws Exception { @@ -910,7 +917,7 @@ public void testDrainSync() throws Exception { assertBusy(() -> assertEquals(1, blobStoreTransferService.listAll(getTranslogDirectory().add(METADATA_DIR)).size())); } - private BlobPath getTranslogDirectory() { + protected BlobPath getTranslogDirectory() { return repository.basePath().add(shardId.getIndex().getUUID()).add(String.valueOf(shardId.id())).add(TRANSLOG.getName()); } @@ -1688,13 +1695,13 @@ public void testDownloadWithRetries() throws IOException { TranslogTransferManager mockTransfer = mock(TranslogTransferManager.class); RemoteTranslogTransferTracker remoteTranslogTransferTracker = mock(RemoteTranslogTransferTracker.class); - when(mockTransfer.readMetadata()).thenReturn(translogTransferMetadata); + when(mockTransfer.readMetadata(0)).thenReturn(translogTransferMetadata); when(mockTransfer.getRemoteTranslogTransferTracker()).thenReturn(remoteTranslogTransferTracker); // Always File not found when(mockTransfer.downloadTranslog(any(), any(), any())).thenThrow(new NoSuchFileException("File not found")); TranslogTransferManager finalMockTransfer = mockTransfer; - assertThrows(NoSuchFileException.class, () -> RemoteFsTranslog.download(finalMockTransfer, location, logger, false)); + assertThrows(NoSuchFileException.class, () -> RemoteFsTranslog.download(finalMockTransfer, location, logger, false, 0)); // File not found in first attempt . File found in second attempt. mockTransfer = mock(TranslogTransferManager.class); @@ -1715,7 +1722,7 @@ public void testDownloadWithRetries() throws IOException { }).when(mockTransfer).downloadTranslog(any(), any(), any()); // no exception thrown - RemoteFsTranslog.download(mockTransfer, location, logger, false); + RemoteFsTranslog.download(mockTransfer, location, logger, false, 0); } // No translog data in local as well as remote, we skip creating empty translog @@ -1728,7 +1735,7 @@ public void testDownloadWithNoTranslogInLocalAndRemote() throws IOException { when(mockTransfer.getRemoteTranslogTransferTracker()).thenReturn(remoteTranslogTransferTracker); Path[] filesBeforeDownload = FileSystemUtils.files(location); - RemoteFsTranslog.download(mockTransfer, location, logger, false); + RemoteFsTranslog.download(mockTransfer, location, logger, false, 0); assertEquals(filesBeforeDownload, FileSystemUtils.files(location)); } @@ -1748,7 +1755,7 @@ public void testDownloadWithTranslogOnlyInLocal() throws IOException { Checkpoint existingCheckpoint = Translog.readCheckpoint(location); TranslogTransferManager finalMockTransfer = mockTransfer; - RemoteFsTranslog.download(finalMockTransfer, location, logger, false); + RemoteFsTranslog.download(finalMockTransfer, location, logger, false, 0); Path[] filesPostDownload = FileSystemUtils.files(location); assertEquals(2, filesPostDownload.length); @@ -1784,11 +1791,11 @@ public void testDownloadWithEmptyTranslogOnlyInLocal() throws IOException { TranslogTransferManager finalMockTransfer = mockTransfer; // download first time will ensure creating empty translog - RemoteFsTranslog.download(finalMockTransfer, location, logger, false); + RemoteFsTranslog.download(finalMockTransfer, location, logger, false, 0); Path[] filesPostFirstDownload = FileSystemUtils.files(location); // download on empty translog should be a no-op - RemoteFsTranslog.download(finalMockTransfer, location, logger, false); + RemoteFsTranslog.download(finalMockTransfer, location, logger, false, 0); Path[] filesPostSecondDownload = FileSystemUtils.files(location); assertArrayEquals(filesPostFirstDownload, filesPostSecondDownload); diff --git a/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java b/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java index ffa6a0f456f36..ed0d6b7d50706 100644 --- a/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java +++ b/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java @@ -23,8 +23,10 @@ import org.opensearch.core.action.ActionListener; import org.opensearch.core.index.Index; import org.opensearch.core.index.shard.ShardId; +import org.opensearch.index.remote.RemoteStoreUtils; import org.opensearch.index.remote.RemoteTranslogTransferTracker; import org.opensearch.index.translog.Translog; +import org.opensearch.index.translog.TranslogReader; import org.opensearch.index.translog.transfer.FileSnapshot.CheckpointFileSnapshot; import org.opensearch.index.translog.transfer.FileSnapshot.TransferFileSnapshot; import org.opensearch.index.translog.transfer.FileSnapshot.TranslogFileSnapshot; @@ -51,7 +53,10 @@ import java.util.Objects; import java.util.Set; import java.util.UUID; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; import org.mockito.Mockito; @@ -623,7 +628,7 @@ public void testMetadataConflict() throws InterruptedException { String mdFilename = tm.getFileName(); long count = mdFilename.chars().filter(ch -> ch == METADATA_SEPARATOR.charAt(0)).count(); // There should not be any `_` in mdFile name as it is used a separator . - assertEquals(12, count); + assertEquals(14, count); Thread.sleep(1); TranslogTransferMetadata tm2 = new TranslogTransferMetadata(1, 1, 1, 2, "node--2"); String mdFilename2 = tm2.getFileName(); @@ -753,4 +758,170 @@ private void assertTlogCkpDownloadStatsWithMetadata() { // Expect delay for both tlog and ckp file assertTrue(remoteTranslogTransferTracker.getTotalDownloadTimeInMillis() >= delayForBlobDownload); } + + public void testlistTranslogMetadataFilesAsync() throws Exception { + String tm1 = new TranslogTransferMetadata(1, 1, 1, 2).getFileName(); + String tm2 = new TranslogTransferMetadata(1, 2, 1, 2).getFileName(); + String tm3 = new TranslogTransferMetadata(2, 3, 1, 2).getFileName(); + doAnswer(invocation -> { + ActionListener> actionListener = invocation.getArgument(4); + List bmList = new LinkedList<>(); + bmList.add(new PlainBlobMetadata(tm1, 1)); + bmList.add(new PlainBlobMetadata(tm2, 1)); + bmList.add(new PlainBlobMetadata(tm3, 1)); + actionListener.onResponse(bmList); + return null; + }).when(transferService) + .listAllInSortedOrderAsync( + eq(ThreadPool.Names.REMOTE_PURGE), + any(BlobPath.class), + eq(TranslogTransferMetadata.METADATA_PREFIX), + anyInt(), + any(ActionListener.class) + ); + AtomicBoolean fetchCompleted = new AtomicBoolean(false); + translogTransferManager.listTranslogMetadataFilesAsync(new ActionListener<>() { + @Override + public void onResponse(List blobMetadata) { + assertEquals(3, blobMetadata.size()); + assertEquals(blobMetadata.stream().map(BlobMetadata::name).collect(Collectors.toList()), List.of(tm1, tm2, tm3)); + fetchCompleted.set(true); + } + + @Override + public void onFailure(Exception e) { + fetchCompleted.set(true); + throw new RuntimeException(e); + } + }); + assertBusy(() -> assertTrue(fetchCompleted.get())); + } + + public void testReadMetadataForGivenTimestampNoFile() throws IOException { + doAnswer(invocation -> { + LatchedActionListener> latchedActionListener = invocation.getArgument(3); + List bmList = new LinkedList<>(); + latchedActionListener.onResponse(bmList); + return null; + }).when(transferService) + .listAllInSortedOrder(any(BlobPath.class), eq(TranslogTransferMetadata.METADATA_PREFIX), anyInt(), any(ActionListener.class)); + + assertNull(translogTransferManager.readMetadata(1234L)); + assertNoDownloadStats(false); + } + + public void testReadMetadataForGivenTimestampNoMatchingFile() throws IOException { + doAnswer(invocation -> { + LatchedActionListener> latchedActionListener = invocation.getArgument(3); + String timestamp1 = RemoteStoreUtils.invertLong(2345L); + BlobMetadata bm1 = new PlainBlobMetadata("metadata__1__12__" + timestamp1 + "__node1__1", 1); + String timestamp2 = RemoteStoreUtils.invertLong(3456L); + BlobMetadata bm2 = new PlainBlobMetadata("metadata__1__12__" + timestamp2 + "__node1__1", 1); + List bmList = List.of(bm1, bm2); + latchedActionListener.onResponse(bmList); + return null; + }).when(transferService) + .listAllInSortedOrder(any(BlobPath.class), eq(TranslogTransferMetadata.METADATA_PREFIX), anyInt(), any(ActionListener.class)); + + assertNull(translogTransferManager.readMetadata(1234L)); + assertNoDownloadStats(false); + } + + public void testReadMetadataForGivenTimestampFile() throws IOException { + AtomicReference mdFilename1 = new AtomicReference<>(); + String timestamp1 = RemoteStoreUtils.invertLong(2345L); + mdFilename1.set("metadata__1__12__" + timestamp1 + "__node1__1"); + doAnswer(invocation -> { + LatchedActionListener> latchedActionListener = invocation.getArgument(3); + BlobMetadata bm1 = new PlainBlobMetadata(mdFilename1.get(), 1); + String timestamp2 = RemoteStoreUtils.invertLong(3456L); + BlobMetadata bm2 = new PlainBlobMetadata("metadata__1__12__" + timestamp2 + "__node1__1", 1); + List bmList = List.of(bm1, bm2); + latchedActionListener.onResponse(bmList); + return null; + }).when(transferService) + .listAllInSortedOrder(any(BlobPath.class), eq(TranslogTransferMetadata.METADATA_PREFIX), anyInt(), any(ActionListener.class)); + + TranslogTransferMetadata metadata = createTransferSnapshot().getTranslogTransferMetadata(); + long delayForMdDownload = 1; + when(transferService.downloadBlob(any(BlobPath.class), eq(mdFilename1.get()))).thenAnswer(invocation -> { + Thread.sleep(delayForMdDownload); + return new ByteArrayInputStream(translogTransferManager.getMetadataBytes(metadata)); + }); + + assertEquals(metadata, translogTransferManager.readMetadata(3000L)); + + assertEquals(translogTransferManager.getMetadataBytes(metadata).length, remoteTranslogTransferTracker.getDownloadBytesSucceeded()); + assertTrue(remoteTranslogTransferTracker.getTotalDownloadTimeInMillis() >= delayForMdDownload); + } + + public void testReadMetadataForGivenTimestampException() throws IOException { + AtomicReference mdFilename1 = new AtomicReference<>(); + String timestamp1 = RemoteStoreUtils.invertLong(2345L); + mdFilename1.set("metadata__1__12__" + timestamp1 + "__node1__1"); + doAnswer(invocation -> { + LatchedActionListener> latchedActionListener = invocation.getArgument(3); + BlobMetadata bm1 = new PlainBlobMetadata(mdFilename1.get(), 1); + String timestamp2 = RemoteStoreUtils.invertLong(3456L); + BlobMetadata bm2 = new PlainBlobMetadata("metadata__1__12__" + timestamp2 + "__node1__1", 1); + List bmList = List.of(bm1, bm2); + latchedActionListener.onResponse(bmList); + return null; + }).when(transferService) + .listAllInSortedOrder(any(BlobPath.class), eq(TranslogTransferMetadata.METADATA_PREFIX), anyInt(), any(ActionListener.class)); + + when(transferService.downloadBlob(any(BlobPath.class), eq(mdFilename1.get()))).thenThrow(new IOException("Something went wrong")); + + assertThrows(IOException.class, () -> translogTransferManager.readMetadata(3000L)); + assertNoDownloadStats(true); + } + + public void testPopulateFileTrackerWithLocalStateNoReaders() { + translogTransferManager.populateFileTrackerWithLocalState(null); + assertTrue(translogTransferManager.getFileTransferTracker().allUploaded().isEmpty()); + + translogTransferManager.populateFileTrackerWithLocalState(List.of()); + assertTrue(translogTransferManager.getFileTransferTracker().allUploaded().isEmpty()); + } + + public void testPopulateFileTrackerWithLocalState() { + TranslogReader reader1 = mock(TranslogReader.class); + when(reader1.getGeneration()).thenReturn(12L); + TranslogReader reader2 = mock(TranslogReader.class); + when(reader2.getGeneration()).thenReturn(23L); + TranslogReader reader3 = mock(TranslogReader.class); + when(reader3.getGeneration()).thenReturn(34L); + TranslogReader reader4 = mock(TranslogReader.class); + when(reader4.getGeneration()).thenReturn(45L); + + translogTransferManager.populateFileTrackerWithLocalState(List.of(reader1, reader2, reader3, reader4)); + assertEquals( + Set.of("translog-12.tlog", "translog-23.tlog", "translog-34.tlog", "translog-45.tlog"), + translogTransferManager.getFileTransferTracker().allUploaded() + ); + } + + public void testPopulateFileTrackerWithLocalStateNoCkpAsMetadata() { + TranslogTransferManager translogTransferManager = new TranslogTransferManager( + shardId, + transferService, + remoteBaseTransferPath.add(TRANSLOG.getName()), + remoteBaseTransferPath.add(METADATA.getName()), + tracker, + remoteTranslogTransferTracker, + DefaultRemoteStoreSettings.INSTANCE, + true + ); + + TranslogReader reader1 = mock(TranslogReader.class); + when(reader1.getGeneration()).thenReturn(12L); + TranslogReader reader2 = mock(TranslogReader.class); + when(reader2.getGeneration()).thenReturn(23L); + + translogTransferManager.populateFileTrackerWithLocalState(List.of(reader1, reader2)); + assertEquals( + Set.of("translog-12.tlog", "translog-12.ckp", "translog-23.tlog", "translog-23.ckp"), + translogTransferManager.getFileTransferTracker().allUploaded() + ); + } } diff --git a/server/src/test/java/org/opensearch/indices/cluster/ClusterStateChanges.java b/server/src/test/java/org/opensearch/indices/cluster/ClusterStateChanges.java index 8f1d58cf201e9..b11a583de4eee 100644 --- a/server/src/test/java/org/opensearch/indices/cluster/ClusterStateChanges.java +++ b/server/src/test/java/org/opensearch/indices/cluster/ClusterStateChanges.java @@ -94,8 +94,8 @@ import org.opensearch.common.SetOnce; import org.opensearch.common.UUIDs; import org.opensearch.common.settings.ClusterSettings; -import org.opensearch.common.settings.IndexScopedSettings; import org.opensearch.common.settings.Settings; +import org.opensearch.common.settings.SettingsModule; import org.opensearch.common.util.concurrent.ThreadContext; import org.opensearch.core.action.ActionResponse; import org.opensearch.core.index.Index; @@ -291,10 +291,13 @@ public IndexMetadata upgradeIndexMetadata(IndexMetadata indexMetadata, Version m final AwarenessReplicaBalance awarenessReplicaBalance = new AwarenessReplicaBalance(SETTINGS, clusterService.getClusterSettings()); + // build IndexScopedSettings from a settingsModule so that all settings gated by enabled featureFlags are registered. + SettingsModule settingsModule = new SettingsModule(Settings.EMPTY); + MetadataUpdateSettingsService metadataUpdateSettingsService = new MetadataUpdateSettingsService( clusterService, allocationService, - IndexScopedSettings.DEFAULT_SCOPED_SETTINGS, + settingsModule.getIndexScopedSettings(), indicesService, shardLimitValidator, threadPool, @@ -308,7 +311,7 @@ public IndexMetadata upgradeIndexMetadata(IndexMetadata indexMetadata, Version m new AliasValidator(), shardLimitValidator, environment, - IndexScopedSettings.DEFAULT_SCOPED_SETTINGS, + settingsModule.getIndexScopedSettings(), threadPool, xContentRegistry, systemIndices, diff --git a/server/src/test/java/org/opensearch/indices/recovery/LocalStorePeerRecoverySourceHandlerTests.java b/server/src/test/java/org/opensearch/indices/recovery/LocalStorePeerRecoverySourceHandlerTests.java index 4685a7196b85a..a4598c4294a33 100644 --- a/server/src/test/java/org/opensearch/indices/recovery/LocalStorePeerRecoverySourceHandlerTests.java +++ b/server/src/test/java/org/opensearch/indices/recovery/LocalStorePeerRecoverySourceHandlerTests.java @@ -52,6 +52,8 @@ import org.opensearch.action.support.PlainActionFuture; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.routing.IndexShardRoutingTable; +import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.common.Numbers; import org.opensearch.common.Randomness; import org.opensearch.common.SetOnce; @@ -141,6 +143,8 @@ import static org.mockito.Mockito.anyString; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; /** @@ -746,6 +750,206 @@ void phase2( assertFalse(phase2Called.get()); } + /* + If the replica allocation id is not reflected in source nodes routing table even after retries, + recoveries should fail + */ + public void testThrowExceptionOnNoTargetInRouting() throws IOException { + final RecoverySettings recoverySettings = new RecoverySettings(Settings.EMPTY, service); + final StartRecoveryRequest request = getStartRecoveryRequest(); + final IndexShard shard = mock(IndexShard.class); + when(shard.seqNoStats()).thenReturn(mock(SeqNoStats.class)); + when(shard.segmentStats(anyBoolean(), anyBoolean())).thenReturn(mock(SegmentsStats.class)); + when(shard.isRelocatedPrimary()).thenReturn(false); + final org.opensearch.index.shard.ReplicationGroup replicationGroup = mock(org.opensearch.index.shard.ReplicationGroup.class); + final IndexShardRoutingTable routingTable = mock(IndexShardRoutingTable.class); + when(routingTable.getByAllocationId(anyString())).thenReturn(null); + when(shard.getReplicationGroup()).thenReturn(replicationGroup); + when(replicationGroup.getRoutingTable()).thenReturn(routingTable); + when(shard.acquireSafeIndexCommit()).thenReturn(mock(GatedCloseable.class)); + doAnswer(invocation -> { + ((ActionListener) invocation.getArguments()[0]).onResponse(() -> {}); + return null; + }).when(shard).acquirePrimaryOperationPermit(any(), anyString(), any()); + + final IndexMetadata.Builder indexMetadata = IndexMetadata.builder("test") + .settings( + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, between(0, 5)) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, between(1, 5)) + .put(IndexMetadata.SETTING_VERSION_CREATED, VersionUtils.randomVersion(random())) + .put(IndexMetadata.SETTING_INDEX_UUID, UUIDs.randomBase64UUID(random())) + ); + if (randomBoolean()) { + indexMetadata.state(IndexMetadata.State.CLOSE); + } + when(shard.indexSettings()).thenReturn(new IndexSettings(indexMetadata.build(), Settings.EMPTY)); + + final AtomicBoolean phase1Called = new AtomicBoolean(); + final AtomicBoolean prepareTargetForTranslogCalled = new AtomicBoolean(); + final AtomicBoolean phase2Called = new AtomicBoolean(); + final RecoverySourceHandler handler = new LocalStorePeerRecoverySourceHandler( + shard, + mock(RecoveryTargetHandler.class), + threadPool, + request, + Math.toIntExact(recoverySettings.getChunkSize().getBytes()), + between(1, 8), + between(1, 8) + ) { + + @Override + void phase1( + IndexCommit snapshot, + long startingSeqNo, + IntSupplier translogOps, + ActionListener listener, + boolean skipCreateRetentionLeaseStep + ) { + phase1Called.set(true); + super.phase1(snapshot, startingSeqNo, translogOps, listener, skipCreateRetentionLeaseStep); + } + + @Override + void prepareTargetForTranslog(int totalTranslogOps, ActionListener listener) { + prepareTargetForTranslogCalled.set(true); + super.prepareTargetForTranslog(totalTranslogOps, listener); + } + + @Override + void phase2( + long startingSeqNo, + long endingSeqNo, + Translog.Snapshot snapshot, + long maxSeenAutoIdTimestamp, + long maxSeqNoOfUpdatesOrDeletes, + RetentionLeases retentionLeases, + long mappingVersion, + ActionListener listener + ) throws IOException { + phase2Called.set(true); + super.phase2( + startingSeqNo, + endingSeqNo, + snapshot, + maxSeenAutoIdTimestamp, + maxSeqNoOfUpdatesOrDeletes, + retentionLeases, + mappingVersion, + listener + ); + } + + }; + PlainActionFuture future = new PlainActionFuture<>(); + expectThrows(DelayRecoveryException.class, () -> { + handler.recoverToTarget(future); + future.actionGet(); + }); + verify(routingTable, times(5)).getByAllocationId(null); + assertFalse(phase1Called.get()); + assertFalse(prepareTargetForTranslogCalled.get()); + assertFalse(phase2Called.get()); + } + + /* + Tests when the replica allocation id is reflected in source nodes routing table even after 1 retry + */ + public void testTargetInRoutingInSecondAttempt() throws IOException { + final RecoverySettings recoverySettings = new RecoverySettings(Settings.EMPTY, service); + final StartRecoveryRequest request = getStartRecoveryRequest(); + final IndexShard shard = mock(IndexShard.class); + when(shard.seqNoStats()).thenReturn(mock(SeqNoStats.class)); + when(shard.segmentStats(anyBoolean(), anyBoolean())).thenReturn(mock(SegmentsStats.class)); + when(shard.isRelocatedPrimary()).thenReturn(false); + when(shard.getRetentionLeases()).thenReturn(mock(RetentionLeases.class)); + final org.opensearch.index.shard.ReplicationGroup replicationGroup = mock(org.opensearch.index.shard.ReplicationGroup.class); + final IndexShardRoutingTable routingTable = mock(IndexShardRoutingTable.class); + final ShardRouting shardRouting = mock(ShardRouting.class); + when(shardRouting.initializing()).thenReturn(true); + when(shardRouting.currentNodeId()).thenReturn("node"); + when(routingTable.getByAllocationId(any())).thenReturn(null, shardRouting); + when(shard.getReplicationGroup()).thenReturn(replicationGroup); + when(replicationGroup.getRoutingTable()).thenReturn(routingTable); + when(shard.acquireSafeIndexCommit()).thenReturn(mock(GatedCloseable.class)); + doAnswer(invocation -> { + ((ActionListener) invocation.getArguments()[0]).onResponse(() -> {}); + return null; + }).when(shard).acquirePrimaryOperationPermit(any(), anyString(), any()); + + final IndexMetadata.Builder indexMetadata = IndexMetadata.builder("test") + .settings( + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, between(0, 5)) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, between(1, 5)) + .put(IndexMetadata.SETTING_VERSION_CREATED, VersionUtils.randomVersion(random())) + .put(IndexMetadata.SETTING_INDEX_UUID, UUIDs.randomBase64UUID(random())) + ); + if (randomBoolean()) { + indexMetadata.state(IndexMetadata.State.CLOSE); + } + when(shard.indexSettings()).thenReturn(new IndexSettings(indexMetadata.build(), Settings.EMPTY)); + + final AtomicBoolean phase1Called = new AtomicBoolean(); + final AtomicBoolean prepareTargetForTranslogCalled = new AtomicBoolean(); + final AtomicBoolean phase2Called = new AtomicBoolean(); + final RecoverySourceHandler handler = new LocalStorePeerRecoverySourceHandler( + shard, + mock(RecoveryTargetHandler.class), + threadPool, + request, + Math.toIntExact(recoverySettings.getChunkSize().getBytes()), + between(1, 8), + between(1, 8) + ) { + + @Override + void phase1( + IndexCommit snapshot, + long startingSeqNo, + IntSupplier translogOps, + ActionListener listener, + boolean skipCreateRetentionLeaseStep + ) { + phase1Called.set(true); + super.phase1(snapshot, startingSeqNo, translogOps, listener, skipCreateRetentionLeaseStep); + } + + @Override + void prepareTargetForTranslog(int totalTranslogOps, ActionListener listener) { + prepareTargetForTranslogCalled.set(true); + super.prepareTargetForTranslog(totalTranslogOps, listener); + } + + @Override + void phase2( + long startingSeqNo, + long endingSeqNo, + Translog.Snapshot snapshot, + long maxSeenAutoIdTimestamp, + long maxSeqNoOfUpdatesOrDeletes, + RetentionLeases retentionLeases, + long mappingVersion, + ActionListener listener + ) throws IOException { + phase2Called.set(true); + super.phase2( + startingSeqNo, + endingSeqNo, + snapshot, + maxSeenAutoIdTimestamp, + maxSeqNoOfUpdatesOrDeletes, + retentionLeases, + mappingVersion, + listener + ); + } + + }; + handler.waitForAssignmentPropagate(new SetOnce<>()); + verify(routingTable, times(2)).getByAllocationId(null); + } + public void testCancellationsDoesNotLeakPrimaryPermits() throws Exception { final CancellableThreads cancellableThreads = new CancellableThreads(); final IndexShard shard = mock(IndexShard.class); diff --git a/server/src/test/java/org/opensearch/indices/recovery/RemoteStorePeerRecoverySourceHandlerTests.java b/server/src/test/java/org/opensearch/indices/recovery/RemoteStorePeerRecoverySourceHandlerTests.java index 131514eb019b3..7f913f3c8596a 100644 --- a/server/src/test/java/org/opensearch/indices/recovery/RemoteStorePeerRecoverySourceHandlerTests.java +++ b/server/src/test/java/org/opensearch/indices/recovery/RemoteStorePeerRecoverySourceHandlerTests.java @@ -8,20 +8,64 @@ package org.opensearch.indices.recovery; +import org.apache.lucene.index.IndexCommit; +import org.opensearch.Version; +import org.opensearch.action.support.PlainActionFuture; import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.routing.IndexShardRoutingTable; +import org.opensearch.common.UUIDs; +import org.opensearch.common.concurrent.GatedCloseable; +import org.opensearch.common.lease.Releasable; +import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.action.ActionListener; +import org.opensearch.core.index.shard.ShardId; import org.opensearch.index.IndexSettings; +import org.opensearch.index.engine.Engine; import org.opensearch.index.engine.NRTReplicationEngineFactory; +import org.opensearch.index.engine.SegmentsStats; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.replication.OpenSearchIndexLevelReplicationTestCase; import org.opensearch.index.seqno.ReplicationTracker; +import org.opensearch.index.seqno.RetentionLeases; +import org.opensearch.index.seqno.SeqNoStats; +import org.opensearch.index.seqno.SequenceNumbers; import org.opensearch.index.shard.IndexShard; +import org.opensearch.index.store.Store; +import org.opensearch.index.translog.Translog; import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.test.IndexSettingsModule; +import org.opensearch.test.VersionUtils; +import java.io.IOException; import java.nio.file.Path; +import java.util.Collections; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.IntSupplier; + +import static java.util.Collections.emptyMap; +import static java.util.Collections.emptySet; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; public class RemoteStorePeerRecoverySourceHandlerTests extends OpenSearchIndexLevelReplicationTestCase { + private static final IndexSettings INDEX_SETTINGS = IndexSettingsModule.newIndexSettings( + "index", + Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, org.opensearch.Version.CURRENT).build() + ); + private final ShardId shardId = new ShardId(INDEX_SETTINGS.getIndex(), 1); + + private final ClusterSettings service = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + private static final Settings settings = Settings.builder() .put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) .put(IndexMetadata.SETTING_REMOTE_STORE_ENABLED, "true") @@ -73,4 +117,122 @@ public void testReplicaShardRecoveryUptoLastFlushedCommit() throws Exception { assertFalse(primary.getRetentionLeases().contains(ReplicationTracker.getPeerRecoveryRetentionLeaseId(replica2.routingEntry()))); } } + + public void testThrowExceptionOnNoTargetInRouting() throws IOException { + final RecoverySettings recoverySettings = new RecoverySettings(Settings.EMPTY, service); + final StartRecoveryRequest request = getStartRecoveryRequest(); + final IndexShard shard = mock(IndexShard.class); + when(shard.seqNoStats()).thenReturn(mock(SeqNoStats.class)); + when(shard.segmentStats(anyBoolean(), anyBoolean())).thenReturn(mock(SegmentsStats.class)); + when(shard.isRelocatedPrimary()).thenReturn(false); + final org.opensearch.index.shard.ReplicationGroup replicationGroup = mock(org.opensearch.index.shard.ReplicationGroup.class); + final IndexShardRoutingTable routingTable = mock(IndexShardRoutingTable.class); + when(routingTable.getByAllocationId(anyString())).thenReturn(null); + when(shard.getReplicationGroup()).thenReturn(replicationGroup); + when(replicationGroup.getRoutingTable()).thenReturn(routingTable); + when(shard.acquireSafeIndexCommit()).thenReturn(mock(GatedCloseable.class)); + doAnswer(invocation -> { + ((ActionListener) invocation.getArguments()[0]).onResponse(() -> {}); + return null; + }).when(shard).acquirePrimaryOperationPermit(any(), anyString(), any()); + + final IndexMetadata.Builder indexMetadata = IndexMetadata.builder("test") + .settings( + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, between(0, 5)) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, between(1, 5)) + .put(IndexMetadata.SETTING_VERSION_CREATED, VersionUtils.randomVersion(random())) + .put(IndexMetadata.SETTING_INDEX_UUID, UUIDs.randomBase64UUID(random())) + ); + if (randomBoolean()) { + indexMetadata.state(IndexMetadata.State.CLOSE); + } + when(shard.indexSettings()).thenReturn(new IndexSettings(indexMetadata.build(), Settings.EMPTY)); + + final AtomicBoolean phase1Called = new AtomicBoolean(); + final AtomicBoolean prepareTargetForTranslogCalled = new AtomicBoolean(); + final AtomicBoolean phase2Called = new AtomicBoolean(); + final RecoverySourceHandler handler = new RemoteStorePeerRecoverySourceHandler( + shard, + mock(RecoveryTargetHandler.class), + threadPool, + request, + Math.toIntExact(recoverySettings.getChunkSize().getBytes()), + between(1, 8), + between(1, 8) + ) { + + @Override + void phase1( + IndexCommit snapshot, + long startingSeqNo, + IntSupplier translogOps, + ActionListener listener, + boolean skipCreateRetentionLeaseStep + ) { + phase1Called.set(true); + super.phase1(snapshot, startingSeqNo, translogOps, listener, skipCreateRetentionLeaseStep); + } + + @Override + void prepareTargetForTranslog(int totalTranslogOps, ActionListener listener) { + prepareTargetForTranslogCalled.set(true); + super.prepareTargetForTranslog(totalTranslogOps, listener); + } + + @Override + void phase2( + long startingSeqNo, + long endingSeqNo, + Translog.Snapshot snapshot, + long maxSeenAutoIdTimestamp, + long maxSeqNoOfUpdatesOrDeletes, + RetentionLeases retentionLeases, + long mappingVersion, + ActionListener listener + ) throws IOException { + phase2Called.set(true); + super.phase2( + startingSeqNo, + endingSeqNo, + snapshot, + maxSeenAutoIdTimestamp, + maxSeqNoOfUpdatesOrDeletes, + retentionLeases, + mappingVersion, + listener + ); + } + + }; + PlainActionFuture future = new PlainActionFuture<>(); + expectThrows(DelayRecoveryException.class, () -> { + handler.recoverToTarget(future); + future.actionGet(); + }); + verify(routingTable, times(5)).getByAllocationId(null); + assertFalse(phase1Called.get()); + assertFalse(prepareTargetForTranslogCalled.get()); + assertFalse(phase2Called.get()); + } + + public StartRecoveryRequest getStartRecoveryRequest() throws IOException { + Store.MetadataSnapshot metadataSnapshot = randomBoolean() + ? Store.MetadataSnapshot.EMPTY + : new Store.MetadataSnapshot( + Collections.emptyMap(), + Collections.singletonMap(Engine.HISTORY_UUID_KEY, UUIDs.randomBase64UUID()), + randomIntBetween(0, 100) + ); + return new StartRecoveryRequest( + shardId, + null, + new DiscoveryNode("b", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT), + new DiscoveryNode("b", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT), + metadataSnapshot, + randomBoolean(), + randomNonNegativeLong(), + randomBoolean() || metadataSnapshot.getHistoryUUID() == null ? SequenceNumbers.UNASSIGNED_SEQ_NO : randomNonNegativeLong() + ); + } } diff --git a/server/src/test/java/org/opensearch/indices/replication/SegmentReplicatorTests.java b/server/src/test/java/org/opensearch/indices/replication/SegmentReplicatorTests.java new file mode 100644 index 0000000000000..7acee449a1b46 --- /dev/null +++ b/server/src/test/java/org/opensearch/indices/replication/SegmentReplicatorTests.java @@ -0,0 +1,163 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.indices.replication; + +import org.apache.lucene.store.IOContext; +import org.opensearch.OpenSearchCorruptionException; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.lucene.Lucene; +import org.opensearch.common.settings.Settings; +import org.opensearch.core.action.ActionListener; +import org.opensearch.index.engine.NRTReplicationEngineFactory; +import org.opensearch.index.replication.TestReplicationSource; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.index.shard.IndexShardTestCase; +import org.opensearch.index.store.StoreFileMetadata; +import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; +import org.opensearch.indices.replication.common.CopyState; +import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.threadpool.ThreadPool; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.BiConsumer; + +import org.mockito.Mockito; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; + +public class SegmentReplicatorTests extends IndexShardTestCase { + + private static final Settings settings = Settings.builder() + .put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) + .build(); + + public void testStartReplicationWithoutSourceFactory() { + ThreadPool threadpool = mock(ThreadPool.class); + ExecutorService mock = mock(ExecutorService.class); + when(threadpool.generic()).thenReturn(mock); + SegmentReplicator segmentReplicator = new SegmentReplicator(threadpool); + + IndexShard shard = mock(IndexShard.class); + segmentReplicator.startReplication(shard); + Mockito.verifyNoInteractions(mock); + } + + public void testStartReplicationRunsSuccessfully() throws Exception { + final IndexShard replica = newStartedShard(false, settings, new NRTReplicationEngineFactory()); + final IndexShard primary = newStartedShard(true, settings, new NRTReplicationEngineFactory()); + + // index and copy segments to replica. + int numDocs = randomIntBetween(10, 20); + for (int i = 0; i < numDocs; i++) { + indexDoc(primary, "_doc", Integer.toString(i)); + } + primary.refresh("test"); + + SegmentReplicator segmentReplicator = spy(new SegmentReplicator(threadPool)); + SegmentReplicationSourceFactory factory = mock(SegmentReplicationSourceFactory.class); + when(factory.get(replica)).thenReturn(new TestReplicationSource() { + @Override + public void getCheckpointMetadata( + long replicationId, + ReplicationCheckpoint checkpoint, + ActionListener listener + ) { + resolveCheckpointListener(listener, primary); + } + + @Override + public void getSegmentFiles( + long replicationId, + ReplicationCheckpoint checkpoint, + List filesToFetch, + IndexShard indexShard, + BiConsumer fileProgressTracker, + ActionListener listener + ) { + try { + Lucene.cleanLuceneIndex(indexShard.store().directory()); + Map segmentMetadataMap = primary.getSegmentMetadataMap(); + for (String file : segmentMetadataMap.keySet()) { + indexShard.store().directory().copyFrom(primary.store().directory(), file, file, IOContext.DEFAULT); + } + listener.onResponse(new GetSegmentFilesResponse(new ArrayList<>(segmentMetadataMap.values()))); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + }); + segmentReplicator.setSourceFactory(factory); + segmentReplicator.startReplication(replica); + assertBusy(() -> assertDocCount(replica, numDocs)); + closeShards(primary, replica); + } + + public void testReplicationFails() throws Exception { + allowShardFailures(); + final IndexShard replica = newStartedShard(false, settings, new NRTReplicationEngineFactory()); + final IndexShard primary = newStartedShard(true, settings, new NRTReplicationEngineFactory()); + + SegmentReplicator segmentReplicator = spy(new SegmentReplicator(threadPool)); + SegmentReplicationSourceFactory factory = mock(SegmentReplicationSourceFactory.class); + when(factory.get(replica)).thenReturn(new TestReplicationSource() { + @Override + public void getCheckpointMetadata( + long replicationId, + ReplicationCheckpoint checkpoint, + ActionListener listener + ) { + resolveCheckpointListener(listener, primary); + } + + @Override + public void getSegmentFiles( + long replicationId, + ReplicationCheckpoint checkpoint, + List filesToFetch, + IndexShard indexShard, + BiConsumer fileProgressTracker, + ActionListener listener + ) { + OpenSearchCorruptionException corruptIndexException = new OpenSearchCorruptionException("test"); + try { + indexShard.store().markStoreCorrupted(corruptIndexException); + } catch (IOException e) { + throw new RuntimeException(e); + } + listener.onFailure(corruptIndexException); + } + }); + // assert shard failure on corruption + AtomicBoolean failureCallbackTriggered = new AtomicBoolean(false); + replica.addShardFailureCallback((ig) -> failureCallbackTriggered.set(true)); + segmentReplicator.setSourceFactory(factory); + segmentReplicator.startReplication(replica); + assertBusy(() -> assertTrue(failureCallbackTriggered.get())); + closeShards(primary, replica); + } + + protected void resolveCheckpointListener(ActionListener listener, IndexShard primary) { + try (final CopyState copyState = new CopyState(primary)) { + listener.onResponse( + new CheckpointInfoResponse(copyState.getCheckpoint(), copyState.getMetadataMap(), copyState.getInfosBytes()) + ); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + +} diff --git a/server/src/test/java/org/opensearch/ingest/IngestServiceTests.java b/server/src/test/java/org/opensearch/ingest/IngestServiceTests.java index 1f4b1d635d438..e1e1ea561284b 100644 --- a/server/src/test/java/org/opensearch/ingest/IngestServiceTests.java +++ b/server/src/test/java/org/opensearch/ingest/IngestServiceTests.java @@ -58,6 +58,7 @@ import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.SetOnce; import org.opensearch.common.metrics.OperationStats; +import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.common.util.concurrent.OpenSearchExecutors; import org.opensearch.common.xcontent.XContentType; @@ -151,8 +152,12 @@ public void setup() { public void testIngestPlugin() { Client client = mock(Client.class); + ClusterService clusterService = mock(ClusterService.class); + when(clusterService.getClusterSettings()).thenReturn( + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) + ); IngestService ingestService = new IngestService( - mock(ClusterService.class), + clusterService, threadPool, null, null, @@ -186,8 +191,12 @@ public void testIngestPluginDuplicate() { public void testExecuteIndexPipelineDoesNotExist() { Client client = mock(Client.class); + ClusterService clusterService = mock(ClusterService.class); + when(clusterService.getClusterSettings()).thenReturn( + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) + ); IngestService ingestService = new IngestService( - mock(ClusterService.class), + clusterService, threadPool, null, null, @@ -720,6 +729,124 @@ public void testValidate() throws Exception { ingestService.validatePipeline(ingestInfos, putRequest); } + public void testValidateProcessorCountForIngestPipelineThrowsException() { + IngestService ingestService = createWithProcessors(); + PutPipelineRequest putRequest = new PutPipelineRequest( + "_id", + new BytesArray( + "{\"processors\": [{\"set\" : {\"field\": \"_field\", \"value\": \"_value\", \"tag\": \"tag1\"}}," + + "{\"remove\" : {\"field\": \"_field\", \"tag\": \"tag2\"}}]}" + ), + MediaTypeRegistry.JSON + ); + + DiscoveryNode node1 = new DiscoveryNode("_node_id1", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT); + DiscoveryNode node2 = new DiscoveryNode("_node_id2", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT); + Map ingestInfos = new HashMap<>(); + ingestInfos.put(node1, new IngestInfo(Arrays.asList(new ProcessorInfo("set"), new ProcessorInfo("remove")))); + ingestInfos.put(node2, new IngestInfo(Arrays.asList(new ProcessorInfo("set")))); + + Settings newSettings = Settings.builder().put("cluster.ingest.max_number_processors", 1).build(); + ingestService.getClusterService().getClusterSettings().applySettings(newSettings); + + expectThrows(IllegalStateException.class, () -> ingestService.validatePipeline(ingestInfos, putRequest)); + } + + public void testValidateProcessorCountForWithNestedOnFailureProcessorThrowsException() { + IngestService ingestService = createWithProcessors(); + PutPipelineRequest putRequest = new PutPipelineRequest( + "_id", + new BytesArray( + "{\n" + + " \"processors\": [\n" + + " {\n" + + " \"set\": {\n" + + " \"field\": \"timestamp_field_1\",\n" + + " \"value\": \"value\",\n" + + " \"on_failure\": [\n" + + " {\n" + + " \"set\": {\n" + + " \"field\": \"ingest_error1\",\n" + + " \"value\": \"failed\",\n" + + " \"tag\": \"tagggg\",\n" + + " \"on_failure\": [\n" + + " {\n" + + " \"set\": {\n" + + " \"field\": \"ingest_error1\",\n" + + " \"value\": \"failed\",\n" + + " \"tag\": \"tagggg\",\n" + + " \"on_failure\": [\n" + + " {\n" + + " \"set\": {\n" + + " \"field\": \"ingest_error1\",\n" + + " \"value\": \"failed\",\n" + + " \"tag\": \"tagggg\",\n" + + " \"on_failure\": [\n" + + " {\n" + + " \"set\": {\n" + + " \"field\": \"ingest_error1\",\n" + + " \"value\": \"failed\",\n" + + " \"tag\": \"tagggg\"\n" + + " }\n" + + " },\n" + + " {\n" + + " \"set\": {\n" + + " \"field\": \"ingest_error2\",\n" + + " \"value\": \"failed\",\n" + + " \"tag\": \"tagggg\"\n" + + " }\n" + + " }\n" + + " ]\n" + + " }\n" + + " },\n" + + " {\n" + + " \"set\": {\n" + + " \"field\": \"ingest_error2\",\n" + + " \"value\": \"failed\",\n" + + " \"tag\": \"tagggg\"\n" + + " }\n" + + " }\n" + + " ]\n" + + " }\n" + + " },\n" + + " {\n" + + " \"set\": {\n" + + " \"field\": \"ingest_error2\",\n" + + " \"value\": \"failed\",\n" + + " \"tag\": \"tagggg\"\n" + + " }\n" + + " }\n" + + " ]\n" + + " }\n" + + " },\n" + + " {\n" + + " \"set\": {\n" + + " \"field\": \"ingest_error2\",\n" + + " \"value\": \"failed\",\n" + + " \"tag\": \"tagggg\"\n" + + " }\n" + + " }\n" + + " ]\n" + + " }\n" + + " }\n" + + " ]\n" + + "}" + ), + MediaTypeRegistry.JSON + ); + + DiscoveryNode node1 = new DiscoveryNode("_node_id1", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT); + DiscoveryNode node2 = new DiscoveryNode("_node_id2", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT); + Map ingestInfos = new HashMap<>(); + ingestInfos.put(node1, new IngestInfo(Arrays.asList(new ProcessorInfo("set"), new ProcessorInfo("remove")))); + ingestInfos.put(node2, new IngestInfo(Arrays.asList(new ProcessorInfo("set")))); + + Settings newSettings = Settings.builder().put("cluster.ingest.max_number_processors", 7).build(); + ingestService.getClusterService().getClusterSettings().applySettings(newSettings); + + expectThrows(IllegalStateException.class, () -> ingestService.validatePipeline(ingestInfos, putRequest)); + } + public void testExecuteIndexPipelineExistsButFailedParsing() { IngestService ingestService = createWithProcessors( Collections.singletonMap("mock", (factories, tag, description, config) -> new AbstractProcessor("mock", "description") { @@ -1506,8 +1633,12 @@ public Map getProcessors(Processor.Parameters paramet // Create ingest service: Client client = mock(Client.class); + ClusterService clusterService = mock(ClusterService.class); + when(clusterService.getClusterSettings()).thenReturn( + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) + ); IngestService ingestService = new IngestService( - mock(ClusterService.class), + clusterService, threadPool, null, null, @@ -2058,6 +2189,18 @@ public void testPrepareBatches_different_index_pipeline() { assertEquals(4, batches.size()); } + public void testUpdateMaxIngestProcessorCountSetting() { + ClusterSettings clusterSettings = new ClusterSettings(Settings.builder().build(), ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + + // verify defaults + assertEquals(Integer.MAX_VALUE, clusterSettings.get(IngestService.MAX_NUMBER_OF_INGEST_PROCESSORS).intValue()); + + // verify update max processor + Settings newSettings = Settings.builder().put("cluster.ingest.max_number_processors", 3).build(); + clusterSettings.applySettings(newSettings); + assertEquals(3, clusterSettings.get(IngestService.MAX_NUMBER_OF_INGEST_PROCESSORS).intValue()); + } + private IngestService.IndexRequestWrapper createIndexRequestWrapper(String index, List pipelines) { IndexRequest indexRequest = new IndexRequest(index); return new IngestService.IndexRequestWrapper(0, indexRequest, pipelines, true); @@ -2093,7 +2236,11 @@ private static IngestService createWithProcessors(Map ExecutorService executorService = OpenSearchExecutors.newDirectExecutorService(); when(threadPool.generic()).thenReturn(executorService); when(threadPool.executor(anyString())).thenReturn(executorService); - return new IngestService(mock(ClusterService.class), threadPool, null, null, null, Collections.singletonList(new IngestPlugin() { + ClusterService clusterService = mock(ClusterService.class); + when(clusterService.getClusterSettings()).thenReturn( + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) + ); + return new IngestService(clusterService, threadPool, null, null, null, Collections.singletonList(new IngestPlugin() { @Override public Map getProcessors(final Processor.Parameters parameters) { return processors; diff --git a/server/src/test/java/org/opensearch/repositories/IndexIdTests.java b/server/src/test/java/org/opensearch/repositories/IndexIdTests.java index 2b927b3b40115..bcd75f9a47ad8 100644 --- a/server/src/test/java/org/opensearch/repositories/IndexIdTests.java +++ b/server/src/test/java/org/opensearch/repositories/IndexIdTests.java @@ -32,6 +32,7 @@ package org.opensearch.repositories; +import org.opensearch.Version; import org.opensearch.common.UUIDs; import org.opensearch.common.io.stream.BytesStreamOutput; import org.opensearch.common.xcontent.json.JsonXContent; @@ -39,6 +40,7 @@ import org.opensearch.core.xcontent.ToXContent; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.index.remote.RemoteStoreEnums; import org.opensearch.test.OpenSearchTestCase; import java.io.IOException; @@ -52,51 +54,84 @@ public void testEqualsAndHashCode() { // assert equals and hashcode String name = randomAlphaOfLength(8); String id = UUIDs.randomBase64UUID(); - IndexId indexId1 = new IndexId(name, id); - IndexId indexId2 = new IndexId(name, id); + int shardPathType = randomIntBetween(0, 2); + IndexId indexId1 = new IndexId(name, id, shardPathType); + IndexId indexId2 = new IndexId(name, id, shardPathType); assertEquals(indexId1, indexId2); assertEquals(indexId1.hashCode(), indexId2.hashCode()); // assert equals when using index name for id id = name; - indexId1 = new IndexId(name, id); - indexId2 = new IndexId(name, id); + indexId1 = new IndexId(name, id, shardPathType); + indexId2 = new IndexId(name, id, shardPathType); assertEquals(indexId1, indexId2); assertEquals(indexId1.hashCode(), indexId2.hashCode()); - // assert not equals when name or id differ - indexId2 = new IndexId(randomAlphaOfLength(8), id); + // assert not equals when name, id, or shardPathType differ + indexId2 = new IndexId(randomAlphaOfLength(8), id, shardPathType); assertNotEquals(indexId1, indexId2); assertNotEquals(indexId1.hashCode(), indexId2.hashCode()); - indexId2 = new IndexId(name, UUIDs.randomBase64UUID()); + indexId2 = new IndexId(name, UUIDs.randomBase64UUID(), shardPathType); assertNotEquals(indexId1, indexId2); assertNotEquals(indexId1.hashCode(), indexId2.hashCode()); + int newShardPathType = randomIntBetween(0, 2); + indexId2 = new IndexId(name, id, newShardPathType); + if (shardPathType == newShardPathType) { + assertEquals(indexId1, indexId2); + assertEquals(indexId1.hashCode(), indexId2.hashCode()); + } else { + assertNotEquals(indexId1, indexId2); + assertNotEquals(indexId1.hashCode(), indexId2.hashCode()); + } } public void testSerialization() throws IOException { - IndexId indexId = new IndexId(randomAlphaOfLength(8), UUIDs.randomBase64UUID()); + IndexId indexId = new IndexId(randomAlphaOfLength(8), UUIDs.randomBase64UUID(), randomIntBetween(0, 2)); BytesStreamOutput out = new BytesStreamOutput(); + out.setVersion(Version.V_2_17_0); indexId.writeTo(out); assertEquals(indexId, new IndexId(out.bytes().streamInput())); } public void testXContent() throws IOException { - IndexId indexId = new IndexId(randomAlphaOfLength(8), UUIDs.randomBase64UUID()); + String name = randomAlphaOfLength(8); + String id = UUIDs.randomBase64UUID(); + int shardPathType = randomIntBetween(0, 2); + IndexId indexId = new IndexId(name, id, shardPathType); XContentBuilder builder = JsonXContent.contentBuilder(); indexId.toXContent(builder, ToXContent.EMPTY_PARAMS); XContentParser parser = createParser(JsonXContent.jsonXContent, BytesReference.bytes(builder)); assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken()); - String name = null; - String id = null; + String parsedName = null; + String parsedId = null; + int parsedShardPathType = -1; while (parser.nextToken() != XContentParser.Token.END_OBJECT) { final String currentFieldName = parser.currentName(); parser.nextToken(); - if (currentFieldName.equals(IndexId.NAME)) { - name = parser.text(); - } else if (currentFieldName.equals(IndexId.ID)) { - id = parser.text(); + switch (currentFieldName) { + case IndexId.NAME: + parsedName = parser.text(); + break; + case IndexId.ID: + parsedId = parser.text(); + break; + case IndexId.SHARD_PATH_TYPE: + parsedShardPathType = parser.intValue(); + break; } } - assertNotNull(name); - assertNotNull(id); - assertEquals(indexId, new IndexId(name, id)); + parser.close(); + assertNotNull(parsedName); + assertNotNull(parsedId); + assertNotEquals(-1, parsedShardPathType); + assertEquals(name, parsedName); + assertEquals(id, parsedId); + assertEquals(shardPathType, parsedShardPathType); + } + + public void testDefaultShardPathType() { + String name = randomAlphaOfLength(8); + String id = UUIDs.randomBase64UUID(); + IndexId indexId = new IndexId(name, id); + assertEquals(IndexId.DEFAULT_SHARD_PATH_TYPE, indexId.getShardPathType()); + assertEquals(RemoteStoreEnums.PathType.FIXED.getCode(), IndexId.DEFAULT_SHARD_PATH_TYPE); } } diff --git a/server/src/test/java/org/opensearch/repositories/RepositoriesServiceTests.java b/server/src/test/java/org/opensearch/repositories/RepositoriesServiceTests.java index cb0a36c870d07..4cd822c7d583b 100644 --- a/server/src/test/java/org/opensearch/repositories/RepositoriesServiceTests.java +++ b/server/src/test/java/org/opensearch/repositories/RepositoriesServiceTests.java @@ -62,6 +62,7 @@ import org.opensearch.common.io.InputStreamContainer; import org.opensearch.common.lifecycle.Lifecycle; import org.opensearch.common.lifecycle.LifecycleListener; +import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.Strings; @@ -148,6 +149,10 @@ private RepositoriesService createRepositoriesServiceWithMockedClusterService(Cl when(currentClusterState.getNodes()).thenReturn(nodes); when(clusterService.state()).thenReturn(currentClusterState); + when(clusterService.getSettings()).thenReturn(Settings.EMPTY); + ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + when(clusterService.getClusterSettings()).thenReturn(clusterSettings); + RepositoriesService repositoriesService = new RepositoriesService( Settings.EMPTY, clusterService, @@ -676,6 +681,19 @@ public void finalizeSnapshot( listener.onResponse(null); } + @Override + public void finalizeSnapshot( + ShardGenerations shardGenerations, + long repositoryStateId, + Metadata clusterMetadata, + SnapshotInfo snapshotInfo, + Version repositoryMetaVersion, + Function stateTransformer, + ActionListener listener + ) { + listener.onResponse(null); + } + @Override public void deleteSnapshots( Collection snapshotIds, diff --git a/server/src/test/java/org/opensearch/repositories/RepositoryDataTests.java b/server/src/test/java/org/opensearch/repositories/RepositoryDataTests.java index 46293e6a0db7a..285a1131326ee 100644 --- a/server/src/test/java/org/opensearch/repositories/RepositoryDataTests.java +++ b/server/src/test/java/org/opensearch/repositories/RepositoryDataTests.java @@ -42,12 +42,14 @@ import org.opensearch.core.xcontent.XContent; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.index.remote.RemoteStoreEnums.PathType; import org.opensearch.snapshots.SnapshotId; import org.opensearch.snapshots.SnapshotState; import org.opensearch.test.OpenSearchTestCase; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; @@ -59,6 +61,8 @@ import java.util.function.Function; import java.util.stream.Collectors; +import static org.opensearch.index.remote.RemoteStoreEnums.PathType.FIXED; +import static org.opensearch.index.remote.RemoteStoreEnums.PathType.HASHED_PREFIX; import static org.opensearch.repositories.RepositoryData.EMPTY_REPO_GEN; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.equalTo; @@ -70,7 +74,7 @@ public class RepositoryDataTests extends OpenSearchTestCase { public void testEqualsAndHashCode() { - RepositoryData repositoryData1 = generateRandomRepoData(); + RepositoryData repositoryData1 = generateRandomRepoData(FIXED.getCode()); RepositoryData repositoryData2 = repositoryData1.copy(); assertEquals(repositoryData1, repositoryData2); assertEquals(repositoryData1.hashCode(), repositoryData2.hashCode()); @@ -332,7 +336,7 @@ public void testIndexThatReferenceANullSnapshot() throws IOException { ); assertThat( e.getMessage(), - equalTo("Detected a corrupted repository, " + "index [docs/_id] references an unknown snapshot uuid [null]") + equalTo("Detected a corrupted repository, " + "index [docs/_id/0] references an unknown snapshot uuid [null]") ); } } @@ -403,11 +407,95 @@ public void testIndexMetaDataToRemoveAfterRemovingSnapshotWithSharing() { assertEquals(newRepoData.indexMetaDataToRemoveAfterRemovingSnapshots(Collections.singleton(otherSnapshotId)), removeFromOther); } + public void testResolveNewIndices() { + // Test case 1: All indices are new + List indicesToResolve = Arrays.asList("index1", "index2", "index3"); + Map inFlightIds = Collections.emptyMap(); + int pathType = randomIntBetween(0, 2); + List resolvedIndices = RepositoryData.EMPTY.resolveNewIndices(indicesToResolve, inFlightIds, pathType); + assertEquals(indicesToResolve.size(), resolvedIndices.size()); + for (IndexId indexId : resolvedIndices) { + assertTrue(indicesToResolve.contains(indexId.getName())); + assertNotNull(indexId.getId()); + assertEquals(pathType, indexId.getShardPathType()); + } + + // Test case 2: Some indices are existing, some are new + RepositoryData repositoryData = generateRandomRepoData(); + Map existingIndices = repositoryData.getIndices(); + List existingIndexNames = new ArrayList<>(existingIndices.keySet()); + List newIndexNames = Arrays.asList("newIndex1", "newIndex2"); + indicesToResolve = new ArrayList<>(existingIndexNames); + indicesToResolve.addAll(newIndexNames); + pathType = randomIntBetween(0, 2); + resolvedIndices = repositoryData.resolveNewIndices(indicesToResolve, Collections.emptyMap(), pathType); + assertEquals(indicesToResolve.size(), resolvedIndices.size()); + for (IndexId indexId : resolvedIndices) { + if (existingIndexNames.contains(indexId.getName())) { + assertEquals(existingIndices.get(indexId.getName()), indexId); + } else { + assertTrue(newIndexNames.contains(indexId.getName())); + assertNotNull(indexId.getId()); + assertEquals(pathType, indexId.getShardPathType()); + } + } + + // Test case 3: Some indices are in-flight + Map inFlightIndexIds = new HashMap<>(); + for (String indexName : newIndexNames) { + inFlightIndexIds.put(indexName, new IndexId(indexName, UUIDs.randomBase64UUID(), pathType)); + } + resolvedIndices = repositoryData.resolveNewIndices(indicesToResolve, inFlightIndexIds, pathType); + assertEquals(indicesToResolve.size(), resolvedIndices.size()); + for (IndexId indexId : resolvedIndices) { + if (existingIndexNames.contains(indexId.getName())) { + assertEquals(existingIndices.get(indexId.getName()), indexId); + } else if (newIndexNames.contains(indexId.getName())) { + assertEquals(inFlightIndexIds.get(indexId.getName()), indexId); + } else { + fail("Unexpected index: " + indexId.getName()); + } + } + } + + public void testResolveNewIndicesWithDifferentPathType() { + // Generate repository data with a fixed path type + int existingPathType = PathType.FIXED.getCode(); + RepositoryData repositoryData = generateRandomRepoData(existingPathType); + Map existingIndices = repositoryData.getIndices(); + + // Create a list of existing and new index names + List existingIndexNames = new ArrayList<>(existingIndices.keySet()); + List newIndexNames = Arrays.asList("newIndex1", "newIndex2"); + List indicesToResolve = new ArrayList<>(existingIndexNames); + indicesToResolve.addAll(newIndexNames); + + // Use a different path type for new indices + int newPathType = HASHED_PREFIX.getCode(); + + List resolvedIndices = repositoryData.resolveNewIndices(indicesToResolve, Collections.emptyMap(), newPathType); + assertEquals(indicesToResolve.size(), resolvedIndices.size()); + for (IndexId indexId : resolvedIndices) { + if (existingIndexNames.contains(indexId.getName())) { + assertEquals(existingIndices.get(indexId.getName()), indexId); + assertEquals(existingPathType, indexId.getShardPathType()); + } else { + assertTrue(newIndexNames.contains(indexId.getName())); + assertNotNull(indexId.getId()); + assertEquals(newPathType, indexId.getShardPathType()); + } + } + } + public static RepositoryData generateRandomRepoData() { + return generateRandomRepoData(randomFrom(PathType.values()).getCode()); + } + + public static RepositoryData generateRandomRepoData(int pathType) { final int numIndices = randomIntBetween(1, 30); final List indices = new ArrayList<>(numIndices); for (int i = 0; i < numIndices; i++) { - indices.add(new IndexId(randomAlphaOfLength(8), UUIDs.randomBase64UUID())); + indices.add(new IndexId(randomAlphaOfLength(8), UUIDs.randomBase64UUID(), pathType)); } final int numSnapshots = randomIntBetween(1, 30); RepositoryData repositoryData = RepositoryData.EMPTY; diff --git a/server/src/test/java/org/opensearch/repositories/blobstore/BlobStoreRepositoryHelperTests.java b/server/src/test/java/org/opensearch/repositories/blobstore/BlobStoreRepositoryHelperTests.java index 29ffb94ce8bf4..970e375055bc7 100644 --- a/server/src/test/java/org/opensearch/repositories/blobstore/BlobStoreRepositoryHelperTests.java +++ b/server/src/test/java/org/opensearch/repositories/blobstore/BlobStoreRepositoryHelperTests.java @@ -9,7 +9,6 @@ package org.opensearch.repositories.blobstore; import org.opensearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse; -import org.opensearch.action.support.master.AcknowledgedResponse; import org.opensearch.client.Client; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.service.ClusterService; @@ -24,6 +23,7 @@ import org.opensearch.index.IndexSettings; import org.opensearch.index.store.RemoteBufferedOutputDirectory; import org.opensearch.indices.IndicesService; +import org.opensearch.indices.RemoteStoreSettings; import org.opensearch.indices.recovery.RecoverySettings; import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.plugins.Plugin; @@ -60,13 +60,16 @@ protected Collection> getPlugins() { protected String[] getLockFilesInRemoteStore(String remoteStoreIndex, String remoteStoreRepository) throws IOException { final RepositoriesService repositoriesService = getInstanceFromNode(RepositoriesService.class); final BlobStoreRepository remoteStorerepository = (BlobStoreRepository) repositoriesService.repository(remoteStoreRepository); + ClusterService clusterService = getInstanceFromNode(ClusterService.class); + String segmentsPathFixedPrefix = RemoteStoreSettings.CLUSTER_REMOTE_STORE_SEGMENTS_PATH_PREFIX.get(clusterService.getSettings()); BlobPath shardLevelBlobPath = getShardLevelBlobPath( client(), remoteStoreIndex, remoteStorerepository.basePath(), "0", SEGMENTS, - LOCK_FILES + LOCK_FILES, + segmentsPathFixedPrefix ); BlobContainer blobContainer = remoteStorerepository.blobStore().blobContainer(shardLevelBlobPath); try (RemoteBufferedOutputDirectory lockDirectory = new RemoteBufferedOutputDirectory(blobContainer)) { @@ -99,25 +102,15 @@ protected void assertSnapshotOrGenericThread() { } protected void createRepository(Client client, String repoName) { - AcknowledgedResponse putRepositoryResponse = client.admin() - .cluster() - .preparePutRepository(repoName) - .setType(REPO_TYPE) - .setSettings( - Settings.builder().put(node().settings()).put("location", OpenSearchIntegTestCase.randomRepoPath(node().settings())) - ) - .get(); - assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true)); + Settings.Builder settings = Settings.builder() + .put(node().settings()) + .put("location", OpenSearchIntegTestCase.randomRepoPath(node().settings())); + OpenSearchIntegTestCase.putRepository(client.admin().cluster(), repoName, REPO_TYPE, settings); } protected void createRepository(Client client, String repoName, Settings repoSettings) { - AcknowledgedResponse putRepositoryResponse = client.admin() - .cluster() - .preparePutRepository(repoName) - .setType(REPO_TYPE) - .setSettings(repoSettings) - .get(); - assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true)); + Settings.Builder settingsBuilder = Settings.builder().put(repoSettings); + OpenSearchIntegTestCase.putRepository(client.admin().cluster(), repoName, REPO_TYPE, settingsBuilder); } protected void updateRepository(Client client, String repoName, Settings repoSettings) { diff --git a/server/src/test/java/org/opensearch/repositories/blobstore/BlobStoreRepositoryRemoteIndexTests.java b/server/src/test/java/org/opensearch/repositories/blobstore/BlobStoreRepositoryRemoteIndexTests.java index 9cca495cced72..e280ab8c7a73c 100644 --- a/server/src/test/java/org/opensearch/repositories/blobstore/BlobStoreRepositoryRemoteIndexTests.java +++ b/server/src/test/java/org/opensearch/repositories/blobstore/BlobStoreRepositoryRemoteIndexTests.java @@ -33,6 +33,8 @@ package org.opensearch.repositories.blobstore; import org.opensearch.action.admin.cluster.repositories.get.GetRepositoriesResponse; +import org.opensearch.action.admin.cluster.repositories.verify.VerifyRepositoryResponse; +import org.opensearch.action.support.master.AcknowledgedResponse; import org.opensearch.client.Client; import org.opensearch.cluster.metadata.RepositoryMetadata; import org.opensearch.common.settings.Settings; @@ -41,13 +43,16 @@ import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.index.IndexSettings; import org.opensearch.index.snapshots.blobstore.RemoteStoreShardShallowCopySnapshot; +import org.opensearch.indices.RemoteStoreSettings; import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.repositories.IndexId; import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.RepositoryData; +import org.opensearch.repositories.RepositoryException; import org.opensearch.repositories.fs.FsRepository; import org.opensearch.snapshots.SnapshotId; import org.opensearch.snapshots.SnapshotInfo; +import org.opensearch.snapshots.SnapshotsService; import org.opensearch.test.OpenSearchIntegTestCase; import java.io.IOException; @@ -64,6 +69,9 @@ import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY; +import static org.opensearch.repositories.blobstore.BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY; +import static org.opensearch.repositories.blobstore.BlobStoreRepository.SHALLOW_SNAPSHOT_V2; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; import static org.hamcrest.Matchers.equalTo; /** @@ -81,6 +89,7 @@ protected Settings nodeSettings() { .put(Environment.PATH_HOME_SETTING.getKey(), tempDir) .put(Environment.PATH_REPO_SETTING.getKey(), tempDir.resolve("repo")) .put(Environment.PATH_SHARED_DATA_SETTING.getKey(), tempDir.getParent()) + .put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_ENABLED.getKey(), true) .build(); } @@ -373,4 +382,119 @@ public void testRetrieveShallowCopySnapshotCase2() throws IOException { assertThat(snapshotIds, equalTo(originalSnapshots)); } + public void testRepositoryCreationShallowV2() throws Exception { + Client client = client(); + + Settings snapshotRepoSettings1 = Settings.builder() + .put(node().settings()) + .put("location", OpenSearchIntegTestCase.randomRepoPath(node().settings())) + .put(REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(SHALLOW_SNAPSHOT_V2.getKey(), true) + .build(); + + String invalidRepoName = "test" + SnapshotsService.SNAPSHOT_PINNED_TIMESTAMP_DELIMITER + "repo-1"; + try { + createRepository(client, invalidRepoName, snapshotRepoSettings1); + } catch (RepositoryException e) { + assertEquals( + "[" + + invalidRepoName + + "] setting shallow_snapshot_v2 cannot be enabled for repository with __ in the name as this delimiter is used to create pinning entity", + e.getMessage() + ); + } + + // Create repo with shallow snapshot V2 enabled + createRepository(client, "test-repo-1", snapshotRepoSettings1); + + logger.info("--> verify the repository"); + VerifyRepositoryResponse verifyRepositoryResponse = client.admin().cluster().prepareVerifyRepository("test-repo-1").get(); + assertNotNull(verifyRepositoryResponse.getNodes()); + + GetRepositoriesResponse getRepositoriesResponse = client.admin().cluster().prepareGetRepositories("test-repo-1").get(); + assertTrue(SHALLOW_SNAPSHOT_V2.get(getRepositoriesResponse.repositories().get(0).settings())); + + Settings snapshotRepoSettings2 = Settings.builder() + .put(node().settings()) + .put("location", OpenSearchIntegTestCase.randomRepoPath(node().settings())) + .put(REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(SHALLOW_SNAPSHOT_V2.getKey(), true) + .build(); + + // Create another repo with shallow snapshot V2 enabled, this should fail. + try { + createRepository(client, "test-repo-2", snapshotRepoSettings2); + } catch (RepositoryException e) { + assertEquals( + "[test-repo-2] setting shallow_snapshot_v2 cannot be enabled as this setting can be enabled only on one repository and one or more repositories in the cluster have the setting as enabled", + e.getMessage() + ); + } + + // Disable shallow snapshot V2 setting on test-repo-1 + updateRepository( + client, + "test-repo-1", + Settings.builder().put(snapshotRepoSettings1).put(SHALLOW_SNAPSHOT_V2.getKey(), false).build() + ); + getRepositoriesResponse = client.admin().cluster().prepareGetRepositories("test-repo-1").get(); + assertFalse(SHALLOW_SNAPSHOT_V2.get(getRepositoriesResponse.repositories().get(0).settings())); + + // Create test-repo-2 with shallow snapshot V2 enabled, this should pass now. + createRepository(client, "test-repo-2", snapshotRepoSettings2); + getRepositoriesResponse = client.admin().cluster().prepareGetRepositories("test-repo-2").get(); + assertTrue(SHALLOW_SNAPSHOT_V2.get(getRepositoriesResponse.repositories().get(0).settings())); + + final String indexName = "test-idx"; + createIndex(indexName); + ensureGreen(); + indexDocuments(client, indexName); + + // Create pinned timestamp snapshot in test-repo-2 + SnapshotInfo snapshotInfo = createSnapshot("test-repo-2", "test-snap-2", new ArrayList<>()); + assertNotNull(snapshotInfo.snapshotId()); + + // As snapshot is present, even after disabling shallow snapshot setting in test-repo-2, we will not be able to + // enable shallow snapshot v2 setting in test-repo-1 + updateRepository( + client, + "test-repo-2", + Settings.builder().put(snapshotRepoSettings2).put(SHALLOW_SNAPSHOT_V2.getKey(), false).build() + ); + getRepositoriesResponse = client.admin().cluster().prepareGetRepositories("test-repo-2").get(); + assertFalse(SHALLOW_SNAPSHOT_V2.get(getRepositoriesResponse.repositories().get(0).settings())); + + try { + updateRepository(client, "test-repo-1", snapshotRepoSettings1); + } catch (RepositoryException e) { + assertEquals( + "[test-repo-1] setting shallow_snapshot_v2 cannot be enabled if there are existing snapshots created with shallow V2 setting using different repository.", + e.getMessage() + ); + } + + // After deleting the snapshot, we will be able to enable shallow snapshot v2 setting in test-repo-1 + AcknowledgedResponse deleteSnapshotResponse = client().admin().cluster().prepareDeleteSnapshot("test-repo-2", "test-snap-2").get(); + + assertAcked(deleteSnapshotResponse); + + updateRepository(client, "test-repo-1", snapshotRepoSettings1); + getRepositoriesResponse = client.admin().cluster().prepareGetRepositories("test-repo-1").get(); + assertTrue(SHALLOW_SNAPSHOT_V2.get(getRepositoriesResponse.repositories().get(0).settings())); + + // Having a snapshot in the same repo should allow disabling and re-enabling shallow snapshot v2 setting + snapshotInfo = createSnapshot("test-repo-1", "test-snap-1", new ArrayList<>()); + assertNotNull(snapshotInfo.snapshotId()); + updateRepository( + client, + "test-repo-1", + Settings.builder().put(snapshotRepoSettings1).put(SHALLOW_SNAPSHOT_V2.getKey(), false).build() + ); + getRepositoriesResponse = client.admin().cluster().prepareGetRepositories("test-repo-1").get(); + assertFalse(SHALLOW_SNAPSHOT_V2.get(getRepositoriesResponse.repositories().get(0).settings())); + + updateRepository(client, "test-repo-1", snapshotRepoSettings1); + getRepositoriesResponse = client.admin().cluster().prepareGetRepositories("test-repo-1").get(); + assertTrue(SHALLOW_SNAPSHOT_V2.get(getRepositoriesResponse.repositories().get(0).settings())); + } } diff --git a/server/src/test/java/org/opensearch/repositories/blobstore/BlobStoreRepositoryTests.java b/server/src/test/java/org/opensearch/repositories/blobstore/BlobStoreRepositoryTests.java index eabac37bf3434..d9622aae4c378 100644 --- a/server/src/test/java/org/opensearch/repositories/blobstore/BlobStoreRepositoryTests.java +++ b/server/src/test/java/org/opensearch/repositories/blobstore/BlobStoreRepositoryTests.java @@ -34,19 +34,29 @@ import org.opensearch.Version; import org.opensearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse; +import org.opensearch.action.support.GroupedActionListener; import org.opensearch.action.support.PlainActionFuture; -import org.opensearch.action.support.master.AcknowledgedResponse; import org.opensearch.client.Client; import org.opensearch.cluster.metadata.RepositoryMetadata; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.Priority; import org.opensearch.common.UUIDs; +import org.opensearch.common.blobstore.BlobContainer; +import org.opensearch.common.blobstore.BlobMetadata; +import org.opensearch.common.blobstore.DeleteResult; +import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; +import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.unit.ByteSizeUnit; +import org.opensearch.core.compress.Compressor; import org.opensearch.core.index.Index; import org.opensearch.core.index.shard.ShardId; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.env.Environment; +import org.opensearch.index.remote.RemoteStoreEnums; +import org.opensearch.index.store.RemoteSegmentStoreDirectoryFactory; +import org.opensearch.index.store.lockmanager.RemoteStoreLockManager; +import org.opensearch.index.store.lockmanager.RemoteStoreLockManagerFactory; import org.opensearch.indices.recovery.RecoverySettings; import org.opensearch.plugins.Plugin; import org.opensearch.plugins.RepositoryPlugin; @@ -55,25 +65,44 @@ import org.opensearch.repositories.Repository; import org.opensearch.repositories.RepositoryData; import org.opensearch.repositories.RepositoryException; +import org.opensearch.repositories.RepositoryStats; import org.opensearch.repositories.ShardGenerations; import org.opensearch.repositories.fs.FsRepository; import org.opensearch.snapshots.SnapshotId; +import org.opensearch.snapshots.SnapshotShardPaths; +import org.opensearch.snapshots.SnapshotShardPaths.ShardInfo; import org.opensearch.snapshots.SnapshotState; import org.opensearch.test.OpenSearchIntegTestCase; import java.nio.file.Path; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; import java.util.stream.Collectors; import static org.opensearch.repositories.RepositoryDataTests.generateRandomRepoData; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.nullValue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyMap; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; /** * Tests for the {@link BlobStoreRepository} and its subclasses. @@ -114,13 +143,8 @@ public void testRetrieveSnapshots() throws Exception { final String repositoryName = "test-repo"; logger.info("--> creating repository"); - AcknowledgedResponse putRepositoryResponse = client.admin() - .cluster() - .preparePutRepository(repositoryName) - .setType(REPO_TYPE) - .setSettings(Settings.builder().put(node().settings()).put("location", location)) - .get(); - assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true)); + Settings.Builder settings = Settings.builder().put(node().settings()).put("location", location); + OpenSearchIntegTestCase.putRepository(client.admin().cluster(), repositoryName, REPO_TYPE, settings); logger.info("--> creating an index and indexing documents"); final String indexName = "test-idx"; @@ -239,20 +263,13 @@ public void testBadChunksize() throws Exception { final Client client = client(); final Path location = OpenSearchIntegTestCase.randomRepoPath(node().settings()); final String repositoryName = "test-repo"; - + Settings.Builder settings = Settings.builder() + .put(node().settings()) + .put("location", location) + .put("chunk_size", randomLongBetween(-10, 0), ByteSizeUnit.BYTES); expectThrows( RepositoryException.class, - () -> client.admin() - .cluster() - .preparePutRepository(repositoryName) - .setType(REPO_TYPE) - .setSettings( - Settings.builder() - .put(node().settings()) - .put("location", location) - .put("chunk_size", randomLongBetween(-10, 0), ByteSizeUnit.BYTES) - ) - .get() + () -> OpenSearchIntegTestCase.putRepository(client.admin().cluster(), repositoryName, REPO_TYPE, settings) ); } @@ -260,18 +277,11 @@ public void testPrefixModeVerification() throws Exception { final Client client = client(); final Path location = OpenSearchIntegTestCase.randomRepoPath(node().settings()); final String repositoryName = "test-repo"; - AcknowledgedResponse putRepositoryResponse = client.admin() - .cluster() - .preparePutRepository(repositoryName) - .setType(REPO_TYPE) - .setSettings( - Settings.builder() - .put(node().settings()) - .put("location", location) - .put(BlobStoreRepository.PREFIX_MODE_VERIFICATION_SETTING.getKey(), true) - ) - .get(); - assertTrue(putRepositoryResponse.isAcknowledged()); + Settings.Builder settings = Settings.builder() + .put(node().settings()) + .put("location", location) + .put(BlobStoreRepository.PREFIX_MODE_VERIFICATION_SETTING.getKey(), true); + OpenSearchIntegTestCase.putRepository(client.admin().cluster(), repositoryName, REPO_TYPE, settings); final RepositoriesService repositoriesService = getInstanceFromNode(RepositoriesService.class); final BlobStoreRepository repository = (BlobStoreRepository) repositoriesService.repository(repositoryName); @@ -305,13 +315,8 @@ private BlobStoreRepository setupRepo() { final Path location = OpenSearchIntegTestCase.randomRepoPath(node().settings()); final String repositoryName = "test-repo"; - AcknowledgedResponse putRepositoryResponse = client.admin() - .cluster() - .preparePutRepository(repositoryName) - .setType(REPO_TYPE) - .setSettings(Settings.builder().put(node().settings()).put("location", location)) - .get(); - assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true)); + Settings.Builder settings = Settings.builder().put(node().settings()).put("location", location); + OpenSearchIntegTestCase.putRepository(client.admin().cluster(), repositoryName, REPO_TYPE, settings); final RepositoriesService repositoriesService = getInstanceFromNode(RepositoriesService.class); final BlobStoreRepository repository = (BlobStoreRepository) repositoriesService.repository(repositoryName); @@ -371,4 +376,268 @@ public void testRemoteStoreShardCleanupTask() { remoteStoreShardCleanupTask.run(); assertFalse(executed1.get()); } + + public void testParseShardPath() { + RepositoryData repoData = generateRandomRepoData(); + IndexId indexId = repoData.getIndices().values().iterator().next(); + int shardCount = repoData.shardGenerations().getGens(indexId).size(); + + String shardPath = String.join( + SnapshotShardPaths.DELIMITER, + indexId.getId(), + indexId.getName(), + String.valueOf(shardCount), + String.valueOf(indexId.getShardPathType()), + "1" + ); + ShardInfo shardInfo = SnapshotShardPaths.parseShardPath(shardPath); + + assertEquals(shardInfo.getIndexId(), indexId); + assertEquals(shardInfo.getShardCount(), shardCount); + } + + public void testWriteAndReadShardPaths() throws Exception { + BlobStoreRepository repository = setupRepo(); + RepositoryData repoData = generateRandomRepoData(); + SnapshotId snapshotId = repoData.getSnapshotIds().iterator().next(); + + Set writtenShardPaths = new HashSet<>(); + for (IndexId indexId : repoData.getIndices().values()) { + if (indexId.getShardPathType() != IndexId.DEFAULT_SHARD_PATH_TYPE) { + String shardPathBlobName = repository.writeIndexShardPaths(indexId, snapshotId, indexId.getShardPathType()); + writtenShardPaths.add(shardPathBlobName); + } + } + + // Read shard paths and verify + Map shardPathBlobs = repository.snapshotShardPathBlobContainer().listBlobs(); + + // Create sets for comparison + Set expectedPaths = new HashSet<>(writtenShardPaths); + Set actualPaths = new HashSet<>(shardPathBlobs.keySet()); + + // Remove known extra files - "extra0" file is added by the ExtrasFS, which is part of Lucene's test framework + actualPaths.remove("extra0"); + + // Check if all expected paths are present in the actual paths + assertTrue("All expected paths should be present", actualPaths.containsAll(expectedPaths)); + + // Check if there are any unexpected additional paths + Set unexpectedPaths = new HashSet<>(actualPaths); + unexpectedPaths.removeAll(expectedPaths); + if (!unexpectedPaths.isEmpty()) { + logger.warn("Unexpected additional paths found: " + unexpectedPaths); + } + + assertEquals("Expected and actual paths should match after removing known extra files", expectedPaths, actualPaths); + + for (String shardPathBlobName : expectedPaths) { + SnapshotShardPaths.ShardInfo shardInfo = SnapshotShardPaths.parseShardPath(shardPathBlobName); + IndexId indexId = repoData.getIndices().get(shardInfo.getIndexId().getName()); + assertNotNull("IndexId should not be null", indexId); + assertEquals("Index ID should match", shardInfo.getIndexId().getId(), indexId.getId()); + assertEquals("Shard path type should match", shardInfo.getIndexId().getShardPathType(), indexId.getShardPathType()); + String[] parts = shardPathBlobName.split("\\" + SnapshotShardPaths.DELIMITER); + assertEquals( + "Path hash algorithm should be FNV_1A_COMPOSITE_1", + RemoteStoreEnums.PathHashAlgorithm.FNV_1A_COMPOSITE_1, + RemoteStoreEnums.PathHashAlgorithm.fromCode(Integer.parseInt(parts[4])) + ); + } + } + + public void testCleanupStaleIndices() throws Exception { + // Mock the BlobStoreRepository + BlobStoreRepository repository = mock(BlobStoreRepository.class); + + // Mock BlobContainer for stale index + BlobContainer staleIndexContainer = mock(BlobContainer.class); + when(staleIndexContainer.delete()).thenReturn(new DeleteResult(1, 100L)); + + // Mock BlobContainer for current index + BlobContainer currentIndexContainer = mock(BlobContainer.class); + + Map foundIndices = new HashMap<>(); + foundIndices.put("stale-index", staleIndexContainer); + foundIndices.put("current-index", currentIndexContainer); + + List snapshotIds = new ArrayList<>(); + snapshotIds.add(new SnapshotId("snap1", UUIDs.randomBase64UUID())); + snapshotIds.add(new SnapshotId("snap2", UUIDs.randomBase64UUID())); + + Set survivingIndexIds = new HashSet<>(); + survivingIndexIds.add("current-index"); + + RepositoryData repositoryData = generateRandomRepoData(); + + // Create a mock RemoteStoreLockManagerFactory + RemoteStoreLockManagerFactory mockRemoteStoreLockManagerFactory = mock(RemoteStoreLockManagerFactory.class); + RemoteSegmentStoreDirectoryFactory mockRemoteSegmentStoreDirectoryFactory = mock(RemoteSegmentStoreDirectoryFactory.class); + RemoteStoreLockManager mockLockManager = mock(RemoteStoreLockManager.class); + when(mockRemoteStoreLockManagerFactory.newLockManager(anyString(), anyString(), anyString(), any())).thenReturn(mockLockManager); + + // Create mock snapshot shard paths + Map mockSnapshotShardPaths = new HashMap<>(); + String validShardPath = "stale-index-id#stale-index#1#0#1"; + mockSnapshotShardPaths.put(validShardPath, mock(BlobMetadata.class)); + + // Mock snapshotShardPathBlobContainer + BlobContainer mockSnapshotShardPathBlobContainer = mock(BlobContainer.class); + when(mockSnapshotShardPathBlobContainer.delete()).thenReturn(new DeleteResult(1, 50L)); + when(repository.snapshotShardPathBlobContainer()).thenReturn(mockSnapshotShardPathBlobContainer); + + // Mock the cleanupStaleIndices method to call our test implementation + doAnswer(invocation -> { + Map indices = invocation.getArgument(1); + Set surviving = invocation.getArgument(2); + GroupedActionListener listener = invocation.getArgument(6); + + // Simulate the cleanup process + DeleteResult result = DeleteResult.ZERO; + for (Map.Entry entry : indices.entrySet()) { + if (!surviving.contains(entry.getKey())) { + result = result.add(entry.getValue().delete()); + } + } + result = result.add(mockSnapshotShardPathBlobContainer.delete()); + + listener.onResponse(result); + return null; + }).when(repository).cleanupStaleIndices(any(), any(), any(), any(), any(), any(), any(), any(), anyMap()); + + AtomicReference> resultReference = new AtomicReference<>(); + CountDownLatch latch = new CountDownLatch(1); + + GroupedActionListener listener = new GroupedActionListener<>(ActionListener.wrap(deleteResults -> { + resultReference.set(deleteResults); + latch.countDown(); + }, e -> { + logger.error("Error in cleanupStaleIndices", e); + latch.countDown(); + }), 1); + + // Call the method we're testing + repository.cleanupStaleIndices( + snapshotIds, + foundIndices, + survivingIndexIds, + mockRemoteStoreLockManagerFactory, + null, + repositoryData, + listener, + mockSnapshotShardPaths, + Collections.emptyMap() + ); + + assertTrue("Cleanup did not complete within the expected time", latch.await(30, TimeUnit.SECONDS)); + + Collection results = resultReference.get(); + assertNotNull("DeleteResult collection should not be null", results); + assertFalse("DeleteResult collection should not be empty", results.isEmpty()); + + DeleteResult combinedResult = results.stream().reduce(DeleteResult.ZERO, DeleteResult::add); + + assertTrue("Bytes deleted should be greater than 0", combinedResult.bytesDeleted() > 0); + assertTrue("Blobs deleted should be greater than 0", combinedResult.blobsDeleted() > 0); + + // Verify that the stale index was processed for deletion + verify(staleIndexContainer, times(1)).delete(); + + // Verify that the current index was not processed for deletion + verify(currentIndexContainer, never()).delete(); + + // Verify that snapshot shard paths were considered in the cleanup process + verify(mockSnapshotShardPathBlobContainer, times(1)).delete(); + + // Verify the total number of bytes and blobs deleted + assertEquals("Total bytes deleted should be 150", 150L, combinedResult.bytesDeleted()); + assertEquals("Total blobs deleted should be 2", 2, combinedResult.blobsDeleted()); + } + + public void testGetMetadata() { + BlobStoreRepository repository = setupRepo(); + RepositoryMetadata metadata = repository.getMetadata(); + assertNotNull(metadata); + assertEquals(metadata.name(), "test-repo"); + assertEquals(metadata.type(), REPO_TYPE); + repository.close(); + } + + public void testGetNamedXContentRegistry() { + BlobStoreRepository repository = setupRepo(); + NamedXContentRegistry registry = repository.getNamedXContentRegistry(); + assertNotNull(registry); + repository.close(); + } + + public void testGetCompressor() { + BlobStoreRepository repository = setupRepo(); + Compressor compressor = repository.getCompressor(); + assertNotNull(compressor); + repository.close(); + } + + public void testGetStats() { + BlobStoreRepository repository = setupRepo(); + RepositoryStats stats = repository.stats(); + assertNotNull(stats); + repository.close(); + } + + public void testGetSnapshotThrottleTimeInNanos() { + BlobStoreRepository repository = setupRepo(); + long throttleTime = repository.getSnapshotThrottleTimeInNanos(); + assertTrue(throttleTime >= 0); + repository.close(); + } + + public void testGetRestoreThrottleTimeInNanos() { + BlobStoreRepository repository = setupRepo(); + long throttleTime = repository.getRestoreThrottleTimeInNanos(); + assertTrue(throttleTime >= 0); + repository.close(); + } + + public void testGetRemoteUploadThrottleTimeInNanos() { + BlobStoreRepository repository = setupRepo(); + long throttleTime = repository.getRemoteUploadThrottleTimeInNanos(); + assertTrue(throttleTime >= 0); + repository.close(); + } + + public void testGetLowPriorityRemoteUploadThrottleTimeInNanos() { + BlobStoreRepository repository = setupRepo(); + long throttleTime = repository.getLowPriorityRemoteUploadThrottleTimeInNanos(); + assertTrue(throttleTime >= 0); + repository.close(); + } + + public void testGetRemoteDownloadThrottleTimeInNanos() { + BlobStoreRepository repository = setupRepo(); + long throttleTime = repository.getRemoteDownloadThrottleTimeInNanos(); + assertTrue(throttleTime >= 0); + repository.close(); + } + + public void testIsReadOnly() { + BlobStoreRepository repository = setupRepo(); + assertFalse(repository.isReadOnly()); + repository.close(); + } + + public void testIsSystemRepository() { + BlobStoreRepository repository = setupRepo(); + assertFalse(repository.isSystemRepository()); + repository.close(); + } + + public void testGetRestrictedSystemRepositorySettings() { + BlobStoreRepository repository = setupRepo(); + List> settings = repository.getRestrictedSystemRepositorySettings(); + assertNotNull(settings); + assertTrue(settings.contains(BlobStoreRepository.SYSTEM_REPOSITORY_SETTING)); + assertTrue(settings.contains(BlobStoreRepository.READONLY_SETTING)); + assertTrue(settings.contains(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY)); + repository.close(); + } } diff --git a/server/src/test/java/org/opensearch/repositories/blobstore/ChecksumWritableBlobStoreFormatTests.java b/server/src/test/java/org/opensearch/repositories/blobstore/ChecksumWritableBlobStoreFormatTests.java index 536df880b2597..c4e53c1eea138 100644 --- a/server/src/test/java/org/opensearch/repositories/blobstore/ChecksumWritableBlobStoreFormatTests.java +++ b/server/src/test/java/org/opensearch/repositories/blobstore/ChecksumWritableBlobStoreFormatTests.java @@ -35,7 +35,12 @@ public class ChecksumWritableBlobStoreFormatTests extends OpenSearchTestCase { public void testSerDe() throws IOException { IndexMetadata indexMetadata = getIndexMetadata(); - BytesReference bytesReference = clusterBlocksFormat.serialize(indexMetadata, TEST_BLOB_FILE_NAME, CompressorRegistry.none()); + BytesReference bytesReference = clusterBlocksFormat.serialize( + (out, metadata) -> metadata.writeTo(out), + indexMetadata, + TEST_BLOB_FILE_NAME, + CompressorRegistry.none() + ); IndexMetadata readIndexMetadata = clusterBlocksFormat.deserialize(TEST_BLOB_FILE_NAME, bytesReference); assertThat(readIndexMetadata, is(indexMetadata)); } @@ -43,6 +48,7 @@ public void testSerDe() throws IOException { public void testSerDeForCompressed() throws IOException { IndexMetadata indexMetadata = getIndexMetadata(); BytesReference bytesReference = clusterBlocksFormat.serialize( + (out, metadata) -> metadata.writeTo(out), indexMetadata, TEST_BLOB_FILE_NAME, CompressorRegistry.getCompressor(DeflateCompressor.NAME) diff --git a/server/src/test/java/org/opensearch/search/DefaultSearchContextTests.java b/server/src/test/java/org/opensearch/search/DefaultSearchContextTests.java index a1a808c9faa9b..55b30d5068daa 100644 --- a/server/src/test/java/org/opensearch/search/DefaultSearchContextTests.java +++ b/server/src/test/java/org/opensearch/search/DefaultSearchContextTests.java @@ -51,6 +51,7 @@ import org.opensearch.common.SetOnce; import org.opensearch.common.UUIDs; import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.BigArrays; @@ -67,9 +68,16 @@ import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.query.AbstractQueryBuilder; +import org.opensearch.index.query.BoolQueryBuilder; import org.opensearch.index.query.ParsedQuery; import org.opensearch.index.query.QueryShardContext; import org.opensearch.index.shard.IndexShard; +import org.opensearch.search.aggregations.AggregatorFactories; +import org.opensearch.search.aggregations.MultiBucketConsumerService; +import org.opensearch.search.aggregations.SearchContextAggregations; +import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.search.deciders.ConcurrentSearchDecision; +import org.opensearch.search.deciders.ConcurrentSearchRequestDecider; import org.opensearch.search.internal.AliasFilter; import org.opensearch.search.internal.LegacyReaderContext; import org.opensearch.search.internal.PitReaderContext; @@ -84,8 +92,12 @@ import org.opensearch.threadpool.ThreadPool; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Optional; import java.util.UUID; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -158,9 +170,8 @@ public void testPreProcess() throws Exception { when(indexCache.query()).thenReturn(queryCache); when(indexService.cache()).thenReturn(indexCache); QueryShardContext queryShardContext = mock(QueryShardContext.class); - when(indexService.newQueryShardContext(eq(shardId.id()), any(), any(), nullable(String.class), anyBoolean())).thenReturn( - queryShardContext - ); + when(indexService.newQueryShardContext(eq(shardId.id()), any(), any(), nullable(String.class), anyBoolean(), anyBoolean())) + .thenReturn(queryShardContext); MapperService mapperService = mock(MapperService.class); when(mapperService.hasNested()).thenReturn(randomBoolean()); when(indexService.mapperService()).thenReturn(mapperService); @@ -220,7 +231,8 @@ protected Engine.Searcher acquireSearcherInternal(String source) { Version.CURRENT, false, executor, - null + null, + Collections.emptyList() ); contextWithoutScroll.from(300); contextWithoutScroll.close(); @@ -263,7 +275,8 @@ protected Engine.Searcher acquireSearcherInternal(String source) { Version.CURRENT, false, executor, - null + null, + Collections.emptyList() ); context1.from(300); exception = expectThrows(IllegalArgumentException.class, () -> context1.preProcess(false)); @@ -334,7 +347,8 @@ protected Engine.Searcher acquireSearcherInternal(String source) { Version.CURRENT, false, executor, - null + null, + Collections.emptyList() ); SliceBuilder sliceBuilder = mock(SliceBuilder.class); @@ -374,7 +388,8 @@ protected Engine.Searcher acquireSearcherInternal(String source) { Version.CURRENT, false, executor, - null + null, + Collections.emptyList() ); ParsedQuery parsedQuery = ParsedQuery.parsedMatchAllQuery(); context3.sliceBuilder(null).parsedQuery(parsedQuery).preProcess(false); @@ -410,7 +425,8 @@ protected Engine.Searcher acquireSearcherInternal(String source) { Version.CURRENT, false, executor, - null + null, + Collections.emptyList() ); context4.sliceBuilder(new SliceBuilder(1, 2)).parsedQuery(parsedQuery).preProcess(false); Query query1 = context4.query(); @@ -441,7 +457,8 @@ protected Engine.Searcher acquireSearcherInternal(String source) { Version.CURRENT, false, executor, - null + null, + Collections.emptyList() ); int numSlicesForPit = maxSlicesPerPit + randomIntBetween(1, 100); when(sliceBuilder.getMax()).thenReturn(numSlicesForPit); @@ -485,9 +502,8 @@ public void testClearQueryCancellationsOnClose() throws IOException { IndexService indexService = mock(IndexService.class); QueryShardContext queryShardContext = mock(QueryShardContext.class); - when(indexService.newQueryShardContext(eq(shardId.id()), any(), any(), nullable(String.class), anyBoolean())).thenReturn( - queryShardContext - ); + when(indexService.newQueryShardContext(eq(shardId.id()), any(), any(), nullable(String.class), anyBoolean(), anyBoolean())) + .thenReturn(queryShardContext); Settings settings = Settings.builder() .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) @@ -547,7 +563,8 @@ protected Engine.Searcher acquireSearcherInternal(String source) { Version.CURRENT, false, executor, - null + null, + Collections.emptyList() ); assertThat(context.searcher().hasCancellations(), is(false)); context.searcher().addQueryCancellation(() -> {}); @@ -581,9 +598,8 @@ public void testSearchPathEvaluation() throws Exception { IndexService indexService = mock(IndexService.class); QueryShardContext queryShardContext = mock(QueryShardContext.class); - when(indexService.newQueryShardContext(eq(shardId.id()), any(), any(), nullable(String.class), anyBoolean())).thenReturn( - queryShardContext - ); + when(indexService.newQueryShardContext(eq(shardId.id()), any(), any(), nullable(String.class), anyBoolean(), anyBoolean())) + .thenReturn(queryShardContext); IndexMetadata indexMetadata = IndexMetadata.builder("index").settings(settings).build(); IndexSettings indexSettings = new IndexSettings(indexMetadata, Settings.EMPTY); @@ -643,6 +659,7 @@ protected Engine.Searcher acquireSearcherInternal(String source) { final ClusterService clusterService = mock(ClusterService.class); final ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); clusterSettings.registerSetting(SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING); + // clusterSettings.registerSetting(SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE); clusterSettings.applySettings( Settings.builder().put(SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), true).build() ); @@ -651,7 +668,7 @@ protected Engine.Searcher acquireSearcherInternal(String source) { readerContext, shardSearchRequest, target, - null, + clusterService, bigArrays, null, null, @@ -660,7 +677,8 @@ protected Engine.Searcher acquireSearcherInternal(String source) { Version.CURRENT, false, executor, - null + null, + Collections.emptyList() ); // Case1: if sort is on timestamp field, non-concurrent path is used @@ -685,7 +703,8 @@ protected Engine.Searcher acquireSearcherInternal(String source) { Version.CURRENT, false, executor, - null + null, + Collections.emptyList() ); context.sort( new SortAndFormats(new Sort(new SortField("test2", SortField.Type.INT)), new DocValueFormat[] { DocValueFormat.RAW }) @@ -712,7 +731,8 @@ protected Engine.Searcher acquireSearcherInternal(String source) { Version.CURRENT, false, executor, - null + null, + Collections.emptyList() ); context.evaluateRequestShouldUseConcurrentSearch(); if (executor == null) { @@ -744,7 +764,8 @@ protected Engine.Searcher acquireSearcherInternal(String source) { Version.CURRENT, false, executor, - null + null, + Collections.emptyList() ); context.evaluateRequestShouldUseConcurrentSearch(); assertFalse(context.shouldUseConcurrentSearch()); @@ -772,12 +793,378 @@ protected Engine.Searcher acquireSearcherInternal(String source) { Version.CURRENT, false, executor, - null + null, + Collections.emptyList() ); context.evaluateRequestShouldUseConcurrentSearch(); assertFalse(context.shouldUseConcurrentSearch()); assertThrows(SetOnce.AlreadySetException.class, context::evaluateRequestShouldUseConcurrentSearch); + if (clusterService.getClusterSettings().get(SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING)) { + assertSettingDeprecationsAndWarnings(new Setting[] { SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING }); + } + + // shutdown the threadpool + threadPool.shutdown(); + } + } + + public void testSearchPathEvaluationWithConcurrentSearchModeAsAuto() throws Exception { + ShardSearchRequest shardSearchRequest = mock(ShardSearchRequest.class); + when(shardSearchRequest.searchType()).thenReturn(SearchType.DEFAULT); + ShardId shardId = new ShardId("index", UUID.randomUUID().toString(), 1); + when(shardSearchRequest.shardId()).thenReturn(shardId); + + ThreadPool threadPool = new TestThreadPool(this.getClass().getName()); + IndexShard indexShard = mock(IndexShard.class); + QueryCachingPolicy queryCachingPolicy = mock(QueryCachingPolicy.class); + when(indexShard.getQueryCachingPolicy()).thenReturn(queryCachingPolicy); + when(indexShard.getThreadPool()).thenReturn(threadPool); + + Settings settings = Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 2) + .build(); + + IndexService indexService = mock(IndexService.class); + QueryShardContext queryShardContext = mock(QueryShardContext.class); + when(indexService.newQueryShardContext(eq(shardId.id()), any(), any(), nullable(String.class), anyBoolean(), anyBoolean())) + .thenReturn(queryShardContext); + + IndexMetadata indexMetadata = IndexMetadata.builder("index").settings(settings).build(); + IndexSettings indexSettings = new IndexSettings(indexMetadata, Settings.EMPTY); + when(indexService.getIndexSettings()).thenReturn(indexSettings); + when(indexShard.indexSettings()).thenReturn(indexSettings); + + BigArrays bigArrays = new MockBigArrays(new MockPageCacheRecycler(Settings.EMPTY), new NoneCircuitBreakerService()); + + IndexShard systemIndexShard = mock(IndexShard.class); + when(systemIndexShard.getQueryCachingPolicy()).thenReturn(queryCachingPolicy); + when(systemIndexShard.getThreadPool()).thenReturn(threadPool); + when(systemIndexShard.isSystem()).thenReturn(true); + + IndexShard throttledIndexShard = mock(IndexShard.class); + when(throttledIndexShard.getQueryCachingPolicy()).thenReturn(queryCachingPolicy); + when(throttledIndexShard.getThreadPool()).thenReturn(threadPool); + IndexSettings throttledIndexSettings = new IndexSettings( + indexMetadata, + Settings.builder().put(INDEX_SEARCH_THROTTLED.getKey(), true).build() + ); + when(throttledIndexShard.indexSettings()).thenReturn(throttledIndexSettings); + + try (Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir)) { + + final Supplier searcherSupplier = () -> new Engine.SearcherSupplier(Function.identity()) { + @Override + protected void doClose() {} + + @Override + protected Engine.Searcher acquireSearcherInternal(String source) { + try { + IndexReader reader = w.getReader(); + return new Engine.Searcher( + "test", + reader, + IndexSearcher.getDefaultSimilarity(), + IndexSearcher.getDefaultQueryCache(), + IndexSearcher.getDefaultQueryCachingPolicy(), + reader + ); + } catch (IOException exc) { + throw new AssertionError(exc); + } + } + }; + + SearchShardTarget target = new SearchShardTarget("node", shardId, null, OriginalIndices.NONE); + ReaderContext readerContext = new ReaderContext( + newContextId(), + indexService, + indexShard, + searcherSupplier.get(), + randomNonNegativeLong(), + false + ); + + final ClusterService clusterService = mock(ClusterService.class); + final ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + clusterSettings.registerSetting(SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING); + clusterSettings.registerSetting(SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE); + clusterSettings.applySettings( + Settings.builder().put(SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), "auto").build() + ); + when(clusterService.getClusterSettings()).thenReturn(clusterSettings); + when(clusterService.getSettings()).thenReturn(settings); + + DefaultSearchContext context = new DefaultSearchContext( + readerContext, + shardSearchRequest, + target, + clusterService, + bigArrays, + null, + null, + null, + false, + Version.CURRENT, + false, + executor, + null, + Collections.emptyList() + ); + + // Case1: if there is no agg in the query, non-concurrent path is used + context.evaluateRequestShouldUseConcurrentSearch(); + assertFalse(context.shouldUseConcurrentSearch()); + assertThrows(SetOnce.AlreadySetException.class, context::evaluateRequestShouldUseConcurrentSearch); + + // Case2: if un supported agg present, non-concurrent path is used + SearchContextAggregations mockAggregations = mock(SearchContextAggregations.class); + when(mockAggregations.factories()).thenReturn(mock(AggregatorFactories.class)); + when(mockAggregations.factories().allFactoriesSupportConcurrentSearch()).thenReturn(false); + when(mockAggregations.multiBucketConsumer()).thenReturn(mock(MultiBucketConsumerService.MultiBucketConsumer.class)); + + context = new DefaultSearchContext( + readerContext, + shardSearchRequest, + target, + clusterService, + bigArrays, + null, + null, + null, + false, + Version.CURRENT, + false, + executor, + null, + Collections.emptyList() + ); + + // add un-supported agg operation + context.aggregations(mockAggregations); + context.evaluateRequestShouldUseConcurrentSearch(); + if (executor == null) { + assertFalse(context.shouldUseConcurrentSearch()); + } else { + assertFalse(context.shouldUseConcurrentSearch()); + } + assertThrows(SetOnce.AlreadySetException.class, context::evaluateRequestShouldUseConcurrentSearch); + + // Case3: if supported agg present, concurrent path is used + + // set agg operation to be supported + when(mockAggregations.factories().allFactoriesSupportConcurrentSearch()).thenReturn(true); + + context = new DefaultSearchContext( + readerContext, + shardSearchRequest, + target, + clusterService, + bigArrays, + null, + null, + null, + false, + Version.CURRENT, + false, + executor, + null, + Collections.emptyList() + ); + // create a supported agg operation + context.aggregations(mockAggregations); + context.evaluateRequestShouldUseConcurrentSearch(); + if (executor == null) { + assertFalse(context.shouldUseConcurrentSearch()); + } else { + assertTrue(context.shouldUseConcurrentSearch()); + } + assertThrows(SetOnce.AlreadySetException.class, context::evaluateRequestShouldUseConcurrentSearch); + + // Case4: multiple deciders are registered and all of them opt out of decision-making + // with supported agg query so concurrent path is used + + ConcurrentSearchRequestDecider decider1 = mock(ConcurrentSearchRequestDecider.class); + + ConcurrentSearchRequestDecider decider2 = mock(ConcurrentSearchRequestDecider.class); + + ConcurrentSearchRequestDecider.Factory factory1 = new ConcurrentSearchRequestDecider.Factory() { + @Override + public Optional create(IndexSettings indexSettings) { + return Optional.ofNullable(decider1); + } + }; + + ConcurrentSearchRequestDecider.Factory factory2 = new ConcurrentSearchRequestDecider.Factory() { + @Override + public Optional create(IndexSettings indexSettings) { + return Optional.ofNullable(decider2); + } + }; + ConcurrentSearchRequestDecider.Factory factory3 = new ConcurrentSearchRequestDecider.Factory() { + @Override + public Optional create(IndexSettings indexSettings) { + return Optional.empty(); + } + }; + + List concurrentSearchRequestDeciders = new ArrayList<>(); + concurrentSearchRequestDeciders.add(factory1); + concurrentSearchRequestDeciders.add(factory2); + concurrentSearchRequestDeciders.add(factory3); + + context = new DefaultSearchContext( + readerContext, + shardSearchRequest, + target, + clusterService, + bigArrays, + null, + null, + null, + false, + Version.CURRENT, + false, + executor, + null, + concurrentSearchRequestDeciders + ); + // create a supported agg operation + context.aggregations(mockAggregations); + context.evaluateRequestShouldUseConcurrentSearch(); + if (executor == null) { + assertFalse(context.shouldUseConcurrentSearch()); + } else { + assertTrue(context.shouldUseConcurrentSearch()); + } + assertThrows(SetOnce.AlreadySetException.class, context::evaluateRequestShouldUseConcurrentSearch); + + // Case5: multiple deciders are registered and one of them returns ConcurrentSearchDecision.DecisionStatus.NO + // use non-concurrent path even if query contains supported agg + when(decider1.getConcurrentSearchDecision()).thenReturn( + new ConcurrentSearchDecision(ConcurrentSearchDecision.DecisionStatus.NO, "disable concurrent search") + ); + + // create a source so that query tree is parsed by visitor + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + BoolQueryBuilder queryBuilder = new BoolQueryBuilder(); + sourceBuilder.query(queryBuilder); + when(shardSearchRequest.source()).thenReturn(sourceBuilder); + + context = new DefaultSearchContext( + readerContext, + shardSearchRequest, + target, + clusterService, + bigArrays, + null, + null, + null, + false, + Version.CURRENT, + false, + executor, + null, + concurrentSearchRequestDeciders + ); + + // create a supported agg operation + context.aggregations(mockAggregations); + context.evaluateRequestShouldUseConcurrentSearch(); + if (executor == null) { + assertFalse(context.shouldUseConcurrentSearch()); + } else { + assertFalse(context.shouldUseConcurrentSearch()); + } + assertThrows(SetOnce.AlreadySetException.class, context::evaluateRequestShouldUseConcurrentSearch); + + // Case6: multiple deciders are registered and first decider returns ConcurrentSearchDecision.DecisionStatus.YES + // while second decider returns ConcurrentSearchDecision.DecisionStatus.NO + // use non-concurrent path even if query contains supported agg + + when(decider1.getConcurrentSearchDecision()).thenReturn( + new ConcurrentSearchDecision(ConcurrentSearchDecision.DecisionStatus.YES, "enable concurrent search") + ); + + when(decider2.getConcurrentSearchDecision()).thenReturn( + new ConcurrentSearchDecision(ConcurrentSearchDecision.DecisionStatus.NO, "disable concurrent search") + ); + + // create a source so that query tree is parsed by visitor + when(shardSearchRequest.source()).thenReturn(sourceBuilder); + + context = new DefaultSearchContext( + readerContext, + shardSearchRequest, + target, + clusterService, + bigArrays, + null, + null, + null, + false, + Version.CURRENT, + false, + executor, + null, + concurrentSearchRequestDeciders + ); + + // create a supported agg operation + context.aggregations(mockAggregations); + context.evaluateRequestShouldUseConcurrentSearch(); + if (executor == null) { + assertFalse(context.shouldUseConcurrentSearch()); + } else { + assertFalse(context.shouldUseConcurrentSearch()); + } + assertThrows(SetOnce.AlreadySetException.class, context::evaluateRequestShouldUseConcurrentSearch); + + // Case7: multiple deciders are registered and all return ConcurrentSearchDecision.DecisionStatus.NO_OP + // but un-supported agg query is present, use non-concurrent path + + when(decider1.getConcurrentSearchDecision()).thenReturn( + new ConcurrentSearchDecision(ConcurrentSearchDecision.DecisionStatus.NO_OP, "noop") + ); + + when(decider2.getConcurrentSearchDecision()).thenReturn( + new ConcurrentSearchDecision(ConcurrentSearchDecision.DecisionStatus.NO_OP, "noop") + ); + + when(mockAggregations.factories().allFactoriesSupportConcurrentSearch()).thenReturn(false); + + // create a source so that query tree is parsed by visitor + when(shardSearchRequest.source()).thenReturn(sourceBuilder); + + context = new DefaultSearchContext( + readerContext, + shardSearchRequest, + target, + clusterService, + bigArrays, + null, + null, + null, + false, + Version.CURRENT, + false, + executor, + null, + concurrentSearchRequestDeciders + ); + + // create a supported agg operation + context.aggregations(mockAggregations); + context.evaluateRequestShouldUseConcurrentSearch(); + if (executor == null) { + assertFalse(context.shouldUseConcurrentSearch()); + } else { + assertFalse(context.shouldUseConcurrentSearch()); + } + assertThrows(SetOnce.AlreadySetException.class, context::evaluateRequestShouldUseConcurrentSearch); + // shutdown the threadpool threadPool.shutdown(); } diff --git a/server/src/test/java/org/opensearch/search/SearchModuleTests.java b/server/src/test/java/org/opensearch/search/SearchModuleTests.java index 01b8d6d8cdd72..9e8e7afe332f1 100644 --- a/server/src/test/java/org/opensearch/search/SearchModuleTests.java +++ b/server/src/test/java/org/opensearch/search/SearchModuleTests.java @@ -41,6 +41,7 @@ import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.index.IndexSettings; import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.query.QueryRewriteContext; import org.opensearch.index.query.QueryShardContext; @@ -69,6 +70,7 @@ import org.opensearch.search.aggregations.support.ValuesSourceConfig; import org.opensearch.search.aggregations.support.ValuesSourceRegistry; import org.opensearch.search.aggregations.support.ValuesSourceType; +import org.opensearch.search.deciders.ConcurrentSearchRequestDecider; import org.opensearch.search.fetch.FetchSubPhase; import org.opensearch.search.fetch.subphase.ExplainPhase; import org.opensearch.search.fetch.subphase.highlight.CustomHighlighter; @@ -500,6 +502,67 @@ public Optional getIndexSearcherExecutorProvider() { expectThrows(IllegalStateException.class, () -> new SearchModule(Settings.EMPTY, searchPlugins)); } + public void testRegisterConcurrentSearchRequestDecidersNoExternalPlugins() { + SearchModule searchModule = new SearchModule(Settings.EMPTY, Collections.emptyList()); + assertEquals(searchModule.getConcurrentSearchRequestDeciderFactories().size(), 0); + } + + public void testRegisterConcurrentSearchRequestDecidersExternalPluginsWithNoDeciders() { + SearchPlugin plugin1 = new SearchPlugin() { + @Override + public Optional getIndexSearcherExecutorProvider() { + return Optional.of(mock(ExecutorServiceProvider.class)); + } + }; + SearchPlugin plugin2 = new SearchPlugin() { + }; + + List searchPlugins = new ArrayList<>(); + searchPlugins.add(plugin1); + searchPlugins.add(plugin2); + SearchModule searchModule = new SearchModule(Settings.EMPTY, searchPlugins); + + assertEquals(searchModule.getConcurrentSearchRequestDeciderFactories().size(), 0); + } + + public void testRegisterConcurrentSearchRequestDecidersExternalPluginsWithDeciders() { + SearchPlugin pluginDecider1 = new SearchPlugin() { + @Override + public Optional getIndexSearcherExecutorProvider() { + return Optional.of(mock(ExecutorServiceProvider.class)); + } + + @Override + public Optional getConcurrentSearchRequestDeciderFactory() { + return Optional.of(new ConcurrentSearchRequestDecider.Factory() { + @Override + public Optional create(IndexSettings indexSettings) { + return Optional.of(mock(ConcurrentSearchRequestDecider.class)); + } + }); + } + }; + + SearchPlugin pluginDecider2 = new SearchPlugin() { + @Override + public Optional getConcurrentSearchRequestDeciderFactory() { + return Optional.of(new ConcurrentSearchRequestDecider.Factory() { + @Override + public Optional create(IndexSettings indexSettings) { + return Optional.of(mock(ConcurrentSearchRequestDecider.class)); + } + }); + } + }; + + List searchPlugins = new ArrayList<>(); + searchPlugins.add(pluginDecider1); + searchPlugins.add(pluginDecider2); + + SearchModule searchModule = new SearchModule(Settings.EMPTY, searchPlugins); + assertEquals(searchModule.getConcurrentSearchRequestDeciderFactories().size(), 2); + } + private static final String[] NON_DEPRECATED_QUERIES = new String[] { "bool", "boosting", diff --git a/server/src/test/java/org/opensearch/search/SearchServiceTests.java b/server/src/test/java/org/opensearch/search/SearchServiceTests.java index e8a0f70ee3563..514e99a126267 100644 --- a/server/src/test/java/org/opensearch/search/SearchServiceTests.java +++ b/server/src/test/java/org/opensearch/search/SearchServiceTests.java @@ -56,6 +56,7 @@ import org.opensearch.action.support.WriteRequest; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.UUIDs; +import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; import org.opensearch.common.settings.SettingsException; import org.opensearch.common.unit.TimeValue; @@ -93,8 +94,10 @@ import org.opensearch.script.Script; import org.opensearch.script.ScriptType; import org.opensearch.search.aggregations.AggregationBuilders; +import org.opensearch.search.aggregations.AggregatorFactories; import org.opensearch.search.aggregations.InternalAggregation; import org.opensearch.search.aggregations.MultiBucketConsumerService; +import org.opensearch.search.aggregations.SearchContextAggregations; import org.opensearch.search.aggregations.bucket.global.GlobalAggregationBuilder; import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; import org.opensearch.search.aggregations.support.ValueType; @@ -141,6 +144,8 @@ import static org.hamcrest.CoreMatchers.is; import static org.hamcrest.CoreMatchers.notNullValue; import static org.hamcrest.CoreMatchers.startsWith; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; public class SearchServiceTests extends OpenSearchSingleNodeTestCase { @@ -1309,17 +1314,40 @@ public void testCreateSearchContext() throws IOException { * index and cluster settings. */ public void testConcurrentSegmentSearchSearchContext() throws IOException { - Boolean[][] scenarios = { - // cluster setting, index setting, concurrent search enabled? - { null, null, false }, - { null, false, false }, - { null, true, true }, - { true, null, true }, - { true, false, false }, - { true, true, true }, - { false, null, false }, - { false, false, false }, - { false, true, true } }; + Object[][] scenarios = { + // cluster setting, index setting, cluster mode setting, concurrent search enabled?, concurrent search executor null? + { null, null, null, false, true }, + { null, false, null, false, true }, + { null, true, null, true, false }, + { true, null, null, true, false }, + { true, false, null, false, true }, + { true, true, null, true, false }, + { false, null, null, false, true }, + { false, false, null, false, true }, + { false, true, null, true, false }, + + // Adding cases with mode set to "none" + { null, null, "none", false, true }, + { true, true, "none", false, true }, + { false, false, "none", false, true }, + { true, false, "none", false, true }, + { false, true, "none", false, true }, + + // Adding cases with mode set to "all" + { null, null, "all", true, false }, + { true, true, "all", true, false }, + { false, false, "all", true, false }, + { true, false, "all", true, false }, + { false, true, "all", true, false }, + + // Adding cases with mode set to "auto" + // auto mode concurrent search is false since request has no aggregation + // however concurrentSearchExecutor will not be null + { null, null, "auto", false, false }, + { true, true, "auto", false, false }, + { false, false, "auto", false, false }, + { true, false, "auto", false, false }, + { false, true, "auto", false, false } }; String index = randomAlphaOfLengthBetween(5, 10).toLowerCase(Locale.ROOT); IndexService indexService = createIndex(index); @@ -1341,10 +1369,12 @@ public void testConcurrentSegmentSearchSearchContext() throws IOException { Strings.EMPTY_ARRAY ); - for (Boolean[] scenario : scenarios) { - Boolean clusterSetting = scenario[0]; - Boolean indexSetting = scenario[1]; - Boolean concurrentSearchEnabled = scenario[2]; + for (Object[] scenario : scenarios) { + Boolean clusterSetting = (Boolean) scenario[0]; + Boolean indexSetting = (Boolean) scenario[1]; + String mode = (String) scenario[2]; + Boolean concurrentSearchEnabled = (Boolean) scenario[3]; + Boolean concurrentSearchExecutorNull = (Boolean) scenario[4]; if (clusterSetting == null) { client().admin() @@ -1376,6 +1406,21 @@ public void testConcurrentSegmentSearchSearchContext() throws IOException { .get(); } + // update mode + if (mode == null) { + client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().putNull(SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey())) + .get(); + } else { + client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put(SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), mode)) + .get(); + } + try (DefaultSearchContext searchContext = service.createSearchContext(request, new TimeValue(System.currentTimeMillis()))) { assertEquals( clusterSetting, @@ -1396,10 +1441,22 @@ public void testConcurrentSegmentSearchSearchContext() throws IOException { .get() .getSetting(index, IndexSettings.INDEX_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey()) ); + + assertEquals( + mode, + client().admin() + .cluster() + .prepareState() + .get() + .getState() + .getMetadata() + .transientSettings() + .get(SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), null) + ); searchContext.evaluateRequestShouldUseConcurrentSearch(); assertEquals(concurrentSearchEnabled, searchContext.shouldUseConcurrentSearch()); - // verify executor nullability with concurrent search enabled/disabled - if (concurrentSearchEnabled) { + // verify executor nullability + if (!concurrentSearchExecutorNull) { assertNotNull(searchContext.searcher().getExecutor()); } else { assertNull(searchContext.searcher().getExecutor()); @@ -1410,8 +1467,133 @@ public void testConcurrentSegmentSearchSearchContext() throws IOException { client().admin() .cluster() .prepareUpdateSettings() - .setTransientSettings(Settings.builder().putNull(SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey())) + .setTransientSettings( + Settings.builder() + .putNull(SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey()) + .putNull(SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey()) + ) .get(); + assertSettingDeprecationsAndWarnings(new Setting[] { IndexSettings.INDEX_CONCURRENT_SEGMENT_SEARCH_SETTING }); + } + + public void testConcurrentSegmentSearchWithRandomizedModeSettings() throws IOException { + + String index = randomAlphaOfLengthBetween(5, 10).toLowerCase(Locale.ROOT); + IndexService indexService = createIndex(index); + final SearchService service = getInstanceFromNode(SearchService.class); + ShardId shardId = new ShardId(indexService.index(), 0); + long nowInMillis = System.currentTimeMillis(); + String clusterAlias = randomBoolean() ? null : randomAlphaOfLengthBetween(3, 10); + SearchRequest searchRequest = new SearchRequest(); + searchRequest.allowPartialSearchResults(randomBoolean()); + ShardSearchRequest request = new ShardSearchRequest( + OriginalIndices.NONE, + searchRequest, + shardId, + indexService.numberOfShards(), + AliasFilter.EMPTY, + 1f, + nowInMillis, + clusterAlias, + Strings.EMPTY_ARRAY + ); + + String[] modeSettings = { "all", "auto", "none", null }; + + // Randomize both index and cluster settings + String clusterMode = randomFrom(modeSettings); + String indexMode = randomFrom(modeSettings); + + // default to false in case mode setting is not set + boolean concurrentSearchEnabled = false; + boolean nullExecutor = true; + + boolean aggregationSupportsConcurrent = randomBoolean(); + + if (indexMode != null) { + concurrentSearchEnabled = !indexMode.equals("none") && aggregationSupportsConcurrent; + nullExecutor = indexMode.equals("none"); + } else if (clusterMode != null) { + concurrentSearchEnabled = !clusterMode.equals("none") && aggregationSupportsConcurrent; + nullExecutor = clusterMode.equals("none"); + } + + // Set the cluster setting for mode + if (clusterMode == null) { + client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().putNull(SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey())) + .get(); + } else { + client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put(SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), clusterMode)) + .get(); + } + + // Set the index setting for mode + if (indexMode == null) { + client().admin() + .indices() + .prepareUpdateSettings(index) + .setSettings(Settings.builder().putNull(IndexSettings.INDEX_CONCURRENT_SEGMENT_SEARCH_MODE.getKey())) + .get(); + } else { + client().admin() + .indices() + .prepareUpdateSettings(index) + .setSettings(Settings.builder().put(IndexSettings.INDEX_CONCURRENT_SEGMENT_SEARCH_MODE.getKey(), indexMode)) + .get(); + } + + try (DefaultSearchContext searchContext = service.createSearchContext(request, new TimeValue(System.currentTimeMillis()))) { + assertEquals( + clusterMode, + client().admin() + .cluster() + .prepareState() + .get() + .getState() + .getMetadata() + .transientSettings() + .get(SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey()) + ); + assertEquals( + indexMode, + client().admin() + .indices() + .prepareGetSettings(index) + .get() + .getSetting(index, IndexSettings.INDEX_CONCURRENT_SEGMENT_SEARCH_MODE.getKey()) + ); + SearchContextAggregations mockAggregations = mock(SearchContextAggregations.class); + when(mockAggregations.factories()).thenReturn(mock(AggregatorFactories.class)); + when(mockAggregations.factories().allFactoriesSupportConcurrentSearch()).thenReturn(aggregationSupportsConcurrent); + + // set the aggregations for context + searchContext.aggregations(mockAggregations); + + searchContext.evaluateRequestShouldUseConcurrentSearch(); + // check concurrentSearchenabled based on mode and supportedAggregation is computed correctly + assertEquals(concurrentSearchEnabled, searchContext.shouldUseConcurrentSearch()); + + // Verify executor nullability based on mode + if (!nullExecutor) { + assertNotNull(searchContext.searcher().getExecutor()); + } else { + assertNull(searchContext.searcher().getExecutor()); + } + } + + // Cleanup + client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().putNull(SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.getKey())) + .get(); + } /** diff --git a/server/src/test/java/org/opensearch/search/approximate/ApproximateIndexOrDocValuesQueryTests.java b/server/src/test/java/org/opensearch/search/approximate/ApproximateIndexOrDocValuesQueryTests.java new file mode 100644 index 0000000000000..47f87c6abf629 --- /dev/null +++ b/server/src/test/java/org/opensearch/search/approximate/ApproximateIndexOrDocValuesQueryTests.java @@ -0,0 +1,113 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.approximate; + +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.search.ConstantScoreWeight; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Weight; +import org.apache.lucene.store.Directory; +import org.opensearch.search.internal.SearchContext; +import org.opensearch.test.OpenSearchTestCase; +import org.junit.After; +import org.junit.Before; + +import java.io.IOException; + +import static org.apache.lucene.document.LongPoint.pack; + +public class ApproximateIndexOrDocValuesQueryTests extends OpenSearchTestCase { + private Directory dir; + private IndexWriter w; + private DirectoryReader reader; + private IndexSearcher searcher; + + @Before + public void initSearcher() throws IOException { + dir = newDirectory(); + w = new IndexWriter(dir, newIndexWriterConfig()); + } + + @After + public void closeAllTheReaders() throws IOException { + reader.close(); + w.close(); + dir.close(); + } + + public void testApproximateIndexOrDocValuesQueryWeight() throws IOException { + + long l = Long.MIN_VALUE; + long u = Long.MAX_VALUE; + Query indexQuery = LongPoint.newRangeQuery("test-index", l, u); + + ApproximateQuery approximateIndexQuery = new ApproximatePointRangeQuery( + "test-index", + pack(new long[] { l }).bytes, + pack(new long[] { u }).bytes, + new long[] { l }.length + ) { + protected String toString(int dimension, byte[] value) { + return Long.toString(LongPoint.decodeDimension(value, 0)); + } + }; + + Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery("test-index", l, u); + + ApproximateIndexOrDocValuesQuery approximateIndexOrDocValuesQuery = new ApproximateIndexOrDocValuesQuery( + indexQuery, + approximateIndexQuery, + dvQuery + ); + + reader = DirectoryReader.open(w); + searcher = newSearcher(reader); + + approximateIndexOrDocValuesQuery.resolvedQuery = indexQuery; + + Weight weight = approximateIndexOrDocValuesQuery.rewrite(searcher).createWeight(searcher, ScoreMode.COMPLETE, 1f); + + assertTrue(weight instanceof ConstantScoreWeight); + + ApproximateQuery approximateIndexQueryCanApproximate = new ApproximatePointRangeQuery( + "test-index", + pack(new long[] { l }).bytes, + pack(new long[] { u }).bytes, + new long[] { l }.length + ) { + protected String toString(int dimension, byte[] value) { + return Long.toString(LongPoint.decodeDimension(value, 0)); + } + + public boolean canApproximate(SearchContext context) { + return true; + } + }; + + ApproximateIndexOrDocValuesQuery approximateIndexOrDocValuesQueryCanApproximate = new ApproximateIndexOrDocValuesQuery( + indexQuery, + approximateIndexQueryCanApproximate, + dvQuery + ); + + approximateIndexOrDocValuesQueryCanApproximate.resolvedQuery = approximateIndexQueryCanApproximate; + + Weight approximateIndexOrDocValuesQueryCanApproximateWeight = approximateIndexOrDocValuesQueryCanApproximate.rewrite(searcher) + .createWeight(searcher, ScoreMode.COMPLETE, 1f); + + // we get ConstantScoreWeight since we're expecting to call ApproximatePointRangeQuery + assertTrue(approximateIndexOrDocValuesQueryCanApproximateWeight instanceof ConstantScoreWeight); + + } +} diff --git a/server/src/test/java/org/opensearch/search/approximate/ApproximatePointRangeQueryTests.java b/server/src/test/java/org/opensearch/search/approximate/ApproximatePointRangeQueryTests.java new file mode 100644 index 0000000000000..9c022aade5dc6 --- /dev/null +++ b/server/src/test/java/org/opensearch/search/approximate/ApproximatePointRangeQueryTests.java @@ -0,0 +1,353 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.approximate; + +import com.carrotsearch.randomizedtesting.generators.RandomNumbers; + +import org.apache.lucene.analysis.core.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TotalHits; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.index.RandomIndexWriter; +import org.opensearch.search.internal.SearchContext; +import org.opensearch.search.sort.SortOrder; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; + +import static java.util.Arrays.asList; +import static org.apache.lucene.document.LongPoint.pack; +import static org.mockito.Mockito.mock; + +public class ApproximatePointRangeQueryTests extends OpenSearchTestCase { + + protected static final String DATE_FIELD_NAME = "mapped_date"; + + public void testApproximateRangeEqualsActualRange() throws IOException { + try (Directory directory = newDirectory()) { + try (RandomIndexWriter iw = new RandomIndexWriter(random(), directory, new WhitespaceAnalyzer())) { + int dims = 1; + + long[] scratch = new long[dims]; + for (int i = 0; i < 100; i++) { + int numPoints = RandomNumbers.randomIntBetween(random(), 1, 10); + Document doc = new Document(); + for (int j = 0; j < numPoints; j++) { + for (int v = 0; v < dims; v++) { + scratch[v] = RandomNumbers.randomLongBetween(random(), 0, 100); + } + doc.add(new LongPoint("point", scratch)); + } + iw.addDocument(doc); + } + iw.flush(); + try (IndexReader reader = iw.getReader()) { + try { + long lower = RandomNumbers.randomLongBetween(random(), -100, 200); + long upper = lower + RandomNumbers.randomLongBetween(random(), 0, 100); + Query approximateQuery = new ApproximatePointRangeQuery("point", pack(lower).bytes, pack(upper).bytes, dims) { + protected String toString(int dimension, byte[] value) { + return Long.toString(LongPoint.decodeDimension(value, 0)); + } + }; + Query query = LongPoint.newRangeQuery("point", lower, upper); + IndexSearcher searcher = new IndexSearcher(reader); + TopDocs topDocs = searcher.search(approximateQuery, 10); + TopDocs topDocs1 = searcher.search(query, 10); + assertEquals(topDocs.totalHits, topDocs1.totalHits); + } catch (IOException e) { + throw new RuntimeException(e); + } + + } + } + } + } + + public void testApproximateRangeWithDefaultSize() throws IOException { + try (Directory directory = newDirectory()) { + try (RandomIndexWriter iw = new RandomIndexWriter(random(), directory, new WhitespaceAnalyzer())) { + int dims = 1; + + long[] scratch = new long[dims]; + int numPoints = 1000; + for (int i = 0; i < numPoints; i++) { + Document doc = new Document(); + for (int v = 0; v < dims; v++) { + scratch[v] = i; + } + doc.add(new LongPoint("point", scratch)); + iw.addDocument(doc); + if (i % 15 == 0) iw.flush(); + } + iw.flush(); + try (IndexReader reader = iw.getReader()) { + try { + long lower = 0; + long upper = 1000; + Query approximateQuery = new ApproximatePointRangeQuery("point", pack(lower).bytes, pack(upper).bytes, dims) { + protected String toString(int dimension, byte[] value) { + return Long.toString(LongPoint.decodeDimension(value, 0)); + } + }; + IndexSearcher searcher = new IndexSearcher(reader); + TopDocs topDocs = searcher.search(approximateQuery, 10); + assertEquals(topDocs.totalHits, new TotalHits(1000, TotalHits.Relation.EQUAL_TO)); + } catch (IOException e) { + throw new RuntimeException(e); + } + + } + } + } + } + + public void testApproximateRangeWithSizeUnderDefault() throws IOException { + try (Directory directory = newDirectory()) { + try (RandomIndexWriter iw = new RandomIndexWriter(random(), directory, new WhitespaceAnalyzer())) { + int dims = 1; + + long[] scratch = new long[dims]; + int numPoints = 1000; + for (int i = 0; i < numPoints; i++) { + Document doc = new Document(); + for (int v = 0; v < dims; v++) { + scratch[v] = i; + } + doc.add(new LongPoint("point", scratch)); + iw.addDocument(doc); + } + iw.flush(); + iw.forceMerge(1); + try (IndexReader reader = iw.getReader()) { + try { + long lower = 0; + long upper = 45; + Query approximateQuery = new ApproximatePointRangeQuery("point", pack(lower).bytes, pack(upper).bytes, dims, 10) { + protected String toString(int dimension, byte[] value) { + return Long.toString(LongPoint.decodeDimension(value, 0)); + } + }; + IndexSearcher searcher = new IndexSearcher(reader); + TopDocs topDocs = searcher.search(approximateQuery, 10); + assertEquals(topDocs.totalHits, new TotalHits(10, TotalHits.Relation.EQUAL_TO)); + } catch (IOException e) { + throw new RuntimeException(e); + } + + } + } + } + } + + public void testApproximateRangeWithSizeOverDefault() throws IOException { + try (Directory directory = newDirectory()) { + try (RandomIndexWriter iw = new RandomIndexWriter(random(), directory, new WhitespaceAnalyzer())) { + int dims = 1; + + long[] scratch = new long[dims]; + int numPoints = 15000; + for (int i = 0; i < numPoints; i++) { + Document doc = new Document(); + for (int v = 0; v < dims; v++) { + scratch[v] = i; + } + doc.add(new LongPoint("point", scratch)); + iw.addDocument(doc); + } + iw.flush(); + iw.forceMerge(1); + try (IndexReader reader = iw.getReader()) { + try { + long lower = 0; + long upper = 12000; + Query approximateQuery = new ApproximatePointRangeQuery( + "point", + pack(lower).bytes, + pack(upper).bytes, + dims, + 11_000 + ) { + protected String toString(int dimension, byte[] value) { + return Long.toString(LongPoint.decodeDimension(value, 0)); + } + }; + IndexSearcher searcher = new IndexSearcher(reader); + TopDocs topDocs = searcher.search(approximateQuery, 11000); + assertEquals(topDocs.totalHits, new TotalHits(11000, TotalHits.Relation.EQUAL_TO)); + } catch (IOException e) { + throw new RuntimeException(e); + } + + } + } + } + } + + public void testApproximateRangeShortCircuit() throws IOException { + try (Directory directory = newDirectory()) { + try (RandomIndexWriter iw = new RandomIndexWriter(random(), directory, new WhitespaceAnalyzer())) { + int dims = 1; + + long[] scratch = new long[dims]; + int numPoints = 1000; + for (int i = 0; i < numPoints; i++) { + Document doc = new Document(); + for (int v = 0; v < dims; v++) { + scratch[v] = i; + } + doc.add(new LongPoint("point", scratch)); + iw.addDocument(doc); + if (i % 10 == 0) iw.flush(); + } + iw.flush(); + iw.forceMerge(1); + try (IndexReader reader = iw.getReader()) { + try { + long lower = 0; + long upper = 100; + Query approximateQuery = new ApproximatePointRangeQuery("point", pack(lower).bytes, pack(upper).bytes, dims, 10) { + protected String toString(int dimension, byte[] value) { + return Long.toString(LongPoint.decodeDimension(value, 0)); + } + }; + Query query = LongPoint.newRangeQuery("point", lower, upper); + ; + IndexSearcher searcher = new IndexSearcher(reader); + TopDocs topDocs = searcher.search(approximateQuery, 10); + TopDocs topDocs1 = searcher.search(query, 10); + + // since we short-circuit from the approx range at the end of size these will not be equal + assertNotEquals(topDocs.totalHits, topDocs1.totalHits); + assertEquals(topDocs.totalHits, new TotalHits(10, TotalHits.Relation.EQUAL_TO)); + assertEquals(topDocs1.totalHits, new TotalHits(101, TotalHits.Relation.EQUAL_TO)); + + } catch (IOException e) { + throw new RuntimeException(e); + } + + } + } + } + } + + public void testApproximateRangeShortCircuitAscSort() throws IOException { + try (Directory directory = newDirectory()) { + try (RandomIndexWriter iw = new RandomIndexWriter(random(), directory, new WhitespaceAnalyzer())) { + int dims = 1; + + long[] scratch = new long[dims]; + int numPoints = 1000; + for (int i = 0; i < numPoints; i++) { + Document doc = new Document(); + for (int v = 0; v < dims; v++) { + scratch[v] = i; + } + iw.addDocument(asList(new LongPoint("point", scratch[0]), new NumericDocValuesField("point", scratch[0]))); + } + iw.flush(); + iw.forceMerge(1); + try (IndexReader reader = iw.getReader()) { + try { + long lower = 0; + long upper = 20; + Query approximateQuery = new ApproximatePointRangeQuery( + "point", + pack(lower).bytes, + pack(upper).bytes, + dims, + 10, + SortOrder.ASC + ) { + protected String toString(int dimension, byte[] value) { + return Long.toString(LongPoint.decodeDimension(value, 0)); + } + }; + Query query = LongPoint.newRangeQuery("point", lower, upper); + ; + IndexSearcher searcher = new IndexSearcher(reader); + Sort sort = new Sort(new SortField("point", SortField.Type.LONG)); + TopDocs topDocs = searcher.search(approximateQuery, 10, sort); + TopDocs topDocs1 = searcher.search(query, 10, sort); + + // since we short-circuit from the approx range at the end of size these will not be equal + assertNotEquals(topDocs.totalHits, topDocs1.totalHits); + assertEquals(topDocs.totalHits, new TotalHits(10, TotalHits.Relation.EQUAL_TO)); + assertEquals(topDocs1.totalHits, new TotalHits(21, TotalHits.Relation.EQUAL_TO)); + assertEquals(topDocs.scoreDocs[0].doc, topDocs1.scoreDocs[0].doc); + assertEquals(topDocs.scoreDocs[1].doc, topDocs1.scoreDocs[1].doc); + assertEquals(topDocs.scoreDocs[2].doc, topDocs1.scoreDocs[2].doc); + assertEquals(topDocs.scoreDocs[3].doc, topDocs1.scoreDocs[3].doc); + assertEquals(topDocs.scoreDocs[4].doc, topDocs1.scoreDocs[4].doc); + assertEquals(topDocs.scoreDocs[5].doc, topDocs1.scoreDocs[5].doc); + + } catch (IOException e) { + throw new RuntimeException(e); + } + + } + } + } + } + + public void testSize() { + ApproximatePointRangeQuery query = new ApproximatePointRangeQuery("point", pack(0).bytes, pack(20).bytes, 1) { + protected String toString(int dimension, byte[] value) { + return Long.toString(LongPoint.decodeDimension(value, 0)); + } + }; + assertEquals(query.getSize(), 10_000); + + query.setSize(100); + assertEquals(query.getSize(), 100); + + } + + public void testSortOrder() { + ApproximatePointRangeQuery query = new ApproximatePointRangeQuery("point", pack(0).bytes, pack(20).bytes, 1) { + protected String toString(int dimension, byte[] value) { + return Long.toString(LongPoint.decodeDimension(value, 0)); + } + }; + assertNull(query.getSortOrder()); + + query.setSortOrder(SortOrder.ASC); + assertEquals(query.getSortOrder(), SortOrder.ASC); + } + + public void testCanApproximate() { + ApproximatePointRangeQuery query = new ApproximatePointRangeQuery("point", pack(0).bytes, pack(20).bytes, 1) { + protected String toString(int dimension, byte[] value) { + return Long.toString(LongPoint.decodeDimension(value, 0)); + } + }; + + assertFalse(query.canApproximate(null)); + + ApproximatePointRangeQuery queryCanApproximate = new ApproximatePointRangeQuery("point", pack(0).bytes, pack(20).bytes, 1) { + protected String toString(int dimension, byte[] value) { + return Long.toString(LongPoint.decodeDimension(value, 0)); + } + + public boolean canApproximate(SearchContext context) { + return true; + } + }; + SearchContext searchContext = mock(SearchContext.class); + assertTrue(queryCanApproximate.canApproximate(searchContext)); + } +} diff --git a/server/src/test/java/org/opensearch/search/approximate/ApproximateScoreQueryTests.java b/server/src/test/java/org/opensearch/search/approximate/ApproximateScoreQueryTests.java new file mode 100644 index 0000000000000..aa45ea6744227 --- /dev/null +++ b/server/src/test/java/org/opensearch/search/approximate/ApproximateScoreQueryTests.java @@ -0,0 +1,83 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.approximate; + +import org.apache.lucene.analysis.core.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.PointRangeQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Weight; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.index.RandomIndexWriter; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; + +import static org.apache.lucene.document.LongPoint.pack; + +public class ApproximateScoreQueryTests extends OpenSearchTestCase { + + public void testApproximationScoreSupplier() throws IOException { + long l = Long.MIN_VALUE; + long u = Long.MAX_VALUE; + Query originalQuery = new PointRangeQuery( + "test-index", + pack(new long[] { l }).bytes, + pack(new long[] { u }).bytes, + new long[] { l }.length + ) { + protected String toString(int dimension, byte[] value) { + return Long.toString(LongPoint.decodeDimension(value, 0)); + } + }; + + ApproximateQuery approximateQuery = new ApproximatePointRangeQuery( + "test-index", + pack(new long[] { l }).bytes, + pack(new long[] { u }).bytes, + new long[] { l }.length + ) { + protected String toString(int dimension, byte[] value) { + return Long.toString(LongPoint.decodeDimension(value, 0)); + } + }; + + ApproximateScoreQuery query = new ApproximateScoreQuery(originalQuery, approximateQuery); + query.resolvedQuery = approximateQuery; + + try (Directory directory = newDirectory()) { + try (RandomIndexWriter iw = new RandomIndexWriter(random(), directory, new WhitespaceAnalyzer())) { + Document document = new Document(); + document.add(new LongPoint("testPoint", Long.MIN_VALUE)); + iw.addDocument(document); + iw.flush(); + try (IndexReader reader = iw.getReader()) { + try { + IndexSearcher searcher = new IndexSearcher(reader); + searcher.search(query, 10); + Weight weight = query.rewrite(searcher).createWeight(searcher, ScoreMode.TOP_SCORES, 1.0F); + Scorer scorer = weight.scorer(reader.leaves().get(0)); + assertEquals( + scorer, + originalQuery.createWeight(searcher, ScoreMode.TOP_SCORES, 1.0F).scorer(searcher.getLeafContexts().get(0)) + ); + } catch (IOException e) { + throw new RuntimeException(e); + } + + } + } + } + } +} diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java index e27223cea0778..350c6f9ae8f6b 100644 --- a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java @@ -229,6 +229,7 @@ import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.telemetry.metrics.noop.NoopMetricsRegistry; import org.opensearch.telemetry.tracing.noop.NoopTracer; +import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.disruption.DisruptableMockTransport; import org.opensearch.threadpool.ThreadPool; @@ -1501,12 +1502,8 @@ private RepositoryData getRepositoryData(Repository repository) { private StepListener createRepoAndIndex(String repoName, String index, int shards) { final StepListener createRepositoryListener = new StepListener<>(); - client().admin() - .cluster() - .preparePutRepository(repoName) - .setType(FsRepository.TYPE) - .setSettings(Settings.builder().put("location", randomAlphaOfLength(10))) - .execute(createRepositoryListener); + Settings.Builder settings = Settings.builder().put("location", randomAlphaOfLength(10)); + OpenSearchIntegTestCase.putRepository(client().admin().cluster(), repoName, FsRepository.TYPE, settings, createRepositoryListener); final StepListener createIndexResponseStepListener = new StepListener<>(); @@ -2016,7 +2013,8 @@ public void onFailure(final Exception e) { repositoriesService, transportService, actionFilters, - null + null, + DefaultRemoteStoreSettings.INSTANCE ); nodeEnv = new NodeEnvironment(settings, environment); final NamedXContentRegistry namedXContentRegistry = new NamedXContentRegistry(Collections.emptyList()); @@ -2073,7 +2071,7 @@ public void onFailure(final Exception e) { emptyMap(), null, emptyMap(), - new RemoteSegmentStoreDirectoryFactory(() -> repositoriesService, threadPool), + new RemoteSegmentStoreDirectoryFactory(() -> repositoriesService, threadPool, ""), repositoriesServiceReference::get, null, new RemoteStoreStatsTrackerFactory(clusterService, settings), @@ -2287,7 +2285,8 @@ public void onFailure(final Exception e) { responseCollectorService, new NoneCircuitBreakerService(), null, - new TaskResourceTrackingService(settings, clusterSettings, threadPool) + new TaskResourceTrackingService(settings, clusterSettings, threadPool), + Collections.emptyList() ); SearchPhaseController searchPhaseController = new SearchPhaseController( writableRegistry(), @@ -2369,7 +2368,8 @@ public void onFailure(final Exception e) { snapshotsService, threadPool, actionFilters, - indexNameExpressionResolver + indexNameExpressionResolver, + DefaultRemoteStoreSettings.INSTANCE ) ); actions.put( @@ -2413,7 +2413,8 @@ public void onFailure(final Exception e) { clusterService, threadPool, actionFilters, - indexNameExpressionResolver + indexNameExpressionResolver, + null ) ); actions.put( @@ -2455,7 +2456,8 @@ public void onFailure(final Exception e) { clusterService, threadPool, actionFilters, - indexNameExpressionResolver + indexNameExpressionResolver, + null ) ); diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotShardPathsTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotShardPathsTests.java new file mode 100644 index 0000000000000..15eb70913eb88 --- /dev/null +++ b/server/src/test/java/org/opensearch/snapshots/SnapshotShardPathsTests.java @@ -0,0 +1,144 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.snapshots; + +import org.opensearch.common.xcontent.XContentHelper; +import org.opensearch.common.xcontent.XContentType; +import org.opensearch.core.common.bytes.BytesReference; +import org.opensearch.index.remote.RemoteStoreEnums; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +public class SnapshotShardPathsTests extends OpenSearchTestCase { + + public void testToXContent() throws IOException { + List paths = Arrays.asList("/path/to/shard/1", "/path/to/shard/2", "/path/to/shard/3"); + String indexId = "index-id"; + String indexName = "index-name"; + int numberOfShards = 5; + RemoteStoreEnums.PathType shardPathType = RemoteStoreEnums.PathType.HASHED_PREFIX; + RemoteStoreEnums.PathHashAlgorithm shardPathHashAlgorithm = RemoteStoreEnums.PathHashAlgorithm.FNV_1A_BASE64; + + SnapshotShardPaths snapshotShardPaths = new SnapshotShardPaths( + paths, + indexId, + indexName, + numberOfShards, + shardPathType, + shardPathHashAlgorithm + ); + + BytesReference bytes = XContentHelper.toXContent(snapshotShardPaths, XContentType.JSON, false); + String expectedJson = + "{\"indexId\":\"index-id\",\"indexName\":\"index-name\",\"number_of_shards\":5,\"shard_path_type\":1,\"shard_path_hash_algorithm\":0,\"paths\":[\"/path/to/shard/1\",\"/path/to/shard/2\",\"/path/to/shard/3\"]}"; + assertEquals(expectedJson, bytes.utf8ToString()); + } + + public void testMissingPaths() { + List paths = Collections.emptyList(); + String indexId = "index-id"; + String indexName = "index-name"; + int numberOfShards = 5; + RemoteStoreEnums.PathType shardPathType = RemoteStoreEnums.PathType.FIXED; + RemoteStoreEnums.PathHashAlgorithm shardPathHashAlgorithm = RemoteStoreEnums.PathHashAlgorithm.FNV_1A_COMPOSITE_1; + + AssertionError exception = expectThrows( + AssertionError.class, + () -> new SnapshotShardPaths(paths, indexId, indexName, numberOfShards, shardPathType, shardPathHashAlgorithm) + ); + assertTrue(exception.getMessage().contains("paths must not be empty")); + } + + public void testMissingIndexId() { + List paths = Arrays.asList("/path/to/shard/1", "/path/to/shard/2", "/path/to/shard/3"); + String indexId = ""; + String indexName = "index-name"; + int numberOfShards = 5; + RemoteStoreEnums.PathType shardPathType = RemoteStoreEnums.PathType.HASHED_PREFIX; + RemoteStoreEnums.PathHashAlgorithm shardPathHashAlgorithm = RemoteStoreEnums.PathHashAlgorithm.FNV_1A_BASE64; + + AssertionError exception = expectThrows( + AssertionError.class, + () -> new SnapshotShardPaths(paths, indexId, indexName, numberOfShards, shardPathType, shardPathHashAlgorithm) + ); + assertTrue(exception.getMessage().contains("indexId must not be empty")); + } + + public void testMissingIndexName() { + List paths = Arrays.asList("/path/to/shard/1", "/path/to/shard/2", "/path/to/shard/3"); + String indexId = "index-id"; + String indexName = ""; + int numberOfShards = 5; + RemoteStoreEnums.PathType shardPathType = RemoteStoreEnums.PathType.HASHED_PREFIX; + RemoteStoreEnums.PathHashAlgorithm shardPathHashAlgorithm = RemoteStoreEnums.PathHashAlgorithm.FNV_1A_BASE64; + + AssertionError exception = expectThrows( + AssertionError.class, + () -> new SnapshotShardPaths(paths, indexId, indexName, numberOfShards, shardPathType, shardPathHashAlgorithm) + ); + assertTrue(exception.getMessage().contains("indexName must not be empty")); + } + + public void testMissingNumberOfShards() { + List paths = Arrays.asList("/path/to/shard/1", "/path/to/shard/2", "/path/to/shard/3"); + String indexId = "index-id"; + String indexName = "index-name"; + int numberOfShards = 0; + RemoteStoreEnums.PathType shardPathType = RemoteStoreEnums.PathType.HASHED_PREFIX; + RemoteStoreEnums.PathHashAlgorithm shardPathHashAlgorithm = RemoteStoreEnums.PathHashAlgorithm.FNV_1A_BASE64; + + AssertionError exception = expectThrows( + AssertionError.class, + () -> new SnapshotShardPaths(paths, indexId, indexName, numberOfShards, shardPathType, shardPathHashAlgorithm) + ); + assertTrue(exception.getMessage().contains("numberOfShards must be > 0")); + } + + public void testMissingShardPathType() { + List paths = Arrays.asList("/path/to/shard/1", "/path/to/shard/2", "/path/to/shard/3"); + String indexId = "index-id"; + String indexName = "index-name"; + int numberOfShards = 5; + RemoteStoreEnums.PathType shardPathType = null; + RemoteStoreEnums.PathHashAlgorithm shardPathHashAlgorithm = RemoteStoreEnums.PathHashAlgorithm.FNV_1A_BASE64; + + AssertionError exception = expectThrows( + AssertionError.class, + () -> new SnapshotShardPaths(paths, indexId, indexName, numberOfShards, shardPathType, shardPathHashAlgorithm) + ); + assertTrue(exception.getMessage().contains("shardPathType must not be null")); + } + + public void testMissingShardPathHashAlgorithm() { + List paths = Arrays.asList("/path/to/shard/1", "/path/to/shard/2", "/path/to/shard/3"); + String indexId = "index-id"; + String indexName = "index-name"; + int numberOfShards = 5; + RemoteStoreEnums.PathType shardPathType = RemoteStoreEnums.PathType.HASHED_PREFIX; + RemoteStoreEnums.PathHashAlgorithm shardPathHashAlgorithm = null; + + AssertionError exception = expectThrows( + AssertionError.class, + () -> new SnapshotShardPaths(paths, indexId, indexName, numberOfShards, shardPathType, shardPathHashAlgorithm) + ); + assertEquals("shardPathHashAlgorithm must not be null", exception.getMessage()); + } + + public void testFromXContent() { + UnsupportedOperationException exception = expectThrows( + UnsupportedOperationException.class, + () -> SnapshotShardPaths.fromXContent(null) + ); + assertEquals("SnapshotShardPaths.fromXContent() is not supported", exception.getMessage()); + } +} diff --git a/server/src/test/java/org/opensearch/threadpool/ScalingThreadPoolTests.java b/server/src/test/java/org/opensearch/threadpool/ScalingThreadPoolTests.java index d8f04a11fe494..4f59f9688fb7e 100644 --- a/server/src/test/java/org/opensearch/threadpool/ScalingThreadPoolTests.java +++ b/server/src/test/java/org/opensearch/threadpool/ScalingThreadPoolTests.java @@ -148,6 +148,7 @@ private int expectedSize(final String threadPoolName, final int numberOfProcesso sizes.put(ThreadPool.Names.REFRESH, ThreadPool::halfAllocatedProcessorsMaxTen); sizes.put(ThreadPool.Names.WARMER, ThreadPool::halfAllocatedProcessorsMaxFive); sizes.put(ThreadPool.Names.SNAPSHOT, ThreadPool::halfAllocatedProcessorsMaxFive); + sizes.put(ThreadPool.Names.SNAPSHOT_DELETION, n -> ThreadPool.boundedBy(4 * n, 64, 256)); sizes.put(ThreadPool.Names.FETCH_SHARD_STARTED, ThreadPool::twiceAllocatedProcessors); sizes.put(ThreadPool.Names.FETCH_SHARD_STORE, ThreadPool::twiceAllocatedProcessors); sizes.put(ThreadPool.Names.TRANSLOG_TRANSFER, ThreadPool::halfAllocatedProcessors); diff --git a/server/src/test/java/org/opensearch/wlm/MutableQueryGroupFragmentTests.java b/server/src/test/java/org/opensearch/wlm/MutableQueryGroupFragmentTests.java new file mode 100644 index 0000000000000..cfe53ddbd2c14 --- /dev/null +++ b/server/src/test/java/org/opensearch/wlm/MutableQueryGroupFragmentTests.java @@ -0,0 +1,66 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.wlm; + +import org.opensearch.common.io.stream.BytesStreamOutput; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +public class MutableQueryGroupFragmentTests extends OpenSearchTestCase { + + public void testSerializationDeserialization() throws IOException { + Map resourceLimits = new HashMap<>(); + resourceLimits.put(ResourceType.CPU, 0.5); + resourceLimits.put(ResourceType.MEMORY, 0.75); + MutableQueryGroupFragment mutableQueryGroupFragment = new MutableQueryGroupFragment( + MutableQueryGroupFragment.ResiliencyMode.SOFT, + resourceLimits + ); + BytesStreamOutput out = new BytesStreamOutput(); + mutableQueryGroupFragment.writeTo(out); + StreamInput in = out.bytes().streamInput(); + MutableQueryGroupFragment deserializedGroup = new MutableQueryGroupFragment(in); + assertEquals(mutableQueryGroupFragment, deserializedGroup); + } + + public void testSerializationDeserializationWithNull() throws IOException { + MutableQueryGroupFragment mutableQueryGroupFragment = new MutableQueryGroupFragment(); + BytesStreamOutput out = new BytesStreamOutput(); + mutableQueryGroupFragment.writeTo(out); + StreamInput in = out.bytes().streamInput(); + MutableQueryGroupFragment deserializedGroup = new MutableQueryGroupFragment(in); + assertEquals(0, deserializedGroup.getResourceLimits().size()); + assertEquals(mutableQueryGroupFragment.getResiliencyMode(), deserializedGroup.getResiliencyMode()); + } + + public void testValidateResourceLimits() { + Map invalidLimits = new HashMap<>(); + invalidLimits.put(ResourceType.CPU, 1.5); + Exception exception = assertThrows(IllegalArgumentException.class, () -> { + MutableQueryGroupFragment.validateResourceLimits(invalidLimits); + }); + String expectedMessage = "resource value should be greater than 0 and less or equal to 1.0"; + String actualMessage = exception.getMessage(); + assertTrue(actualMessage.contains(expectedMessage)); + } + + public void testSetMethodsWithNullAndEmptyValues() { + MutableQueryGroupFragment queryGroup = new MutableQueryGroupFragment(); + queryGroup.setResiliencyMode(null); + assertNull(queryGroup.getResiliencyMode()); + queryGroup.setResourceLimits(null); + assertNull(queryGroup.getResourceLimits()); + queryGroup.setResourceLimits(new HashMap<>()); + assertEquals(0, queryGroup.getResourceLimits().size()); + } +} diff --git a/server/src/test/java/org/opensearch/wlm/QueryGroupLevelResourceUsageViewTests.java b/server/src/test/java/org/opensearch/wlm/QueryGroupLevelResourceUsageViewTests.java index 532bf3de95bd6..0c7eb721806d5 100644 --- a/server/src/test/java/org/opensearch/wlm/QueryGroupLevelResourceUsageViewTests.java +++ b/server/src/test/java/org/opensearch/wlm/QueryGroupLevelResourceUsageViewTests.java @@ -8,23 +8,34 @@ package org.opensearch.wlm; -import org.opensearch.action.search.SearchAction; -import org.opensearch.core.tasks.TaskId; -import org.opensearch.tasks.Task; import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.wlm.tracker.ResourceUsageCalculatorTrackerServiceTests; -import java.util.Collections; import java.util.List; import java.util.Map; +import static org.opensearch.wlm.cancellation.QueryGroupTaskCancellationService.MIN_VALUE; +import static org.opensearch.wlm.tracker.CpuUsageCalculator.PROCESSOR_COUNT; +import static org.opensearch.wlm.tracker.MemoryUsageCalculator.HEAP_SIZE_BYTES; +import static org.opensearch.wlm.tracker.ResourceUsageCalculatorTests.createMockTaskWithResourceStats; +import static org.mockito.Mockito.mock; + public class QueryGroupLevelResourceUsageViewTests extends OpenSearchTestCase { - Map resourceUsage; - List activeTasks; + Map resourceUsage; + List activeTasks; + ResourceUsageCalculatorTrackerServiceTests.TestClock clock; + WorkloadManagementSettings settings; public void setUp() throws Exception { super.setUp(); - resourceUsage = Map.of(ResourceType.fromName("memory"), 34L, ResourceType.fromName("cpu"), 12L); - activeTasks = List.of(getRandomTask(4321)); + settings = mock(WorkloadManagementSettings.class); + clock = new ResourceUsageCalculatorTrackerServiceTests.TestClock(); + activeTasks = List.of(createMockTaskWithResourceStats(QueryGroupTask.class, 100, 200, 0, 1)); + clock.fastForwardBy(300); + double memoryUsage = 200.0 / HEAP_SIZE_BYTES; + double cpuUsage = 100.0 / (PROCESSOR_COUNT * 300.0); + + resourceUsage = Map.of(ResourceType.MEMORY, memoryUsage, ResourceType.CPU, cpuUsage); } public void testGetResourceUsageData() { @@ -32,7 +43,7 @@ public void testGetResourceUsageData() { resourceUsage, activeTasks ); - Map resourceUsageData = queryGroupLevelResourceUsageView.getResourceUsageData(); + Map resourceUsageData = queryGroupLevelResourceUsageView.getResourceUsageData(); assertTrue(assertResourceUsageData(resourceUsageData)); } @@ -41,23 +52,13 @@ public void testGetActiveTasks() { resourceUsage, activeTasks ); - List activeTasks = queryGroupLevelResourceUsageView.getActiveTasks(); + List activeTasks = queryGroupLevelResourceUsageView.getActiveTasks(); assertEquals(1, activeTasks.size()); - assertEquals(4321, activeTasks.get(0).getId()); + assertEquals(1, activeTasks.get(0).getId()); } - private boolean assertResourceUsageData(Map resourceUsageData) { - return resourceUsageData.get(ResourceType.fromName("memory")) == 34L && resourceUsageData.get(ResourceType.fromName("cpu")) == 12L; - } - - private Task getRandomTask(long id) { - return new Task( - id, - "transport", - SearchAction.NAME, - "test description", - new TaskId(randomLong() + ":" + randomLong()), - Collections.emptyMap() - ); + private boolean assertResourceUsageData(Map resourceUsageData) { + return (resourceUsageData.get(ResourceType.MEMORY) - 200.0 / HEAP_SIZE_BYTES) <= MIN_VALUE + && (resourceUsageData.get(ResourceType.CPU) - 100.0 / (300)) < MIN_VALUE; } } diff --git a/server/src/test/java/org/opensearch/wlm/ResourceTypeTests.java b/server/src/test/java/org/opensearch/wlm/ResourceTypeTests.java index 737cbb37b554c..16bd8b7e66266 100644 --- a/server/src/test/java/org/opensearch/wlm/ResourceTypeTests.java +++ b/server/src/test/java/org/opensearch/wlm/ResourceTypeTests.java @@ -8,14 +8,8 @@ package org.opensearch.wlm; -import org.opensearch.action.search.SearchShardTask; -import org.opensearch.core.tasks.resourcetracker.ResourceStats; -import org.opensearch.tasks.CancellableTask; import org.opensearch.test.OpenSearchTestCase; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - public class ResourceTypeTests extends OpenSearchTestCase { public void testFromName() { @@ -35,17 +29,4 @@ public void testGetName() { assertEquals("cpu", ResourceType.CPU.getName()); assertEquals("memory", ResourceType.MEMORY.getName()); } - - public void testGetResourceUsage() { - SearchShardTask mockTask = createMockTask(SearchShardTask.class, 100, 200); - assertEquals(100, ResourceType.CPU.getResourceUsage(mockTask)); - assertEquals(200, ResourceType.MEMORY.getResourceUsage(mockTask)); - } - - private T createMockTask(Class type, long cpuUsage, long heapUsage) { - T task = mock(type); - when(task.getTotalResourceUtilization(ResourceStats.CPU)).thenReturn(cpuUsage); - when(task.getTotalResourceUtilization(ResourceStats.MEMORY)).thenReturn(heapUsage); - return task; - } } diff --git a/server/src/test/java/org/opensearch/wlm/cancellation/MaximumResourceTaskSelectionStrategyTests.java b/server/src/test/java/org/opensearch/wlm/cancellation/MaximumResourceTaskSelectionStrategyTests.java new file mode 100644 index 0000000000000..dc79822c59c49 --- /dev/null +++ b/server/src/test/java/org/opensearch/wlm/cancellation/MaximumResourceTaskSelectionStrategyTests.java @@ -0,0 +1,126 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.wlm.cancellation; + +import org.opensearch.action.search.SearchAction; +import org.opensearch.action.search.SearchTask; +import org.opensearch.core.tasks.TaskId; +import org.opensearch.core.tasks.resourcetracker.ResourceStats; +import org.opensearch.core.tasks.resourcetracker.ResourceStatsType; +import org.opensearch.core.tasks.resourcetracker.ResourceUsageMetric; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.wlm.QueryGroupTask; +import org.opensearch.wlm.ResourceType; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.stream.IntStream; + +import static org.opensearch.wlm.cancellation.QueryGroupTaskCancellationService.MIN_VALUE; +import static org.opensearch.wlm.tracker.MemoryUsageCalculator.HEAP_SIZE_BYTES; + +public class MaximumResourceTaskSelectionStrategyTests extends OpenSearchTestCase { + + public void testSelectTasksToCancelSelectsTasksMeetingThreshold_ifReduceByIsGreaterThanZero() { + MaximumResourceTaskSelectionStrategy testHighestResourceConsumingTaskFirstSelectionStrategy = + new MaximumResourceTaskSelectionStrategy(); + double reduceBy = 50000.0 / HEAP_SIZE_BYTES; + ResourceType resourceType = ResourceType.MEMORY; + List tasks = getListOfTasks(100); + List selectedTasks = testHighestResourceConsumingTaskFirstSelectionStrategy.selectTasksForCancellation( + tasks, + reduceBy, + resourceType + ); + assertFalse(selectedTasks.isEmpty()); + boolean sortedInDescendingResourceUsage = IntStream.range(0, selectedTasks.size() - 1) + .noneMatch( + index -> ResourceType.MEMORY.getResourceUsageCalculator() + .calculateTaskResourceUsage(selectedTasks.get(index)) < ResourceType.MEMORY.getResourceUsageCalculator() + .calculateTaskResourceUsage(selectedTasks.get(index + 1)) + ); + assertTrue(sortedInDescendingResourceUsage); + assertTrue(tasksUsageMeetsThreshold(selectedTasks, reduceBy)); + } + + public void testSelectTasksToCancelSelectsTasksMeetingThreshold_ifReduceByIsLesserThanZero() { + MaximumResourceTaskSelectionStrategy testHighestResourceConsumingTaskFirstSelectionStrategy = + new MaximumResourceTaskSelectionStrategy(); + double reduceBy = -50.0 / HEAP_SIZE_BYTES; + ResourceType resourceType = ResourceType.MEMORY; + List tasks = getListOfTasks(3); + try { + testHighestResourceConsumingTaskFirstSelectionStrategy.selectTasksForCancellation(tasks, reduceBy, resourceType); + } catch (Exception e) { + assertTrue(e instanceof IllegalArgumentException); + assertEquals("limit has to be greater than zero", e.getMessage()); + } + } + + public void testSelectTasksToCancelSelectsTasksMeetingThreshold_ifReduceByIsEqualToZero() { + MaximumResourceTaskSelectionStrategy testHighestResourceConsumingTaskFirstSelectionStrategy = + new MaximumResourceTaskSelectionStrategy(); + double reduceBy = 0.0; + ResourceType resourceType = ResourceType.MEMORY; + List tasks = getListOfTasks(50); + List selectedTasks = testHighestResourceConsumingTaskFirstSelectionStrategy.selectTasksForCancellation( + tasks, + reduceBy, + resourceType + ); + assertTrue(selectedTasks.isEmpty()); + } + + private boolean tasksUsageMeetsThreshold(List selectedTasks, double threshold) { + double memory = 0; + for (QueryGroupTask task : selectedTasks) { + memory += ResourceType.MEMORY.getResourceUsageCalculator().calculateTaskResourceUsage(task); + if ((memory - threshold) > MIN_VALUE) { + return true; + } + } + return false; + } + + private List getListOfTasks(int numberOfTasks) { + List tasks = new ArrayList<>(); + + while (tasks.size() < numberOfTasks) { + long id = randomLong(); + final QueryGroupTask task = getRandomSearchTask(id); + long initial_memory = randomLongBetween(1, 100); + + ResourceUsageMetric[] initialTaskResourceMetrics = new ResourceUsageMetric[] { + new ResourceUsageMetric(ResourceStats.MEMORY, initial_memory) }; + task.startThreadResourceTracking(id, ResourceStatsType.WORKER_STATS, initialTaskResourceMetrics); + + long memory = initial_memory + randomLongBetween(1, 10000); + + ResourceUsageMetric[] taskResourceMetrics = new ResourceUsageMetric[] { + new ResourceUsageMetric(ResourceStats.MEMORY, memory), }; + task.updateThreadResourceStats(id, ResourceStatsType.WORKER_STATS, taskResourceMetrics); + task.stopThreadResourceTracking(id, ResourceStatsType.WORKER_STATS); + tasks.add(task); + } + + return tasks; + } + + private QueryGroupTask getRandomSearchTask(long id) { + return new SearchTask( + id, + "transport", + SearchAction.NAME, + () -> "test description", + new TaskId(randomLong() + ":" + randomLong()), + Collections.emptyMap() + ); + } +} diff --git a/server/src/test/java/org/opensearch/wlm/cancellation/QueryGroupTaskCancellationServiceTests.java b/server/src/test/java/org/opensearch/wlm/cancellation/QueryGroupTaskCancellationServiceTests.java new file mode 100644 index 0000000000000..f7a49235efc69 --- /dev/null +++ b/server/src/test/java/org/opensearch/wlm/cancellation/QueryGroupTaskCancellationServiceTests.java @@ -0,0 +1,541 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.wlm.cancellation; + +import org.opensearch.action.search.SearchAction; +import org.opensearch.cluster.metadata.QueryGroup; +import org.opensearch.core.tasks.TaskId; +import org.opensearch.tasks.TaskCancellation; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.wlm.MutableQueryGroupFragment; +import org.opensearch.wlm.MutableQueryGroupFragment.ResiliencyMode; +import org.opensearch.wlm.QueryGroupLevelResourceUsageView; +import org.opensearch.wlm.QueryGroupTask; +import org.opensearch.wlm.ResourceType; +import org.opensearch.wlm.WorkloadManagementSettings; +import org.opensearch.wlm.tracker.QueryGroupResourceUsageTrackerService; +import org.opensearch.wlm.tracker.ResourceUsageCalculatorTrackerServiceTests.TestClock; +import org.junit.Before; + +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class QueryGroupTaskCancellationServiceTests extends OpenSearchTestCase { + private static final String queryGroupId1 = "queryGroup1"; + private static final String queryGroupId2 = "queryGroup2"; + + private TestClock clock; + + private Map queryGroupLevelViews; + private Set activeQueryGroups; + private Set deletedQueryGroups; + private QueryGroupTaskCancellationService taskCancellation; + private WorkloadManagementSettings workloadManagementSettings; + private QueryGroupResourceUsageTrackerService resourceUsageTrackerService; + + @Before + public void setup() { + workloadManagementSettings = mock(WorkloadManagementSettings.class); + queryGroupLevelViews = new HashMap<>(); + activeQueryGroups = new HashSet<>(); + deletedQueryGroups = new HashSet<>(); + + clock = new TestClock(); + when(workloadManagementSettings.getNodeLevelCpuCancellationThreshold()).thenReturn(0.9); + when(workloadManagementSettings.getNodeLevelMemoryCancellationThreshold()).thenReturn(0.9); + resourceUsageTrackerService = mock(QueryGroupResourceUsageTrackerService.class); + taskCancellation = new QueryGroupTaskCancellationService( + workloadManagementSettings, + new MaximumResourceTaskSelectionStrategy(), + resourceUsageTrackerService, + activeQueryGroups, + deletedQueryGroups + ); + } + + public void testGetCancellableTasksFrom_setupAppropriateCancellationReasonAndScore() { + ResourceType resourceType = ResourceType.CPU; + double cpuUsage = 0.11; + double memoryUsage = 0.0; + Double threshold = 0.1; + + QueryGroup queryGroup1 = new QueryGroup( + "testQueryGroup", + queryGroupId1, + new MutableQueryGroupFragment(ResiliencyMode.ENFORCED, Map.of(resourceType, threshold)), + 1L + ); + clock.fastForwardBy(1000); + + QueryGroupLevelResourceUsageView mockView = createResourceUsageViewMock(); + when(mockView.getResourceUsageData()).thenReturn(Map.of(resourceType, cpuUsage, ResourceType.MEMORY, memoryUsage)); + queryGroupLevelViews.put(queryGroupId1, mockView); + taskCancellation.queryGroupLevelResourceUsageViews = queryGroupLevelViews; + + List cancellableTasksFrom = taskCancellation.getAllCancellableTasks(List.of(queryGroup1)); + assertEquals(2, cancellableTasksFrom.size()); + assertEquals(1234, cancellableTasksFrom.get(0).getTask().getId()); + assertEquals(4321, cancellableTasksFrom.get(1).getTask().getId()); + assertEquals(1, cancellableTasksFrom.get(0).getReasons().get(0).getCancellationScore()); + } + + public void testGetCancellableTasksFrom_returnsTasksWhenBreachingThreshold() { + ResourceType resourceType = ResourceType.CPU; + double cpuUsage = 0.11; + double memoryUsage = 0.0; + Double threshold = 0.1; + + QueryGroup queryGroup1 = new QueryGroup( + "testQueryGroup", + queryGroupId1, + new MutableQueryGroupFragment(ResiliencyMode.ENFORCED, Map.of(resourceType, threshold)), + 1L + ); + + QueryGroupLevelResourceUsageView mockView = createResourceUsageViewMock(); + when(mockView.getResourceUsageData()).thenReturn(Map.of(resourceType, cpuUsage, ResourceType.MEMORY, memoryUsage)); + queryGroupLevelViews.put(queryGroupId1, mockView); + taskCancellation.queryGroupLevelResourceUsageViews = queryGroupLevelViews; + + List cancellableTasksFrom = taskCancellation.getAllCancellableTasks(List.of(queryGroup1)); + assertEquals(2, cancellableTasksFrom.size()); + assertEquals(1234, cancellableTasksFrom.get(0).getTask().getId()); + assertEquals(4321, cancellableTasksFrom.get(1).getTask().getId()); + } + + public void testGetCancellableTasksFrom_returnsTasksWhenBreachingThresholdForMemory() { + ResourceType resourceType = ResourceType.MEMORY; + double cpuUsage = 0.0; + double memoryUsage = 0.11; + Double threshold = 0.1; + + QueryGroup queryGroup1 = new QueryGroup( + "testQueryGroup", + queryGroupId1, + new MutableQueryGroupFragment(ResiliencyMode.ENFORCED, Map.of(resourceType, threshold)), + 1L + ); + + QueryGroupLevelResourceUsageView mockView = createResourceUsageViewMock(); + when(mockView.getResourceUsageData()).thenReturn(Map.of(ResourceType.CPU, cpuUsage, resourceType, memoryUsage)); + + queryGroupLevelViews.put(queryGroupId1, mockView); + activeQueryGroups.add(queryGroup1); + taskCancellation.queryGroupLevelResourceUsageViews = queryGroupLevelViews; + + List cancellableTasksFrom = taskCancellation.getAllCancellableTasks(ResiliencyMode.ENFORCED); + assertEquals(2, cancellableTasksFrom.size()); + assertEquals(1234, cancellableTasksFrom.get(0).getTask().getId()); + assertEquals(4321, cancellableTasksFrom.get(1).getTask().getId()); + } + + public void testGetCancellableTasksFrom_returnsNoTasksWhenNotBreachingThreshold() { + ResourceType resourceType = ResourceType.CPU; + double cpuUsage = 0.81; + double memoryUsage = 0.0; + Double threshold = 0.9; + QueryGroup queryGroup1 = new QueryGroup( + "testQueryGroup", + queryGroupId1, + new MutableQueryGroupFragment(ResiliencyMode.ENFORCED, Map.of(resourceType, threshold)), + 1L + ); + + QueryGroupLevelResourceUsageView mockView = createResourceUsageViewMock(); + when(mockView.getResourceUsageData()).thenReturn(Map.of(ResourceType.CPU, cpuUsage, ResourceType.MEMORY, memoryUsage)); + queryGroupLevelViews.put(queryGroupId1, mockView); + activeQueryGroups.add(queryGroup1); + taskCancellation.queryGroupLevelResourceUsageViews = queryGroupLevelViews; + + List cancellableTasksFrom = taskCancellation.getAllCancellableTasks(List.of(queryGroup1)); + assertTrue(cancellableTasksFrom.isEmpty()); + } + + public void testGetCancellableTasksFrom_filtersQueryGroupCorrectly() { + ResourceType resourceType = ResourceType.CPU; + double usage = 0.02; + Double threshold = 0.01; + + QueryGroup queryGroup1 = new QueryGroup( + "testQueryGroup", + queryGroupId1, + new MutableQueryGroupFragment(ResiliencyMode.ENFORCED, Map.of(resourceType, threshold)), + 1L + ); + + QueryGroupLevelResourceUsageView mockView = createResourceUsageViewMock(); + queryGroupLevelViews.put(queryGroupId1, mockView); + activeQueryGroups.add(queryGroup1); + taskCancellation.queryGroupLevelResourceUsageViews = queryGroupLevelViews; + + QueryGroupTaskCancellationService taskCancellation = new QueryGroupTaskCancellationService( + workloadManagementSettings, + new MaximumResourceTaskSelectionStrategy(), + resourceUsageTrackerService, + activeQueryGroups, + deletedQueryGroups + ); + + List cancellableTasksFrom = taskCancellation.getAllCancellableTasks(ResiliencyMode.SOFT); + assertEquals(0, cancellableTasksFrom.size()); + } + + public void testCancelTasks_cancelsGivenTasks() { + ResourceType resourceType = ResourceType.CPU; + double cpuUsage = 0.011; + double memoryUsage = 0.011; + + Double threshold = 0.01; + + QueryGroup queryGroup1 = new QueryGroup( + "testQueryGroup", + queryGroupId1, + new MutableQueryGroupFragment(ResiliencyMode.ENFORCED, Map.of(resourceType, threshold, ResourceType.MEMORY, threshold)), + 1L + ); + + QueryGroupLevelResourceUsageView mockView = createResourceUsageViewMock(); + when(mockView.getResourceUsageData()).thenReturn(Map.of(ResourceType.CPU, cpuUsage, ResourceType.MEMORY, memoryUsage)); + + queryGroupLevelViews.put(queryGroupId1, mockView); + activeQueryGroups.add(queryGroup1); + + QueryGroupTaskCancellationService taskCancellation = new QueryGroupTaskCancellationService( + workloadManagementSettings, + new MaximumResourceTaskSelectionStrategy(), + resourceUsageTrackerService, + activeQueryGroups, + deletedQueryGroups + ); + + taskCancellation.queryGroupLevelResourceUsageViews = queryGroupLevelViews; + + List cancellableTasksFrom = taskCancellation.getAllCancellableTasks(ResiliencyMode.ENFORCED); + assertEquals(2, cancellableTasksFrom.size()); + assertEquals(1234, cancellableTasksFrom.get(0).getTask().getId()); + assertEquals(4321, cancellableTasksFrom.get(1).getTask().getId()); + + when(resourceUsageTrackerService.constructQueryGroupLevelUsageViews()).thenReturn(queryGroupLevelViews); + taskCancellation.cancelTasks(() -> false); + assertTrue(cancellableTasksFrom.get(0).getTask().isCancelled()); + assertTrue(cancellableTasksFrom.get(1).getTask().isCancelled()); + } + + public void testCancelTasks_cancelsTasksFromDeletedQueryGroups() { + ResourceType resourceType = ResourceType.CPU; + double activeQueryGroupCpuUsage = 0.01; + double activeQueryGroupMemoryUsage = 0.0; + double deletedQueryGroupCpuUsage = 0.01; + double deletedQueryGroupMemoryUsage = 0.0; + Double threshold = 0.01; + + QueryGroup activeQueryGroup = new QueryGroup( + "testQueryGroup", + queryGroupId1, + new MutableQueryGroupFragment(ResiliencyMode.ENFORCED, Map.of(resourceType, threshold)), + 1L + ); + + QueryGroup deletedQueryGroup = new QueryGroup( + "testQueryGroup", + queryGroupId2, + new MutableQueryGroupFragment(ResiliencyMode.ENFORCED, Map.of(resourceType, threshold)), + 1L + ); + + QueryGroupLevelResourceUsageView mockView1 = createResourceUsageViewMock(); + QueryGroupLevelResourceUsageView mockView2 = createResourceUsageViewMock( + resourceType, + deletedQueryGroupCpuUsage, + List.of(1000, 1001) + ); + + when(mockView1.getResourceUsageData()).thenReturn( + Map.of(ResourceType.CPU, activeQueryGroupCpuUsage, ResourceType.MEMORY, activeQueryGroupMemoryUsage) + ); + when(mockView2.getResourceUsageData()).thenReturn( + Map.of(ResourceType.CPU, deletedQueryGroupCpuUsage, ResourceType.MEMORY, deletedQueryGroupMemoryUsage) + ); + queryGroupLevelViews.put(queryGroupId1, mockView1); + queryGroupLevelViews.put(queryGroupId2, mockView2); + + activeQueryGroups.add(activeQueryGroup); + deletedQueryGroups.add(deletedQueryGroup); + + QueryGroupTaskCancellationService taskCancellation = new QueryGroupTaskCancellationService( + workloadManagementSettings, + new MaximumResourceTaskSelectionStrategy(), + resourceUsageTrackerService, + activeQueryGroups, + deletedQueryGroups + ); + + taskCancellation.queryGroupLevelResourceUsageViews = queryGroupLevelViews; + + List cancellableTasksFrom = taskCancellation.getAllCancellableTasks(ResiliencyMode.ENFORCED); + assertEquals(2, cancellableTasksFrom.size()); + assertEquals(1234, cancellableTasksFrom.get(0).getTask().getId()); + assertEquals(4321, cancellableTasksFrom.get(1).getTask().getId()); + + List cancellableTasksFromDeletedQueryGroups = taskCancellation.getAllCancellableTasks(List.of(deletedQueryGroup)); + assertEquals(2, cancellableTasksFromDeletedQueryGroups.size()); + assertEquals(1000, cancellableTasksFromDeletedQueryGroups.get(0).getTask().getId()); + assertEquals(1001, cancellableTasksFromDeletedQueryGroups.get(1).getTask().getId()); + + when(resourceUsageTrackerService.constructQueryGroupLevelUsageViews()).thenReturn(queryGroupLevelViews); + taskCancellation.cancelTasks(() -> true); + + assertTrue(cancellableTasksFrom.get(0).getTask().isCancelled()); + assertTrue(cancellableTasksFrom.get(1).getTask().isCancelled()); + assertTrue(cancellableTasksFromDeletedQueryGroups.get(0).getTask().isCancelled()); + assertTrue(cancellableTasksFromDeletedQueryGroups.get(1).getTask().isCancelled()); + } + + public void testCancelTasks_does_not_cancelTasksFromDeletedQueryGroups_whenNodeNotInDuress() { + ResourceType resourceType = ResourceType.CPU; + double activeQueryGroupCpuUsage = 0.11; + double activeQueryGroupMemoryUsage = 0.0; + double deletedQueryGroupCpuUsage = 0.11; + double deletedQueryGroupMemoryUsage = 0.0; + + Double threshold = 0.01; + + QueryGroup activeQueryGroup = new QueryGroup( + "testQueryGroup", + queryGroupId1, + new MutableQueryGroupFragment(ResiliencyMode.ENFORCED, Map.of(resourceType, threshold)), + 1L + ); + + QueryGroup deletedQueryGroup = new QueryGroup( + "testQueryGroup", + queryGroupId2, + new MutableQueryGroupFragment(ResiliencyMode.ENFORCED, Map.of(resourceType, threshold)), + 1L + ); + + QueryGroupLevelResourceUsageView mockView1 = createResourceUsageViewMock(); + QueryGroupLevelResourceUsageView mockView2 = createResourceUsageViewMock( + resourceType, + deletedQueryGroupCpuUsage, + List.of(1000, 1001) + ); + + when(mockView1.getResourceUsageData()).thenReturn( + Map.of(ResourceType.CPU, activeQueryGroupCpuUsage, ResourceType.MEMORY, activeQueryGroupMemoryUsage) + ); + when(mockView2.getResourceUsageData()).thenReturn( + Map.of(ResourceType.CPU, deletedQueryGroupCpuUsage, ResourceType.MEMORY, deletedQueryGroupMemoryUsage) + ); + + queryGroupLevelViews.put(queryGroupId1, mockView1); + queryGroupLevelViews.put(queryGroupId2, mockView2); + activeQueryGroups.add(activeQueryGroup); + deletedQueryGroups.add(deletedQueryGroup); + + QueryGroupTaskCancellationService taskCancellation = new QueryGroupTaskCancellationService( + workloadManagementSettings, + new MaximumResourceTaskSelectionStrategy(), + resourceUsageTrackerService, + activeQueryGroups, + deletedQueryGroups + ); + taskCancellation.queryGroupLevelResourceUsageViews = queryGroupLevelViews; + + List cancellableTasksFrom = taskCancellation.getAllCancellableTasks(ResiliencyMode.ENFORCED); + assertEquals(2, cancellableTasksFrom.size()); + assertEquals(1234, cancellableTasksFrom.get(0).getTask().getId()); + assertEquals(4321, cancellableTasksFrom.get(1).getTask().getId()); + + List cancellableTasksFromDeletedQueryGroups = taskCancellation.getAllCancellableTasks(List.of(deletedQueryGroup)); + assertEquals(2, cancellableTasksFromDeletedQueryGroups.size()); + assertEquals(1000, cancellableTasksFromDeletedQueryGroups.get(0).getTask().getId()); + assertEquals(1001, cancellableTasksFromDeletedQueryGroups.get(1).getTask().getId()); + + when(resourceUsageTrackerService.constructQueryGroupLevelUsageViews()).thenReturn(queryGroupLevelViews); + taskCancellation.cancelTasks(() -> false); + + assertTrue(cancellableTasksFrom.get(0).getTask().isCancelled()); + assertTrue(cancellableTasksFrom.get(1).getTask().isCancelled()); + assertFalse(cancellableTasksFromDeletedQueryGroups.get(0).getTask().isCancelled()); + assertFalse(cancellableTasksFromDeletedQueryGroups.get(1).getTask().isCancelled()); + } + + public void testCancelTasks_cancelsGivenTasks_WhenNodeInDuress() { + ResourceType resourceType = ResourceType.CPU; + double cpuUsage1 = 0.11; + double memoryUsage1 = 0.0; + double cpuUsage2 = 0.11; + double memoryUsage2 = 0.0; + Double threshold = 0.01; + + QueryGroup queryGroup1 = new QueryGroup( + "testQueryGroup", + queryGroupId1, + new MutableQueryGroupFragment(ResiliencyMode.ENFORCED, Map.of(resourceType, threshold)), + 1L + ); + + QueryGroup queryGroup2 = new QueryGroup( + "testQueryGroup", + queryGroupId2, + new MutableQueryGroupFragment(ResiliencyMode.SOFT, Map.of(resourceType, threshold)), + 1L + ); + + QueryGroupLevelResourceUsageView mockView1 = createResourceUsageViewMock(); + when(mockView1.getResourceUsageData()).thenReturn(Map.of(ResourceType.CPU, cpuUsage1, ResourceType.MEMORY, memoryUsage1)); + queryGroupLevelViews.put(queryGroupId1, mockView1); + QueryGroupLevelResourceUsageView mockView = createResourceUsageViewMock(); + when(mockView.getActiveTasks()).thenReturn(List.of(getRandomSearchTask(5678), getRandomSearchTask(8765))); + when(mockView.getResourceUsageData()).thenReturn(Map.of(ResourceType.CPU, cpuUsage2, ResourceType.MEMORY, memoryUsage2)); + queryGroupLevelViews.put(queryGroupId2, mockView); + Collections.addAll(activeQueryGroups, queryGroup1, queryGroup2); + + QueryGroupTaskCancellationService taskCancellation = new QueryGroupTaskCancellationService( + workloadManagementSettings, + new MaximumResourceTaskSelectionStrategy(), + resourceUsageTrackerService, + activeQueryGroups, + deletedQueryGroups + ); + + taskCancellation.queryGroupLevelResourceUsageViews = queryGroupLevelViews; + + List cancellableTasksFrom = taskCancellation.getAllCancellableTasks(ResiliencyMode.ENFORCED); + assertEquals(2, cancellableTasksFrom.size()); + assertEquals(1234, cancellableTasksFrom.get(0).getTask().getId()); + assertEquals(4321, cancellableTasksFrom.get(1).getTask().getId()); + + List cancellableTasksFrom1 = taskCancellation.getAllCancellableTasks(ResiliencyMode.SOFT); + assertEquals(2, cancellableTasksFrom1.size()); + assertEquals(5678, cancellableTasksFrom1.get(0).getTask().getId()); + assertEquals(8765, cancellableTasksFrom1.get(1).getTask().getId()); + + when(resourceUsageTrackerService.constructQueryGroupLevelUsageViews()).thenReturn(queryGroupLevelViews); + taskCancellation.cancelTasks(() -> true); + assertTrue(cancellableTasksFrom.get(0).getTask().isCancelled()); + assertTrue(cancellableTasksFrom.get(1).getTask().isCancelled()); + assertTrue(cancellableTasksFrom1.get(0).getTask().isCancelled()); + assertTrue(cancellableTasksFrom1.get(1).getTask().isCancelled()); + } + + public void testGetAllCancellableTasks_ReturnsNoTasksWhenNotBreachingThresholds() { + ResourceType resourceType = ResourceType.CPU; + double queryGroupCpuUsage = 0.09; + double queryGroupMemoryUsage = 0.0; + Double threshold = 0.1; + + QueryGroup queryGroup1 = new QueryGroup( + "testQueryGroup", + queryGroupId1, + new MutableQueryGroupFragment(ResiliencyMode.ENFORCED, Map.of(resourceType, threshold)), + 1L + ); + + QueryGroupLevelResourceUsageView mockView = createResourceUsageViewMock(); + when(mockView.getResourceUsageData()).thenReturn( + Map.of(ResourceType.CPU, queryGroupCpuUsage, ResourceType.MEMORY, queryGroupMemoryUsage) + ); + queryGroupLevelViews.put(queryGroupId1, mockView); + activeQueryGroups.add(queryGroup1); + taskCancellation.queryGroupLevelResourceUsageViews = queryGroupLevelViews; + + List allCancellableTasks = taskCancellation.getAllCancellableTasks(ResiliencyMode.ENFORCED); + assertTrue(allCancellableTasks.isEmpty()); + } + + public void testGetAllCancellableTasks_ReturnsTasksWhenBreachingThresholds() { + ResourceType resourceType = ResourceType.CPU; + double cpuUsage = 0.11; + double memoryUsage = 0.0; + Double threshold = 0.01; + + QueryGroup queryGroup1 = new QueryGroup( + "testQueryGroup", + queryGroupId1, + new MutableQueryGroupFragment(ResiliencyMode.ENFORCED, Map.of(resourceType, threshold)), + 1L + ); + + QueryGroupLevelResourceUsageView mockView = createResourceUsageViewMock(); + when(mockView.getResourceUsageData()).thenReturn(Map.of(ResourceType.CPU, cpuUsage, ResourceType.MEMORY, memoryUsage)); + queryGroupLevelViews.put(queryGroupId1, mockView); + activeQueryGroups.add(queryGroup1); + taskCancellation.queryGroupLevelResourceUsageViews = queryGroupLevelViews; + + List allCancellableTasks = taskCancellation.getAllCancellableTasks(ResiliencyMode.ENFORCED); + assertEquals(2, allCancellableTasks.size()); + assertEquals(1234, allCancellableTasks.get(0).getTask().getId()); + assertEquals(4321, allCancellableTasks.get(1).getTask().getId()); + } + + public void testGetCancellableTasksFrom_doesNotReturnTasksWhenQueryGroupIdNotFound() { + ResourceType resourceType = ResourceType.CPU; + double usage = 0.11; + Double threshold = 0.01; + + QueryGroup queryGroup1 = new QueryGroup( + "testQueryGroup1", + queryGroupId1, + new MutableQueryGroupFragment(ResiliencyMode.ENFORCED, Map.of(resourceType, threshold)), + 1L + ); + QueryGroup queryGroup2 = new QueryGroup( + "testQueryGroup2", + queryGroupId2, + new MutableQueryGroupFragment(ResiliencyMode.ENFORCED, Map.of(resourceType, threshold)), + 1L + ); + + QueryGroupLevelResourceUsageView mockView = createResourceUsageViewMock(); + queryGroupLevelViews.put(queryGroupId1, mockView); + activeQueryGroups.add(queryGroup1); + activeQueryGroups.add(queryGroup2); + taskCancellation.queryGroupLevelResourceUsageViews = queryGroupLevelViews; + + List cancellableTasksFrom = taskCancellation.getAllCancellableTasks(List.of(queryGroup2)); + assertEquals(0, cancellableTasksFrom.size()); + } + + private QueryGroupLevelResourceUsageView createResourceUsageViewMock() { + QueryGroupLevelResourceUsageView mockView = mock(QueryGroupLevelResourceUsageView.class); + when(mockView.getActiveTasks()).thenReturn(List.of(getRandomSearchTask(1234), getRandomSearchTask(4321))); + return mockView; + } + + private QueryGroupLevelResourceUsageView createResourceUsageViewMock(ResourceType resourceType, double usage, Collection ids) { + QueryGroupLevelResourceUsageView mockView = mock(QueryGroupLevelResourceUsageView.class); + when(mockView.getResourceUsageData()).thenReturn(Collections.singletonMap(resourceType, usage)); + when(mockView.getActiveTasks()).thenReturn(ids.stream().map(this::getRandomSearchTask).collect(Collectors.toList())); + return mockView; + } + + private QueryGroupTask getRandomSearchTask(long id) { + return new QueryGroupTask( + id, + "transport", + SearchAction.NAME, + "test description", + new TaskId(randomLong() + ":" + randomLong()), + Collections.emptyMap(), + null, + clock::getTime + ); + } +} diff --git a/server/src/test/java/org/opensearch/wlm/listeners/QueryGroupRequestOperationListenerTests.java b/server/src/test/java/org/opensearch/wlm/listeners/QueryGroupRequestOperationListenerTests.java new file mode 100644 index 0000000000000..0307ff623c408 --- /dev/null +++ b/server/src/test/java/org/opensearch/wlm/listeners/QueryGroupRequestOperationListenerTests.java @@ -0,0 +1,187 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.wlm.listeners; + +import org.opensearch.common.util.concurrent.ThreadContext; +import org.opensearch.core.concurrency.OpenSearchRejectedExecutionException; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.wlm.QueryGroupService; +import org.opensearch.wlm.QueryGroupTask; +import org.opensearch.wlm.ResourceType; +import org.opensearch.wlm.stats.QueryGroupState; +import org.opensearch.wlm.stats.QueryGroupStats; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.mockito.Mockito.doNothing; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; + +public class QueryGroupRequestOperationListenerTests extends OpenSearchTestCase { + public static final int ITERATIONS = 20; + ThreadPool testThreadPool; + QueryGroupService queryGroupService; + + Map queryGroupStateMap; + String testQueryGroupId; + QueryGroupRequestOperationListener sut; + + public void setUp() throws Exception { + super.setUp(); + queryGroupStateMap = new HashMap<>(); + testQueryGroupId = "safjgagnakg-3r3fads"; + testThreadPool = new TestThreadPool("RejectionTestThreadPool"); + queryGroupService = mock(QueryGroupService.class); + sut = new QueryGroupRequestOperationListener(queryGroupService, testThreadPool); + } + + public void tearDown() throws Exception { + super.tearDown(); + testThreadPool.shutdown(); + } + + public void testRejectionCase() { + final String testQueryGroupId = "asdgasgkajgkw3141_3rt4t"; + testThreadPool.getThreadContext().putHeader(QueryGroupTask.QUERY_GROUP_ID_HEADER, testQueryGroupId); + doThrow(OpenSearchRejectedExecutionException.class).when(queryGroupService).rejectIfNeeded(testQueryGroupId); + assertThrows(OpenSearchRejectedExecutionException.class, () -> sut.onRequestStart(null)); + } + + public void testNonRejectionCase() { + final String testQueryGroupId = "asdgasgkajgkw3141_3rt4t"; + testThreadPool.getThreadContext().putHeader(QueryGroupTask.QUERY_GROUP_ID_HEADER, testQueryGroupId); + doNothing().when(queryGroupService).rejectIfNeeded(testQueryGroupId); + + sut.onRequestStart(null); + } + + public void testValidQueryGroupRequestFailure() throws IOException { + + QueryGroupStats expectedStats = new QueryGroupStats( + Map.of( + testQueryGroupId, + new QueryGroupStats.QueryGroupStatsHolder( + 0, + 0, + 1, + 0, + Map.of( + ResourceType.CPU, + new QueryGroupStats.ResourceStats(0, 0, 0), + ResourceType.MEMORY, + new QueryGroupStats.ResourceStats(0, 0, 0) + ) + ) + ) + ); + + assertSuccess(testQueryGroupId, queryGroupStateMap, expectedStats, testQueryGroupId); + } + + public void testMultiThreadedValidQueryGroupRequestFailures() { + + queryGroupStateMap.put(testQueryGroupId, new QueryGroupState()); + + queryGroupService = new QueryGroupService(queryGroupStateMap); + + sut = new QueryGroupRequestOperationListener(queryGroupService, testThreadPool); + + List threads = new ArrayList<>(); + for (int i = 0; i < ITERATIONS; i++) { + threads.add(new Thread(() -> { + try (ThreadContext.StoredContext currentContext = testThreadPool.getThreadContext().stashContext()) { + testThreadPool.getThreadContext().putHeader(QueryGroupTask.QUERY_GROUP_ID_HEADER, testQueryGroupId); + sut.onRequestFailure(null, null); + } + })); + } + + threads.forEach(Thread::start); + threads.forEach(th -> { + try { + th.join(); + } catch (InterruptedException ignored) { + + } + }); + + QueryGroupStats actualStats = queryGroupService.nodeStats(); + + QueryGroupStats expectedStats = new QueryGroupStats( + Map.of( + testQueryGroupId, + new QueryGroupStats.QueryGroupStatsHolder( + 0, + 0, + ITERATIONS, + 0, + Map.of( + ResourceType.CPU, + new QueryGroupStats.ResourceStats(0, 0, 0), + ResourceType.MEMORY, + new QueryGroupStats.ResourceStats(0, 0, 0) + ) + ) + ) + ); + + assertEquals(expectedStats, actualStats); + } + + public void testInvalidQueryGroupFailure() throws IOException { + QueryGroupStats expectedStats = new QueryGroupStats( + Map.of( + testQueryGroupId, + new QueryGroupStats.QueryGroupStatsHolder( + 0, + 0, + 0, + 0, + Map.of( + ResourceType.CPU, + new QueryGroupStats.ResourceStats(0, 0, 0), + ResourceType.MEMORY, + new QueryGroupStats.ResourceStats(0, 0, 0) + ) + ) + ) + ); + + assertSuccess(testQueryGroupId, queryGroupStateMap, expectedStats, "dummy-invalid-qg-id"); + + } + + private void assertSuccess( + String testQueryGroupId, + Map queryGroupStateMap, + QueryGroupStats expectedStats, + String threadContextQG_Id + ) { + + try (ThreadContext.StoredContext currentContext = testThreadPool.getThreadContext().stashContext()) { + testThreadPool.getThreadContext().putHeader(QueryGroupTask.QUERY_GROUP_ID_HEADER, threadContextQG_Id); + queryGroupStateMap.put(testQueryGroupId, new QueryGroupState()); + + queryGroupService = new QueryGroupService(queryGroupStateMap); + + sut = new QueryGroupRequestOperationListener(queryGroupService, testThreadPool); + sut.onRequestFailure(null, null); + + QueryGroupStats actualStats = queryGroupService.nodeStats(); + assertEquals(expectedStats, actualStats); + } + + } +} diff --git a/server/src/test/java/org/opensearch/wlm/listeners/QueryGroupRequestRejectionOperationListenerTests.java b/server/src/test/java/org/opensearch/wlm/listeners/QueryGroupRequestRejectionOperationListenerTests.java deleted file mode 100644 index 19e82aca26153..0000000000000 --- a/server/src/test/java/org/opensearch/wlm/listeners/QueryGroupRequestRejectionOperationListenerTests.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.wlm.listeners; - -import org.opensearch.core.concurrency.OpenSearchRejectedExecutionException; -import org.opensearch.test.OpenSearchTestCase; -import org.opensearch.threadpool.TestThreadPool; -import org.opensearch.threadpool.ThreadPool; -import org.opensearch.wlm.QueryGroupService; -import org.opensearch.wlm.QueryGroupTask; - -import static org.mockito.Mockito.doNothing; -import static org.mockito.Mockito.doThrow; -import static org.mockito.Mockito.mock; - -public class QueryGroupRequestRejectionOperationListenerTests extends OpenSearchTestCase { - ThreadPool testThreadPool; - QueryGroupService queryGroupService; - QueryGroupRequestRejectionOperationListener sut; - - public void setUp() throws Exception { - super.setUp(); - testThreadPool = new TestThreadPool("RejectionTestThreadPool"); - queryGroupService = mock(QueryGroupService.class); - sut = new QueryGroupRequestRejectionOperationListener(queryGroupService, testThreadPool); - } - - public void tearDown() throws Exception { - super.tearDown(); - testThreadPool.shutdown(); - } - - public void testRejectionCase() { - final String testQueryGroupId = "asdgasgkajgkw3141_3rt4t"; - testThreadPool.getThreadContext().putHeader(QueryGroupTask.QUERY_GROUP_ID_HEADER, testQueryGroupId); - doThrow(OpenSearchRejectedExecutionException.class).when(queryGroupService).rejectIfNeeded(testQueryGroupId); - assertThrows(OpenSearchRejectedExecutionException.class, () -> sut.onRequestStart(null)); - } - - public void testNonRejectionCase() { - final String testQueryGroupId = "asdgasgkajgkw3141_3rt4t"; - testThreadPool.getThreadContext().putHeader(QueryGroupTask.QUERY_GROUP_ID_HEADER, testQueryGroupId); - doNothing().when(queryGroupService).rejectIfNeeded(testQueryGroupId); - - sut.onRequestStart(null); - } -} diff --git a/server/src/test/java/org/opensearch/wlm/tracker/ResourceUsageCalculatorTests.java b/server/src/test/java/org/opensearch/wlm/tracker/ResourceUsageCalculatorTests.java new file mode 100644 index 0000000000000..21d9717a1aaca --- /dev/null +++ b/server/src/test/java/org/opensearch/wlm/tracker/ResourceUsageCalculatorTests.java @@ -0,0 +1,70 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.wlm.tracker; + +import org.opensearch.core.tasks.resourcetracker.ResourceStats; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.wlm.QueryGroupTask; +import org.opensearch.wlm.ResourceType; +import org.opensearch.wlm.tracker.ResourceUsageCalculatorTrackerServiceTests.TestClock; + +import java.util.List; + +import static org.opensearch.wlm.cancellation.QueryGroupTaskCancellationService.MIN_VALUE; +import static org.opensearch.wlm.tracker.CpuUsageCalculator.PROCESSOR_COUNT; +import static org.opensearch.wlm.tracker.MemoryUsageCalculator.HEAP_SIZE_BYTES; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class ResourceUsageCalculatorTests extends OpenSearchTestCase { + + public void testQueryGroupCpuUsage() { + TestClock clock = new TestClock(); + long fastForwardTime = PROCESSOR_COUNT * 200L; + clock.fastForwardBy(fastForwardTime); + + double expectedQueryGroupCpuUsage = 1.0 / PROCESSOR_COUNT; + + QueryGroupTask mockTask = createMockTaskWithResourceStats(QueryGroupTask.class, fastForwardTime, 200, 0, 123); + when(mockTask.getElapsedTime()).thenReturn(fastForwardTime); + double actualUsage = ResourceType.CPU.getResourceUsageCalculator().calculateResourceUsage(List.of(mockTask)); + assertEquals(expectedQueryGroupCpuUsage, actualUsage, MIN_VALUE); + + double taskResourceUsage = ResourceType.CPU.getResourceUsageCalculator().calculateTaskResourceUsage(mockTask); + assertEquals(1.0, taskResourceUsage, MIN_VALUE); + } + + public void testQueryGroupMemoryUsage() { + QueryGroupTask mockTask = createMockTaskWithResourceStats(QueryGroupTask.class, 100, 200, 0, 123); + double actualMemoryUsage = ResourceType.MEMORY.getResourceUsageCalculator().calculateResourceUsage(List.of(mockTask)); + double expectedMemoryUsage = 200.0 / HEAP_SIZE_BYTES; + + assertEquals(expectedMemoryUsage, actualMemoryUsage, MIN_VALUE); + assertEquals( + 200.0 / HEAP_SIZE_BYTES, + ResourceType.MEMORY.getResourceUsageCalculator().calculateTaskResourceUsage(mockTask), + MIN_VALUE + ); + } + + public static T createMockTaskWithResourceStats( + Class type, + long cpuUsage, + long heapUsage, + long startTimeNanos, + long taskId + ) { + T task = mock(type); + when(task.getTotalResourceUtilization(ResourceStats.CPU)).thenReturn(cpuUsage); + when(task.getTotalResourceUtilization(ResourceStats.MEMORY)).thenReturn(heapUsage); + when(task.getStartTimeNanos()).thenReturn(startTimeNanos); + when(task.getId()).thenReturn(taskId); + return task; + } +} diff --git a/server/src/test/java/org/opensearch/wlm/tracker/QueryGroupResourceUsageTrackerServiceTests.java b/server/src/test/java/org/opensearch/wlm/tracker/ResourceUsageCalculatorTrackerServiceTests.java similarity index 69% rename from server/src/test/java/org/opensearch/wlm/tracker/QueryGroupResourceUsageTrackerServiceTests.java rename to server/src/test/java/org/opensearch/wlm/tracker/ResourceUsageCalculatorTrackerServiceTests.java index ca2891cb532f2..fe72bd6e710c8 100644 --- a/server/src/test/java/org/opensearch/wlm/tracker/QueryGroupResourceUsageTrackerServiceTests.java +++ b/server/src/test/java/org/opensearch/wlm/tracker/ResourceUsageCalculatorTrackerServiceTests.java @@ -9,10 +9,8 @@ package org.opensearch.wlm.tracker; import org.opensearch.action.search.SearchShardTask; -import org.opensearch.action.search.SearchTask; import org.opensearch.common.util.concurrent.ThreadContext; import org.opensearch.core.tasks.resourcetracker.ResourceStats; -import org.opensearch.tasks.CancellableTask; import org.opensearch.tasks.Task; import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.test.OpenSearchTestCase; @@ -21,6 +19,7 @@ import org.opensearch.wlm.QueryGroupLevelResourceUsageView; import org.opensearch.wlm.QueryGroupTask; import org.opensearch.wlm.ResourceType; +import org.opensearch.wlm.WorkloadManagementSettings; import org.junit.After; import org.junit.Before; @@ -31,18 +30,38 @@ import java.util.concurrent.atomic.AtomicBoolean; import static org.opensearch.wlm.QueryGroupTask.QUERY_GROUP_ID_HEADER; +import static org.opensearch.wlm.cancellation.QueryGroupTaskCancellationService.MIN_VALUE; +import static org.opensearch.wlm.tracker.CpuUsageCalculator.PROCESSOR_COUNT; +import static org.opensearch.wlm.tracker.MemoryUsageCalculator.HEAP_SIZE_BYTES; import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; -public class QueryGroupResourceUsageTrackerServiceTests extends OpenSearchTestCase { +public class ResourceUsageCalculatorTrackerServiceTests extends OpenSearchTestCase { TestThreadPool threadPool; TaskResourceTrackingService mockTaskResourceTrackingService; QueryGroupResourceUsageTrackerService queryGroupResourceUsageTrackerService; + WorkloadManagementSettings settings; + + public static class TestClock { + long time; + + public void fastForwardBy(long nanos) { + time += nanos; + } + + public long getTime() { + return time; + } + } + + TestClock clock; @Before public void setup() { + clock = new TestClock(); + settings = mock(WorkloadManagementSettings.class); threadPool = new TestThreadPool(getTestName()); mockTaskResourceTrackingService = mock(TaskResourceTrackingService.class); queryGroupResourceUsageTrackerService = new QueryGroupResourceUsageTrackerService(mockTaskResourceTrackingService); @@ -55,16 +74,24 @@ public void cleanup() { public void testConstructQueryGroupLevelViews_CreatesQueryGroupLevelUsageView_WhenTasksArePresent() { List queryGroupIds = List.of("queryGroup1", "queryGroup2", "queryGroup3"); + clock.fastForwardBy(2000); Map activeSearchShardTasks = createActiveSearchShardTasks(queryGroupIds); when(mockTaskResourceTrackingService.getResourceAwareTasks()).thenReturn(activeSearchShardTasks); + Map stringQueryGroupLevelResourceUsageViewMap = queryGroupResourceUsageTrackerService .constructQueryGroupLevelUsageViews(); for (String queryGroupId : queryGroupIds) { assertEquals( - 400, - (long) stringQueryGroupLevelResourceUsageViewMap.get(queryGroupId).getResourceUsageData().get(ResourceType.MEMORY) + (400 * 1.0f) / HEAP_SIZE_BYTES, + stringQueryGroupLevelResourceUsageViewMap.get(queryGroupId).getResourceUsageData().get(ResourceType.MEMORY), + MIN_VALUE + ); + assertEquals( + (200 * 1.0f) / (PROCESSOR_COUNT * 2000), + stringQueryGroupLevelResourceUsageViewMap.get(queryGroupId).getResourceUsageData().get(ResourceType.CPU), + MIN_VALUE ); assertEquals(2, stringQueryGroupLevelResourceUsageViewMap.get(queryGroupId).getActiveTasks().size()); } @@ -78,14 +105,23 @@ public void testConstructQueryGroupLevelViews_CreatesQueryGroupLevelUsageView_Wh public void testConstructQueryGroupLevelUsageViews_WithTasksHavingDifferentResourceUsage() { Map activeSearchShardTasks = new HashMap<>(); + clock.fastForwardBy(2000); activeSearchShardTasks.put(1L, createMockTask(SearchShardTask.class, 100, 200, "queryGroup1")); activeSearchShardTasks.put(2L, createMockTask(SearchShardTask.class, 200, 400, "queryGroup1")); when(mockTaskResourceTrackingService.getResourceAwareTasks()).thenReturn(activeSearchShardTasks); - Map queryGroupViews = queryGroupResourceUsageTrackerService .constructQueryGroupLevelUsageViews(); - assertEquals(600, (long) queryGroupViews.get("queryGroup1").getResourceUsageData().get(ResourceType.MEMORY)); + assertEquals( + (double) 600 / HEAP_SIZE_BYTES, + queryGroupViews.get("queryGroup1").getResourceUsageData().get(ResourceType.MEMORY), + MIN_VALUE + ); + assertEquals( + ((double) 300) / (PROCESSOR_COUNT * 2000), + queryGroupViews.get("queryGroup1").getResourceUsageData().get(ResourceType.CPU), + MIN_VALUE + ); assertEquals(2, queryGroupViews.get("queryGroup1").getActiveTasks().size()); } @@ -100,19 +136,16 @@ private Map createActiveSearchShardTasks(List queryGroupIds) return activeSearchShardTasks; } - private T createMockTask(Class type, long cpuUsage, long heapUsage, String queryGroupId) { + private T createMockTask(Class type, long cpuUsage, long heapUsage, String queryGroupId) { T task = mock(type); - if (task instanceof SearchTask || task instanceof SearchShardTask) { - // Stash the current thread context to ensure that any existing context is preserved and restored after setting the query group - // ID. - try (ThreadContext.StoredContext ignore = threadPool.getThreadContext().stashContext()) { - threadPool.getThreadContext().putHeader(QUERY_GROUP_ID_HEADER, queryGroupId); - ((QueryGroupTask) task).setQueryGroupId(threadPool.getThreadContext()); - } + try (ThreadContext.StoredContext ignore = threadPool.getThreadContext().stashContext()) { + threadPool.getThreadContext().putHeader(QUERY_GROUP_ID_HEADER, queryGroupId); + task.setQueryGroupId(threadPool.getThreadContext()); } when(task.getTotalResourceUtilization(ResourceStats.CPU)).thenReturn(cpuUsage); when(task.getTotalResourceUtilization(ResourceStats.MEMORY)).thenReturn(heapUsage); when(task.getStartTimeNanos()).thenReturn((long) 0); + when(task.getElapsedTime()).thenReturn(clock.getTime()); AtomicBoolean isCancelled = new AtomicBoolean(false); doAnswer(invocation -> { diff --git a/test/framework/src/main/java/org/opensearch/action/support/replication/ClusterStateCreationUtils.java b/test/framework/src/main/java/org/opensearch/action/support/replication/ClusterStateCreationUtils.java index 182b2c9288a3d..0c4e871b1330c 100644 --- a/test/framework/src/main/java/org/opensearch/action/support/replication/ClusterStateCreationUtils.java +++ b/test/framework/src/main/java/org/opensearch/action/support/replication/ClusterStateCreationUtils.java @@ -35,6 +35,7 @@ import org.opensearch.Version; import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.metadata.Context; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; @@ -62,6 +63,7 @@ import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_CREATION_DATE; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_VERSION_CREATED; import static org.opensearch.test.OpenSearchTestCase.randomFrom; @@ -324,7 +326,18 @@ public static ClusterState stateWithAssignedPrimariesAndOneReplica(String index, * Creates cluster state with several indexes, shards and replicas and all shards STARTED. */ public static ClusterState stateWithAssignedPrimariesAndReplicas(String[] indices, int numberOfShards, int numberOfReplicas) { + return stateWithAssignedPrimariesAndReplicas(indices, numberOfShards, numberOfReplicas, 0); + } + /** + * Creates cluster state with several indexes, shards and replicas and all shards STARTED. + */ + public static ClusterState stateWithAssignedPrimariesAndReplicas( + String[] indices, + int numberOfShards, + int numberOfReplicas, + int numberOfSearchReplicas + ) { int numberOfDataNodes = numberOfReplicas + 1; DiscoveryNodes.Builder discoBuilder = DiscoveryNodes.builder(); for (int i = 0; i < numberOfDataNodes + 1; i++) { @@ -346,6 +359,7 @@ public static ClusterState stateWithAssignedPrimariesAndReplicas(String[] indice .put(SETTING_VERSION_CREATED, Version.CURRENT) .put(SETTING_NUMBER_OF_SHARDS, numberOfShards) .put(SETTING_NUMBER_OF_REPLICAS, numberOfReplicas) + .put(SETTING_NUMBER_OF_SEARCH_REPLICAS, numberOfSearchReplicas) .put(SETTING_CREATION_DATE, System.currentTimeMillis()) ) .build(); @@ -362,6 +376,19 @@ public static ClusterState stateWithAssignedPrimariesAndReplicas(String[] indice TestShardRouting.newShardRouting(index, i, newNode(replica + 1).getId(), null, false, ShardRoutingState.STARTED) ); } + for (int replica = numberOfReplicas; replica < numberOfSearchReplicas + numberOfReplicas; replica++) { + indexShardRoutingBuilder.addShard( + TestShardRouting.newShardRouting( + new ShardId(index, IndexMetadata.INDEX_UUID_NA_VALUE, i), + newNode(replica + 1).getId(), + null, + false, + true, + ShardRoutingState.STARTED, + null + ) + ); + } indexRoutingTableBuilder.addIndexShard(indexShardRoutingBuilder.build()); } routingTableBuilder.add(indexRoutingTableBuilder.build()); @@ -371,6 +398,44 @@ public static ClusterState stateWithAssignedPrimariesAndReplicas(String[] indice return state.build(); } + public static ClusterState stateWithContext(String index, final int numberOfNodes, final int numberOfPrimaries, Context context) { + DiscoveryNodes.Builder discoBuilder = DiscoveryNodes.builder(); + Set nodes = new HashSet<>(); + for (int i = 0; i < numberOfNodes; i++) { + final DiscoveryNode node = newNode(i); + discoBuilder = discoBuilder.add(node); + nodes.add(node.getId()); + } + discoBuilder.localNodeId(newNode(0).getId()); + discoBuilder.clusterManagerNodeId(randomFrom(nodes)); + IndexMetadata indexMetadata = IndexMetadata.builder(index) + .settings( + Settings.builder() + .put(SETTING_VERSION_CREATED, Version.CURRENT) + .put(SETTING_NUMBER_OF_SHARDS, numberOfPrimaries) + .put(SETTING_NUMBER_OF_REPLICAS, 0) + .put(SETTING_CREATION_DATE, System.currentTimeMillis()) + ) + .context(context) + .build(); + + IndexRoutingTable.Builder indexRoutingTable = IndexRoutingTable.builder(indexMetadata.getIndex()); + for (int i = 0; i < numberOfPrimaries; i++) { + ShardId shardId = new ShardId(indexMetadata.getIndex(), i); + IndexShardRoutingTable.Builder indexShardRoutingBuilder = new IndexShardRoutingTable.Builder(shardId); + indexShardRoutingBuilder.addShard( + TestShardRouting.newShardRouting(shardId, randomFrom(nodes), true, ShardRoutingState.STARTED) + ); + indexRoutingTable.addIndexShard(indexShardRoutingBuilder.build()); + } + + ClusterState.Builder state = ClusterState.builder(new ClusterName("test")); + state.nodes(discoBuilder); + state.metadata(Metadata.builder().put(indexMetadata, false).generateClusterUuidIfNeeded()); + state.routingTable(RoutingTable.builder().add(indexRoutingTable).build()); + return state.build(); + } + /** * Creates cluster state with and index that has one shard and as many replicas as numberOfReplicas. * Primary will be STARTED in cluster state but replicas will be one of UNASSIGNED, INITIALIZING, STARTED or RELOCATING. diff --git a/test/framework/src/main/java/org/opensearch/cluster/OpenSearchAllocationTestCase.java b/test/framework/src/main/java/org/opensearch/cluster/OpenSearchAllocationTestCase.java index f54ba36203684..bc42993ac2096 100644 --- a/test/framework/src/main/java/org/opensearch/cluster/OpenSearchAllocationTestCase.java +++ b/test/framework/src/main/java/org/opensearch/cluster/OpenSearchAllocationTestCase.java @@ -180,6 +180,19 @@ protected static DiscoveryNode newNode(String nodeId, Version version) { protected static ClusterState startRandomInitializingShard(ClusterState clusterState, AllocationService strategy) { List initializingShards = clusterState.getRoutingNodes().shardsWithState(INITIALIZING); + return startInitialisingShardsAndReroute(strategy, clusterState, initializingShards); + } + + protected static ClusterState startRandomInitializingShard(ClusterState clusterState, AllocationService strategy, String index) { + List initializingShards = clusterState.getRoutingNodes().shardsWithState(index, INITIALIZING); + return startInitialisingShardsAndReroute(strategy, clusterState, initializingShards); + } + + private static ClusterState startInitialisingShardsAndReroute( + AllocationService strategy, + ClusterState clusterState, + List initializingShards + ) { if (initializingShards.isEmpty()) { return clusterState; } diff --git a/test/framework/src/main/java/org/opensearch/cluster/routing/TestShardRouting.java b/test/framework/src/main/java/org/opensearch/cluster/routing/TestShardRouting.java index c7c71f0f569e5..9a000a4eeda72 100644 --- a/test/framework/src/main/java/org/opensearch/cluster/routing/TestShardRouting.java +++ b/test/framework/src/main/java/org/opensearch/cluster/routing/TestShardRouting.java @@ -319,4 +319,27 @@ public static RecoverySource randomRecoverySource() { ) ); } + + public static ShardRouting newShardRouting( + ShardId shardId, + String currentNodeId, + String relocatingNodeId, + boolean primary, + boolean searchOnly, + ShardRoutingState state, + UnassignedInfo unassignedInfo + ) { + return new ShardRouting( + shardId, + currentNodeId, + relocatingNodeId, + primary, + searchOnly, + state, + buildRecoveryTarget(primary, state), + unassignedInfo, + buildAllocationId(state), + -1 + ); + } } diff --git a/test/framework/src/main/java/org/opensearch/index/mapper/FieldTypeTestCase.java b/test/framework/src/main/java/org/opensearch/index/mapper/FieldTypeTestCase.java index 7ed0da8509fab..5d85844f3218d 100644 --- a/test/framework/src/main/java/org/opensearch/index/mapper/FieldTypeTestCase.java +++ b/test/framework/src/main/java/org/opensearch/index/mapper/FieldTypeTestCase.java @@ -46,16 +46,18 @@ /** Base test case for subclasses of MappedFieldType */ public abstract class FieldTypeTestCase extends OpenSearchTestCase { - public static final QueryShardContext MOCK_QSC = createMockQueryShardContext(true); - public static final QueryShardContext MOCK_QSC_DISALLOW_EXPENSIVE = createMockQueryShardContext(false); + public static final QueryShardContext MOCK_QSC = createMockQueryShardContext(true, false); + public static final QueryShardContext MOCK_QSC_DISALLOW_EXPENSIVE = createMockQueryShardContext(false, false); + public static final QueryShardContext MOCK_QSC_ENABLE_INDEX_DOC_VALUES = createMockQueryShardContext(true, true); protected QueryShardContext randomMockShardContext() { return randomFrom(MOCK_QSC, MOCK_QSC_DISALLOW_EXPENSIVE); } - static QueryShardContext createMockQueryShardContext(boolean allowExpensiveQueries) { + static QueryShardContext createMockQueryShardContext(boolean allowExpensiveQueries, boolean keywordIndexOrDocValuesEnabled) { QueryShardContext queryShardContext = mock(QueryShardContext.class); when(queryShardContext.allowExpensiveQueries()).thenReturn(allowExpensiveQueries); + when(queryShardContext.keywordFieldIndexOrDocValuesEnabled()).thenReturn(keywordIndexOrDocValuesEnabled); return queryShardContext; } diff --git a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestUtils.java b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestUtils.java index abf8f2a4da6c1..50a1751546293 100644 --- a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestUtils.java +++ b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestUtils.java @@ -21,6 +21,7 @@ import java.util.Map; public class IndexShardTestUtils { + public static final String MOCK_STATE_REPO_NAME = "state-test-repo"; public static final String MOCK_SEGMENT_REPO_NAME = "segment-test-repo"; public static final String MOCK_TLOG_REPO_NAME = "tlog-test-repo"; @@ -37,6 +38,7 @@ public static DiscoveryNode getFakeDiscoNode(String id) { public static DiscoveryNode getFakeRemoteEnabledNode(String id) { Map remoteNodeAttributes = new HashMap(); + remoteNodeAttributes.put(RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, MOCK_STATE_REPO_NAME); remoteNodeAttributes.put(RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, MOCK_SEGMENT_REPO_NAME); remoteNodeAttributes.put(RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY, MOCK_TLOG_REPO_NAME); return new DiscoveryNode( diff --git a/test/framework/src/main/java/org/opensearch/index/shard/RestoreOnlyRepository.java b/test/framework/src/main/java/org/opensearch/index/shard/RestoreOnlyRepository.java index 1ca1a6969ab2d..846f54bd13b40 100644 --- a/test/framework/src/main/java/org/opensearch/index/shard/RestoreOnlyRepository.java +++ b/test/framework/src/main/java/org/opensearch/index/shard/RestoreOnlyRepository.java @@ -118,6 +118,19 @@ public void getRepositoryData(ActionListener listener) { ); } + @Override + public void finalizeSnapshot( + ShardGenerations shardGenerations, + long repositoryStateId, + Metadata clusterMetadata, + SnapshotInfo snapshotInfo, + Version repositoryMetaVersion, + Function stateTransformer, + ActionListener listener + ) { + listener.onResponse(null); + } + @Override public void finalizeSnapshot( ShardGenerations shardGenerations, diff --git a/test/framework/src/main/java/org/opensearch/node/MockNode.java b/test/framework/src/main/java/org/opensearch/node/MockNode.java index ecaee1ccc59b8..97c06962ca2e7 100644 --- a/test/framework/src/main/java/org/opensearch/node/MockNode.java +++ b/test/framework/src/main/java/org/opensearch/node/MockNode.java @@ -57,6 +57,7 @@ import org.opensearch.script.ScriptService; import org.opensearch.search.MockSearchService; import org.opensearch.search.SearchService; +import org.opensearch.search.deciders.ConcurrentSearchRequestDecider; import org.opensearch.search.fetch.FetchPhase; import org.opensearch.search.query.QueryPhase; import org.opensearch.tasks.TaskResourceTrackingService; @@ -156,7 +157,8 @@ protected SearchService newSearchService( ResponseCollectorService responseCollectorService, CircuitBreakerService circuitBreakerService, Executor indexSearcherExecutor, - TaskResourceTrackingService taskResourceTrackingService + TaskResourceTrackingService taskResourceTrackingService, + Collection concurrentSearchDeciderFactories ) { if (getPluginsService().filterPlugins(MockSearchService.TestPlugin.class).isEmpty()) { return super.newSearchService( @@ -170,7 +172,8 @@ protected SearchService newSearchService( responseCollectorService, circuitBreakerService, indexSearcherExecutor, - taskResourceTrackingService + taskResourceTrackingService, + concurrentSearchDeciderFactories ); } return new MockSearchService( diff --git a/test/framework/src/main/java/org/opensearch/repositories/blobstore/BlobStoreTestUtil.java b/test/framework/src/main/java/org/opensearch/repositories/blobstore/BlobStoreTestUtil.java index 187c30be0044e..027b1bef84e7f 100644 --- a/test/framework/src/main/java/org/opensearch/repositories/blobstore/BlobStoreTestUtil.java +++ b/test/framework/src/main/java/org/opensearch/repositories/blobstore/BlobStoreTestUtil.java @@ -51,6 +51,8 @@ import org.opensearch.common.blobstore.BlobMetadata; import org.opensearch.common.blobstore.BlobPath; import org.opensearch.common.blobstore.BlobStore; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.common.xcontent.LoggingDeprecationHandler; import org.opensearch.core.common.Strings; @@ -142,7 +144,7 @@ public static void assertConsistency(BlobStoreRepository repository, Executor ex } assertIndexUUIDs(repository, repositoryData); assertSnapshotUUIDs(repository, repositoryData); - assertShardIndexGenerations(blobContainer, repositoryData); + assertShardIndexGenerations(repository, repositoryData); return null; } catch (AssertionError e) { return e; @@ -166,14 +168,12 @@ private static void assertIndexGenerations(BlobContainer repoRoot, long latestGe assertTrue(indexGenerations.length <= 2); } - private static void assertShardIndexGenerations(BlobContainer repoRoot, RepositoryData repositoryData) throws IOException { + private static void assertShardIndexGenerations(BlobStoreRepository repository, RepositoryData repositoryData) throws IOException { final ShardGenerations shardGenerations = repositoryData.shardGenerations(); - final BlobContainer indicesContainer = repoRoot.children().get("indices"); for (IndexId index : shardGenerations.indices()) { final List gens = shardGenerations.getGens(index); if (gens.isEmpty() == false) { - final BlobContainer indexContainer = indicesContainer.children().get(index.getId()); - final Map shardContainers = indexContainer.children(); + final Map shardContainers = getShardContainers(index, repository, repositoryData); for (int i = 0; i < gens.size(); i++) { final String generation = gens.get(i); assertThat(generation, not(ShardGenerations.DELETED_SHARD_GEN)); @@ -190,6 +190,20 @@ private static void assertShardIndexGenerations(BlobContainer repoRoot, Reposito } } + private static Map getShardContainers( + IndexId indexId, + BlobStoreRepository repository, + RepositoryData repositoryData + ) { + final Map shardContainers = new HashMap<>(); + int shardCount = repositoryData.shardGenerations().getGens(indexId).size(); + for (int i = 0; i < shardCount; i++) { + final BlobContainer shardContainer = repository.shardContainer(indexId, i); + shardContainers.put(String.valueOf(i), shardContainer); + } + return shardContainers; + } + private static void assertIndexUUIDs(BlobStoreRepository repository, RepositoryData repositoryData) throws IOException { final List expectedIndexUUIDs = repositoryData.getIndices() .values() @@ -451,6 +465,9 @@ private static ClusterService mockClusterService(ClusterState initialState) { return null; }).when(clusterService).addStateApplier(any(ClusterStateApplier.class)); when(clusterApplierService.threadPool()).thenReturn(threadPool); + when(clusterService.getSettings()).thenReturn(Settings.EMPTY); + ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + when(clusterService.getClusterSettings()).thenReturn(clusterSettings); return clusterService; } diff --git a/test/framework/src/main/java/org/opensearch/repositories/blobstore/OpenSearchBlobStoreRepositoryIntegTestCase.java b/test/framework/src/main/java/org/opensearch/repositories/blobstore/OpenSearchBlobStoreRepositoryIntegTestCase.java index 789858ca38fad..4e0c9270cd627 100644 --- a/test/framework/src/main/java/org/opensearch/repositories/blobstore/OpenSearchBlobStoreRepositoryIntegTestCase.java +++ b/test/framework/src/main/java/org/opensearch/repositories/blobstore/OpenSearchBlobStoreRepositoryIntegTestCase.java @@ -110,9 +110,7 @@ protected final String createRepository(final String name, final Settings settin final boolean verify = randomBoolean(); logger.debug("--> creating repository [name: {}, verify: {}, settings: {}]", name, verify, settings); - assertAcked( - client().admin().cluster().preparePutRepository(name).setType(repositoryType()).setVerify(verify).setSettings(settings) - ); + OpenSearchIntegTestCase.putRepository(client().admin().cluster(), name, repositoryType(), verify, Settings.builder().put(settings)); internalCluster().getDataOrClusterManagerNodeInstances(RepositoriesService.class).forEach(repositories -> { assertThat(repositories.repository(name), notNullValue()); diff --git a/test/framework/src/main/java/org/opensearch/search/MockSearchService.java b/test/framework/src/main/java/org/opensearch/search/MockSearchService.java index 6c9ace06c8219..28e202e783c4e 100644 --- a/test/framework/src/main/java/org/opensearch/search/MockSearchService.java +++ b/test/framework/src/main/java/org/opensearch/search/MockSearchService.java @@ -45,6 +45,7 @@ import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.threadpool.ThreadPool; +import java.util.Collections; import java.util.HashMap; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; @@ -111,7 +112,8 @@ public MockSearchService( null, circuitBreakerService, indexSearcherExecutor, - taskResourceTrackingService + taskResourceTrackingService, + Collections.emptyList() ); } diff --git a/test/framework/src/main/java/org/opensearch/snapshots/AbstractSnapshotIntegTestCase.java b/test/framework/src/main/java/org/opensearch/snapshots/AbstractSnapshotIntegTestCase.java index ec9cd5b64353e..a8bb10fe20752 100644 --- a/test/framework/src/main/java/org/opensearch/snapshots/AbstractSnapshotIntegTestCase.java +++ b/test/framework/src/main/java/org/opensearch/snapshots/AbstractSnapshotIntegTestCase.java @@ -66,6 +66,7 @@ import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.index.IndexModule; import org.opensearch.index.store.RemoteBufferedOutputDirectory; +import org.opensearch.indices.RemoteStoreSettings; import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.node.NodeClosedException; import org.opensearch.plugins.Plugin; @@ -104,7 +105,6 @@ import static org.opensearch.index.remote.RemoteStoreEnums.DataCategory.SEGMENTS; import static org.opensearch.index.remote.RemoteStoreEnums.DataType.LOCK_FILES; -import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; @@ -382,16 +382,6 @@ public void unblockNode(final String repository, final String node) { ((MockRepository) internalCluster().getInstance(RepositoriesService.class, node).repository(repository)).unblock(); } - protected void createRepository(String repoName, String type, Settings.Builder settings) { - logger.info("--> creating repository [{}] [{}]", repoName, type); - assertAcked(clusterAdmin().preparePutRepository(repoName).setType(type).setSettings(settings)); - } - - protected void updateRepository(String repoName, String type, Settings.Builder settings) { - logger.info("--> updating repository [{}] [{}]", repoName, type); - assertAcked(clusterAdmin().preparePutRepository(repoName).setType(type).setSettings(settings)); - } - protected void createRepository(String repoName, String type, Path location) { createRepository(repoName, type, Settings.builder().put("location", location)); } @@ -564,13 +554,15 @@ protected void assertDocCount(String index, long count) { protected String[] getLockFilesInRemoteStore(String remoteStoreIndex, String remoteStoreRepositoryName) throws IOException { final RepositoriesService repositoriesService = internalCluster().getCurrentClusterManagerNodeInstance(RepositoriesService.class); final BlobStoreRepository remoteStoreRepository = (BlobStoreRepository) repositoriesService.repository(remoteStoreRepositoryName); + String segmentsPathFixedPrefix = RemoteStoreSettings.CLUSTER_REMOTE_STORE_SEGMENTS_PATH_PREFIX.get(getNodeSettings()); BlobPath shardLevelBlobPath = getShardLevelBlobPath( client(), remoteStoreIndex, remoteStoreRepository.basePath(), "0", SEGMENTS, - LOCK_FILES + LOCK_FILES, + segmentsPathFixedPrefix ); BlobContainer blobContainer = remoteStoreRepository.blobStore().blobContainer(shardLevelBlobPath); try (RemoteBufferedOutputDirectory lockDirectory = new RemoteBufferedOutputDirectory(blobContainer)) { diff --git a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java index ec88002317284..7adff82e72245 100644 --- a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java +++ b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java @@ -2143,6 +2143,17 @@ public synchronized void fullRestart(RestartCallback callback) throws Exception } } + /** + * Returns the name of all the cluster managers in the cluster + */ + public Set getClusterManagerNames() { + return nodes.entrySet() + .stream() + .filter(entry -> CLUSTER_MANAGER_NODE_PREDICATE.test(entry.getValue())) + .map(entry -> entry.getKey()) + .collect(Collectors.toSet()); + } + /** * Returns the name of the current cluster-manager node in the cluster. */ diff --git a/test/framework/src/main/java/org/opensearch/test/NotEqualMessageBuilder.java b/test/framework/src/main/java/org/opensearch/test/NotEqualMessageBuilder.java index a70e3f15a4bf5..9524b76f255a6 100644 --- a/test/framework/src/main/java/org/opensearch/test/NotEqualMessageBuilder.java +++ b/test/framework/src/main/java/org/opensearch/test/NotEqualMessageBuilder.java @@ -181,18 +181,9 @@ public void compare(String field, boolean hadKey, @Nullable Object actual, Objec field(field, "same [" + expected + "]"); return; } - field( - field, - "expected " - + expected.getClass().getSimpleName() - + " [" - + expected - + "] but was " - + actual.getClass().getSimpleName() - + " [" - + actual - + "]" - ); + String expectedClass = expected == null ? "null object" : expected.getClass().getSimpleName(); + String actualClass = actual == null ? "null object" : actual.getClass().getSimpleName(); + field(field, "expected " + expectedClass + " [" + expected + "] but was " + actualClass + " [" + actual + "]"); } private void indent() { diff --git a/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java b/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java index 911aa92340de6..e474ef202b235 100644 --- a/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java +++ b/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java @@ -48,6 +48,7 @@ import org.opensearch.action.admin.cluster.health.ClusterHealthResponse; import org.opensearch.action.admin.cluster.node.hotthreads.NodeHotThreads; import org.opensearch.action.admin.cluster.node.info.NodesInfoResponse; +import org.opensearch.action.admin.cluster.repositories.put.PutRepositoryRequestBuilder; import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest; import org.opensearch.action.admin.cluster.state.ClusterStateResponse; import org.opensearch.action.admin.cluster.tasks.PendingClusterTasksResponse; @@ -70,6 +71,7 @@ import org.opensearch.action.search.SearchResponse; import org.opensearch.action.support.IndicesOptions; import org.opensearch.action.support.WriteRequest; +import org.opensearch.action.support.master.AcknowledgedResponse; import org.opensearch.client.AdminClient; import org.opensearch.client.Client; import org.opensearch.client.ClusterAdminClient; @@ -79,6 +81,7 @@ import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.coordination.OpenSearchNodeCommand; import org.opensearch.cluster.health.ClusterHealthStatus; +import org.opensearch.cluster.metadata.Context; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.routing.IndexRoutingTable; @@ -93,6 +96,7 @@ import org.opensearch.cluster.service.applicationtemplates.TestSystemTemplatesRepositoryPlugin; import org.opensearch.common.Nullable; import org.opensearch.common.Priority; +import org.opensearch.common.blobstore.BlobPath; import org.opensearch.common.collect.Tuple; import org.opensearch.common.concurrent.GatedCloseable; import org.opensearch.common.network.NetworkModule; @@ -137,7 +141,9 @@ import org.opensearch.index.engine.Segment; import org.opensearch.index.mapper.CompletionFieldMapper; import org.opensearch.index.mapper.MockFieldFilterPlugin; +import org.opensearch.index.remote.RemoteStoreEnums; import org.opensearch.index.remote.RemoteStoreEnums.PathType; +import org.opensearch.index.remote.RemoteStorePathStrategy; import org.opensearch.index.shard.IndexShard; import org.opensearch.index.store.Store; import org.opensearch.index.translog.Translog; @@ -152,6 +158,7 @@ import org.opensearch.node.remotestore.RemoteStoreNodeService; import org.opensearch.plugins.NetworkPlugin; import org.opensearch.plugins.Plugin; +import org.opensearch.repositories.IndexId; import org.opensearch.repositories.blobstore.BlobStoreRepository; import org.opensearch.repositories.fs.FsRepository; import org.opensearch.repositories.fs.ReloadableFsRepository; @@ -217,6 +224,7 @@ import static org.opensearch.index.IndexSettings.INDEX_DOC_ID_FUZZY_SET_FALSE_POSITIVE_PROBABILITY_SETTING; import static org.opensearch.index.IndexSettings.INDEX_SOFT_DELETES_RETENTION_LEASE_PERIOD_SETTING; import static org.opensearch.index.query.QueryBuilders.matchAllQuery; +import static org.opensearch.index.remote.RemoteStoreEnums.PathHashAlgorithm.FNV_1A_COMPOSITE_1; import static org.opensearch.indices.IndicesService.CLUSTER_REPLICATION_TYPE_SETTING; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX; @@ -391,6 +399,12 @@ public abstract class OpenSearchIntegTestCase extends OpenSearchTestCase { private static Boolean prefixModeVerificationEnable; + private static Boolean translogPathFixedPrefix; + + private static Boolean segmentsPathFixedPrefix; + + private static Boolean snapshotShardPathFixedPrefix; + private Path remoteStoreRepositoryPath; private ReplicationType randomReplicationType; @@ -400,6 +414,9 @@ public abstract class OpenSearchIntegTestCase extends OpenSearchTestCase { @BeforeClass public static void beforeClass() throws Exception { prefixModeVerificationEnable = randomBoolean(); + translogPathFixedPrefix = randomBoolean(); + segmentsPathFixedPrefix = randomBoolean(); + snapshotShardPathFixedPrefix = randomBoolean(); testClusterRule.beforeClass(); } @@ -746,6 +763,13 @@ public final void createIndex(String name, Settings indexSettings) { assertAcked(prepareCreate(name).setSettings(indexSettings)); } + /** + * creates an index with the given setting + */ + public final void createIndex(String name, Context context) { + assertAcked(prepareCreate(name).setContext(context)); + } + /** * Creates a new {@link CreateIndexRequestBuilder} with the settings obtained from {@link #indexSettings()}. */ @@ -2555,6 +2579,91 @@ protected long getLatestSegmentInfoVersion(IndexShard shard) { } } + protected void createRepository(String repoName, String type, Settings.Builder settings, String timeout) { + logger.info("--> creating repository [{}] [{}]", repoName, type); + putRepository(clusterAdmin(), repoName, type, timeout, settings); + } + + protected void createRepository(String repoName, String type, Settings.Builder settings) { + logger.info("--> creating repository [{}] [{}]", repoName, type); + putRepository(clusterAdmin(), repoName, type, null, settings); + } + + protected void updateRepository(String repoName, String type, Settings.Builder settings) { + logger.info("--> updating repository [{}] [{}]", repoName, type); + putRepository(clusterAdmin(), repoName, type, null, settings); + } + + public Settings getNodeSettings() { + InternalTestCluster internalTestCluster = internalCluster(); + ClusterService clusterService = internalTestCluster.getInstance(ClusterService.class, internalTestCluster.getClusterManagerName()); + return clusterService.getSettings(); + } + + public static void putRepository(ClusterAdminClient adminClient, String repoName, String type, Settings.Builder settings) { + assertAcked(putRepositoryRequestBuilder(adminClient, repoName, type, true, settings, null, false)); + } + + public static void putRepository( + ClusterAdminClient adminClient, + String repoName, + String type, + String timeout, + Settings.Builder settings + ) { + assertAcked(putRepositoryRequestBuilder(adminClient, repoName, type, true, settings, timeout, false)); + } + + public static void putRepository( + ClusterAdminClient adminClient, + String repoName, + String type, + boolean verify, + Settings.Builder settings + ) { + assertAcked(putRepositoryRequestBuilder(adminClient, repoName, type, verify, settings, null, false)); + } + + public static void putRepositoryWithNoSettingOverrides( + ClusterAdminClient adminClient, + String repoName, + String type, + boolean verify, + Settings.Builder settings + ) { + assertAcked(putRepositoryRequestBuilder(adminClient, repoName, type, verify, settings, null, true)); + } + + public static void putRepository( + ClusterAdminClient adminClient, + String repoName, + String type, + Settings.Builder settings, + ActionListener listener + ) { + putRepositoryRequestBuilder(adminClient, repoName, type, true, settings, null, false).execute(listener); + } + + public static PutRepositoryRequestBuilder putRepositoryRequestBuilder( + ClusterAdminClient adminClient, + String repoName, + String type, + boolean verify, + Settings.Builder settings, + String timeout, + boolean finalSettings + ) { + PutRepositoryRequestBuilder builder = adminClient.preparePutRepository(repoName).setType(type).setVerify(verify); + if (timeout != null) { + builder.setTimeout(timeout); + } + if (finalSettings == false) { + settings.put(BlobStoreRepository.SHARD_PATH_TYPE.getKey(), randomFrom(PathType.values())); + } + builder.setSettings(settings); + return builder; + } + public static Settings remoteStoreClusterSettings(String name, Path path) { return remoteStoreClusterSettings(name, path, name, path); } @@ -2793,7 +2902,21 @@ private static Settings buildRemoteStoreNodeAttributes( settings.put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING.getKey(), randomFrom(PathType.values())); settings.put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_TRANSLOG_METADATA.getKey(), randomBoolean()); settings.put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_ENABLED.getKey(), randomBoolean()); + settings.put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_SEGMENTS_PATH_PREFIX.getKey(), translogPathFixedPrefix ? "a" : ""); + settings.put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_TRANSLOG_PATH_PREFIX.getKey(), segmentsPathFixedPrefix ? "b" : ""); + settings.put(BlobStoreRepository.SNAPSHOT_SHARD_PATH_PREFIX_SETTING.getKey(), segmentsPathFixedPrefix ? "c" : ""); return settings.build(); } + public static String resolvePath(IndexId indexId, String shardId) { + PathType pathType = PathType.fromCode(indexId.getShardPathType()); + RemoteStorePathStrategy.SnapshotShardPathInput shardPathInput = new RemoteStorePathStrategy.SnapshotShardPathInput.Builder() + .basePath(BlobPath.cleanPath()) + .indexUUID(indexId.getId()) + .shardId(shardId) + .build(); + RemoteStoreEnums.PathHashAlgorithm pathHashAlgorithm = pathType != PathType.FIXED ? FNV_1A_COMPOSITE_1 : null; + BlobPath blobPath = pathType.path(shardPathInput, pathHashAlgorithm); + return blobPath.buildAsString(); + } } diff --git a/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java b/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java index 6afc7c23d9e66..b180187303a60 100644 --- a/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java +++ b/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java @@ -815,6 +815,14 @@ public static long randomNonNegativeLong() { return randomLong == Long.MIN_VALUE ? 0 : Math.abs(randomLong); } + /** + * @return a int between 0 and Integer.MAX_VALUE (inclusive) chosen uniformly at random. + */ + public static int randomNonNegativeInt() { + int randomInt = randomInt(); + return randomInt == Integer.MIN_VALUE ? 0 : Math.abs(randomInt); + } + public static float randomFloat() { return random().nextFloat(); } @@ -1809,7 +1817,8 @@ public static BlobPath getShardLevelBlobPath( BlobPath basePath, String shardId, RemoteStoreEnums.DataCategory dataCategory, - RemoteStoreEnums.DataType dataType + RemoteStoreEnums.DataType dataType, + String fixedPrefix ) { String indexUUID = client.admin() .indices() @@ -1834,6 +1843,7 @@ public static BlobPath getShardLevelBlobPath( .shardId(shardId) .dataCategory(dataCategory) .dataType(dataType) + .fixedPrefix(fixedPrefix) .build(); return type.path(pathInput, hashAlgorithm); } diff --git a/test/framework/src/main/java/org/opensearch/test/ParameterizedOpenSearchIntegTestCase.java b/test/framework/src/main/java/org/opensearch/test/ParameterizedOpenSearchIntegTestCase.java index 23316adf6a2d7..ea0331cbfb9a3 100644 --- a/test/framework/src/main/java/org/opensearch/test/ParameterizedOpenSearchIntegTestCase.java +++ b/test/framework/src/main/java/org/opensearch/test/ParameterizedOpenSearchIntegTestCase.java @@ -10,7 +10,10 @@ import org.opensearch.common.settings.Settings; +import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE; import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_ALL; +import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_AUTO; /** * Base class for running the tests with parameterization of the settings. @@ -35,7 +38,9 @@ abstract class ParameterizedOpenSearchIntegTestCase extends OpenSearchIntegTestC // This method shouldn't be called in setupSuiteScopeCluster(). Only call this method inside single test. public void indexRandomForConcurrentSearch(String... indices) throws InterruptedException { - if (CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.get(settings)) { + if (CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.get(settings) + || CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.get(settings).equals(CONCURRENT_SEGMENT_SEARCH_MODE_AUTO) + || CLUSTER_CONCURRENT_SEGMENT_SEARCH_MODE.get(settings).equals(CONCURRENT_SEGMENT_SEARCH_MODE_ALL)) { indexRandomForMultipleSlices(indices); } } diff --git a/test/framework/src/main/java/org/opensearch/test/gateway/TestShardBatchGatewayAllocator.java b/test/framework/src/main/java/org/opensearch/test/gateway/TestShardBatchGatewayAllocator.java index 156b1d7c620e6..c2ff228a6bf3a 100644 --- a/test/framework/src/main/java/org/opensearch/test/gateway/TestShardBatchGatewayAllocator.java +++ b/test/framework/src/main/java/org/opensearch/test/gateway/TestShardBatchGatewayAllocator.java @@ -10,6 +10,7 @@ import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.RerouteService; import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.cluster.routing.allocation.AllocateUnassignedDecision; import org.opensearch.cluster.routing.allocation.RoutingAllocation; @@ -39,12 +40,13 @@ public TestShardBatchGatewayAllocator() { } - public TestShardBatchGatewayAllocator(CountDownLatch latch) { + public TestShardBatchGatewayAllocator(CountDownLatch latch, long maxBatchSize, RerouteService rerouteService) { + super(maxBatchSize, rerouteService); this.latch = latch; } public TestShardBatchGatewayAllocator(long maxBatchSize) { - super(maxBatchSize); + super(maxBatchSize, null); } Map> knownAllocations = new HashMap<>();