diff --git a/CHANGELOG.md b/CHANGELOG.md index 2dacd128745f5..3fe4c7ab7a4d2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,7 +24,10 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Added missing no-jdk distributions ([#4722](https://github.com/opensearch-project/OpenSearch/pull/4722)) - Copy `build.sh` over from opensearch-build ([#4887](https://github.com/opensearch-project/OpenSearch/pull/4887)) - Update GeoGrid base class access modifier to support extensibility ([#4921](https://github.com/opensearch-project/OpenSearch/pull/4921)) +- Recommission API changes for service layer ([#4320](https://github.com/opensearch-project/OpenSearch/pull/4320)) +- Recommissioning of zone. REST layer support. ([#4624](https://github.com/opensearch-project/OpenSearch/pull/4604)) - Build no-jdk distributions as part of release build ([#4902](https://github.com/opensearch-project/OpenSearch/pull/4902)) +- Use getParameterCount instead of getParameterTypes ([#4821](https://github.com/opensearch-project/OpenSearch/pull/4821)) ### Dependencies - Bumps `com.diffplug.spotless` from 6.9.1 to 6.10.0 @@ -40,6 +43,8 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Update Apache Lucene to 9.4.1 ([#4922](https://github.com/opensearch-project/OpenSearch/pull/4922)) - Bump `woodstox-core` to 6.4.0 ([#4951](https://github.com/opensearch-project/OpenSearch/pull/4951)) - Upgrade jetty-http, kotlin-stdlib and snakeyaml ([#4982](https://github.com/opensearch-project/OpenSearch/pull/4982)) +- OpenJDK Update (October 2022 Patch releases) ([#4997](https://github.com/opensearch-project/OpenSearch/pull/4997)) +- Upgrade zookeeper dependency in hdfs-fixture ([#5007](https://github.com/opensearch-project/OpenSearch/pull/5007)) ### Changed - Dependency updates (httpcore, mockito, slf4j, httpasyncclient, commons-codec) ([#4308](https://github.com/opensearch-project/OpenSearch/pull/4308)) @@ -57,6 +62,9 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - GET api for weighted shard routing([#4275](https://github.com/opensearch-project/OpenSearch/pull/4275/)) - Delete api for weighted shard routing([#4400](https://github.com/opensearch-project/OpenSearch/pull/4400/)) - Fix weighted routing metadata deserialization error on process restart ([#4691](https://github.com/opensearch-project/OpenSearch/pull/4691)) +- Add DecommissionService and helper to execute awareness attribute decommissioning ([#4084](https://github.com/opensearch-project/OpenSearch/pull/4084)) +- Add APIs (GET/PUT) to decommission awareness attribute ([#4261](https://github.com/opensearch-project/OpenSearch/pull/4261)) +- Controlling discovery for decommissioned nodes ([#4590](https://github.com/opensearch-project/OpenSearch/pull/4590)) ### Deprecated ### Removed @@ -82,9 +90,12 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - [Bug]: Fixed invalid location of JDK dependency for arm64 architecture([#4613](https://github.com/opensearch-project/OpenSearch/pull/4613)) - [Bug]: Alias filter lost after rollover ([#4499](https://github.com/opensearch-project/OpenSearch/pull/4499)) - Fixing Gradle warnings associated with publishPluginZipPublicationToXxx tasks ([#4696](https://github.com/opensearch-project/OpenSearch/pull/4696)) +- [BUG]: Remove redundant field from GetDecommissionStateResponse ([#4751](https://github.com/opensearch-project/OpenSearch/pull/4751)) - Fixed randomly failing test ([4774](https://github.com/opensearch-project/OpenSearch/pull/4774)) - Fix recovery path for searchable snapshots ([4813](https://github.com/opensearch-project/OpenSearch/pull/4813)) - Fix a bug on handling an invalid array value for point type field #4900([#4900](https://github.com/opensearch-project/OpenSearch/pull/4900)) +- Fix decommission status update to non leader nodes ([4800](https://github.com/opensearch-project/OpenSearch/pull/4800)) +- Fix bug in AwarenessAttributeDecommissionIT([4822](https://github.com/opensearch-project/OpenSearch/pull/4822)) - Fix for failing checkExtraction, checkLicense and checkNotice tasks for windows gradle check ([#4941](https://github.com/opensearch-project/OpenSearch/pull/4941)) ### Security - CVE-2022-25857 org.yaml:snakeyaml DOS vulnerability ([#4341](https://github.com/opensearch-project/OpenSearch/pull/4341)) @@ -106,12 +117,17 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Add dev guide for dealing with flaky tests ([4894](https://github.com/opensearch-project/OpenSearch/pull/4894)) - Renamed flaky tests ([#4935](https://github.com/opensearch-project/OpenSearch/pull/4935)) - Bump current version to 2.5.0 on 2.x branch ([#5002](https://github.com/opensearch-project/OpenSearch/pull/5002)) +- Remote shard balancer support for searchable snapshots ([#5024](https://github.com/opensearch-project/OpenSearch/pull/5024)) +- Implement Searchable Snapshot using OnDemandBlockIndexInput ([#4892](https://github.com/opensearch-project/OpenSearch/pull/4892)) ### Dependencies - Update Jackson Databind to 2.13.4.2 (addressing CVE-2022-42003) ([#4781](https://github.com/opensearch-project/OpenSearch/pull/4781)) - Install and configure Log4j JUL Adapter for Lucene 9.4 ([#4754](https://github.com/opensearch-project/OpenSearch/pull/4754)) - Added feature to ignore indexes starting with dot during shard limit validation.([#4695](https://github.com/opensearch-project/OpenSearch/pull/4695)) +- Bump protobuf-java to 3.21.8 ([#5005](https://github.com/opensearch-project/OpenSearch/pull/5005)) +- Upgrade zookeeper dependency in hdfs-fixture ([#5047](https://github.com/opensearch-project/OpenSearch/pull/5047)) ### Changed - Refactored BalancedAllocator.Balancer to LocalShardsBalancer ([#4818](https://github.com/opensearch-project/OpenSearch/pull/4818)) +- Skip SymbolicLinkPreservingTarIT when running on Windows ([#5023](https://github.com/opensearch-project/OpenSearch/pull/5023)) ### Deprecated ### Removed - Remove RepositoryData.MIN_VERSION support for next major release ([4729](https://github.com/opensearch-project/OpenSearch/pull/4729)) @@ -131,6 +147,8 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Better plural stemmer than minimal_english ([#4738](https://github.com/opensearch-project/OpenSearch/pull/4738)) - Fix AbstractStringFieldDataTestCase tests to account for TotalHits lower bound ([4867](https://github.com/opensearch-project/OpenSearch/pull/4867)) - [Segment Replication] Fix bug of replica shard's translog not purging on index flush when segment replication is enabled ([4975](https://github.com/opensearch-project/OpenSearch/pull/4975)) +- Fix bug in SlicedInputStream with zero length ([#4863](https://github.com/opensearch-project/OpenSearch/pull/4863)) +- Fixing PIT flaky tests ([4632](https://github.com/opensearch-project/OpenSearch/pull/4632)) - ### Security diff --git a/buildSrc/src/integTest/java/org/opensearch/gradle/tar/SymbolicLinkPreservingTarIT.java b/buildSrc/src/integTest/java/org/opensearch/gradle/tar/SymbolicLinkPreservingTarIT.java index b70574c507f70..61aa55b9c6b53 100644 --- a/buildSrc/src/integTest/java/org/opensearch/gradle/tar/SymbolicLinkPreservingTarIT.java +++ b/buildSrc/src/integTest/java/org/opensearch/gradle/tar/SymbolicLinkPreservingTarIT.java @@ -35,6 +35,7 @@ import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; +import org.apache.tools.ant.taskdefs.condition.Os; import org.opensearch.gradle.test.GradleIntegrationTestCase; import org.gradle.api.GradleException; import org.gradle.testkit.runner.GradleRunner; @@ -52,6 +53,7 @@ import static org.hamcrest.CoreMatchers.anyOf; import static org.hamcrest.CoreMatchers.equalTo; +import static org.junit.Assume.assumeFalse; public class SymbolicLinkPreservingTarIT extends GradleIntegrationTestCase { @@ -60,6 +62,7 @@ public class SymbolicLinkPreservingTarIT extends GradleIntegrationTestCase { @Before public void before() throws IOException { + assumeFalse("Skip tar tests on windows.", Os.isFamily(Os.FAMILY_WINDOWS)); final Path realFolder = temporaryFolder.getRoot().toPath().resolve("real-folder"); Files.createDirectory(realFolder); Files.createFile(realFolder.resolve("file")); diff --git a/buildSrc/src/main/java/org/opensearch/gradle/test/DistroTestPlugin.java b/buildSrc/src/main/java/org/opensearch/gradle/test/DistroTestPlugin.java index b14e93ecfd22d..e7c907dfdf000 100644 --- a/buildSrc/src/main/java/org/opensearch/gradle/test/DistroTestPlugin.java +++ b/buildSrc/src/main/java/org/opensearch/gradle/test/DistroTestPlugin.java @@ -75,9 +75,9 @@ import java.util.stream.Stream; public class DistroTestPlugin implements Plugin { - private static final String SYSTEM_JDK_VERSION = "11.0.16+8"; + private static final String SYSTEM_JDK_VERSION = "11.0.17+8"; private static final String SYSTEM_JDK_VENDOR = "adoptium"; - private static final String GRADLE_JDK_VERSION = "17.0.4+8"; + private static final String GRADLE_JDK_VERSION = "17.0.5+8"; private static final String GRADLE_JDK_VENDOR = "adoptium"; // all distributions used by distro tests. this is temporary until tests are per distribution diff --git a/buildSrc/src/testFixtures/java/org/opensearch/gradle/test/JUnit3MethodProvider.java b/buildSrc/src/testFixtures/java/org/opensearch/gradle/test/JUnit3MethodProvider.java index 0c01b6d519d62..163a903d31832 100644 --- a/buildSrc/src/testFixtures/java/org/opensearch/gradle/test/JUnit3MethodProvider.java +++ b/buildSrc/src/testFixtures/java/org/opensearch/gradle/test/JUnit3MethodProvider.java @@ -59,7 +59,7 @@ public Collection getTestMethods(Class suiteClass, ClassModel classMo if (m.getName().startsWith("test") && Modifier.isPublic(m.getModifiers()) && !Modifier.isStatic(m.getModifiers()) - && m.getParameterTypes().length == 0) { + && m.getParameterCount() == 0) { result.add(m); } } diff --git a/buildSrc/version.properties b/buildSrc/version.properties index c5f190c9ca206..cd8e6dd64fbe4 100644 --- a/buildSrc/version.properties +++ b/buildSrc/version.properties @@ -2,7 +2,7 @@ opensearch = 2.5.0 lucene = 9.4.1 bundled_jdk_vendor = adoptium -bundled_jdk = 17.0.4+8 +bundled_jdk = 17.0.5+8 diff --git a/client/rest-high-level/src/test/java/org/opensearch/client/PitIT.java b/client/rest-high-level/src/test/java/org/opensearch/client/PitIT.java index cbb4db10cd519..be9b614a8720f 100644 --- a/client/rest-high-level/src/test/java/org/opensearch/client/PitIT.java +++ b/client/rest-high-level/src/test/java/org/opensearch/client/PitIT.java @@ -24,6 +24,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; @@ -71,7 +72,7 @@ public void testCreateAndDeletePit() throws IOException { assertTrue(deletePitResponse.getDeletePitResults().get(0).getPitId().equals(createPitResponse.getId())); } - public void testDeleteAllAndListAllPits() throws IOException { + public void testDeleteAllAndListAllPits() throws IOException, InterruptedException { CreatePitRequest pitRequest = new CreatePitRequest(new TimeValue(1, TimeUnit.DAYS), true, "index"); CreatePitResponse pitResponse = execute(pitRequest, highLevelClient()::createPit, highLevelClient()::createPitAsync); CreatePitResponse pitResponse1 = execute(pitRequest, highLevelClient()::createPit, highLevelClient()::createPitAsync); @@ -90,9 +91,11 @@ public void testDeleteAllAndListAllPits() throws IOException { List pits = getAllPitResponse.getPitInfos().stream().map(r -> r.getPitId()).collect(Collectors.toList()); assertTrue(pits.contains(pitResponse.getId())); assertTrue(pits.contains(pitResponse1.getId())); + CountDownLatch countDownLatch = new CountDownLatch(1); ActionListener deletePitListener = new ActionListener<>() { @Override public void onResponse(DeletePitResponse response) { + countDownLatch.countDown(); for (DeletePitInfo deletePitInfo : response.getDeletePitResults()) { assertTrue(deletePitInfo.isSuccessful()); } @@ -100,6 +103,7 @@ public void onResponse(DeletePitResponse response) { @Override public void onFailure(Exception e) { + countDownLatch.countDown(); if (!(e instanceof OpenSearchStatusException)) { throw new AssertionError("Delete all failed"); } @@ -123,6 +127,7 @@ public void onFailure(Exception e) { }; highLevelClient().getAllPitsAsync(RequestOptions.DEFAULT, getPitsListener); highLevelClient().deleteAllPitsAsync(RequestOptions.DEFAULT, deletePitListener); + assertTrue(countDownLatch.await(10, TimeUnit.SECONDS)); // validate no pits case getAllPitResponse = highLevelClient().getAllPits(RequestOptions.DEFAULT); assertTrue(getAllPitResponse.getPitInfos().size() == 0); diff --git a/client/rest-high-level/src/test/java/org/opensearch/client/RestHighLevelClientTests.java b/client/rest-high-level/src/test/java/org/opensearch/client/RestHighLevelClientTests.java index 2965e34dab419..a50cc811a87dc 100644 --- a/client/rest-high-level/src/test/java/org/opensearch/client/RestHighLevelClientTests.java +++ b/client/rest-high-level/src/test/java/org/opensearch/client/RestHighLevelClientTests.java @@ -891,7 +891,10 @@ public void testApiNamingConventions() throws Exception { "remote_store.restore", "cluster.put_weighted_routing", "cluster.get_weighted_routing", - "cluster.delete_weighted_routing", }; + "cluster.delete_weighted_routing", + "cluster.put_decommission_awareness", + "cluster.get_decommission_awareness", + "cluster.delete_decommission_awareness", }; List booleanReturnMethods = Arrays.asList("security.enable_user", "security.disable_user", "security.change_password"); Set deprecatedMethods = new HashSet<>(); deprecatedMethods.add("indices.force_merge"); @@ -1004,37 +1007,34 @@ private static void assertSyncMethod(Method method, String apiName, List } assertEquals("incorrect number of exceptions for method [" + method + "]", 1, method.getExceptionTypes().length); + final Class[] parameterTypes = method.getParameterTypes(); // a few methods don't accept a request object as argument if (APIS_WITHOUT_REQUEST_OBJECT.contains(apiName)) { - assertEquals("incorrect number of arguments for method [" + method + "]", 1, method.getParameterTypes().length); - assertThat( - "the parameter to method [" + method + "] is the wrong type", - method.getParameterTypes()[0], - equalTo(RequestOptions.class) - ); + assertEquals("incorrect number of arguments for method [" + method + "]", 1, method.getParameterCount()); + assertThat("the parameter to method [" + method + "] is the wrong type", parameterTypes[0], equalTo(RequestOptions.class)); } else { - assertEquals("incorrect number of arguments for method [" + method + "]", 2, method.getParameterTypes().length); + assertEquals("incorrect number of arguments for method [" + method + "]", 2, method.getParameterCount()); // This is no longer true for all methods. Some methods can contain these 2 args backwards because of deprecation - if (method.getParameterTypes()[0].equals(RequestOptions.class)) { + if (parameterTypes[0].equals(RequestOptions.class)) { assertThat( "the first parameter to method [" + method + "] is the wrong type", - method.getParameterTypes()[0], + parameterTypes[0], equalTo(RequestOptions.class) ); assertThat( "the second parameter to method [" + method + "] is the wrong type", - method.getParameterTypes()[1].getSimpleName(), + parameterTypes[1].getSimpleName(), endsWith("Request") ); } else { assertThat( "the first parameter to method [" + method + "] is the wrong type", - method.getParameterTypes()[0].getSimpleName(), + parameterTypes[0].getSimpleName(), endsWith("Request") ); assertThat( "the second parameter to method [" + method + "] is the wrong type", - method.getParameterTypes()[1], + parameterTypes[1], equalTo(RequestOptions.class) ); } @@ -1048,39 +1048,40 @@ private static void assertAsyncMethod(Map> methods, Method m ); assertThat("async method [" + method + "] should return Cancellable", method.getReturnType(), equalTo(Cancellable.class)); assertEquals("async method [" + method + "] should not throw any exceptions", 0, method.getExceptionTypes().length); + final Class[] parameterTypes = method.getParameterTypes(); if (APIS_WITHOUT_REQUEST_OBJECT.contains(apiName.replaceAll("_async$", ""))) { - assertEquals(2, method.getParameterTypes().length); - assertThat(method.getParameterTypes()[0], equalTo(RequestOptions.class)); - assertThat(method.getParameterTypes()[1], equalTo(ActionListener.class)); + assertEquals(2, parameterTypes.length); + assertThat(parameterTypes[0], equalTo(RequestOptions.class)); + assertThat(parameterTypes[1], equalTo(ActionListener.class)); } else { - assertEquals("async method [" + method + "] has the wrong number of arguments", 3, method.getParameterTypes().length); + assertEquals("async method [" + method + "] has the wrong number of arguments", 3, method.getParameterCount()); // This is no longer true for all methods. Some methods can contain these 2 args backwards because of deprecation - if (method.getParameterTypes()[0].equals(RequestOptions.class)) { + if (parameterTypes[0].equals(RequestOptions.class)) { assertThat( "the first parameter to async method [" + method + "] should be a request type", - method.getParameterTypes()[0], + parameterTypes[0], equalTo(RequestOptions.class) ); assertThat( "the second parameter to async method [" + method + "] is the wrong type", - method.getParameterTypes()[1].getSimpleName(), + parameterTypes[1].getSimpleName(), endsWith("Request") ); } else { assertThat( "the first parameter to async method [" + method + "] should be a request type", - method.getParameterTypes()[0].getSimpleName(), + parameterTypes[0].getSimpleName(), endsWith("Request") ); assertThat( "the second parameter to async method [" + method + "] is the wrong type", - method.getParameterTypes()[1], + parameterTypes[1], equalTo(RequestOptions.class) ); } assertThat( "the third parameter to async method [" + method + "] is the wrong type", - method.getParameterTypes()[2], + parameterTypes[2], equalTo(ActionListener.class) ); } @@ -1093,16 +1094,17 @@ private static void assertSubmitTaskMethod( ClientYamlSuiteRestSpec restSpec ) { String methodName = extractMethodName(apiName); + final Class[] parameterTypes = method.getParameterTypes(); assertTrue("submit task method [" + method.getName() + "] doesn't have corresponding sync method", methods.containsKey(methodName)); - assertEquals("submit task method [" + method + "] has the wrong number of arguments", 2, method.getParameterTypes().length); + assertEquals("submit task method [" + method + "] has the wrong number of arguments", 2, method.getParameterCount()); assertThat( "the first parameter to submit task method [" + method + "] is the wrong type", - method.getParameterTypes()[0].getSimpleName(), + parameterTypes[0].getSimpleName(), endsWith("Request") ); assertThat( "the second parameter to submit task method [" + method + "] is the wrong type", - method.getParameterTypes()[1], + parameterTypes[1], equalTo(RequestOptions.class) ); diff --git a/modules/lang-painless/src/main/java/org/opensearch/painless/Compiler.java b/modules/lang-painless/src/main/java/org/opensearch/painless/Compiler.java index eca931d87b68c..1f57c5cbd1149 100644 --- a/modules/lang-painless/src/main/java/org/opensearch/painless/Compiler.java +++ b/modules/lang-painless/src/main/java/org/opensearch/painless/Compiler.java @@ -212,8 +212,9 @@ private static void addFactoryMethod(Map> additionalClasses, Cl } additionalClasses.put(factoryClass.getName(), factoryClass); - for (int i = 0; i < factoryMethod.getParameterTypes().length; ++i) { - Class parameterClazz = factoryMethod.getParameterTypes()[i]; + final Class[] parameterTypes = factoryMethod.getParameterTypes(); + for (int i = 0; i < parameterTypes.length; ++i) { + Class parameterClazz = parameterTypes[i]; additionalClasses.put(parameterClazz.getName(), parameterClazz); } } diff --git a/modules/lang-painless/src/main/java/org/opensearch/painless/PainlessScriptEngine.java b/modules/lang-painless/src/main/java/org/opensearch/painless/PainlessScriptEngine.java index 1663185ae7c4e..4555ce5cb1ed0 100644 --- a/modules/lang-painless/src/main/java/org/opensearch/painless/PainlessScriptEngine.java +++ b/modules/lang-painless/src/main/java/org/opensearch/painless/PainlessScriptEngine.java @@ -195,11 +195,12 @@ private Type generateStatefulFactory(Loader loader, ScriptContext context } } - for (int count = 0; count < newFactory.getParameterTypes().length; ++count) { + final Class[] parameterTypes = newFactory.getParameterTypes(); + for (int count = 0; count < parameterTypes.length; ++count) { writer.visitField( Opcodes.ACC_PRIVATE | Opcodes.ACC_FINAL, "$arg" + count, - Type.getType(newFactory.getParameterTypes()[count]).getDescriptor(), + Type.getType(parameterTypes[count]).getDescriptor(), null, null ).visitEnd(); @@ -211,7 +212,7 @@ private Type generateStatefulFactory(Loader loader, ScriptContext context ); org.objectweb.asm.commons.Method init = new org.objectweb.asm.commons.Method( "", - MethodType.methodType(void.class, newFactory.getParameterTypes()).toMethodDescriptorString() + MethodType.methodType(void.class, parameterTypes).toMethodDescriptorString() ); GeneratorAdapter constructor = new GeneratorAdapter( @@ -223,10 +224,10 @@ private Type generateStatefulFactory(Loader loader, ScriptContext context constructor.loadThis(); constructor.invokeConstructor(OBJECT_TYPE, base); - for (int count = 0; count < newFactory.getParameterTypes().length; ++count) { + for (int count = 0; count < parameterTypes.length; ++count) { constructor.loadThis(); constructor.loadArg(count); - constructor.putField(Type.getType("L" + className + ";"), "$arg" + count, Type.getType(newFactory.getParameterTypes()[count])); + constructor.putField(Type.getType("L" + className + ";"), "$arg" + count, Type.getType(parameterTypes[count])); } constructor.returnValue(); @@ -247,7 +248,7 @@ private Type generateStatefulFactory(Loader loader, ScriptContext context MethodType.methodType(newInstance.getReturnType(), newInstance.getParameterTypes()).toMethodDescriptorString() ); - List> parameters = new ArrayList<>(Arrays.asList(newFactory.getParameterTypes())); + List> parameters = new ArrayList<>(Arrays.asList(parameterTypes)); parameters.addAll(Arrays.asList(newInstance.getParameterTypes())); org.objectweb.asm.commons.Method constru = new org.objectweb.asm.commons.Method( @@ -264,9 +265,9 @@ private Type generateStatefulFactory(Loader loader, ScriptContext context adapter.newInstance(WriterConstants.CLASS_TYPE); adapter.dup(); - for (int count = 0; count < newFactory.getParameterTypes().length; ++count) { + for (int count = 0; count < parameterTypes.length; ++count) { adapter.loadThis(); - adapter.getField(Type.getType("L" + className + ";"), "$arg" + count, Type.getType(newFactory.getParameterTypes()[count])); + adapter.getField(Type.getType("L" + className + ";"), "$arg" + count, Type.getType(parameterTypes[count])); } adapter.loadArgs(); @@ -334,13 +335,14 @@ private T generateFactory(Loader loader, ScriptContext context, Type clas } } + final Class[] parameterTypes = reflect.getParameterTypes(); org.objectweb.asm.commons.Method instance = new org.objectweb.asm.commons.Method( reflect.getName(), - MethodType.methodType(reflect.getReturnType(), reflect.getParameterTypes()).toMethodDescriptorString() + MethodType.methodType(reflect.getReturnType(), parameterTypes).toMethodDescriptorString() ); org.objectweb.asm.commons.Method constru = new org.objectweb.asm.commons.Method( "", - MethodType.methodType(void.class, reflect.getParameterTypes()).toMethodDescriptorString() + MethodType.methodType(void.class, parameterTypes).toMethodDescriptorString() ); GeneratorAdapter adapter = new GeneratorAdapter( @@ -421,9 +423,7 @@ private T generateFactory(Loader loader, ScriptContext context, Type clas private void writeNeedsMethods(Class clazz, ClassWriter writer, Set extractedVariables) { for (Method method : clazz.getMethods()) { - if (method.getName().startsWith("needs") - && method.getReturnType().equals(boolean.class) - && method.getParameterTypes().length == 0) { + if (method.getName().startsWith("needs") && method.getReturnType().equals(boolean.class) && method.getParameterCount() == 0) { String name = method.getName(); name = name.substring(5); name = Character.toLowerCase(name.charAt(0)) + name.substring(1); diff --git a/modules/lang-painless/src/main/java/org/opensearch/painless/ScriptClassInfo.java b/modules/lang-painless/src/main/java/org/opensearch/painless/ScriptClassInfo.java index a9d5a7abbb150..69aa3f157951e 100644 --- a/modules/lang-painless/src/main/java/org/opensearch/painless/ScriptClassInfo.java +++ b/modules/lang-painless/src/main/java/org/opensearch/painless/ScriptClassInfo.java @@ -88,7 +88,7 @@ public ScriptClassInfo(PainlessLookup painlessLookup, Class baseClass) { + "] has more than one." ); } - } else if (m.getName().startsWith("needs") && m.getReturnType() == boolean.class && m.getParameterTypes().length == 0) { + } else if (m.getName().startsWith("needs") && m.getReturnType() == boolean.class && m.getParameterCount() == 0) { needsMethods.add(new org.objectweb.asm.commons.Method(m.getName(), NEEDS_PARAMETER_METHOD_TYPE.toMethodDescriptorString())); } else if (m.getName().startsWith("get") && m.getName().equals("getClass") == false @@ -124,7 +124,7 @@ public ScriptClassInfo(PainlessLookup painlessLookup, Class baseClass) { FunctionTable.LocalFunction defConverter = null; for (java.lang.reflect.Method m : baseClass.getMethods()) { if (m.getName().startsWith("convertFrom") - && m.getParameterTypes().length == 1 + && m.getParameterCount() == 1 && m.getReturnType() == returnType && Modifier.isStatic(m.getModifiers())) { diff --git a/modules/lang-painless/src/main/java/org/opensearch/painless/lookup/PainlessLookupBuilder.java b/modules/lang-painless/src/main/java/org/opensearch/painless/lookup/PainlessLookupBuilder.java index ff3fbc640e990..54484e6685996 100644 --- a/modules/lang-painless/src/main/java/org/opensearch/painless/lookup/PainlessLookupBuilder.java +++ b/modules/lang-painless/src/main/java/org/opensearch/painless/lookup/PainlessLookupBuilder.java @@ -2168,9 +2168,10 @@ private void generateBridgeMethod(PainlessClassBuilder painlessClassBuilder, Pai bridgeMethodWriter.loadArg(0); } - for (int typeParameterCount = 0; typeParameterCount < javaMethod.getParameterTypes().length; ++typeParameterCount) { + final Class[] typeParameters = javaMethod.getParameterTypes(); + for (int typeParameterCount = 0; typeParameterCount < typeParameters.length; ++typeParameterCount) { bridgeMethodWriter.loadArg(typeParameterCount + bridgeTypeParameterOffset); - Class typeParameter = javaMethod.getParameterTypes()[typeParameterCount]; + Class typeParameter = typeParameters[typeParameterCount]; if (typeParameter == Byte.class) bridgeMethodWriter.invokeStatic(DEF_UTIL_TYPE, DEF_TO_B_BYTE_IMPLICIT); else if (typeParameter == Short.class) bridgeMethodWriter.invokeStatic(DEF_UTIL_TYPE, DEF_TO_B_SHORT_IMPLICIT); diff --git a/plugins/repository-hdfs/build.gradle b/plugins/repository-hdfs/build.gradle index 1d4772b9c8076..5e48afbf9bb66 100644 --- a/plugins/repository-hdfs/build.gradle +++ b/plugins/repository-hdfs/build.gradle @@ -83,7 +83,6 @@ dependencies { api "org.slf4j:slf4j-api:${versions.slf4j}" api "org.apache.logging.log4j:log4j-slf4j-impl:${versions.log4j}" api 'net.minidev:json-smart:2.4.7' - api 'org.apache.zookeeper:zookeeper:3.7.0' api "io.netty:netty-all:${versions.netty}" implementation "com.fasterxml.woodstox:woodstox-core:${versions.woodstox}" implementation 'org.codehaus.woodstox:stax2-api:4.2.1' diff --git a/plugins/repository-hdfs/licenses/zookeeper-3.7.0.jar.sha1 b/plugins/repository-hdfs/licenses/zookeeper-3.7.0.jar.sha1 deleted file mode 100644 index 88a6106a68710..0000000000000 --- a/plugins/repository-hdfs/licenses/zookeeper-3.7.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -1360048c7ca057df627b7267ff7360870e987ab0 \ No newline at end of file diff --git a/plugins/repository-hdfs/licenses/zookeeper-LICENSE.txt b/plugins/repository-hdfs/licenses/zookeeper-LICENSE.txt deleted file mode 100644 index 7a4a3ea2424c0..0000000000000 --- a/plugins/repository-hdfs/licenses/zookeeper-LICENSE.txt +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file diff --git a/plugins/repository-hdfs/licenses/zookeeper-NOTICE.txt b/plugins/repository-hdfs/licenses/zookeeper-NOTICE.txt deleted file mode 100644 index b853f3e85f32f..0000000000000 --- a/plugins/repository-hdfs/licenses/zookeeper-NOTICE.txt +++ /dev/null @@ -1,11 +0,0 @@ -Apache ZooKeeper -Copyright 2009-2021 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - -This product includes software components originally -developed for Airlift (https://github.com/airlift/airlift), -licensed under the Apache 2.0 license. The licensing terms -for Airlift code can be found at: -https://github.com/airlift/airlift/blob/master/LICENSE \ No newline at end of file diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.delete_decommission_awareness.json b/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.delete_decommission_awareness.json new file mode 100644 index 0000000000000..13ea101169e60 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.delete_decommission_awareness.json @@ -0,0 +1,19 @@ +{ + "cluster.delete_decommission_awareness": { + "documentation": { + "url": "https://opensearch.org/docs/latest/opensearch/rest-api/decommission/", + "description": "Delete any existing decommission." + }, + "stability": "experimental", + "url": { + "paths": [ + { + "path": "/_cluster/decommission/awareness/", + "methods": [ + "DELETE" + ] + } + ] + } + } +} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.get_decommission_awareness.json b/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.get_decommission_awareness.json new file mode 100644 index 0000000000000..302dea4ec31a7 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.get_decommission_awareness.json @@ -0,0 +1,25 @@ +{ + "cluster.get_decommission_awareness": { + "documentation": { + "url": "https://opensearch.org/docs/latest/opensearch/rest-api/decommission/", + "description": "Get details and status of decommissioned attribute" + }, + "stability": "experimental", + "url": { + "paths": [ + { + "path":"/_cluster/decommission/awareness/{awareness_attribute_name}/_status", + "methods":[ + "GET" + ], + "parts":{ + "awareness_attribute_name":{ + "type":"string", + "description":"Awareness attribute name" + } + } + } + ] + } + } +} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.put_decommission_awareness.json b/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.put_decommission_awareness.json new file mode 100644 index 0000000000000..bf4ffd454d9df --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.put_decommission_awareness.json @@ -0,0 +1,29 @@ +{ + "cluster.put_decommission_awareness": { + "documentation": { + "url": "https://opensearch.org/docs/latest/opensearch/rest-api/decommission/", + "description": "Decommissions an awareness attribute" + }, + "stability": "experimental", + "url": { + "paths": [ + { + "path": "/_cluster/decommission/awareness/{awareness_attribute_name}/{awareness_attribute_value}", + "methods": [ + "PUT" + ], + "parts": { + "awareness_attribute_name": { + "type": "string", + "description": "Awareness attribute name" + }, + "awareness_attribute_value": { + "type": "string", + "description": "Awareness attribute value" + } + } + } + ] + } + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/AwarenessAttributeDecommissionIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/AwarenessAttributeDecommissionIT.java new file mode 100644 index 0000000000000..a2270d63ba6fa --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/AwarenessAttributeDecommissionIT.java @@ -0,0 +1,165 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.coordination; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.junit.After; +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateAction; +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateRequest; +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateResponse; +import org.opensearch.action.admin.cluster.decommission.awareness.get.GetDecommissionStateAction; +import org.opensearch.action.admin.cluster.decommission.awareness.get.GetDecommissionStateRequest; +import org.opensearch.action.admin.cluster.decommission.awareness.get.GetDecommissionStateResponse; +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionAction; +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionRequest; +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionResponse; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.cluster.decommission.DecommissionStatus; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodeRole; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.Priority; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.plugins.Plugin; +import org.opensearch.test.OpenSearchIntegTestCase; +import org.opensearch.test.transport.MockTransportService; + +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.concurrent.ExecutionException; + +import static org.opensearch.test.NodeRoles.onlyRole; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertNoTimeout; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +public class AwarenessAttributeDecommissionIT extends OpenSearchIntegTestCase { + private final Logger logger = LogManager.getLogger(AwarenessAttributeDecommissionIT.class); + + @Override + protected Collection> nodePlugins() { + return Collections.singletonList(MockTransportService.TestPlugin.class); + } + + @After + public void cleanup() throws Exception { + assertNoTimeout(client().admin().cluster().prepareHealth().get()); + } + + public void testDecommissionStatusUpdatePublishedToAllNodes() throws ExecutionException, InterruptedException { + Settings commonSettings = Settings.builder() + .put("cluster.routing.allocation.awareness.attributes", "zone") + .put("cluster.routing.allocation.awareness.force.zone.values", "a,b,c") + .build(); + + logger.info("--> start 3 cluster manager nodes on zones 'a' & 'b' & 'c'"); + List clusterManagerNodes = internalCluster().startNodes( + Settings.builder() + .put(commonSettings) + .put("node.attr.zone", "a") + .put(onlyRole(commonSettings, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE)) + .build(), + Settings.builder() + .put(commonSettings) + .put("node.attr.zone", "b") + .put(onlyRole(commonSettings, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE)) + .build(), + Settings.builder() + .put(commonSettings) + .put("node.attr.zone", "c") + .put(onlyRole(commonSettings, DiscoveryNodeRole.CLUSTER_MANAGER_ROLE)) + .build() + ); + + logger.info("--> start 3 data nodes on zones 'a' & 'b' & 'c'"); + List dataNodes = internalCluster().startNodes( + Settings.builder() + .put(commonSettings) + .put("node.attr.zone", "a") + .put(onlyRole(commonSettings, DiscoveryNodeRole.DATA_ROLE)) + .build(), + Settings.builder() + .put(commonSettings) + .put("node.attr.zone", "b") + .put(onlyRole(commonSettings, DiscoveryNodeRole.DATA_ROLE)) + .build(), + Settings.builder() + .put(commonSettings) + .put("node.attr.zone", "c") + .put(onlyRole(commonSettings, DiscoveryNodeRole.DATA_ROLE)) + .build() + ); + + ensureStableCluster(6); + + logger.info("--> starting decommissioning nodes in zone {}", 'c'); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "c"); + DecommissionRequest decommissionRequest = new DecommissionRequest(decommissionAttribute); + DecommissionResponse decommissionResponse = client().execute(DecommissionAction.INSTANCE, decommissionRequest).get(); + assertTrue(decommissionResponse.isAcknowledged()); + + // Will wait for all events to complete + client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).get(); + + // assert that decommission status is successful + GetDecommissionStateResponse response = client().execute( + GetDecommissionStateAction.INSTANCE, + new GetDecommissionStateRequest(decommissionAttribute.attributeName()) + ).get(); + assertEquals(response.getAttributeValue(), decommissionAttribute.attributeValue()); + assertEquals(response.getDecommissionStatus(), DecommissionStatus.SUCCESSFUL); + + ClusterState clusterState = client(clusterManagerNodes.get(0)).admin().cluster().prepareState().execute().actionGet().getState(); + assertEquals(4, clusterState.nodes().getSize()); + + // assert status on nodes that are part of cluster currently + Iterator discoveryNodeIterator = clusterState.nodes().getNodes().valuesIt(); + while (discoveryNodeIterator.hasNext()) { + // assert no node has decommissioned attribute + DiscoveryNode node = discoveryNodeIterator.next(); + assertNotEquals(node.getAttributes().get("zone"), "c"); + + // assert all the nodes has status as SUCCESSFUL + ClusterService localNodeClusterService = internalCluster().getInstance(ClusterService.class, node.getName()); + assertEquals( + localNodeClusterService.state().metadata().decommissionAttributeMetadata().status(), + DecommissionStatus.SUCCESSFUL + ); + } + + // assert status on decommissioned node + // Here we will verify that until it got kicked out, it received appropriate status updates + // decommissioned nodes hence will have status as IN_PROGRESS as it will be kicked out later after this + // and won't receive status update to SUCCESSFUL + String randomDecommissionedNode = randomFrom(clusterManagerNodes.get(2), dataNodes.get(2)); + ClusterService decommissionedNodeClusterService = internalCluster().getInstance(ClusterService.class, randomDecommissionedNode); + assertEquals( + decommissionedNodeClusterService.state().metadata().decommissionAttributeMetadata().status(), + DecommissionStatus.IN_PROGRESS + ); + + // Will wait for all events to complete + client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).get(); + + // Recommissioning the zone back to gracefully succeed the test once above tests succeeds + DeleteDecommissionStateResponse deleteDecommissionStateResponse = client(clusterManagerNodes.get(0)).execute( + DeleteDecommissionStateAction.INSTANCE, + new DeleteDecommissionStateRequest() + ).get(); + assertTrue(deleteDecommissionStateResponse.isAcknowledged()); + + // will wait for cluster to stabilise with a timeout of 2 min (findPeerInterval for decommissioned nodes) + // as by then all nodes should have joined the cluster + ensureStableCluster(6, TimeValue.timeValueMinutes(2)); + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/SearchableSnapshotIT.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/SearchableSnapshotIT.java index c528a311e0761..0e964fdf14109 100644 --- a/server/src/internalClusterTest/java/org/opensearch/snapshots/SearchableSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/SearchableSnapshotIT.java @@ -53,6 +53,13 @@ protected boolean addMockInternalEngine() { return false; } + @Override + protected Settings.Builder randomRepositorySettings() { + final Settings.Builder settings = Settings.builder(); + settings.put("location", randomRepoPath()).put("compress", randomBoolean()); + return settings; + } + public void testCreateSearchableSnapshot() throws Exception { final int numReplicasIndex1 = randomIntBetween(1, 4); final int numReplicasIndex2 = randomIntBetween(0, 2); @@ -92,6 +99,8 @@ public void testCreateSearchableSnapshot() throws Exception { assertTrue(client.admin().indices().prepareDelete("test-idx-1", "test-idx-2").get().isAcknowledged()); + internalCluster().ensureAtLeastNumSearchNodes(Math.max(numReplicasIndex1, numReplicasIndex2) + 1); + logger.info("--> restore indices as 'remote_snapshot'"); client.admin() .cluster() diff --git a/server/src/main/java/org/opensearch/OpenSearchException.java b/server/src/main/java/org/opensearch/OpenSearchException.java index 1641f00a82f5c..76a24cf88970c 100644 --- a/server/src/main/java/org/opensearch/OpenSearchException.java +++ b/server/src/main/java/org/opensearch/OpenSearchException.java @@ -69,6 +69,7 @@ import static java.util.Collections.unmodifiableMap; import static org.opensearch.Version.V_2_1_0; import static org.opensearch.Version.V_2_3_0; +import static org.opensearch.Version.V_2_4_0; import static org.opensearch.cluster.metadata.IndexMetadata.INDEX_UUID_NA_VALUE; import static org.opensearch.common.xcontent.XContentParserUtils.ensureExpectedToken; import static org.opensearch.common.xcontent.XContentParserUtils.ensureFieldName; @@ -1608,6 +1609,18 @@ private enum OpenSearchExceptionHandle { org.opensearch.index.shard.PrimaryShardClosedException::new, 162, V_2_3_0 + ), + DECOMMISSIONING_FAILED_EXCEPTION( + org.opensearch.cluster.decommission.DecommissioningFailedException.class, + org.opensearch.cluster.decommission.DecommissioningFailedException::new, + 163, + V_2_4_0 + ), + NODE_DECOMMISSIONED_EXCEPTION( + org.opensearch.cluster.decommission.NodeDecommissionedException.class, + org.opensearch.cluster.decommission.NodeDecommissionedException::new, + 164, + V_2_4_0 ); final Class exceptionClass; diff --git a/server/src/main/java/org/opensearch/action/ActionModule.java b/server/src/main/java/org/opensearch/action/ActionModule.java index 89eb82de97da8..841085946bc13 100644 --- a/server/src/main/java/org/opensearch/action/ActionModule.java +++ b/server/src/main/java/org/opensearch/action/ActionModule.java @@ -40,6 +40,12 @@ import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsAction; import org.opensearch.action.admin.cluster.configuration.TransportAddVotingConfigExclusionsAction; import org.opensearch.action.admin.cluster.configuration.TransportClearVotingConfigExclusionsAction; +import org.opensearch.action.admin.cluster.decommission.awareness.get.GetDecommissionStateAction; +import org.opensearch.action.admin.cluster.decommission.awareness.get.TransportGetDecommissionStateAction; +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateAction; +import org.opensearch.action.admin.cluster.decommission.awareness.delete.TransportDeleteDecommissionStateAction; +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionAction; +import org.opensearch.action.admin.cluster.decommission.awareness.put.TransportDecommissionAction; import org.opensearch.action.admin.cluster.health.ClusterHealthAction; import org.opensearch.action.admin.cluster.health.TransportClusterHealthAction; import org.opensearch.action.admin.cluster.node.hotthreads.NodesHotThreadsAction; @@ -309,9 +315,11 @@ import org.opensearch.rest.action.admin.cluster.RestClusterStatsAction; import org.opensearch.rest.action.admin.cluster.RestClusterUpdateSettingsAction; import org.opensearch.rest.action.admin.cluster.RestCreateSnapshotAction; +import org.opensearch.rest.action.admin.cluster.RestDeleteDecommissionStateAction; import org.opensearch.rest.action.admin.cluster.RestDeleteRepositoryAction; import org.opensearch.rest.action.admin.cluster.RestDeleteSnapshotAction; import org.opensearch.rest.action.admin.cluster.RestDeleteStoredScriptAction; +import org.opensearch.rest.action.admin.cluster.RestGetDecommissionStateAction; import org.opensearch.rest.action.admin.cluster.RestGetRepositoriesAction; import org.opensearch.rest.action.admin.cluster.RestGetScriptContextAction; import org.opensearch.rest.action.admin.cluster.RestGetScriptLanguageAction; @@ -324,6 +332,7 @@ import org.opensearch.rest.action.admin.cluster.RestNodesStatsAction; import org.opensearch.rest.action.admin.cluster.RestNodesUsageAction; import org.opensearch.rest.action.admin.cluster.RestPendingClusterTasksAction; +import org.opensearch.rest.action.admin.cluster.RestDecommissionAction; import org.opensearch.rest.action.admin.cluster.RestPutRepositoryAction; import org.opensearch.rest.action.admin.cluster.RestPutStoredScriptAction; import org.opensearch.rest.action.admin.cluster.RestReloadSecureSettingsAction; @@ -694,6 +703,10 @@ public void reg actions.register(PitSegmentsAction.INSTANCE, TransportPitSegmentsAction.class); actions.register(GetAllPitsAction.INSTANCE, TransportGetAllPitsAction.class); + // Decommission actions + actions.register(DecommissionAction.INSTANCE, TransportDecommissionAction.class); + actions.register(GetDecommissionStateAction.INSTANCE, TransportGetDecommissionStateAction.class); + actions.register(DeleteDecommissionStateAction.INSTANCE, TransportDeleteDecommissionStateAction.class); return unmodifiableMap(actions.getRegistry()); } @@ -875,6 +888,7 @@ public void initRestHandlers(Supplier nodesInCluster) { registerHandler.accept(new RestDeletePitAction()); registerHandler.accept(new RestGetAllPitsAction(nodesInCluster)); registerHandler.accept(new RestPitSegmentsAction(nodesInCluster)); + registerHandler.accept(new RestDeleteDecommissionStateAction()); for (ActionPlugin plugin : actionPlugins) { for (RestHandler handler : plugin.getRestHandlers( @@ -890,6 +904,8 @@ public void initRestHandlers(Supplier nodesInCluster) { } } registerHandler.accept(new RestCatAction(catActions)); + registerHandler.accept(new RestDecommissionAction()); + registerHandler.accept(new RestGetDecommissionStateAction()); // Remote Store APIs if (FeatureFlags.isEnabled(FeatureFlags.REMOTE_STORE)) { diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/DeleteDecommissionStateAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/DeleteDecommissionStateAction.java new file mode 100644 index 0000000000000..3aff666d388be --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/DeleteDecommissionStateAction.java @@ -0,0 +1,25 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.delete; + +import org.opensearch.action.ActionType; + +/** + * Delete decommission state action. + * + * @opensearch.internal + */ +public class DeleteDecommissionStateAction extends ActionType { + public static final DeleteDecommissionStateAction INSTANCE = new DeleteDecommissionStateAction(); + public static final String NAME = "cluster:admin/decommission/awareness/delete"; + + private DeleteDecommissionStateAction() { + super(NAME, DeleteDecommissionStateResponse::new); + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/DeleteDecommissionStateRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/DeleteDecommissionStateRequest.java new file mode 100644 index 0000000000000..205be54a36c33 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/DeleteDecommissionStateRequest.java @@ -0,0 +1,40 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.delete; + +import org.opensearch.action.ActionRequestValidationException; +import org.opensearch.action.support.clustermanager.ClusterManagerNodeRequest; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; + +import java.io.IOException; + +/** + * Request for deleting decommission request. + * + * @opensearch.internal + */ +public class DeleteDecommissionStateRequest extends ClusterManagerNodeRequest { + + public DeleteDecommissionStateRequest() {} + + public DeleteDecommissionStateRequest(StreamInput in) throws IOException { + super(in); + } + + @Override + public ActionRequestValidationException validate() { + return null; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/DeleteDecommissionStateRequestBuilder.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/DeleteDecommissionStateRequestBuilder.java new file mode 100644 index 0000000000000..08f194c53f18e --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/DeleteDecommissionStateRequestBuilder.java @@ -0,0 +1,27 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.delete; + +import org.opensearch.action.support.clustermanager.ClusterManagerNodeOperationRequestBuilder; +import org.opensearch.client.OpenSearchClient; + +/** + * Builder for Delete decommission request. + * + * @opensearch.internal + */ +public class DeleteDecommissionStateRequestBuilder extends ClusterManagerNodeOperationRequestBuilder< + DeleteDecommissionStateRequest, + DeleteDecommissionStateResponse, + DeleteDecommissionStateRequestBuilder> { + + public DeleteDecommissionStateRequestBuilder(OpenSearchClient client, DeleteDecommissionStateAction action) { + super(client, action, new DeleteDecommissionStateRequest()); + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/DeleteDecommissionStateResponse.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/DeleteDecommissionStateResponse.java new file mode 100644 index 0000000000000..2ff634966586a --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/DeleteDecommissionStateResponse.java @@ -0,0 +1,36 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.delete; + +import org.opensearch.action.support.master.AcknowledgedResponse; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; + +import java.io.IOException; + +/** + * Response returned after deletion of decommission request. + * + * @opensearch.internal + */ +public class DeleteDecommissionStateResponse extends AcknowledgedResponse { + + public DeleteDecommissionStateResponse(StreamInput in) throws IOException { + super(in); + } + + public DeleteDecommissionStateResponse(boolean acknowledged) { + super(acknowledged); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/TransportDeleteDecommissionStateAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/TransportDeleteDecommissionStateAction.java new file mode 100644 index 0000000000000..7d8f4bdd8304c --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/TransportDeleteDecommissionStateAction.java @@ -0,0 +1,86 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.delete; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.action.ActionListener; +import org.opensearch.action.support.ActionFilters; +import org.opensearch.action.support.clustermanager.TransportClusterManagerNodeAction; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.block.ClusterBlockException; +import org.opensearch.cluster.block.ClusterBlockLevel; +import org.opensearch.cluster.decommission.DecommissionService; +import org.opensearch.cluster.metadata.IndexNameExpressionResolver; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.inject.Inject; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.TransportService; + +import java.io.IOException; + +/** + * Transport action for delete decommission. + * + * @opensearch.internal + */ +public class TransportDeleteDecommissionStateAction extends TransportClusterManagerNodeAction< + DeleteDecommissionStateRequest, + DeleteDecommissionStateResponse> { + + private static final Logger logger = LogManager.getLogger(TransportDeleteDecommissionStateAction.class); + private final DecommissionService decommissionService; + + @Inject + public TransportDeleteDecommissionStateAction( + TransportService transportService, + ClusterService clusterService, + ThreadPool threadPool, + ActionFilters actionFilters, + IndexNameExpressionResolver indexNameExpressionResolver, + DecommissionService decommissionService + ) { + super( + DeleteDecommissionStateAction.NAME, + transportService, + clusterService, + threadPool, + actionFilters, + DeleteDecommissionStateRequest::new, + indexNameExpressionResolver + ); + this.decommissionService = decommissionService; + } + + @Override + protected String executor() { + return ThreadPool.Names.SAME; + } + + @Override + protected DeleteDecommissionStateResponse read(StreamInput in) throws IOException { + return new DeleteDecommissionStateResponse(in); + } + + @Override + protected ClusterBlockException checkBlock(DeleteDecommissionStateRequest request, ClusterState state) { + return state.blocks().globalBlockedException(ClusterBlockLevel.METADATA_WRITE); + } + + @Override + protected void clusterManagerOperation( + DeleteDecommissionStateRequest request, + ClusterState state, + ActionListener listener + ) { + logger.info("Received delete decommission Request [{}]", request); + this.decommissionService.startRecommissionAction(listener); + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/package-info.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/package-info.java new file mode 100644 index 0000000000000..c2cfc03baa45e --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/delete/package-info.java @@ -0,0 +1,10 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** Delete decommission transport handlers. */ +package org.opensearch.action.admin.cluster.decommission.awareness.delete; diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateAction.java new file mode 100644 index 0000000000000..72fd1a26cb860 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateAction.java @@ -0,0 +1,26 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.get; + +import org.opensearch.action.ActionType; + +/** + * Get decommission action + * + * @opensearch.internal + */ +public class GetDecommissionStateAction extends ActionType { + + public static final GetDecommissionStateAction INSTANCE = new GetDecommissionStateAction(); + public static final String NAME = "cluster:admin/decommission/awareness/get"; + + private GetDecommissionStateAction() { + super(NAME, GetDecommissionStateResponse::new); + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateRequest.java new file mode 100644 index 0000000000000..1f301aa2b5273 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateRequest.java @@ -0,0 +1,80 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.get; + +import org.opensearch.action.ActionRequestValidationException; +import org.opensearch.action.support.clustermanager.ClusterManagerNodeReadRequest; +import org.opensearch.common.Strings; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; + +import java.io.IOException; + +import static org.opensearch.action.ValidateActions.addValidationError; + +/** + * Get Decommissioned attribute request + * + * @opensearch.internal + */ +public class GetDecommissionStateRequest extends ClusterManagerNodeReadRequest { + + private String attributeName; + + public GetDecommissionStateRequest() {} + + /** + * Constructs a new get decommission state request with given attribute name + * + * @param attributeName name of the attribute + */ + public GetDecommissionStateRequest(String attributeName) { + this.attributeName = attributeName; + } + + public GetDecommissionStateRequest(StreamInput in) throws IOException { + super(in); + attributeName = in.readString(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeString(attributeName); + } + + @Override + public ActionRequestValidationException validate() { + ActionRequestValidationException validationException = null; + if (attributeName == null || Strings.isEmpty(attributeName)) { + validationException = addValidationError("attribute name is missing", validationException); + } + return validationException; + } + + /** + * Sets attribute name + * + * @param attributeName attribute name + * @return this request + */ + public GetDecommissionStateRequest attributeName(String attributeName) { + this.attributeName = attributeName; + return this; + } + + /** + * Returns attribute name + * + * @return attributeName name of attribute + */ + public String attributeName() { + return this.attributeName; + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateRequestBuilder.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateRequestBuilder.java new file mode 100644 index 0000000000000..e766e9c674ff7 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateRequestBuilder.java @@ -0,0 +1,39 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.get; + +import org.opensearch.action.support.clustermanager.ClusterManagerNodeReadOperationRequestBuilder; +import org.opensearch.client.OpenSearchClient; + +/** + * Get decommission request builder + * + * @opensearch.internal + */ +public class GetDecommissionStateRequestBuilder extends ClusterManagerNodeReadOperationRequestBuilder< + GetDecommissionStateRequest, + GetDecommissionStateResponse, + GetDecommissionStateRequestBuilder> { + + /** + * Creates new get decommissioned attributes request builder + */ + public GetDecommissionStateRequestBuilder(OpenSearchClient client, GetDecommissionStateAction action) { + super(client, action, new GetDecommissionStateRequest()); + } + + /** + * @param attributeName name of attribute + * @return current object + */ + public GetDecommissionStateRequestBuilder setAttributeName(String attributeName) { + request.attributeName(attributeName); + return this; + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateResponse.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateResponse.java new file mode 100644 index 0000000000000..ec0bd7cf7e7eb --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateResponse.java @@ -0,0 +1,121 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.get; + +import org.opensearch.OpenSearchParseException; +import org.opensearch.action.ActionResponse; +import org.opensearch.cluster.decommission.DecommissionStatus; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; +import org.opensearch.common.xcontent.ToXContentObject; +import org.opensearch.common.xcontent.XContentBuilder; +import org.opensearch.common.xcontent.XContentParser; + +import java.io.IOException; +import java.util.Locale; +import java.util.Objects; + +import static org.opensearch.common.xcontent.XContentParserUtils.ensureExpectedToken; + +/** + * Response for decommission status + * + * @opensearch.internal + */ +public class GetDecommissionStateResponse extends ActionResponse implements ToXContentObject { + + private String attributeValue; + private DecommissionStatus status; + + GetDecommissionStateResponse() { + this(null, null); + } + + GetDecommissionStateResponse(String attributeValue, DecommissionStatus status) { + this.attributeValue = attributeValue; + this.status = status; + } + + GetDecommissionStateResponse(StreamInput in) throws IOException { + // read decommissioned attribute and status only if it is present + if (in.readBoolean()) { + this.attributeValue = in.readString(); + this.status = DecommissionStatus.fromString(in.readString()); + } + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + // if decommissioned attribute value is null or status is null then mark its absence + if (attributeValue == null || status == null) { + out.writeBoolean(false); + } else { + out.writeBoolean(true); + out.writeString(attributeValue); + out.writeString(status.status()); + } + } + + public String getAttributeValue() { + return attributeValue; + } + + public DecommissionStatus getDecommissionStatus() { + return status; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + if (attributeValue != null && status != null) { + builder.field(attributeValue, status); + } + builder.endObject(); + return builder; + } + + public static GetDecommissionStateResponse fromXContent(XContentParser parser) throws IOException { + ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.nextToken(), parser); + XContentParser.Token token; + String attributeValue = null; + DecommissionStatus status = null; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + attributeValue = parser.currentName(); + if (parser.nextToken() != XContentParser.Token.VALUE_STRING) { + throw new OpenSearchParseException("failed to parse status of decommissioning, expected string but found unknown type"); + } + status = DecommissionStatus.fromString(parser.text().toLowerCase(Locale.ROOT)); + } else { + throw new OpenSearchParseException( + "failed to parse decommission state, expected [{}] but found [{}]", + XContentParser.Token.FIELD_NAME, + token + ); + } + } + return new GetDecommissionStateResponse(attributeValue, status); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + GetDecommissionStateResponse that = (GetDecommissionStateResponse) o; + if (!Objects.equals(attributeValue, that.attributeValue)) { + return false; + } + return Objects.equals(status, that.status); + } + + @Override + public int hashCode() { + return Objects.hash(attributeValue, status); + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/TransportGetDecommissionStateAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/TransportGetDecommissionStateAction.java new file mode 100644 index 0000000000000..d811ab8cf6948 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/TransportGetDecommissionStateAction.java @@ -0,0 +1,89 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.get; + +import org.opensearch.action.ActionListener; +import org.opensearch.action.support.ActionFilters; +import org.opensearch.action.support.clustermanager.TransportClusterManagerNodeReadAction; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.block.ClusterBlockException; +import org.opensearch.cluster.block.ClusterBlockLevel; +import org.opensearch.cluster.decommission.DecommissionAttributeMetadata; +import org.opensearch.cluster.metadata.IndexNameExpressionResolver; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.inject.Inject; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.TransportService; + +import java.io.IOException; + +/** + * Transport action for getting decommission status + * + * @opensearch.internal + */ +public class TransportGetDecommissionStateAction extends TransportClusterManagerNodeReadAction< + GetDecommissionStateRequest, + GetDecommissionStateResponse> { + + @Inject + public TransportGetDecommissionStateAction( + TransportService transportService, + ClusterService clusterService, + ThreadPool threadPool, + ActionFilters actionFilters, + IndexNameExpressionResolver indexNameExpressionResolver + ) { + super( + GetDecommissionStateAction.NAME, + transportService, + clusterService, + threadPool, + actionFilters, + GetDecommissionStateRequest::new, + indexNameExpressionResolver + ); + } + + @Override + protected String executor() { + return ThreadPool.Names.SAME; + } + + @Override + protected GetDecommissionStateResponse read(StreamInput in) throws IOException { + return new GetDecommissionStateResponse(in); + } + + @Override + protected void clusterManagerOperation( + GetDecommissionStateRequest request, + ClusterState state, + ActionListener listener + ) throws Exception { + DecommissionAttributeMetadata decommissionAttributeMetadata = state.metadata().decommissionAttributeMetadata(); + if (decommissionAttributeMetadata != null + && request.attributeName().equals(decommissionAttributeMetadata.decommissionAttribute().attributeName())) { + listener.onResponse( + new GetDecommissionStateResponse( + decommissionAttributeMetadata.decommissionAttribute().attributeValue(), + decommissionAttributeMetadata.status() + ) + ); + } else { + listener.onResponse(new GetDecommissionStateResponse()); + } + } + + @Override + protected ClusterBlockException checkBlock(GetDecommissionStateRequest request, ClusterState state) { + return state.blocks().globalBlockedException(ClusterBlockLevel.METADATA_READ); + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/package-info.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/package-info.java new file mode 100644 index 0000000000000..5b88e91cf4f9d --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/get/package-info.java @@ -0,0 +1,10 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** Transport handlers for getting status of decommission request */ +package org.opensearch.action.admin.cluster.decommission.awareness.get; diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/package-info.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/package-info.java new file mode 100644 index 0000000000000..e1260e638c91d --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/package-info.java @@ -0,0 +1,10 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** Decommission transport handlers. */ +package org.opensearch.action.admin.cluster.decommission.awareness; diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionAction.java new file mode 100644 index 0000000000000..56678650f6e35 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionAction.java @@ -0,0 +1,25 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.put; + +import org.opensearch.action.ActionType; + +/** + * Register decommission action + * + * @opensearch.internal + */ +public final class DecommissionAction extends ActionType { + public static final DecommissionAction INSTANCE = new DecommissionAction(); + public static final String NAME = "cluster:admin/decommission/awareness/put"; + + private DecommissionAction() { + super(NAME, DecommissionResponse::new); + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionRequest.java new file mode 100644 index 0000000000000..79a6688dc6049 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionRequest.java @@ -0,0 +1,84 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.put; + +import org.opensearch.action.ActionRequestValidationException; +import org.opensearch.action.support.clustermanager.ClusterManagerNodeRequest; +import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.common.Strings; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; + +import java.io.IOException; + +import static org.opensearch.action.ValidateActions.addValidationError; + +/** + * Register decommission request. + *

+ * Registers a decommission request with decommission attribute and timeout + * + * @opensearch.internal + */ +public class DecommissionRequest extends ClusterManagerNodeRequest { + + private DecommissionAttribute decommissionAttribute; + + public DecommissionRequest() {} + + public DecommissionRequest(DecommissionAttribute decommissionAttribute) { + this.decommissionAttribute = decommissionAttribute; + } + + public DecommissionRequest(StreamInput in) throws IOException { + super(in); + decommissionAttribute = new DecommissionAttribute(in); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + decommissionAttribute.writeTo(out); + } + + /** + * Sets decommission attribute for decommission request + * + * @param decommissionAttribute attribute key-value that needs to be decommissioned + * @return this request + */ + public DecommissionRequest setDecommissionAttribute(DecommissionAttribute decommissionAttribute) { + this.decommissionAttribute = decommissionAttribute; + return this; + } + + /** + * @return Returns the decommission attribute key-value + */ + public DecommissionAttribute getDecommissionAttribute() { + return this.decommissionAttribute; + } + + @Override + public ActionRequestValidationException validate() { + ActionRequestValidationException validationException = null; + if (decommissionAttribute.attributeName() == null || Strings.isEmpty(decommissionAttribute.attributeName())) { + validationException = addValidationError("attribute name is missing", validationException); + } + if (decommissionAttribute.attributeValue() == null || Strings.isEmpty(decommissionAttribute.attributeValue())) { + validationException = addValidationError("attribute value is missing", validationException); + } + return validationException; + } + + @Override + public String toString() { + return "DecommissionRequest{" + "decommissionAttribute=" + decommissionAttribute + '}'; + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionRequestBuilder.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionRequestBuilder.java new file mode 100644 index 0000000000000..47af3b952c895 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionRequestBuilder.java @@ -0,0 +1,38 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.put; + +import org.opensearch.action.ActionType; +import org.opensearch.action.support.clustermanager.ClusterManagerNodeOperationRequestBuilder; +import org.opensearch.client.OpenSearchClient; +import org.opensearch.cluster.decommission.DecommissionAttribute; + +/** + * Register decommission request builder + * + * @opensearch.internal + */ +public class DecommissionRequestBuilder extends ClusterManagerNodeOperationRequestBuilder< + DecommissionRequest, + DecommissionResponse, + DecommissionRequestBuilder> { + + public DecommissionRequestBuilder(OpenSearchClient client, ActionType action, DecommissionRequest request) { + super(client, action, request); + } + + /** + * @param decommissionAttribute decommission attribute + * @return current object + */ + public DecommissionRequestBuilder setDecommissionedAttribute(DecommissionAttribute decommissionAttribute) { + request.setDecommissionAttribute(decommissionAttribute); + return this; + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionResponse.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionResponse.java new file mode 100644 index 0000000000000..499f403c8cd64 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionResponse.java @@ -0,0 +1,37 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.put; + +import org.opensearch.action.support.master.AcknowledgedResponse; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; +import org.opensearch.common.xcontent.ToXContentObject; + +import java.io.IOException; + +/** + * Response for decommission request + * + * @opensearch.internal + */ +public class DecommissionResponse extends AcknowledgedResponse implements ToXContentObject { + + public DecommissionResponse(StreamInput in) throws IOException { + super(in); + } + + public DecommissionResponse(boolean acknowledged) { + super(acknowledged); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/TransportDecommissionAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/TransportDecommissionAction.java new file mode 100644 index 0000000000000..3a067d2f110b9 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/TransportDecommissionAction.java @@ -0,0 +1,81 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.put; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.action.ActionListener; +import org.opensearch.action.support.ActionFilters; +import org.opensearch.action.support.clustermanager.TransportClusterManagerNodeAction; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.block.ClusterBlockException; +import org.opensearch.cluster.block.ClusterBlockLevel; +import org.opensearch.cluster.decommission.DecommissionService; +import org.opensearch.cluster.metadata.IndexNameExpressionResolver; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.inject.Inject; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.TransportService; + +import java.io.IOException; + +/** + * Transport action for registering decommission + * + * @opensearch.internal + */ +public class TransportDecommissionAction extends TransportClusterManagerNodeAction { + + private static final Logger logger = LogManager.getLogger(TransportDecommissionAction.class); + private final DecommissionService decommissionService; + + @Inject + public TransportDecommissionAction( + TransportService transportService, + ClusterService clusterService, + DecommissionService decommissionService, + ThreadPool threadPool, + ActionFilters actionFilters, + IndexNameExpressionResolver indexNameExpressionResolver + ) { + super( + DecommissionAction.NAME, + transportService, + clusterService, + threadPool, + actionFilters, + DecommissionRequest::new, + indexNameExpressionResolver + ); + this.decommissionService = decommissionService; + } + + @Override + protected String executor() { + return ThreadPool.Names.SAME; + } + + @Override + protected DecommissionResponse read(StreamInput in) throws IOException { + return new DecommissionResponse(in); + } + + @Override + protected ClusterBlockException checkBlock(DecommissionRequest request, ClusterState state) { + return state.blocks().globalBlockedException(ClusterBlockLevel.METADATA_WRITE); + } + + @Override + protected void clusterManagerOperation(DecommissionRequest request, ClusterState state, ActionListener listener) + throws Exception { + logger.info("starting awareness attribute [{}] decommissioning", request.getDecommissionAttribute().toString()); + decommissionService.startDecommissionAction(request.getDecommissionAttribute(), listener); + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/package-info.java b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/package-info.java new file mode 100644 index 0000000000000..c361f4b95a484 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/admin/cluster/decommission/awareness/put/package-info.java @@ -0,0 +1,10 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** Transport handlers for putting a new decommission request */ +package org.opensearch.action.admin.cluster.decommission.awareness.put; diff --git a/server/src/main/java/org/opensearch/client/ClusterAdminClient.java b/server/src/main/java/org/opensearch/client/ClusterAdminClient.java index 47fb5bd8d4600..4ab438ec064f1 100644 --- a/server/src/main/java/org/opensearch/client/ClusterAdminClient.java +++ b/server/src/main/java/org/opensearch/client/ClusterAdminClient.java @@ -37,6 +37,15 @@ import org.opensearch.action.admin.cluster.allocation.ClusterAllocationExplainRequest; import org.opensearch.action.admin.cluster.allocation.ClusterAllocationExplainRequestBuilder; import org.opensearch.action.admin.cluster.allocation.ClusterAllocationExplainResponse; +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateRequest; +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateRequestBuilder; +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateResponse; +import org.opensearch.action.admin.cluster.decommission.awareness.get.GetDecommissionStateRequest; +import org.opensearch.action.admin.cluster.decommission.awareness.get.GetDecommissionStateRequestBuilder; +import org.opensearch.action.admin.cluster.decommission.awareness.get.GetDecommissionStateResponse; +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionRequest; +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionRequestBuilder; +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionResponse; import org.opensearch.action.admin.cluster.health.ClusterHealthRequest; import org.opensearch.action.admin.cluster.health.ClusterHealthRequestBuilder; import org.opensearch.action.admin.cluster.health.ClusterHealthResponse; @@ -846,4 +855,48 @@ public interface ClusterAdminClient extends OpenSearchClient { */ ClusterDeleteWeightedRoutingRequestBuilder prepareDeleteWeightedRouting(); + /** + * Decommission awareness attribute + */ + ActionFuture decommission(DecommissionRequest request); + + /** + * Decommission awareness attribute + */ + void decommission(DecommissionRequest request, ActionListener listener); + + /** + * Decommission awareness attribute + */ + DecommissionRequestBuilder prepareDecommission(DecommissionRequest request); + + /** + * Get Decommissioned attribute + */ + ActionFuture getDecommissionState(GetDecommissionStateRequest request); + + /** + * Get Decommissioned attribute + */ + void getDecommissionState(GetDecommissionStateRequest request, ActionListener listener); + + /** + * Get Decommissioned attribute + */ + GetDecommissionStateRequestBuilder prepareGetDecommissionState(); + + /** + * Deletes the decommission metadata. + */ + ActionFuture deleteDecommissionState(DeleteDecommissionStateRequest request); + + /** + * Deletes the decommission metadata. + */ + void deleteDecommissionState(DeleteDecommissionStateRequest request, ActionListener listener); + + /** + * Deletes the decommission metadata. + */ + DeleteDecommissionStateRequestBuilder prepareDeleteDecommissionRequest(); } diff --git a/server/src/main/java/org/opensearch/client/Requests.java b/server/src/main/java/org/opensearch/client/Requests.java index 107cfe5f04d06..21f2a2d906602 100644 --- a/server/src/main/java/org/opensearch/client/Requests.java +++ b/server/src/main/java/org/opensearch/client/Requests.java @@ -32,6 +32,9 @@ package org.opensearch.client; +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateRequest; +import org.opensearch.action.admin.cluster.decommission.awareness.get.GetDecommissionStateRequest; +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionRequest; import org.opensearch.action.admin.cluster.health.ClusterHealthRequest; import org.opensearch.action.admin.cluster.node.info.NodesInfoRequest; import org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest; @@ -578,4 +581,29 @@ public static ClusterGetWeightedRoutingRequest getWeightedRoutingRequest(String public static ClusterDeleteWeightedRoutingRequest deleteWeightedRoutingRequest() { return new ClusterDeleteWeightedRoutingRequest(); } + + /** + * Creates a new decommission request. + * + * @return returns put decommission request + */ + public static DecommissionRequest decommissionRequest() { + return new DecommissionRequest(); + } + + /** + * Get decommissioned attribute from metadata + * + * @return returns get decommission request + */ + public static GetDecommissionStateRequest getDecommissionStateRequest() { + return new GetDecommissionStateRequest(); + } + + /** + * Creates a new delete decommission request. + */ + public static DeleteDecommissionStateRequest deleteDecommissionStateRequest() { + return new DeleteDecommissionStateRequest(); + } } diff --git a/server/src/main/java/org/opensearch/client/support/AbstractClient.java b/server/src/main/java/org/opensearch/client/support/AbstractClient.java index 80d04156f475b..828ca5f8083ee 100644 --- a/server/src/main/java/org/opensearch/client/support/AbstractClient.java +++ b/server/src/main/java/org/opensearch/client/support/AbstractClient.java @@ -43,6 +43,18 @@ import org.opensearch.action.admin.cluster.allocation.ClusterAllocationExplainRequest; import org.opensearch.action.admin.cluster.allocation.ClusterAllocationExplainRequestBuilder; import org.opensearch.action.admin.cluster.allocation.ClusterAllocationExplainResponse; +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateAction; +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateRequest; +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateRequestBuilder; +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateResponse; +import org.opensearch.action.admin.cluster.decommission.awareness.get.GetDecommissionStateAction; +import org.opensearch.action.admin.cluster.decommission.awareness.get.GetDecommissionStateRequest; +import org.opensearch.action.admin.cluster.decommission.awareness.get.GetDecommissionStateRequestBuilder; +import org.opensearch.action.admin.cluster.decommission.awareness.get.GetDecommissionStateResponse; +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionAction; +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionRequest; +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionRequestBuilder; +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionResponse; import org.opensearch.action.admin.cluster.health.ClusterHealthAction; import org.opensearch.action.admin.cluster.health.ClusterHealthRequest; import org.opensearch.action.admin.cluster.health.ClusterHealthRequestBuilder; @@ -1388,6 +1400,54 @@ public DeleteStoredScriptRequestBuilder prepareDeleteStoredScript() { public DeleteStoredScriptRequestBuilder prepareDeleteStoredScript(String id) { return prepareDeleteStoredScript().setId(id); } + + @Override + public ActionFuture decommission(DecommissionRequest request) { + return execute(DecommissionAction.INSTANCE, request); + } + + @Override + public void decommission(DecommissionRequest request, ActionListener listener) { + execute(DecommissionAction.INSTANCE, request, listener); + } + + @Override + public DecommissionRequestBuilder prepareDecommission(DecommissionRequest request) { + return new DecommissionRequestBuilder(this, DecommissionAction.INSTANCE, request); + } + + @Override + public ActionFuture getDecommissionState(GetDecommissionStateRequest request) { + return execute(GetDecommissionStateAction.INSTANCE, request); + } + + @Override + public void getDecommissionState(GetDecommissionStateRequest request, ActionListener listener) { + execute(GetDecommissionStateAction.INSTANCE, request, listener); + } + + @Override + public GetDecommissionStateRequestBuilder prepareGetDecommissionState() { + return new GetDecommissionStateRequestBuilder(this, GetDecommissionStateAction.INSTANCE); + } + + @Override + public ActionFuture deleteDecommissionState(DeleteDecommissionStateRequest request) { + return execute(DeleteDecommissionStateAction.INSTANCE, request); + } + + @Override + public void deleteDecommissionState( + DeleteDecommissionStateRequest request, + ActionListener listener + ) { + execute(DeleteDecommissionStateAction.INSTANCE, request, listener); + } + + @Override + public DeleteDecommissionStateRequestBuilder prepareDeleteDecommissionRequest() { + return new DeleteDecommissionStateRequestBuilder(this, DeleteDecommissionStateAction.INSTANCE); + } } static class IndicesAdmin implements IndicesAdminClient { diff --git a/server/src/main/java/org/opensearch/cluster/ClusterModule.java b/server/src/main/java/org/opensearch/cluster/ClusterModule.java index 46552bb5d6a03..b7e72949ca05c 100644 --- a/server/src/main/java/org/opensearch/cluster/ClusterModule.java +++ b/server/src/main/java/org/opensearch/cluster/ClusterModule.java @@ -35,6 +35,7 @@ import org.opensearch.cluster.action.index.MappingUpdatedAction; import org.opensearch.cluster.action.index.NodeMappingRefreshAction; import org.opensearch.cluster.action.shard.ShardStateAction; +import org.opensearch.cluster.decommission.DecommissionAttributeMetadata; import org.opensearch.cluster.metadata.ComponentTemplateMetadata; import org.opensearch.cluster.metadata.ComposableIndexTemplateMetadata; import org.opensearch.cluster.metadata.DataStreamMetadata; @@ -73,6 +74,7 @@ import org.opensearch.cluster.routing.allocation.decider.SameShardAllocationDecider; import org.opensearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider; import org.opensearch.cluster.routing.allocation.decider.SnapshotInProgressAllocationDecider; +import org.opensearch.cluster.routing.allocation.decider.TargetPoolAllocationDecider; import org.opensearch.cluster.routing.allocation.decider.ThrottlingAllocationDecider; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.ParseField; @@ -84,6 +86,7 @@ import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Setting.Property; import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.util.concurrent.ThreadContext; import org.opensearch.common.util.set.Sets; import org.opensearch.common.xcontent.NamedXContentRegistry; @@ -193,6 +196,12 @@ public static List getNamedWriteables() { ); registerMetadataCustom(entries, DataStreamMetadata.TYPE, DataStreamMetadata::new, DataStreamMetadata::readDiffFrom); registerMetadataCustom(entries, WeightedRoutingMetadata.TYPE, WeightedRoutingMetadata::new, WeightedRoutingMetadata::readDiffFrom); + registerMetadataCustom( + entries, + DecommissionAttributeMetadata.TYPE, + DecommissionAttributeMetadata::new, + DecommissionAttributeMetadata::readDiffFrom + ); // Task Status (not Diffable) entries.add(new Entry(Task.Status.class, PersistentTasksNodeService.Status.NAME, PersistentTasksNodeService.Status::new)); return entries; @@ -283,6 +292,13 @@ public static List getNamedXWriteables() { WeightedRoutingMetadata::fromXContent ) ); + entries.add( + new NamedXContentRegistry.Entry( + Metadata.Custom.class, + new ParseField(DecommissionAttributeMetadata.TYPE), + DecommissionAttributeMetadata::fromXContent + ) + ); return entries; } @@ -346,6 +362,9 @@ public static Collection createAllocationDeciders( addAllocationDecider(deciders, new ShardsLimitAllocationDecider(settings, clusterSettings)); addAllocationDecider(deciders, new AwarenessAllocationDecider(settings, clusterSettings)); addAllocationDecider(deciders, new NodeLoadAwareAllocationDecider(settings, clusterSettings)); + if (FeatureFlags.isEnabled(FeatureFlags.SEARCHABLE_SNAPSHOT)) { + addAllocationDecider(deciders, new TargetPoolAllocationDecider()); + } clusterPlugins.stream() .flatMap(p -> p.createAllocationDeciders(settings, clusterSettings).stream()) diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java index ca1950df81d68..23bc9c37899e3 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java @@ -106,6 +106,7 @@ import java.util.stream.StreamSupport; import static org.opensearch.cluster.coordination.NoClusterManagerBlockService.NO_CLUSTER_MANAGER_BLOCK_ID; +import static org.opensearch.cluster.decommission.DecommissionService.nodeCommissioned; import static org.opensearch.gateway.ClusterStateUpdaters.hideStateIfNotRecovered; import static org.opensearch.gateway.GatewayService.STATE_NOT_RECOVERED_BLOCK; import static org.opensearch.monitor.StatusInfo.Status.UNHEALTHY; @@ -139,6 +140,7 @@ public class Coordinator extends AbstractLifecycleComponent implements Discovery private final Settings settings; private final boolean singleNodeDiscovery; + private volatile boolean localNodeCommissioned; private final ElectionStrategy electionStrategy; private final TransportService transportService; private final ClusterManagerService clusterManagerService; @@ -219,7 +221,8 @@ public Coordinator( this::joinLeaderInTerm, this.onJoinValidators, rerouteService, - nodeHealthService + nodeHealthService, + this::onNodeCommissionStatusChange ); this.persistedStateSupplier = persistedStateSupplier; this.noClusterManagerBlockService = new NoClusterManagerBlockService(settings, clusterSettings); @@ -282,6 +285,7 @@ public Coordinator( joinHelper::logLastFailedJoinAttempt ); this.nodeHealthService = nodeHealthService; + this.localNodeCommissioned = true; } private ClusterFormationState getClusterFormationState() { @@ -597,6 +601,9 @@ private void handleJoinRequest(JoinRequest joinRequest, JoinHelper.JoinCallback joinRequest.getSourceNode().getVersion(), stateForJoinValidation.getNodes().getMinNodeVersion() ); + // we are checking source node commission status here to reject any join request coming from a decommissioned node + // even before executing the join task to fail fast + JoinTaskExecutor.ensureNodeCommissioned(joinRequest.getSourceNode(), stateForJoinValidation.metadata()); } sendValidateJoinRequest(stateForJoinValidation, joinRequest, joinCallback); } else { @@ -1425,6 +1432,17 @@ protected void onFoundPeersUpdated() { } } + // package-visible for testing + synchronized void onNodeCommissionStatusChange(boolean localNodeCommissioned) { + this.localNodeCommissioned = localNodeCommissioned; + peerFinder.onNodeCommissionStatusChange(localNodeCommissioned); + } + + // package-visible for testing + boolean localNodeCommissioned() { + return localNodeCommissioned; + } + private void startElectionScheduler() { assert electionScheduler == null : electionScheduler; @@ -1451,6 +1469,14 @@ public void run() { return; } + // if either the localNodeCommissioned flag or the last accepted state thinks it should skip pre voting, we will + // acknowledge it + if (nodeCommissioned(lastAcceptedState.nodes().getLocalNode(), lastAcceptedState.metadata()) == false + || localNodeCommissioned == false) { + logger.debug("skip prevoting as local node is decommissioned"); + return; + } + if (prevotingRound != null) { prevotingRound.close(); } diff --git a/server/src/main/java/org/opensearch/cluster/coordination/JoinHelper.java b/server/src/main/java/org/opensearch/cluster/coordination/JoinHelper.java index 656e6d220720f..a66152b8016ee 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/JoinHelper.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/JoinHelper.java @@ -42,6 +42,7 @@ import org.opensearch.cluster.ClusterStateTaskListener; import org.opensearch.cluster.NotClusterManagerException; import org.opensearch.cluster.coordination.Coordinator.Mode; +import org.opensearch.cluster.decommission.NodeDecommissionedException; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.routing.RerouteService; @@ -57,6 +58,7 @@ import org.opensearch.monitor.StatusInfo; import org.opensearch.threadpool.ThreadPool; import org.opensearch.threadpool.ThreadPool.Names; +import org.opensearch.transport.RemoteTransportException; import org.opensearch.transport.TransportChannel; import org.opensearch.transport.TransportException; import org.opensearch.transport.TransportRequest; @@ -78,6 +80,7 @@ import java.util.Set; import java.util.concurrent.atomic.AtomicReference; import java.util.function.BiConsumer; +import java.util.function.Consumer; import java.util.function.Function; import java.util.function.LongSupplier; import java.util.function.Supplier; @@ -118,6 +121,7 @@ public class JoinHelper { private final AtomicReference lastFailedJoinAttempt = new AtomicReference<>(); private final Supplier joinTaskExecutorGenerator; + private final Consumer nodeCommissioned; JoinHelper( Settings settings, @@ -130,12 +134,14 @@ public class JoinHelper { Function joinLeaderInTerm, Collection> joinValidators, RerouteService rerouteService, - NodeHealthService nodeHealthService + NodeHealthService nodeHealthService, + Consumer nodeCommissioned ) { this.clusterManagerService = clusterManagerService; this.transportService = transportService; this.nodeHealthService = nodeHealthService; this.joinTimeout = JOIN_TIMEOUT_SETTING.get(settings); + this.nodeCommissioned = nodeCommissioned; this.joinTaskExecutorGenerator = () -> new JoinTaskExecutor(settings, allocationService, logger, rerouteService, transportService) { private final long term = currentTermSupplier.getAsLong(); @@ -342,6 +348,7 @@ public void handleResponse(Empty response) { pendingOutgoingJoins.remove(dedupKey); logger.debug("successfully joined {} with {}", destination, joinRequest); lastFailedJoinAttempt.set(null); + nodeCommissioned.accept(true); onCompletion.run(); } @@ -352,6 +359,13 @@ public void handleException(TransportException exp) { FailedJoinAttempt attempt = new FailedJoinAttempt(destination, joinRequest, exp); attempt.logNow(); lastFailedJoinAttempt.set(attempt); + if (exp instanceof RemoteTransportException && (exp.getCause() instanceof NodeDecommissionedException)) { + logger.info( + "local node is decommissioned [{}]. Will not be able to join the cluster", + exp.getCause().getMessage() + ); + nodeCommissioned.accept(false); + } onCompletion.run(); } diff --git a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java index 5afdb5b12db23..ac237db85ee5b 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java @@ -39,6 +39,7 @@ import org.opensearch.cluster.ClusterStateTaskExecutor; import org.opensearch.cluster.NotClusterManagerException; import org.opensearch.cluster.block.ClusterBlocks; +import org.opensearch.cluster.decommission.NodeDecommissionedException; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; @@ -60,6 +61,7 @@ import java.util.function.BiConsumer; import java.util.stream.Collectors; +import static org.opensearch.cluster.decommission.DecommissionService.nodeCommissioned; import static org.opensearch.gateway.GatewayService.STATE_NOT_RECOVERED_BLOCK; /** @@ -192,6 +194,9 @@ public ClusterTasksResult execute(ClusterState currentState, List jo // we do this validation quite late to prevent race conditions between nodes joining and importing dangling indices // we have to reject nodes that don't support all indices we have in this cluster ensureIndexCompatibility(node.getVersion(), currentState.getMetadata()); + // we have added the same check in handleJoinRequest method and adding it here as this method + // would guarantee that a decommissioned node would never be able to join the cluster and ensures correctness + ensureNodeCommissioned(node, currentState.metadata()); nodesBuilder.add(node); nodesChanged = true; minClusterNodeVersion = Version.min(minClusterNodeVersion, node.getVersion()); @@ -199,7 +204,7 @@ public ClusterTasksResult execute(ClusterState currentState, List jo if (node.isClusterManagerNode()) { joiniedNodeNameIds.put(node.getName(), node.getId()); } - } catch (IllegalArgumentException | IllegalStateException e) { + } catch (IllegalArgumentException | IllegalStateException | NodeDecommissionedException e) { results.failure(joinTask, e); continue; } @@ -358,6 +363,7 @@ public boolean runOnlyOnClusterManager() { /** * a task indicates that the current node should become master + * * @deprecated As of 2.0, because supporting inclusive language, replaced by {@link #newBecomeClusterManagerTask()} */ @Deprecated @@ -384,8 +390,9 @@ public static Task newFinishElectionTask() { * Ensures that all indices are compatible with the given node version. This will ensure that all indices in the given metadata * will not be created with a newer version of opensearch as well as that all indices are newer or equal to the minimum index * compatibility version. - * @see Version#minimumIndexCompatibilityVersion() + * * @throws IllegalStateException if any index is incompatible with the given version + * @see Version#minimumIndexCompatibilityVersion() */ public static void ensureIndexCompatibility(final Version nodeVersion, Metadata metadata) { Version supportedIndexVersion = nodeVersion.minimumIndexCompatibilityVersion(); @@ -415,14 +422,18 @@ public static void ensureIndexCompatibility(final Version nodeVersion, Metadata } } - /** ensures that the joining node has a version that's compatible with all current nodes*/ + /** + * ensures that the joining node has a version that's compatible with all current nodes + */ public static void ensureNodesCompatibility(final Version joiningNodeVersion, DiscoveryNodes currentNodes) { final Version minNodeVersion = currentNodes.getMinNodeVersion(); final Version maxNodeVersion = currentNodes.getMaxNodeVersion(); ensureNodesCompatibility(joiningNodeVersion, minNodeVersion, maxNodeVersion); } - /** ensures that the joining node has a version that's compatible with a given version range */ + /** + * ensures that the joining node has a version that's compatible with a given version range + */ public static void ensureNodesCompatibility(Version joiningNodeVersion, Version minClusterNodeVersion, Version maxClusterNodeVersion) { assert minClusterNodeVersion.onOrBefore(maxClusterNodeVersion) : minClusterNodeVersion + " > " + maxClusterNodeVersion; if (joiningNodeVersion.isCompatible(maxClusterNodeVersion) == false) { @@ -466,6 +477,17 @@ public static void ensureMajorVersionBarrier(Version joiningNodeVersion, Version } } + public static void ensureNodeCommissioned(DiscoveryNode node, Metadata metadata) { + if (nodeCommissioned(node, metadata) == false) { + throw new NodeDecommissionedException( + "node [{}] has decommissioned attribute [{}] with current status of decommissioning [{}]", + node.toString(), + metadata.decommissionAttributeMetadata().decommissionAttribute().toString(), + metadata.decommissionAttributeMetadata().status().status() + ); + } + } + public static Collection> addBuiltInJoinValidators( Collection> onJoinValidators ) { @@ -473,6 +495,7 @@ public static Collection> addBuiltInJoin validators.add((node, state) -> { ensureNodesCompatibility(node.getVersion(), state.getNodes()); ensureIndexCompatibility(node.getVersion(), state.getMetadata()); + ensureNodeCommissioned(node, state.getMetadata()); }); validators.addAll(onJoinValidators); return Collections.unmodifiableCollection(validators); diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java new file mode 100644 index 0000000000000..bf2487a1a0e18 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java @@ -0,0 +1,92 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; +import org.opensearch.common.io.stream.Writeable; + +import java.io.IOException; +import java.util.Objects; + +/** + * {@link DecommissionAttribute} encapsulates information about decommissioned node attribute like attribute name, attribute value. + * + * @opensearch.internal + */ +public final class DecommissionAttribute implements Writeable { + private final String attributeName; + private final String attributeValue; + + /** + * Constructs new decommission attribute name value pair + * + * @param attributeName attribute name + * @param attributeValue attribute value + */ + public DecommissionAttribute(String attributeName, String attributeValue) { + this.attributeName = attributeName; + this.attributeValue = attributeValue; + } + + /** + * Returns attribute name + * + * @return attributeName + */ + public String attributeName() { + return this.attributeName; + } + + /** + * Returns attribute value + * + * @return attributeValue + */ + public String attributeValue() { + return this.attributeValue; + } + + public DecommissionAttribute(StreamInput in) throws IOException { + attributeName = in.readString(); + attributeValue = in.readString(); + } + + /** + * Writes decommission attribute name value to stream output + * + * @param out stream output + */ + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(attributeName); + out.writeString(attributeValue); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + DecommissionAttribute that = (DecommissionAttribute) o; + + if (!attributeName.equals(that.attributeName)) return false; + return attributeValue.equals(that.attributeValue); + } + + @Override + public int hashCode() { + return Objects.hash(attributeName, attributeValue); + } + + @Override + public String toString() { + return "DecommissionAttribute{" + "attributeName='" + attributeName + '\'' + ", attributeValue='" + attributeValue + '\'' + '}'; + } +} diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java new file mode 100644 index 0000000000000..7f605d0710a38 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java @@ -0,0 +1,258 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +import org.opensearch.OpenSearchParseException; +import org.opensearch.Version; +import org.opensearch.cluster.AbstractNamedDiffable; +import org.opensearch.cluster.NamedDiff; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.metadata.Metadata.Custom; +import org.opensearch.common.Strings; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; +import org.opensearch.common.xcontent.ToXContent; +import org.opensearch.common.xcontent.XContentBuilder; +import org.opensearch.common.xcontent.XContentParser; + +import java.io.IOException; +import java.util.EnumSet; +import java.util.Objects; + +/** + * Contains metadata about decommission attribute + * + * @opensearch.internal + */ +public class DecommissionAttributeMetadata extends AbstractNamedDiffable implements Custom { + + public static final String TYPE = "decommissionedAttribute"; + + private final DecommissionAttribute decommissionAttribute; + private DecommissionStatus status; + public static final String attributeType = "awareness"; + + /** + * Constructs new decommission attribute metadata with given status + * + * @param decommissionAttribute attribute details + * @param status current status of the attribute decommission + */ + public DecommissionAttributeMetadata(DecommissionAttribute decommissionAttribute, DecommissionStatus status) { + this.decommissionAttribute = decommissionAttribute; + this.status = status; + } + + /** + * Constructs new decommission attribute metadata with status as {@link DecommissionStatus#INIT} + * + * @param decommissionAttribute attribute details + */ + public DecommissionAttributeMetadata(DecommissionAttribute decommissionAttribute) { + this(decommissionAttribute, DecommissionStatus.INIT); + } + + /** + * Returns the current decommissioned attribute + * + * @return decommissioned attributes + */ + public DecommissionAttribute decommissionAttribute() { + return this.decommissionAttribute; + } + + /** + * Returns the current status of the attribute decommission + * + * @return attribute type + */ + public DecommissionStatus status() { + return this.status; + } + + /** + * Returns instance of the metadata with updated status + * @param newStatus status to be updated with + */ + // synchronized is strictly speaking not needed (this is called by a single thread), but just to be safe + public synchronized void validateNewStatus(DecommissionStatus newStatus) { + // if the current status is the expected status already or new status is FAILED, we let the check pass + if (newStatus.equals(status) || newStatus.equals(DecommissionStatus.FAILED)) { + return; + } + // We don't expect that INIT will be new status, as it is registered only when starting the decommission action + switch (newStatus) { + case IN_PROGRESS: + validateStatus(DecommissionStatus.INIT, newStatus); + break; + case SUCCESSFUL: + validateStatus(DecommissionStatus.IN_PROGRESS, newStatus); + break; + default: + throw new IllegalArgumentException( + "illegal decommission status [" + newStatus.status() + "] requested for updating metadata" + ); + } + } + + private void validateStatus(DecommissionStatus expected, DecommissionStatus next) { + if (status.equals(expected) == false) { + assert false : "can't move decommission status to [" + + next + + "]. current status: [" + + status + + "] (expected [" + + expected + + "])"; + throw new IllegalStateException( + "can't move decommission status to [" + next + "]. current status: [" + status + "] (expected [" + expected + "])" + ); + } + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + DecommissionAttributeMetadata that = (DecommissionAttributeMetadata) o; + + if (!status.equals(that.status)) return false; + return decommissionAttribute.equals(that.decommissionAttribute); + } + + @Override + public int hashCode() { + return Objects.hash(attributeType, decommissionAttribute, status); + } + + /** + * {@inheritDoc} + */ + @Override + public String getWriteableName() { + return TYPE; + } + + @Override + public Version getMinimalSupportedVersion() { + return Version.V_2_4_0; + } + + public DecommissionAttributeMetadata(StreamInput in) throws IOException { + this.decommissionAttribute = new DecommissionAttribute(in); + this.status = DecommissionStatus.fromString(in.readString()); + } + + public static NamedDiff readDiffFrom(StreamInput in) throws IOException { + return readDiffFrom(Custom.class, TYPE, in); + } + + /** + * {@inheritDoc} + */ + @Override + public void writeTo(StreamOutput out) throws IOException { + decommissionAttribute.writeTo(out); + out.writeString(status.status()); + } + + public static DecommissionAttributeMetadata fromXContent(XContentParser parser) throws IOException { + XContentParser.Token token; + DecommissionAttribute decommissionAttribute = null; + DecommissionStatus status = null; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + String currentFieldName = parser.currentName(); + if (attributeType.equals(currentFieldName)) { + if (parser.nextToken() != XContentParser.Token.START_OBJECT) { + throw new OpenSearchParseException( + "failed to parse decommission attribute type [{}], expected object", + attributeType + ); + } + token = parser.nextToken(); + if (token != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + String fieldName = parser.currentName(); + String value; + token = parser.nextToken(); + if (token == XContentParser.Token.VALUE_STRING) { + value = parser.text(); + } else { + throw new OpenSearchParseException( + "failed to parse attribute [{}], expected string for attribute value", + fieldName + ); + } + decommissionAttribute = new DecommissionAttribute(fieldName, value); + parser.nextToken(); + } else { + throw new OpenSearchParseException("failed to parse attribute type [{}], unexpected type", attributeType); + } + } else { + throw new OpenSearchParseException("failed to parse attribute type [{}]", attributeType); + } + } else if ("status".equals(currentFieldName)) { + if (parser.nextToken() != XContentParser.Token.VALUE_STRING) { + throw new OpenSearchParseException( + "failed to parse status of decommissioning, expected string but found unknown type" + ); + } + status = DecommissionStatus.fromString(parser.text()); + } else { + throw new OpenSearchParseException( + "unknown field found [{}], failed to parse the decommission attribute", + currentFieldName + ); + } + } + } + return new DecommissionAttributeMetadata(decommissionAttribute, status); + } + + /** + * {@inheritDoc} + */ + @Override + public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params params) throws IOException { + toXContent(decommissionAttribute, status, attributeType, builder, params); + return builder; + } + + @Override + public EnumSet context() { + return Metadata.API_AND_GATEWAY; + } + + /** + * @param decommissionAttribute decommission attribute + * @param status decommission status + * @param attributeType attribute type + * @param builder XContent builder + * @param params serialization parameters + */ + public static void toXContent( + DecommissionAttribute decommissionAttribute, + DecommissionStatus status, + String attributeType, + XContentBuilder builder, + ToXContent.Params params + ) throws IOException { + builder.startObject(attributeType); + builder.field(decommissionAttribute.attributeName(), decommissionAttribute.attributeValue()); + builder.endObject(); + builder.field("status", status.status()); + } + + @Override + public String toString() { + return Strings.toString(this); + } +} diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java new file mode 100644 index 0000000000000..b58d99a9d59db --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java @@ -0,0 +1,274 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.OpenSearchTimeoutException; +import org.opensearch.action.ActionListener; +import org.opensearch.action.admin.cluster.configuration.AddVotingConfigExclusionsAction; +import org.opensearch.action.admin.cluster.configuration.AddVotingConfigExclusionsRequest; +import org.opensearch.action.admin.cluster.configuration.AddVotingConfigExclusionsResponse; +import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsAction; +import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsRequest; +import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsResponse; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.ClusterStateObserver; +import org.opensearch.cluster.ClusterStateTaskConfig; +import org.opensearch.cluster.ClusterStateTaskListener; +import org.opensearch.cluster.ClusterStateUpdateTask; +import org.opensearch.cluster.coordination.NodeRemovalClusterStateTaskExecutor; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.routing.allocation.AllocationService; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.Priority; +import org.opensearch.common.Strings; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.TransportException; +import org.opensearch.transport.TransportResponseHandler; +import org.opensearch.transport.TransportService; + +import java.io.IOException; +import java.util.Arrays; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Set; +import java.util.function.Predicate; +import java.util.stream.Collectors; + +/** + * Helper controller class to remove list of nodes from the cluster and update status + * + * @opensearch.internal + */ + +public class DecommissionController { + + private static final Logger logger = LogManager.getLogger(DecommissionController.class); + + private final NodeRemovalClusterStateTaskExecutor nodeRemovalExecutor; + private final ClusterService clusterService; + private final TransportService transportService; + private final ThreadPool threadPool; + + DecommissionController( + ClusterService clusterService, + TransportService transportService, + AllocationService allocationService, + ThreadPool threadPool + ) { + this.clusterService = clusterService; + this.transportService = transportService; + this.nodeRemovalExecutor = new NodeRemovalClusterStateTaskExecutor(allocationService, logger); + this.threadPool = threadPool; + } + + /** + * Transport call to add nodes to voting config exclusion + * + * @param nodes set of nodes Ids to be added to voting config exclusion list + * @param listener callback for response or failure + */ + public void excludeDecommissionedNodesFromVotingConfig(Set nodes, ActionListener listener) { + transportService.sendRequest( + transportService.getLocalNode(), + AddVotingConfigExclusionsAction.NAME, + new AddVotingConfigExclusionsRequest( + Strings.EMPTY_ARRAY, + nodes.toArray(String[]::new), + Strings.EMPTY_ARRAY, + TimeValue.timeValueSeconds(120) // giving a larger timeout of 120 sec as cluster might already be in stress when + // decommission is triggered + ), + new TransportResponseHandler() { + @Override + public void handleResponse(AddVotingConfigExclusionsResponse response) { + listener.onResponse(null); + } + + @Override + public void handleException(TransportException exp) { + listener.onFailure(exp); + } + + @Override + public String executor() { + return ThreadPool.Names.SAME; + } + + @Override + public AddVotingConfigExclusionsResponse read(StreamInput in) throws IOException { + return new AddVotingConfigExclusionsResponse(in); + } + } + ); + } + + /** + * Transport call to clear voting config exclusion + * + * @param listener callback for response or failure + */ + public void clearVotingConfigExclusion(ActionListener listener, boolean waitForRemoval) { + final ClearVotingConfigExclusionsRequest clearVotingConfigExclusionsRequest = new ClearVotingConfigExclusionsRequest(); + clearVotingConfigExclusionsRequest.setWaitForRemoval(waitForRemoval); + transportService.sendRequest( + transportService.getLocalNode(), + ClearVotingConfigExclusionsAction.NAME, + clearVotingConfigExclusionsRequest, + new TransportResponseHandler() { + @Override + public void handleResponse(ClearVotingConfigExclusionsResponse response) { + listener.onResponse(null); + } + + @Override + public void handleException(TransportException exp) { + listener.onFailure(exp); + } + + @Override + public String executor() { + return ThreadPool.Names.SAME; + } + + @Override + public ClearVotingConfigExclusionsResponse read(StreamInput in) throws IOException { + return new ClearVotingConfigExclusionsResponse(in); + } + } + ); + } + + /** + * This method triggers batch of tasks for nodes to be decommissioned using executor {@link NodeRemovalClusterStateTaskExecutor} + * Once the tasks are submitted, it waits for an expected cluster state to guarantee + * that the expected decommissioned nodes are removed from the cluster + * + * @param nodesToBeDecommissioned set of the node to be decommissioned + * @param reason reason of removal + * @param timeout timeout for the request + * @param nodesRemovedListener callback for the success or failure + */ + public synchronized void removeDecommissionedNodes( + Set nodesToBeDecommissioned, + String reason, + TimeValue timeout, + ActionListener nodesRemovedListener + ) { + final Map nodesDecommissionTasks = new LinkedHashMap<>( + nodesToBeDecommissioned.size() + ); + nodesToBeDecommissioned.forEach(discoveryNode -> { + final NodeRemovalClusterStateTaskExecutor.Task task = new NodeRemovalClusterStateTaskExecutor.Task(discoveryNode, reason); + nodesDecommissionTasks.put(task, nodeRemovalExecutor); + }); + + logger.info("submitting state update task to remove [{}] nodes due to decommissioning", nodesToBeDecommissioned.toString()); + clusterService.submitStateUpdateTasks( + "node-decommissioned", + nodesDecommissionTasks, + ClusterStateTaskConfig.build(Priority.URGENT), + nodeRemovalExecutor + ); + + Predicate allDecommissionedNodesRemovedPredicate = clusterState -> { + Set intersection = Arrays.stream(clusterState.nodes().getNodes().values().toArray(DiscoveryNode.class)) + .collect(Collectors.toSet()); + intersection.retainAll(nodesToBeDecommissioned); + return intersection.size() == 0; + }; + + final ClusterStateObserver observer = new ClusterStateObserver(clusterService, timeout, logger, threadPool.getThreadContext()); + + final ClusterStateObserver.Listener removalListener = new ClusterStateObserver.Listener() { + @Override + public void onNewClusterState(ClusterState state) { + logger.info("successfully removed all decommissioned nodes [{}] from the cluster", nodesToBeDecommissioned.toString()); + nodesRemovedListener.onResponse(null); + } + + @Override + public void onClusterServiceClose() { + logger.warn( + "cluster service closed while waiting for removal of decommissioned nodes [{}]", + nodesToBeDecommissioned.toString() + ); + } + + @Override + public void onTimeout(TimeValue timeout) { + logger.info( + "timed out [{}] while waiting for removal of decommissioned nodes [{}]", + timeout.toString(), + nodesToBeDecommissioned.toString() + ); + nodesRemovedListener.onFailure( + new OpenSearchTimeoutException( + "timed out [{}] while waiting for removal of decommissioned nodes [{}]", + timeout.toString(), + nodesToBeDecommissioned.toString() + ) + ); + } + }; + + if (allDecommissionedNodesRemovedPredicate.test(clusterService.getClusterApplierService().state())) { + removalListener.onNewClusterState(clusterService.getClusterApplierService().state()); + } else { + observer.waitForNextChange(removalListener, allDecommissionedNodesRemovedPredicate); + } + } + + /** + * This method updates the status in the currently registered metadata. + * + * @param decommissionStatus status to update decommission metadata with + * @param listener listener for response and failure + */ + public void updateMetadataWithDecommissionStatus(DecommissionStatus decommissionStatus, ActionListener listener) { + clusterService.submitStateUpdateTask("update-decommission-status", new ClusterStateUpdateTask(Priority.URGENT) { + @Override + public ClusterState execute(ClusterState currentState) { + DecommissionAttributeMetadata decommissionAttributeMetadata = currentState.metadata().decommissionAttributeMetadata(); + assert decommissionAttributeMetadata != null && decommissionAttributeMetadata.decommissionAttribute() != null; + logger.info( + "attempting to update current decommission status [{}] with expected status [{}]", + decommissionAttributeMetadata.status(), + decommissionStatus + ); + // validateNewStatus can throw IllegalStateException if the sequence of update is not valid + decommissionAttributeMetadata.validateNewStatus(decommissionStatus); + decommissionAttributeMetadata = new DecommissionAttributeMetadata( + decommissionAttributeMetadata.decommissionAttribute(), + decommissionStatus + ); + return ClusterState.builder(currentState) + .metadata(Metadata.builder(currentState.metadata()).decommissionAttributeMetadata(decommissionAttributeMetadata)) + .build(); + } + + @Override + public void onFailure(String source, Exception e) { + listener.onFailure(e); + } + + @Override + public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { + DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata().decommissionAttributeMetadata(); + assert decommissionAttributeMetadata != null; + assert decommissionAttributeMetadata.status().equals(decommissionStatus); + listener.onResponse(decommissionAttributeMetadata.status()); + } + }); + } +} diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java new file mode 100644 index 0000000000000..b2c8bfbc0cdc8 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -0,0 +1,564 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.message.ParameterizedMessage; +import org.opensearch.OpenSearchTimeoutException; +import org.opensearch.action.ActionListener; +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateResponse; +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionResponse; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.ClusterStateObserver; +import org.opensearch.cluster.ClusterStateUpdateTask; +import org.opensearch.cluster.NotClusterManagerException; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.routing.allocation.AllocationService; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.Priority; +import org.opensearch.common.inject.Inject; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.TransportService; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Predicate; +import java.util.stream.Collectors; + +import static org.opensearch.cluster.routing.allocation.decider.AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING; +import static org.opensearch.cluster.routing.allocation.decider.AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP_SETTING; + +/** + * Service responsible for entire lifecycle of decommissioning and recommissioning an awareness attribute. + *

+ * Whenever a cluster manager initiates operation to decommission an awareness attribute, + * the service makes the best attempt to perform the following task - + *

    + *
  • Initiates nodes decommissioning by adding custom metadata with the attribute and state as {@link DecommissionStatus#INIT}
  • + *
  • Remove to-be-decommissioned cluster-manager eligible nodes from voting config and wait for its abdication if it is active leader
  • + *
  • Triggers weigh away for nodes having given awareness attribute to drain.
  • + *
  • Once weighed away, the service triggers nodes decommission. This marks the decommission status as {@link DecommissionStatus#IN_PROGRESS}
  • + *
  • Once the decommission is successful, the service clears the voting config and marks the status as {@link DecommissionStatus#SUCCESSFUL}
  • + *
  • If service fails at any step, it makes best attempt to mark the status as {@link DecommissionStatus#FAILED} and to clear voting config exclusion
  • + *
+ * + * @opensearch.internal + */ +public class DecommissionService { + + private static final Logger logger = LogManager.getLogger(DecommissionService.class); + + private final ClusterService clusterService; + private final TransportService transportService; + private final ThreadPool threadPool; + private final DecommissionController decommissionController; + private volatile List awarenessAttributes; + private volatile Map> forcedAwarenessAttributes; + + @Inject + public DecommissionService( + Settings settings, + ClusterSettings clusterSettings, + ClusterService clusterService, + TransportService transportService, + ThreadPool threadPool, + AllocationService allocationService + ) { + this.clusterService = clusterService; + this.transportService = transportService; + this.threadPool = threadPool; + this.decommissionController = new DecommissionController(clusterService, transportService, allocationService, threadPool); + this.awarenessAttributes = CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING.get(settings); + clusterSettings.addSettingsUpdateConsumer(CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING, this::setAwarenessAttributes); + + setForcedAwarenessAttributes(CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP_SETTING.get(settings)); + clusterSettings.addSettingsUpdateConsumer( + CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP_SETTING, + this::setForcedAwarenessAttributes + ); + } + + private void setAwarenessAttributes(List awarenessAttributes) { + this.awarenessAttributes = awarenessAttributes; + } + + private void setForcedAwarenessAttributes(Settings forceSettings) { + Map> forcedAwarenessAttributes = new HashMap<>(); + Map forceGroups = forceSettings.getAsGroups(); + for (Map.Entry entry : forceGroups.entrySet()) { + List aValues = entry.getValue().getAsList("values"); + if (aValues.size() > 0) { + forcedAwarenessAttributes.put(entry.getKey(), aValues); + } + } + this.forcedAwarenessAttributes = forcedAwarenessAttributes; + } + + /** + * Starts the new decommission request and registers the metadata with status as {@link DecommissionStatus#INIT} + * Once the status is updated, it tries to exclude to-be-decommissioned cluster manager eligible nodes from Voting Configuration + * + * @param decommissionAttribute register decommission attribute in the metadata request + * @param listener register decommission listener + */ + public void startDecommissionAction( + final DecommissionAttribute decommissionAttribute, + final ActionListener listener + ) { + // register the metadata with status as INIT as first step + clusterService.submitStateUpdateTask("decommission [" + decommissionAttribute + "]", new ClusterStateUpdateTask(Priority.URGENT) { + @Override + public ClusterState execute(ClusterState currentState) { + // validates if correct awareness attributes and forced awareness attribute set to the cluster before starting action + validateAwarenessAttribute(decommissionAttribute, awarenessAttributes, forcedAwarenessAttributes); + DecommissionAttributeMetadata decommissionAttributeMetadata = currentState.metadata().decommissionAttributeMetadata(); + // check that request is eligible to proceed + ensureEligibleRequest(decommissionAttributeMetadata, decommissionAttribute); + decommissionAttributeMetadata = new DecommissionAttributeMetadata(decommissionAttribute); + logger.info("registering decommission metadata [{}] to execute action", decommissionAttributeMetadata.toString()); + return ClusterState.builder(currentState) + .metadata(Metadata.builder(currentState.metadata()).decommissionAttributeMetadata(decommissionAttributeMetadata)) + .build(); + } + + @Override + public void onFailure(String source, Exception e) { + logger.error( + () -> new ParameterizedMessage( + "failed to start decommission action for attribute [{}]", + decommissionAttribute.toString() + ), + e + ); + listener.onFailure(e); + } + + @Override + public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { + DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata().decommissionAttributeMetadata(); + assert decommissionAttribute.equals(decommissionAttributeMetadata.decommissionAttribute()); + logger.info( + "registered decommission metadata for attribute [{}] with status [{}]", + decommissionAttributeMetadata.decommissionAttribute(), + decommissionAttributeMetadata.status() + ); + decommissionClusterManagerNodes(decommissionAttributeMetadata.decommissionAttribute(), listener); + } + }); + } + + private synchronized void decommissionClusterManagerNodes( + final DecommissionAttribute decommissionAttribute, + ActionListener listener + ) { + ClusterState state = clusterService.getClusterApplierService().state(); + // since here metadata is already registered with INIT, we can guarantee that no new node with decommission attribute can further + // join the cluster + // and hence in further request lifecycle we are sure that no new to-be-decommission leader will join the cluster + Set clusterManagerNodesToBeDecommissioned = filterNodesWithDecommissionAttribute(state, decommissionAttribute, true); + logger.info( + "resolved cluster manager eligible nodes [{}] that should be removed from Voting Configuration", + clusterManagerNodesToBeDecommissioned.toString() + ); + + // remove all 'to-be-decommissioned' cluster manager eligible nodes from voting config + Set nodeIdsToBeExcluded = clusterManagerNodesToBeDecommissioned.stream() + .map(DiscoveryNode::getId) + .collect(Collectors.toSet()); + + final Predicate allNodesRemovedAndAbdicated = clusterState -> { + final Set votingConfigNodeIds = clusterState.getLastCommittedConfiguration().getNodeIds(); + return nodeIdsToBeExcluded.stream().noneMatch(votingConfigNodeIds::contains) + && nodeIdsToBeExcluded.contains(clusterState.nodes().getClusterManagerNodeId()) == false + && clusterState.nodes().getClusterManagerNodeId() != null; + }; + + ActionListener exclusionListener = new ActionListener() { + @Override + public void onResponse(Void unused) { + if (clusterService.getClusterApplierService().state().nodes().isLocalNodeElectedClusterManager()) { + if (nodeHasDecommissionedAttribute(clusterService.localNode(), decommissionAttribute)) { + // this is an unexpected state, as after exclusion of nodes having decommission attribute, + // this local node shouldn't have had the decommission attribute. Will send the failure response to the user + String errorMsg = + "unexpected state encountered [local node is to-be-decommissioned leader] while executing decommission request"; + logger.error(errorMsg); + // will go ahead and clear the voting config and mark the status as false + clearVotingConfigExclusionAndUpdateStatus(false, false); + // we can send the failure response to the user here + listener.onFailure(new IllegalStateException(errorMsg)); + } else { + logger.info("will attempt to fail decommissioned nodes as local node is eligible to process the request"); + // we are good here to send the response now as the request is processed by an eligible active leader + // and to-be-decommissioned cluster manager is no more part of Voting Configuration and no more to-be-decommission + // nodes can be part of Voting Config + listener.onResponse(new DecommissionResponse(true)); + failDecommissionedNodes(clusterService.getClusterApplierService().state()); + } + } else { + // explicitly calling listener.onFailure with NotClusterManagerException as the local node is not the cluster manager + // this will ensures that request is retried until cluster manager times out + logger.info( + "local node is not eligible to process the request, " + + "throwing NotClusterManagerException to attempt a retry on an eligible node" + ); + listener.onFailure( + new NotClusterManagerException( + "node [" + + transportService.getLocalNode().toString() + + "] not eligible to execute decommission request. Will retry until timeout." + ) + ); + } + } + + @Override + public void onFailure(Exception e) { + listener.onFailure(e); + // attempting to mark the status as FAILED + decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.FAILED, statusUpdateListener()); + } + }; + + if (allNodesRemovedAndAbdicated.test(state)) { + exclusionListener.onResponse(null); + } else { + logger.debug("sending transport request to remove nodes [{}] from voting config", nodeIdsToBeExcluded.toString()); + // send a transport request to exclude to-be-decommissioned cluster manager eligible nodes from voting config + decommissionController.excludeDecommissionedNodesFromVotingConfig(nodeIdsToBeExcluded, new ActionListener() { + @Override + public void onResponse(Void unused) { + logger.info( + "successfully removed decommissioned cluster manager eligible nodes [{}] from voting config ", + clusterManagerNodesToBeDecommissioned.toString() + ); + final ClusterStateObserver abdicationObserver = new ClusterStateObserver( + clusterService, + TimeValue.timeValueSeconds(60L), + logger, + threadPool.getThreadContext() + ); + final ClusterStateObserver.Listener abdicationListener = new ClusterStateObserver.Listener() { + @Override + public void onNewClusterState(ClusterState state) { + logger.debug("to-be-decommissioned node is no more the active leader"); + exclusionListener.onResponse(null); + } + + @Override + public void onClusterServiceClose() { + String errorMsg = "cluster service closed while waiting for abdication of to-be-decommissioned leader"; + logger.warn(errorMsg); + listener.onFailure(new DecommissioningFailedException(decommissionAttribute, errorMsg)); + } + + @Override + public void onTimeout(TimeValue timeout) { + logger.info("timed out while waiting for abdication of to-be-decommissioned leader"); + clearVotingConfigExclusionAndUpdateStatus(false, false); + listener.onFailure( + new OpenSearchTimeoutException( + "timed out [{}] while waiting for abdication of to-be-decommissioned leader", + timeout.toString() + ) + ); + } + }; + // In case the cluster state is already processed even before this code is executed + // therefore testing first before attaching the listener + ClusterState currentState = clusterService.getClusterApplierService().state(); + if (allNodesRemovedAndAbdicated.test(currentState)) { + abdicationListener.onNewClusterState(currentState); + } else { + logger.debug("waiting to abdicate to-be-decommissioned leader"); + abdicationObserver.waitForNextChange(abdicationListener, allNodesRemovedAndAbdicated); + } + } + + @Override + public void onFailure(Exception e) { + logger.error( + new ParameterizedMessage( + "failure in removing to-be-decommissioned cluster manager eligible nodes [{}] from voting config", + nodeIdsToBeExcluded.toString() + ), + e + ); + exclusionListener.onFailure(e); + } + }); + } + } + + private void failDecommissionedNodes(ClusterState state) { + // this method ensures no matter what, we always exit from this function after clearing the voting config exclusion + DecommissionAttributeMetadata decommissionAttributeMetadata = state.metadata().decommissionAttributeMetadata(); + DecommissionAttribute decommissionAttribute = decommissionAttributeMetadata.decommissionAttribute(); + decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.IN_PROGRESS, new ActionListener<>() { + @Override + public void onResponse(DecommissionStatus status) { + logger.info("updated the decommission status to [{}]", status); + // execute nodes decommissioning + decommissionController.removeDecommissionedNodes( + filterNodesWithDecommissionAttribute(clusterService.getClusterApplierService().state(), decommissionAttribute, false), + "nodes-decommissioned", + TimeValue.timeValueSeconds(120L), + new ActionListener() { + @Override + public void onResponse(Void unused) { + clearVotingConfigExclusionAndUpdateStatus(true, true); + } + + @Override + public void onFailure(Exception e) { + clearVotingConfigExclusionAndUpdateStatus(false, false); + } + } + ); + } + + @Override + public void onFailure(Exception e) { + logger.error( + () -> new ParameterizedMessage( + "failed to update decommission status for attribute [{}] to [{}]", + decommissionAttribute.toString(), + DecommissionStatus.IN_PROGRESS + ), + e + ); + // since we are not able to update the status, we will clear the voting config exclusion we have set earlier + clearVotingConfigExclusionAndUpdateStatus(false, false); + } + }); + } + + private void clearVotingConfigExclusionAndUpdateStatus(boolean decommissionSuccessful, boolean waitForRemoval) { + decommissionController.clearVotingConfigExclusion(new ActionListener() { + @Override + public void onResponse(Void unused) { + logger.info( + "successfully cleared voting config exclusion after completing decommission action, proceeding to update metadata" + ); + DecommissionStatus updateStatusWith = decommissionSuccessful ? DecommissionStatus.SUCCESSFUL : DecommissionStatus.FAILED; + decommissionController.updateMetadataWithDecommissionStatus(updateStatusWith, statusUpdateListener()); + } + + @Override + public void onFailure(Exception e) { + logger.debug( + new ParameterizedMessage("failure in clearing voting config exclusion after processing decommission request"), + e + ); + decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.FAILED, statusUpdateListener()); + } + }, waitForRemoval); + } + + private Set filterNodesWithDecommissionAttribute( + ClusterState clusterState, + DecommissionAttribute decommissionAttribute, + boolean onlyClusterManagerNodes + ) { + Set nodesWithDecommissionAttribute = new HashSet<>(); + Iterator nodesIter = onlyClusterManagerNodes + ? clusterState.nodes().getClusterManagerNodes().valuesIt() + : clusterState.nodes().getNodes().valuesIt(); + + while (nodesIter.hasNext()) { + final DiscoveryNode node = nodesIter.next(); + if (nodeHasDecommissionedAttribute(node, decommissionAttribute)) { + nodesWithDecommissionAttribute.add(node); + } + } + return nodesWithDecommissionAttribute; + } + + private static void validateAwarenessAttribute( + final DecommissionAttribute decommissionAttribute, + List awarenessAttributes, + Map> forcedAwarenessAttributes + ) { + String msg = null; + if (awarenessAttributes == null) { + msg = "awareness attribute not set to the cluster."; + } else if (forcedAwarenessAttributes == null) { + msg = "forced awareness attribute not set to the cluster."; + } else if (awarenessAttributes.contains(decommissionAttribute.attributeName()) == false) { + msg = "invalid awareness attribute requested for decommissioning"; + } else if (forcedAwarenessAttributes.containsKey(decommissionAttribute.attributeName()) == false) { + msg = "forced awareness attribute [" + forcedAwarenessAttributes.toString() + "] doesn't have the decommissioning attribute"; + } else if (forcedAwarenessAttributes.get(decommissionAttribute.attributeName()) + .contains(decommissionAttribute.attributeValue()) == false) { + msg = "invalid awareness attribute value requested for decommissioning. Set forced awareness values before to decommission"; + } + + if (msg != null) { + throw new DecommissioningFailedException(decommissionAttribute, msg); + } + } + + private static void ensureEligibleRequest( + DecommissionAttributeMetadata decommissionAttributeMetadata, + DecommissionAttribute requestedDecommissionAttribute + ) { + String msg = null; + if (decommissionAttributeMetadata != null) { + // check if the same attribute is registered and handle it accordingly + if (decommissionAttributeMetadata.decommissionAttribute().equals(requestedDecommissionAttribute)) { + switch (decommissionAttributeMetadata.status()) { + // for INIT and FAILED - we are good to process it again + case INIT: + case FAILED: + break; + case IN_PROGRESS: + case SUCCESSFUL: + msg = "same request is already in status [" + decommissionAttributeMetadata.status() + "]"; + break; + default: + throw new IllegalStateException( + "unknown status [" + decommissionAttributeMetadata.status() + "] currently registered in metadata" + ); + } + } else { + switch (decommissionAttributeMetadata.status()) { + case SUCCESSFUL: + // one awareness attribute is already decommissioned. We will reject the new request + msg = "one awareness attribute [" + + decommissionAttributeMetadata.decommissionAttribute().toString() + + "] already successfully decommissioned, recommission before triggering another decommission"; + break; + case IN_PROGRESS: + case INIT: + // it means the decommission has been initiated or is inflight. In that case, will fail new request + msg = "there's an inflight decommission request for attribute [" + + decommissionAttributeMetadata.decommissionAttribute().toString() + + "] is in progress, cannot process this request"; + break; + case FAILED: + break; + default: + throw new IllegalStateException( + "unknown status [" + decommissionAttributeMetadata.status() + "] currently registered in metadata" + ); + } + } + } + + if (msg != null) { + throw new DecommissioningFailedException(requestedDecommissionAttribute, msg); + } + } + + private ActionListener statusUpdateListener() { + return new ActionListener<>() { + @Override + public void onResponse(DecommissionStatus status) { + logger.info("updated the decommission status to [{}]", status); + } + + @Override + public void onFailure(Exception e) { + logger.error("unexpected failure occurred during decommission status update", e); + } + }; + } + + public void startRecommissionAction(final ActionListener listener) { + /* + * For abandoned requests, we might not really know if it actually restored the exclusion list. + * And can land up in cases where even after recommission, exclusions are set(which is unexpected). + * And by definition of OpenSearch - Clusters should have no voting configuration exclusions in normal operation. + * Once the excluded nodes have stopped, clear the voting configuration exclusions with DELETE /_cluster/voting_config_exclusions. + * And hence it is safe to remove the exclusion if any. User should make conscious choice before decommissioning awareness attribute. + */ + decommissionController.clearVotingConfigExclusion(new ActionListener() { + @Override + public void onResponse(Void unused) { + logger.info("successfully cleared voting config exclusion for deleting the decommission."); + deleteDecommissionState(listener); + } + + @Override + public void onFailure(Exception e) { + logger.error("Failure in clearing voting config during delete_decommission request.", e); + listener.onFailure(e); + } + }, false); + } + + void deleteDecommissionState(ActionListener listener) { + clusterService.submitStateUpdateTask("delete_decommission_state", new ClusterStateUpdateTask(Priority.URGENT) { + @Override + public ClusterState execute(ClusterState currentState) { + logger.info("Deleting the decommission attribute from the cluster state"); + Metadata metadata = currentState.metadata(); + Metadata.Builder mdBuilder = Metadata.builder(metadata); + mdBuilder.removeCustom(DecommissionAttributeMetadata.TYPE); + return ClusterState.builder(currentState).metadata(mdBuilder).build(); + } + + @Override + public void onFailure(String source, Exception e) { + logger.error(() -> new ParameterizedMessage("Failed to clear decommission attribute. [{}]", source), e); + listener.onFailure(e); + } + + @Override + public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { + // Cluster state processed for deleting the decommission attribute. + assert newState.metadata().decommissionAttributeMetadata() == null; + listener.onResponse(new DeleteDecommissionStateResponse(true)); + } + }); + } + + /** + * Utility method to check if the node has decommissioned attribute + * + * @param discoveryNode node to check on + * @param decommissionAttribute attribute to be checked with + * @return true or false based on whether node has decommissioned attribute + */ + public static boolean nodeHasDecommissionedAttribute(DiscoveryNode discoveryNode, DecommissionAttribute decommissionAttribute) { + String nodeAttributeValue = discoveryNode.getAttributes().get(decommissionAttribute.attributeName()); + return nodeAttributeValue != null && nodeAttributeValue.equals(decommissionAttribute.attributeValue()); + } + + /** + * Utility method to check if the node is commissioned or not + * + * @param discoveryNode node to check on + * @param metadata metadata present current which will be used to check the commissioning status of the node + * @return if the node is commissioned or not + */ + public static boolean nodeCommissioned(DiscoveryNode discoveryNode, Metadata metadata) { + DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.decommissionAttributeMetadata(); + if (decommissionAttributeMetadata != null) { + DecommissionAttribute decommissionAttribute = decommissionAttributeMetadata.decommissionAttribute(); + DecommissionStatus status = decommissionAttributeMetadata.status(); + if (decommissionAttribute != null && status != null) { + if (nodeHasDecommissionedAttribute(discoveryNode, decommissionAttribute) + && (status.equals(DecommissionStatus.IN_PROGRESS) || status.equals(DecommissionStatus.SUCCESSFUL))) { + return false; + } + } + } + return true; + } +} diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java new file mode 100644 index 0000000000000..af88b0d0f5902 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java @@ -0,0 +1,68 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +/** + * An enumeration of the states during decommissioning + */ +public enum DecommissionStatus { + /** + * Decommission process is initiated, and to-be-decommissioned leader is excluded from voting config + */ + INIT("init"), + /** + * Decommission process has started, decommissioned nodes should be removed + */ + IN_PROGRESS("in_progress"), + /** + * Decommission action completed + */ + SUCCESSFUL("successful"), + /** + * Decommission request failed + */ + FAILED("failed"); + + private final String status; + + DecommissionStatus(String status) { + this.status = status; + } + + /** + * Returns status that represents the decommission state + * + * @return status + */ + public String status() { + return status; + } + + /** + * Generate decommission status from given string + * + * @param status status in string + * @return status + */ + public static DecommissionStatus fromString(String status) { + if (status == null) { + throw new IllegalArgumentException("decommission status cannot be null"); + } + if (status.equals(INIT.status())) { + return INIT; + } else if (status.equals(IN_PROGRESS.status())) { + return IN_PROGRESS; + } else if (status.equals(SUCCESSFUL.status())) { + return SUCCESSFUL; + } else if (status.equals(FAILED.status())) { + return FAILED; + } + throw new IllegalStateException("Decommission status [" + status + "] not recognized."); + } +} diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissioningFailedException.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissioningFailedException.java new file mode 100644 index 0000000000000..fe1b9368ac712 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissioningFailedException.java @@ -0,0 +1,55 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +import org.opensearch.OpenSearchException; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; + +import java.io.IOException; + +/** + * This exception is thrown whenever a failure occurs in decommission request @{@link DecommissionService} + * + * @opensearch.internal + */ + +public class DecommissioningFailedException extends OpenSearchException { + + private final DecommissionAttribute decommissionAttribute; + + public DecommissioningFailedException(DecommissionAttribute decommissionAttribute, String msg) { + this(decommissionAttribute, msg, null); + } + + public DecommissioningFailedException(DecommissionAttribute decommissionAttribute, String msg, Throwable cause) { + super("[" + (decommissionAttribute == null ? "_na" : decommissionAttribute.toString()) + "] " + msg, cause); + this.decommissionAttribute = decommissionAttribute; + } + + public DecommissioningFailedException(StreamInput in) throws IOException { + super(in); + decommissionAttribute = new DecommissionAttribute(in); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + decommissionAttribute.writeTo(out); + } + + /** + * Returns decommission attribute + * + * @return decommission attribute + */ + public DecommissionAttribute decommissionAttribute() { + return decommissionAttribute; + } +} diff --git a/server/src/main/java/org/opensearch/cluster/decommission/NodeDecommissionedException.java b/server/src/main/java/org/opensearch/cluster/decommission/NodeDecommissionedException.java new file mode 100644 index 0000000000000..847d5a527b017 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/decommission/NodeDecommissionedException.java @@ -0,0 +1,31 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +import org.opensearch.OpenSearchException; +import org.opensearch.common.io.stream.StreamInput; + +import java.io.IOException; + +/** + * This exception is thrown if the node is decommissioned by @{@link DecommissionService} + * and this nodes needs to be removed from the cluster + * + * @opensearch.internal + */ +public class NodeDecommissionedException extends OpenSearchException { + + public NodeDecommissionedException(String msg, Object... args) { + super(msg, args); + } + + public NodeDecommissionedException(StreamInput in) throws IOException { + super(in); + } +} diff --git a/server/src/main/java/org/opensearch/cluster/decommission/package-info.java b/server/src/main/java/org/opensearch/cluster/decommission/package-info.java new file mode 100644 index 0000000000000..256c2f22253cc --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/decommission/package-info.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Decommission lifecycle classes + */ +package org.opensearch.cluster.decommission; diff --git a/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java b/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java index 086865d2170c3..eb5e8bbc2d49b 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java @@ -51,6 +51,7 @@ import org.opensearch.cluster.block.ClusterBlock; import org.opensearch.cluster.block.ClusterBlockLevel; import org.opensearch.cluster.coordination.CoordinationMetadata; +import org.opensearch.cluster.decommission.DecommissionAttributeMetadata; import org.opensearch.common.Nullable; import org.opensearch.common.Strings; import org.opensearch.common.UUIDs; @@ -795,6 +796,10 @@ public Map dataStreams() { .orElse(Collections.emptyMap()); } + public DecommissionAttributeMetadata decommissionAttributeMetadata() { + return custom(DecommissionAttributeMetadata.TYPE); + } + public ImmutableOpenMap customs() { return this.customs; } @@ -1336,6 +1341,15 @@ public IndexGraveyard indexGraveyard() { return graveyard; } + public Builder decommissionAttributeMetadata(final DecommissionAttributeMetadata decommissionAttributeMetadata) { + putCustom(DecommissionAttributeMetadata.TYPE, decommissionAttributeMetadata); + return this; + } + + public DecommissionAttributeMetadata decommissionAttributeMetadata() { + return (DecommissionAttributeMetadata) getCustom(DecommissionAttributeMetadata.TYPE); + } + public Builder updateSettings(Settings settings, String... indices) { if (indices == null || indices.length == 0) { indices = this.indices.keys().toArray(String.class); diff --git a/server/src/main/java/org/opensearch/cluster/routing/RoutingNodes.java b/server/src/main/java/org/opensearch/cluster/routing/RoutingNodes.java index 986df494917c0..17267d5474738 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/RoutingNodes.java +++ b/server/src/main/java/org/opensearch/cluster/routing/RoutingNodes.java @@ -702,6 +702,23 @@ assert node(failedShard.currentNodeId()).getByShardId(failedShard.shardId()) == + " was matched but wasn't removed"; } + public void swapPrimaryWithReplica( + Logger logger, + ShardRouting primaryShard, + ShardRouting replicaShard, + RoutingChangesObserver changes + ) { + assert primaryShard.primary() : "Invalid primary shard provided"; + assert !replicaShard.primary() : "Invalid Replica shard provided"; + + ShardRouting newPrimary = primaryShard.moveActivePrimaryToReplica(); + ShardRouting newReplica = replicaShard.moveActiveReplicaToPrimary(); + updateAssigned(primaryShard, newPrimary); + updateAssigned(replicaShard, newReplica); + logger.info("Swap relocation performed for shard [{}]", newPrimary.shortSummary()); + changes.replicaPromoted(newPrimary); + } + private void unassignPrimaryAndPromoteActiveReplicaIfExists( ShardRouting failedShard, UnassignedInfo unassignedInfo, @@ -1127,6 +1144,18 @@ public ShardRouting[] drain() { primaries = 0; return mutableShardRoutings; } + + /** + * Drains all ignored shards and returns it. + * This method will not drain unassigned shards. + */ + public ShardRouting[] drainIgnored() { + nodes.ensureMutable(); + ShardRouting[] mutableShardRoutings = ignored.toArray(new ShardRouting[ignored.size()]); + ignored.clear(); + ignoredPrimaries = 0; + return mutableShardRoutings; + } } /** diff --git a/server/src/main/java/org/opensearch/cluster/routing/RoutingPool.java b/server/src/main/java/org/opensearch/cluster/routing/RoutingPool.java new file mode 100644 index 0000000000000..1a3c366694221 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/routing/RoutingPool.java @@ -0,0 +1,69 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.routing; + +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.routing.allocation.RoutingAllocation; +import org.opensearch.common.settings.Settings; +import org.opensearch.index.IndexModule; + +/** + * {@link RoutingPool} defines the different node types based on the assigned capabilities. The methods + * help decide the capabilities of a specific node as well as an index or shard based on the index configuration. + * These methods help with allocation decisions and determining shard classification with the allocation process. + * + * @opensearch.internal + */ +public enum RoutingPool { + LOCAL_ONLY, + REMOTE_CAPABLE; + + /** + * Helps to determine the appropriate {@link RoutingPool} for a given node from the {@link RoutingNode} + */ + public static RoutingPool getNodePool(RoutingNode node) { + return getNodePool(node.node()); + } + + /** + * Helps to determine the appropriate {@link RoutingPool} for a given node from the {@link DiscoveryNode} + */ + public static RoutingPool getNodePool(DiscoveryNode node) { + if (node.isSearchNode()) { + return REMOTE_CAPABLE; + } + return LOCAL_ONLY; + } + + /** + * Can determine the appropriate {@link RoutingPool} for a given shard using the {@link IndexMetadata} for the + * index using the {@link RoutingAllocation}. + * @param shard the shard routing for which {@link RoutingPool} has to be determined. + * @param allocation the current allocation of the cluster + * @return {@link RoutingPool} for the given shard. + */ + public static RoutingPool getShardPool(ShardRouting shard, RoutingAllocation allocation) { + IndexMetadata indexMetadata = allocation.metadata().getIndexSafe(shard.index()); + return getIndexPool(indexMetadata); + } + + /** + * Can determine the appropriate {@link RoutingPool} for a given index using the {@link IndexMetadata}. + * @param indexMetadata the index metadata object for which {@link RoutingPool} has to be determined. + * @return {@link RoutingPool} for the given index. + */ + public static RoutingPool getIndexPool(IndexMetadata indexMetadata) { + Settings indexSettings = indexMetadata.getSettings(); + if (IndexModule.Type.REMOTE_SNAPSHOT.getSettingsKey().equals(indexSettings.get(IndexModule.INDEX_STORE_TYPE_SETTING.getKey()))) { + return REMOTE_CAPABLE; + } + return LOCAL_ONLY; + } +} diff --git a/server/src/main/java/org/opensearch/cluster/routing/ShardRouting.java b/server/src/main/java/org/opensearch/cluster/routing/ShardRouting.java index e3aa2a666d454..5945c7d17a69a 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/ShardRouting.java +++ b/server/src/main/java/org/opensearch/cluster/routing/ShardRouting.java @@ -534,6 +534,29 @@ public ShardRouting moveToStarted() { ); } + /** + * Make the active primary shard as replica + * + * @throws IllegalShardRoutingStateException if shard is already a replica + */ + public ShardRouting moveActivePrimaryToReplica() { + assert active() : "expected an active shard " + this; + if (!primary) { + throw new IllegalShardRoutingStateException(this, "Not a primary shard, can't move to replica"); + } + return new ShardRouting( + shardId, + currentNodeId, + relocatingNodeId, + false, + state, + recoverySource, + unassignedInfo, + allocationId, + expectedShardSize + ); + } + /** * Make the active shard primary unless it's not primary * diff --git a/server/src/main/java/org/opensearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java b/server/src/main/java/org/opensearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java index 42c8f7987bf3d..d8761e9b1a78e 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java +++ b/server/src/main/java/org/opensearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java @@ -50,6 +50,7 @@ import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Setting.Property; import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import java.util.HashMap; import java.util.HashSet; @@ -145,6 +146,13 @@ public void allocate(RoutingAllocation allocation) { localShardsBalancer.allocateUnassigned(); localShardsBalancer.moveShards(); localShardsBalancer.balance(); + + if (FeatureFlags.isEnabled(FeatureFlags.SEARCHABLE_SNAPSHOT)) { + final ShardsBalancer remoteShardsBalancer = new RemoteShardsBalancer(logger, allocation); + remoteShardsBalancer.allocateUnassigned(); + remoteShardsBalancer.moveShards(); + remoteShardsBalancer.balance(); + } } @Override diff --git a/server/src/main/java/org/opensearch/cluster/routing/allocation/allocator/LocalShardsBalancer.java b/server/src/main/java/org/opensearch/cluster/routing/allocation/allocator/LocalShardsBalancer.java index 53d7c827392d5..3c5e4013748af 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/allocation/allocator/LocalShardsBalancer.java +++ b/server/src/main/java/org/opensearch/cluster/routing/allocation/allocator/LocalShardsBalancer.java @@ -15,6 +15,7 @@ import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.routing.RoutingNode; import org.opensearch.cluster.routing.RoutingNodes; +import org.opensearch.cluster.routing.RoutingPool; import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.cluster.routing.ShardRoutingState; import org.opensearch.cluster.routing.UnassignedInfo; @@ -27,9 +28,11 @@ import org.opensearch.cluster.routing.allocation.decider.Decision; import org.opensearch.cluster.routing.allocation.decider.DiskThresholdDecider; import org.opensearch.common.collect.Tuple; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.gateway.PriorityComparator; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; @@ -38,6 +41,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; import java.util.stream.StreamSupport; import static org.opensearch.cluster.routing.ShardRoutingState.RELOCATING; @@ -102,6 +106,10 @@ public float avgShardsPerNode(String index) { */ @Override public float avgShardsPerNode() { + if (FeatureFlags.isEnabled(FeatureFlags.SEARCHABLE_SNAPSHOT)) { + float totalShards = nodes.values().stream().map(BalancedShardsAllocator.ModelNode::numShards).reduce(0, Integer::sum); + return totalShards / nodes.size(); + } return avgShardsPerNode; } @@ -172,6 +180,11 @@ void balance() { */ @Override MoveDecision decideRebalance(final ShardRouting shard) { + if (FeatureFlags.isEnabled(FeatureFlags.SEARCHABLE_SNAPSHOT) + && RoutingPool.REMOTE_CAPABLE.equals(RoutingPool.getShardPool(shard, allocation))) { + return MoveDecision.NOT_TAKEN; + } + if (shard.started() == false) { // we can only rebalance started shards return MoveDecision.NOT_TAKEN; @@ -441,7 +454,19 @@ private void balanceByWeights() { * to the nodes we relocated them from. */ private String[] buildWeightOrderedIndices() { - final String[] indices = allocation.routingTable().indicesRouting().keys().toArray(String.class); + final String[] indices; + if (FeatureFlags.isEnabled(FeatureFlags.SEARCHABLE_SNAPSHOT)) { + final List localIndices = new ArrayList<>(); + for (String index : allocation.routingTable().indicesRouting().keys().toArray(String.class)) { + if (RoutingPool.LOCAL_ONLY.equals(RoutingPool.getIndexPool(metadata.index(index)))) { + localIndices.add(index); + } + } + indices = localIndices.toArray(new String[0]); + } else { + indices = allocation.routingTable().indicesRouting().keys().toArray(String.class); + } + final float[] deltas = new float[indices.length]; for (int i = 0; i < deltas.length; i++) { sorter.reset(indices[i]); @@ -507,7 +532,7 @@ void moveShards() { // shard movements, the goal of this iteration order is to achieve a fairer movement of shards from the nodes that are // offloading the shards. - // Trying to eliminate target nodes so that we donot unnecessarily iterate over source nodes + // Trying to eliminate target nodes so that we do not unnecessarily iterate over source nodes // when no target is eligible for (BalancedShardsAllocator.ModelNode currentNode : sorter.modelNodes) { checkAndAddInEligibleTargetNode(currentNode.getRoutingNode()); @@ -533,6 +558,11 @@ void moveShards() { ShardRouting shardRouting = it.next(); + if (FeatureFlags.isEnabled(FeatureFlags.SEARCHABLE_SNAPSHOT) + && RoutingPool.REMOTE_CAPABLE.equals(RoutingPool.getShardPool(shardRouting, allocation))) { + continue; + } + // Ensure that replicas don't relocate if primaries are being throttled and primary first is enabled if (movePrimaryFirst && primariesThrottled && !shardRouting.primary()) { logger.info( @@ -593,6 +623,11 @@ void moveShards() { */ @Override MoveDecision decideMove(final ShardRouting shardRouting) { + if (FeatureFlags.isEnabled(FeatureFlags.SEARCHABLE_SNAPSHOT) + && RoutingPool.REMOTE_CAPABLE.equals(RoutingPool.getShardPool(shardRouting, allocation))) { + return MoveDecision.NOT_TAKEN; + } + if (shardRouting.started() == false) { // we can only move started shards return MoveDecision.NOT_TAKEN; @@ -680,7 +715,7 @@ private Map buildModelFromAssigned() for (ShardRouting shard : rn) { assert rn.nodeId().equals(shard.currentNodeId()); /* we skip relocating shards here since we expect an initializing shard with the same id coming in */ - if (shard.state() != RELOCATING) { + if (RoutingPool.LOCAL_ONLY.equals(RoutingPool.getShardPool(shard, allocation)) && shard.state() != RELOCATING) { node.addShard(shard); if (logger.isTraceEnabled()) { logger.trace("Assigned shard [{}] to node [{}]", shard, node.getNodeId()); @@ -735,7 +770,17 @@ void allocateUnassigned() { * if we allocate for instance (0, R, IDX1) we move the second replica to the secondary array and proceed with * the next replica. If we could not find a node to allocate (0,R,IDX1) we move all it's replicas to ignoreUnassigned. */ - ShardRouting[] primary = unassigned.drain(); + ShardRouting[] unassignedShards = unassigned.drain(); + if (FeatureFlags.isEnabled(FeatureFlags.SEARCHABLE_SNAPSHOT)) { + List allUnassignedShards = Arrays.stream(unassignedShards).collect(Collectors.toList()); + List localUnassignedShards = allUnassignedShards.stream() + .filter(shard -> RoutingPool.LOCAL_ONLY.equals(RoutingPool.getShardPool(shard, allocation))) + .collect(Collectors.toList()); + allUnassignedShards.removeAll(localUnassignedShards); + allUnassignedShards.forEach(shard -> routingNodes.unassigned().add(shard)); + unassignedShards = localUnassignedShards.toArray(new ShardRouting[localUnassignedShards.size()]); + } + ShardRouting[] primary = unassignedShards; ShardRouting[] secondary = new ShardRouting[primary.length]; int secondaryLength = 0; int primaryLength = primary.length; diff --git a/server/src/main/java/org/opensearch/cluster/routing/allocation/allocator/RemoteShardsBalancer.java b/server/src/main/java/org/opensearch/cluster/routing/allocation/allocator/RemoteShardsBalancer.java new file mode 100644 index 0000000000000..efb2c5a27a12f --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/routing/allocation/allocator/RemoteShardsBalancer.java @@ -0,0 +1,646 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.routing.allocation.allocator; + +import com.carrotsearch.hppc.ObjectIntHashMap; +import org.apache.logging.log4j.Logger; +import org.opensearch.cluster.routing.RoutingNode; +import org.opensearch.cluster.routing.RoutingNodes; +import org.opensearch.cluster.routing.RoutingPool; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.UnassignedInfo; +import org.opensearch.cluster.routing.allocation.AllocateUnassignedDecision; +import org.opensearch.cluster.routing.allocation.MoveDecision; +import org.opensearch.cluster.routing.allocation.RoutingAllocation; +import org.opensearch.cluster.routing.allocation.decider.Decision; +import org.opensearch.cluster.routing.allocation.decider.DiskThresholdDecider; +import org.opensearch.common.Randomness; + +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Queue; +import java.util.Set; + +/** + * A {@link RemoteShardsBalancer} used by the {@link BalancedShardsAllocator} to perform allocation operations + * for remote shards within the cluster. + * + * @opensearch.internal + */ +public final class RemoteShardsBalancer extends ShardsBalancer { + private final Logger logger; + private final RoutingAllocation allocation; + private final RoutingNodes routingNodes; + + public RemoteShardsBalancer(Logger logger, RoutingAllocation allocation) { + this.logger = logger; + this.allocation = allocation; + this.routingNodes = allocation.routingNodes(); + } + + /** + * Allocates unassigned remote shards on the routing node which are filtered using + * {@link #groupUnassignedShardsByIndex} + */ + @Override + void allocateUnassigned() { + if (routingNodes.unassigned().isEmpty()) { + logger.debug("No unassigned remote shards found."); + return; + } + + Queue nodeQueue = getShuffledRemoteNodes(); + if (nodeQueue.isEmpty()) { + logger.debug("No remote searcher nodes available for unassigned remote shards."); + failUnattemptedShards(); + return; + } + + Map unassignedShardMap = groupUnassignedShardsByIndex(); + allocateUnassignedPrimaries(nodeQueue, unassignedShardMap); + allocateUnassignedReplicas(nodeQueue, unassignedShardMap); + ignoreRemainingShards(unassignedShardMap); + } + + /** + * Performs shard movement for incompatible remote shards + */ + @Override + void moveShards() { + Queue eligibleNodes = new ArrayDeque<>(); + Queue excludedNodes = new ArrayDeque<>(); + classifyNodesForShardMovement(eligibleNodes, excludedNodes); + + if (excludedNodes.isEmpty()) { + logger.debug("No excluded nodes found. Returning..."); + return; + } + + while (!eligibleNodes.isEmpty() && !excludedNodes.isEmpty()) { + RoutingNode sourceNode = excludedNodes.poll(); + for (ShardRouting ineligibleShard : sourceNode) { + if (ineligibleShard.started() == false) { + continue; + } + + if (!RoutingPool.REMOTE_CAPABLE.equals(RoutingPool.getShardPool(ineligibleShard, allocation))) { + continue; + } + + if (eligibleNodes.isEmpty()) { + break; + } + + tryShardMovementToEligibleNode(eligibleNodes, ineligibleShard); + } + } + } + + /** + * Classifies the nodes into eligible and excluded depending on whether node is able or unable for shard assignment + * @param eligibleNodes contains the list of classified nodes eligible to accept shards + * @param excludedNodes contains the list of classified nodes that are unable for assigning shards + */ + private void classifyNodesForShardMovement(Queue eligibleNodes, Queue excludedNodes) { + List remoteRoutingNodes = getRemoteRoutingNodes(); + int throttledNodeCount = 0; + for (RoutingNode node : remoteRoutingNodes) { + Decision nodeDecision = allocation.deciders().canAllocateAnyShardToNode(node, allocation); + /* canAllocateAnyShardToNode decision can be THROTTLE for throttled nodes. To classify + * as excluded nodes, we look for Decision.Type.NO + */ + if (nodeDecision.type() == Decision.Type.NO) { + excludedNodes.add(node); + } else if (nodeDecision.type() == Decision.Type.YES) { + eligibleNodes.add(node); + } else { + throttledNodeCount++; + } + logger.debug( + "Excluded Node Count: [{}], Eligible Node Count: [{}], Throttled Node Count: [{}]", + excludedNodes.size(), + eligibleNodes.size(), + throttledNodeCount + ); + } + } + + /** + * Tries to move a shard assigned to an excluded node to an eligible node. + * + * @param eligibleNodes set of nodes that are still accepting shards + * @param shard the ineligible shard to be moved + */ + private void tryShardMovementToEligibleNode(Queue eligibleNodes, ShardRouting shard) { + Set nodesCheckedForShard = new HashSet<>(); + while (!eligibleNodes.isEmpty()) { + RoutingNode targetNode = eligibleNodes.poll(); + Decision currentShardDecision = allocation.deciders().canAllocate(shard, targetNode, allocation); + + if (currentShardDecision.type() == Decision.Type.YES) { + if (logger.isDebugEnabled()) { + logger.debug( + "Moving shard: {} from node: [{}] to node: [{}]", + shardShortSummary(shard), + shard.currentNodeId(), + targetNode.nodeId() + ); + } + routingNodes.relocateShard( + shard, + targetNode.nodeId(), + allocation.clusterInfo().getShardSize(shard, ShardRouting.UNAVAILABLE_EXPECTED_SHARD_SIZE), + allocation.changes() + ); + eligibleNodes.offer(targetNode); + break; + } else { + if (logger.isTraceEnabled()) { + logger.trace( + "Cannot move shard: {} to node: [{}]. Decisions: [{}]", + shardShortSummary(shard), + targetNode.nodeId(), + currentShardDecision.getDecisions() + ); + } + + Decision nodeLevelDecision = allocation.deciders().canAllocateAnyShardToNode(targetNode, allocation); + if (nodeLevelDecision.type() == Decision.Type.YES) { + logger.debug("Node: [{}] can still accept shards. Adding it back to the queue.", targetNode.nodeId()); + eligibleNodes.offer(targetNode); + nodesCheckedForShard.add(targetNode.nodeId()); + } else { + logger.debug("Node: [{}] cannot accept any more shards. Removing it from queue.", targetNode.nodeId()); + } + + // Break out if all nodes in the queue have been checked for this shard + if (eligibleNodes.stream().allMatch(rn -> nodesCheckedForShard.contains(rn.nodeId()))) { + break; + } + } + } + } + + /** + * Performs heuristic, naive weight-based balancing for remote shards within the cluster by using average nodes per + * cluster as the metric for shard distribution. + * It does so without accounting for the local shards located on any nodes within the cluster. + */ + @Override + void balance() { + List remoteRoutingNodes = getRemoteRoutingNodes(); + logger.trace("Performing balancing for remote shards."); + + if (remoteRoutingNodes.isEmpty()) { + logger.info("No eligible remote nodes found to perform balancing"); + return; + } + + ObjectIntHashMap nodePrimaryShardCount = calculateNodePrimaryShardCount(remoteRoutingNodes); + int totalPrimaryShardCount = Arrays.stream(nodePrimaryShardCount.values).sum(); + + totalPrimaryShardCount += routingNodes.unassigned().getNumPrimaries(); + int avgPrimaryPerNode = (totalPrimaryShardCount + routingNodes.size() - 1) / routingNodes.size(); + + ArrayDeque sourceNodes = new ArrayDeque<>(); + ArrayDeque targetNodes = new ArrayDeque<>(); + for (RoutingNode node : remoteRoutingNodes) { + if (nodePrimaryShardCount.get(node.nodeId()) > avgPrimaryPerNode) { + sourceNodes.add(node); + } else if (nodePrimaryShardCount.get(node.nodeId()) < avgPrimaryPerNode) { + targetNodes.add(node); + } + } + + while (!sourceNodes.isEmpty() && !targetNodes.isEmpty()) { + RoutingNode sourceNode = sourceNodes.poll(); + tryRebalanceNode(sourceNode, targetNodes, avgPrimaryPerNode, nodePrimaryShardCount); + } + } + + /** + * Calculates the total number of primary shards per node. + * @param remoteRoutingNodes routing nodes for which the aggregation needs to be performed + * @return map of node id to primary shard count + */ + private ObjectIntHashMap calculateNodePrimaryShardCount(List remoteRoutingNodes) { + ObjectIntHashMap primaryShardCount = new ObjectIntHashMap<>(); + for (RoutingNode node : remoteRoutingNodes) { + int totalPrimaryShardsPerNode = 0; + for (ShardRouting shard : node) { + if (RoutingPool.REMOTE_CAPABLE.equals(RoutingPool.getShardPool(shard, allocation)) + && shard.primary() + && (shard.initializing() || shard.started())) { + totalPrimaryShardsPerNode++; + } + } + primaryShardCount.put(node.nodeId(), totalPrimaryShardsPerNode); + } + return primaryShardCount; + } + + @Override + AllocateUnassignedDecision decideAllocateUnassigned(ShardRouting shardRouting) { + throw new UnsupportedOperationException("remote shards balancer does not support decision operations"); + } + + @Override + MoveDecision decideMove(ShardRouting shardRouting) { + throw new UnsupportedOperationException("remote shards balancer does not support decision operations"); + } + + @Override + MoveDecision decideRebalance(ShardRouting shardRouting) { + throw new UnsupportedOperationException("remote shards balancer does not support decision operations"); + } + + /** + * Groups unassigned shards within the allocation based on the index. + * @return {@link UnassignedIndexShards} grouped by index name + */ + public Map groupUnassignedShardsByIndex() { + HashMap unassignedShardMap = new HashMap<>(); + unassignIgnoredRemoteShards(allocation); + for (ShardRouting shard : routingNodes.unassigned().drain()) { + String index = shard.getIndexName(); + if (!RoutingPool.REMOTE_CAPABLE.equals(RoutingPool.getShardPool(shard, allocation))) { + routingNodes.unassigned().add(shard); + continue; + } + if (!unassignedShardMap.containsKey(index)) { + unassignedShardMap.put(index, new UnassignedIndexShards()); + } + unassignedShardMap.get(index).addShard(shard); + } + return unassignedShardMap; + } + + /** + * Unassigned shards from {@link LocalShardsBalancer} are ignored since the balancer cannot allocate remote shards. + * Prior to allocation operations done by {@link RemoteShardsBalancer}, the ignored remote shards are moved back to + * unassigned status. + */ + private void unassignIgnoredRemoteShards(RoutingAllocation routingAllocation) { + RoutingNodes.UnassignedShards unassignedShards = routingAllocation.routingNodes().unassigned(); + for (ShardRouting shard : unassignedShards.drainIgnored()) { + RoutingPool pool = RoutingPool.getShardPool(shard, routingAllocation); + if (pool == RoutingPool.REMOTE_CAPABLE && shard.unassigned() && (shard.primary() || !shard.unassignedInfo().isDelayed())) { + unassignedShards.add(shard); + } else { + unassignedShards.ignoreShard(shard, shard.unassignedInfo().getLastAllocationStatus(), routingAllocation.changes()); + } + } + } + + private void allocateUnassignedPrimaries(Queue nodeQueue, Map unassignedShardMap) { + allocateUnassignedShards(true, nodeQueue, unassignedShardMap); + } + + private void allocateUnassignedReplicas(Queue nodeQueue, Map unassignedShardMap) { + allocateUnassignedShards(false, nodeQueue, unassignedShardMap); + } + + private void ignoreRemainingShards(Map unassignedShardMap) { + for (UnassignedIndexShards indexShards : unassignedShardMap.values()) { + for (ShardRouting shard : indexShards.getPrimaries()) { + routingNodes.unassigned().ignoreShard(shard, UnassignedInfo.AllocationStatus.DECIDERS_NO, allocation.changes()); + } + for (ShardRouting shard : indexShards.getReplicas()) { + routingNodes.unassigned().ignoreShard(shard, UnassignedInfo.AllocationStatus.DECIDERS_NO, allocation.changes()); + } + } + } + + private void allocateUnassignedShards( + boolean primaries, + Queue nodeQueue, + Map unassignedShardMap + ) { + logger.debug("Allocating unassigned {}. Nodes available in queue: [{}]", (primaries ? "primaries" : "replicas"), nodeQueue.size()); + + // Iterate through all shards index by index and allocate them + for (String index : unassignedShardMap.keySet()) { + if (nodeQueue.isEmpty()) { + break; + } + + UnassignedIndexShards indexShards = unassignedShardMap.get(index); + Queue shardsToAllocate = primaries ? indexShards.getPrimaries() : indexShards.getReplicas(); + if (shardsToAllocate.isEmpty()) { + continue; + } + logger.debug("Allocating shards for index: [{}]", index); + + while (!shardsToAllocate.isEmpty() && !nodeQueue.isEmpty()) { + ShardRouting shard = shardsToAllocate.poll(); + if (shard.assignedToNode()) { + if (logger.isDebugEnabled()) { + logger.debug("Shard: {} already assigned to node: [{}]", shardShortSummary(shard), shard.currentNodeId()); + } + continue; + } + + Decision shardLevelDecision = allocation.deciders().canAllocate(shard, allocation); + if (shardLevelDecision.type() == Decision.Type.NO) { + if (logger.isDebugEnabled()) { + logger.debug( + "Ignoring shard: [{}] as is cannot be allocated to any node. Shard level decisions: [{}][{}].", + shardShortSummary(shard), + shardLevelDecision.getDecisions(), + shardLevelDecision.getExplanation() + ); + } + routingNodes.unassigned().ignoreShard(shard, UnassignedInfo.AllocationStatus.DECIDERS_NO, allocation.changes()); + continue; + } + + tryAllocateUnassignedShard(nodeQueue, shard); + } + } + } + + /** + * Tries to allocate an unassigned shard to one of the nodes within the node queue. + * @param nodeQueue ordered list of nodes to try allocation + * @param shard the unassigned shard which needs to be allocated + */ + private void tryAllocateUnassignedShard(Queue nodeQueue, ShardRouting shard) { + boolean allocated = false; + boolean throttled = false; + Set nodesCheckedForShard = new HashSet<>(); + while (!nodeQueue.isEmpty()) { + RoutingNode node = nodeQueue.poll(); + Decision allocateDecision = allocation.deciders().canAllocate(shard, node, allocation); + nodesCheckedForShard.add(node.nodeId()); + if (allocateDecision.type() == Decision.Type.YES) { + if (logger.isTraceEnabled()) { + logger.trace("Assigned shard [{}] to [{}]", shardShortSummary(shard), node.nodeId()); + } + final long shardSize = DiskThresholdDecider.getExpectedShardSize( + shard, + ShardRouting.UNAVAILABLE_EXPECTED_SHARD_SIZE, + allocation.clusterInfo(), + allocation.snapshotShardSizeInfo(), + allocation.metadata(), + allocation.routingTable() + ); + ShardRouting initShard = routingNodes.initializeShard(shard, node.nodeId(), null, shardSize, allocation.changes()); + nodeQueue.offer(node); + allocated = true; + break; + } else { + if (logger.isTraceEnabled()) { + logger.trace( + "Cannot allocate shard: {} on node [{}]. Decisions: [{}]", + shardShortSummary(shard), + node.nodeId(), + allocateDecision.getDecisions() + ); + } + throttled = throttled || allocateDecision.type() == Decision.Type.THROTTLE; + + Decision nodeLevelDecision = allocation.deciders().canAllocateAnyShardToNode(node, allocation); + if (nodeLevelDecision.type() == Decision.Type.YES) { + if (logger.isTraceEnabled()) { + logger.trace( + "Node: [{}] can still accept shards, retaining it in queue - [{}]", + node.nodeId(), + nodeLevelDecision.getDecisions() + ); + } + nodeQueue.offer(node); + } else { + if (logger.isTraceEnabled()) { + logger.trace( + "Cannot allocate any shard to node: [{}]. Removing from queue. Node level decisions: [{}],[{}]", + node.nodeId(), + nodeLevelDecision.getDecisions(), + nodeLevelDecision.getExplanation() + ); + } + } + + // Break out if all nodes in the queue have been checked for this shard + if (nodeQueue.stream().allMatch(rn -> nodesCheckedForShard.contains(rn.nodeId()))) { + throttled = true; + break; + } + } + } + + if (!allocated) { + UnassignedInfo.AllocationStatus status = throttled + ? UnassignedInfo.AllocationStatus.DECIDERS_THROTTLED + : UnassignedInfo.AllocationStatus.DECIDERS_NO; + routingNodes.unassigned().ignoreShard(shard, status, allocation.changes()); + } + } + + private void tryRebalanceNode( + RoutingNode sourceNode, + ArrayDeque targetNodes, + int avgPrimary, + ObjectIntHashMap primaryCount + ) { + long shardsToBalance = primaryCount.get(sourceNode.nodeId()) - avgPrimary; + assert shardsToBalance >= 0 : "Shards to balance should be greater than 0, but found negative"; + Iterator shardIterator = sourceNode.copyShards().iterator(); + Set nodesCheckedForRelocation = new HashSet<>(); + + // Try to relocate the valid shards on the sourceNode, one at a time; + // until either sourceNode is balanced OR no more active primary shard available OR all the target nodes are exhausted + while (shardsToBalance > 0 && shardIterator.hasNext() && !targetNodes.isEmpty()) { + // Find an active primary shard to relocate + ShardRouting shard = shardIterator.next(); + if (!shard.started() || !shard.primary() || !RoutingPool.REMOTE_CAPABLE.equals(RoutingPool.getShardPool(shard, allocation))) { + continue; + } + + while (!targetNodes.isEmpty()) { + // Find a valid target node that can accommodate the current shard relocation + RoutingNode targetNode = targetNodes.poll(); + if (primaryCount.get(targetNode.nodeId()) >= avgPrimary) { + logger.trace("Avg shard limit reached for node: [{}]. Removing from queue.", targetNode.nodeId()); + continue; + } + + // Try relocate the shard on the target node + Decision rebalanceDecision = tryRelocateShard(shard, targetNode); + + if (rebalanceDecision.type() == Decision.Type.YES) { + shardsToBalance--; + primaryCount.addTo(targetNode.nodeId(), 1); + targetNodes.offer(targetNode); + break; + + // If the relocation attempt failed for the shard, check if the target node can accommodate any other shard; else remove + // the target node from the target list + } else { + Decision nodeDecision = allocation.deciders().canAllocateAnyShardToNode(targetNode, allocation); + if (nodeDecision.type() == Decision.Type.YES) { + targetNodes.offer(targetNode); + nodesCheckedForRelocation.add(targetNode.nodeId()); + } else { + if (logger.isTraceEnabled()) { + logger.trace( + "Cannot allocate any shard to node: [{}]. Removing from queue. Node level decisions: [{}],[{}]", + targetNode.nodeId(), + nodeDecision.getDecisions(), + nodeDecision.toString() + ); + } + } + } + + // If all the target nodes are exhausted for the current shard; skip to next shard + if (targetNodes.stream().allMatch(node -> nodesCheckedForRelocation.contains(node.nodeId()))) { + break; + } + } + } + } + + /** + * For every primary shard for which this method is invoked, + * swap is attempted with the destination node in case replica shard is present. + * In case replica is not present, relocation of the shard id performed. + */ + private Decision tryRelocateShard(ShardRouting shard, RoutingNode destinationNode) { + // Check if there is already a replica for the shard on the destination node. + // Then we can directly swap the replica with the primary shards. + // Invariant: We only allow swap relocation on remote shards. + ShardRouting replicaShard = destinationNode.getByShardId(shard.shardId()); + if (replicaShard != null) { + assert !replicaShard.primary() : "Primary Shard found while expected Replica during shard rebalance"; + return executeSwapShard(shard, replicaShard, allocation); + } + + // Since no replica present on the destinationNode; try relocating the shard to the destination node + Decision allocationDecision = allocation.deciders().canAllocate(shard, destinationNode, allocation); + Decision rebalanceDecision = allocation.deciders().canRebalance(shard, allocation); + logger.trace( + "Relocating shard [{}] from node [{}] to node [{}]. AllocationDecision: [{}]. AllocationExplanation: [{}]. " + + "RebalanceDecision: [{}]. RebalanceExplanation: [{}]", + shard.id(), + shard.currentNodeId(), + destinationNode.nodeId(), + allocationDecision.type(), + allocationDecision.toString(), + rebalanceDecision.type(), + rebalanceDecision.toString() + ); + + // Perform the relocation of allocation and rebalance decisions are YES + if ((allocationDecision.type() == Decision.Type.YES) && (rebalanceDecision.type() == Decision.Type.YES)) { + final long shardSize = allocation.clusterInfo().getShardSize(shard, ShardRouting.UNAVAILABLE_EXPECTED_SHARD_SIZE); + ShardRouting targetShard = routingNodes.relocateShard(shard, destinationNode.nodeId(), shardSize, allocation.changes()).v2(); + logger.info("Relocated shard [{}] to node [{}] during primary Rebalance", shard, targetShard.currentNodeId()); + return Decision.YES; + } + + if ((allocationDecision.type() == Decision.Type.THROTTLE) || (rebalanceDecision.type() == Decision.Type.THROTTLE)) { + return Decision.THROTTLE; + } + + return Decision.NO; + } + + private Decision executeSwapShard(ShardRouting primaryShard, ShardRouting replicaShard, RoutingAllocation allocation) { + if (!replicaShard.started()) { + return new Decision.Single(Decision.Type.NO); + } + + allocation.routingNodes().swapPrimaryWithReplica(logger, primaryShard, replicaShard, allocation.changes()); + return new Decision.Single(Decision.Type.YES); + } + + private void failUnattemptedShards() { + RoutingNodes.UnassignedShards.UnassignedIterator unassignedIterator = routingNodes.unassigned().iterator(); + while (unassignedIterator.hasNext()) { + ShardRouting shard = unassignedIterator.next(); + UnassignedInfo unassignedInfo = shard.unassignedInfo(); + if (shard.primary() && unassignedInfo.getLastAllocationStatus() == UnassignedInfo.AllocationStatus.NO_ATTEMPT) { + unassignedIterator.updateUnassigned( + new UnassignedInfo( + unassignedInfo.getReason(), + unassignedInfo.getMessage(), + unassignedInfo.getFailure(), + unassignedInfo.getNumFailedAllocations(), + unassignedInfo.getUnassignedTimeInNanos(), + unassignedInfo.getUnassignedTimeInMillis(), + unassignedInfo.isDelayed(), + UnassignedInfo.AllocationStatus.DECIDERS_NO, + Collections.emptySet() + ), + shard.recoverySource(), + allocation.changes() + ); + } + } + } + + private Queue getShuffledRemoteNodes() { + List nodeList = getRemoteRoutingNodes(); + Randomness.shuffle(nodeList); + return new ArrayDeque<>(nodeList); + } + + /** + * Filters out and returns the list of {@link RoutingPool#REMOTE_CAPABLE} nodes from the routing nodes in cluster. + * @return list of {@link RoutingPool#REMOTE_CAPABLE} routing nodes. + */ + private List getRemoteRoutingNodes() { + List nodeList = new ArrayList<>(); + for (RoutingNode rNode : routingNodes) { + if (RoutingPool.REMOTE_CAPABLE.equals(RoutingPool.getNodePool(rNode))) { + nodeList.add(rNode); + } + } + return nodeList; + } + + /** + * {@link UnassignedIndexShards} maintains a queue of unassigned remote shards for allocation operations within + * the cluster. + * + * @opensearch.internal + */ + public static class UnassignedIndexShards { + private final Queue primaries = new ArrayDeque<>(); + private final Queue replicas = new ArrayDeque<>(); + + public void addShard(ShardRouting shard) { + if (shard.primary()) { + primaries.add(shard); + } else { + replicas.add(shard); + } + } + + public Queue getPrimaries() { + return primaries; + } + + public Queue getReplicas() { + return replicas; + } + } + + private String shardShortSummary(ShardRouting shard) { + return "[" + shard.getIndexName() + "]" + "[" + shard.getId() + "]" + "[" + (shard.primary() ? "p" : "r") + "]"; + } + +} diff --git a/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/TargetPoolAllocationDecider.java b/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/TargetPoolAllocationDecider.java new file mode 100644 index 0000000000000..c87f7d16079e9 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/TargetPoolAllocationDecider.java @@ -0,0 +1,104 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.routing.allocation.decider; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.routing.RoutingNode; +import org.opensearch.cluster.routing.RoutingPool; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.allocation.RoutingAllocation; + +/** + * {@link TargetPoolAllocationDecider} ensures that the different shard types are assigned to the nodes with + * appropriate capabilities. The node pools with respective capabilities are defined within {@link RoutingPool}. + * + * @opensearch.internal + */ +public class TargetPoolAllocationDecider extends AllocationDecider { + private static final Logger logger = LogManager.getLogger(TargetPoolAllocationDecider.class); + + public static final String NAME = "target_pool"; + + @Override + public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + RoutingPool shardPool = RoutingPool.getShardPool(shardRouting, allocation); + RoutingPool targetNodePool = RoutingPool.getNodePool(node); + if (RoutingPool.REMOTE_CAPABLE.equals(shardPool) && RoutingPool.LOCAL_ONLY.equals(targetNodePool)) { + logger.debug( + "Shard: [{}] has target pool: [{}]. Cannot allocate on node: [{}] with target pool: [{}]", + shardRouting.shortSummary(), + shardPool, + node.node(), + targetNodePool + ); + return allocation.decision( + Decision.NO, + NAME, + "Routing pools are incompatible. Shard pool: [%s], Node Pool: [%s]", + shardPool, + targetNodePool + ); + } + return allocation.decision( + Decision.YES, + NAME, + "Routing pools are compatible. Shard pool: [%s], Node Pool: [%s]", + shardPool, + targetNodePool + ); + } + + @Override + public Decision canAllocate(IndexMetadata indexMetadata, RoutingNode node, RoutingAllocation allocation) { + return canAllocateInTargetPool(indexMetadata, node.node(), allocation); + } + + @Override + public Decision canForceAllocatePrimary(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + logger.debug("Evaluating force allocation for primary shard."); + return canAllocate(shardRouting, node, allocation); + } + + public Decision shouldAutoExpandToNode(IndexMetadata indexMetadata, DiscoveryNode node, RoutingAllocation allocation) { + logger.debug("Evaluating node: {} for autoExpandReplica eligibility of index: {}", node, indexMetadata.getIndex()); + return canAllocateInTargetPool(indexMetadata, node, allocation); + } + + private Decision canAllocateInTargetPool(IndexMetadata indexMetadata, DiscoveryNode node, RoutingAllocation allocation) { + RoutingPool indexPool = RoutingPool.getIndexPool(indexMetadata); + RoutingPool targetNodePool = RoutingPool.getNodePool(node); + if (RoutingPool.REMOTE_CAPABLE.equals(indexPool) && RoutingPool.LOCAL_ONLY.equals(targetNodePool)) { + logger.debug( + "Index: [{}] has target pool: [{}]. Cannot allocate on node: [{}] with target pool: [{}]", + indexMetadata.getIndex().getName(), + indexPool, + node, + targetNodePool + ); + return allocation.decision( + Decision.NO, + NAME, + "Routing pools are incompatible. Index pool: [%s], Node Pool: [%s]", + indexPool, + targetNodePool + ); + } + return allocation.decision( + Decision.YES, + NAME, + "Routing pools are compatible. Index pool: [%s], Node Pool: [%s]", + indexPool, + targetNodePool + ); + } + +} diff --git a/server/src/main/java/org/opensearch/common/lucene/store/ByteArrayIndexInput.java b/server/src/main/java/org/opensearch/common/lucene/store/ByteArrayIndexInput.java index 0113d238869c5..6eb613daf5133 100644 --- a/server/src/main/java/org/opensearch/common/lucene/store/ByteArrayIndexInput.java +++ b/server/src/main/java/org/opensearch/common/lucene/store/ByteArrayIndexInput.java @@ -32,6 +32,7 @@ package org.opensearch.common.lucene.store; import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.RandomAccessInput; import java.io.EOFException; import java.io.IOException; @@ -41,14 +42,14 @@ * * @opensearch.internal */ -public class ByteArrayIndexInput extends IndexInput { +public class ByteArrayIndexInput extends IndexInput implements RandomAccessInput { private final byte[] bytes; - private int pos; + private final int offset; - private int offset; + private final int length; - private int length; + private int pos; public ByteArrayIndexInput(String resourceDesc, byte[] bytes) { this(resourceDesc, bytes, 0, bytes.length); @@ -106,18 +107,66 @@ public IndexInput slice(String sliceDescription, long offset, long length) throw @Override public byte readByte() throws IOException { - if (pos >= offset + length) { - throw new EOFException("seek past EOF"); - } + validatePos(pos, Byte.BYTES); return bytes[offset + pos++]; } @Override public void readBytes(final byte[] b, final int offset, int len) throws IOException { - if (pos + len > this.offset + length) { - throw new EOFException("seek past EOF"); - } + validatePos(pos, len); System.arraycopy(bytes, this.offset + pos, b, offset, len); pos += len; } + + @Override + public byte readByte(long pos) throws IOException { + validatePos(pos, Byte.BYTES); + return internalReadByte(pos); + } + + @Override + public short readShort(long pos) throws IOException { + validatePos(pos, Short.BYTES); + return internalReadShort(pos); + } + + @Override + public int readInt(long pos) throws IOException { + validatePos(pos, Integer.BYTES); + return internalReadInt(pos); + } + + @Override + public long readLong(long pos) throws IOException { + validatePos(pos, Long.BYTES); + return internalReadLong(pos); + } + + private byte internalReadByte(long pos) { + return bytes[offset + (int) pos]; + } + + private short internalReadShort(long pos) { + final byte p1 = internalReadByte(pos); + final byte p2 = internalReadByte(pos + 1); + return (short) (((p2 & 0xFF) << 8) | (p1 & 0xFF)); + } + + private int internalReadInt(long pos) { + final short p1 = internalReadShort(pos); + final short p2 = internalReadShort(pos + Short.BYTES); + return ((p2 & 0xFFFF) << 16) | (p1 & 0xFFFF); + } + + public long internalReadLong(long pos) { + final int p1 = internalReadInt(pos); + final int p2 = internalReadInt(pos + Integer.BYTES); + return (((long) p2) << 32) | (p1 & 0xFFFFFFFFL); + } + + private void validatePos(long pos, int len) throws EOFException { + if (pos < 0 || pos + len > length + offset) { + throw new EOFException("seek past EOF"); + } + } } diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 415dee4118639..2107a27846993 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -536,6 +536,7 @@ public void apply(Settings value, Settings current, Settings previous) { PersistentTasksClusterService.CLUSTER_TASKS_ALLOCATION_RECHECK_INTERVAL_SETTING, EnableAssignmentDecider.CLUSTER_TASKS_ALLOCATION_ENABLE_SETTING, PeerFinder.DISCOVERY_FIND_PEERS_INTERVAL_SETTING, + PeerFinder.DISCOVERY_FIND_PEERS_INTERVAL_DURING_DECOMMISSION_SETTING, PeerFinder.DISCOVERY_REQUEST_PEERS_TIMEOUT_SETTING, ClusterFormationFailureHelper.DISCOVERY_CLUSTER_FORMATION_WARNING_TIMEOUT_SETTING, ElectionSchedulerFactory.ELECTION_INITIAL_TIMEOUT_SETTING, diff --git a/server/src/main/java/org/opensearch/discovery/PeerFinder.java b/server/src/main/java/org/opensearch/discovery/PeerFinder.java index a601a6fbe4d82..e8b6c72c512a2 100644 --- a/server/src/main/java/org/opensearch/discovery/PeerFinder.java +++ b/server/src/main/java/org/opensearch/discovery/PeerFinder.java @@ -84,6 +84,14 @@ public abstract class PeerFinder { Setting.Property.NodeScope ); + // the time between attempts to find all peers when node is in decommissioned state, default set to 2 minutes + public static final Setting DISCOVERY_FIND_PEERS_INTERVAL_DURING_DECOMMISSION_SETTING = Setting.timeSetting( + "discovery.find_peers_interval_during_decommission", + TimeValue.timeValueSeconds(120L), + TimeValue.timeValueMillis(1000), + Setting.Property.NodeScope + ); + public static final Setting DISCOVERY_REQUEST_PEERS_TIMEOUT_SETTING = Setting.timeSetting( "discovery.request_peers_timeout", TimeValue.timeValueMillis(3000), @@ -91,7 +99,8 @@ public abstract class PeerFinder { Setting.Property.NodeScope ); - private final TimeValue findPeersInterval; + private final Settings settings; + private TimeValue findPeersInterval; private final TimeValue requestPeersTimeout; private final Object mutex = new Object(); @@ -112,6 +121,7 @@ public PeerFinder( TransportAddressConnector transportAddressConnector, ConfiguredHostsResolver configuredHostsResolver ) { + this.settings = settings; findPeersInterval = DISCOVERY_FIND_PEERS_INTERVAL_SETTING.get(settings); requestPeersTimeout = DISCOVERY_REQUEST_PEERS_TIMEOUT_SETTING.get(settings); this.transportService = transportService; @@ -128,6 +138,23 @@ public PeerFinder( ); } + public synchronized void onNodeCommissionStatusChange(boolean localNodeCommissioned) { + findPeersInterval = localNodeCommissioned + ? DISCOVERY_FIND_PEERS_INTERVAL_SETTING.get(settings) + : DISCOVERY_FIND_PEERS_INTERVAL_DURING_DECOMMISSION_SETTING.get(settings); + logger.info( + "setting findPeersInterval to [{}] as node commission status = [{}] for local node [{}]", + findPeersInterval, + localNodeCommissioned, + transportService.getLocalNode() + ); + } + + // package private for tests + TimeValue getFindPeersInterval() { + return findPeersInterval; + } + public void activate(final DiscoveryNodes lastAcceptedNodes) { logger.trace("activating with {}", lastAcceptedNodes); diff --git a/server/src/main/java/org/opensearch/index/IndexModule.java b/server/src/main/java/org/opensearch/index/IndexModule.java index 2a462f6165184..9f7e3e9fb5eee 100644 --- a/server/src/main/java/org/opensearch/index/IndexModule.java +++ b/server/src/main/java/org/opensearch/index/IndexModule.java @@ -70,7 +70,7 @@ import org.opensearch.index.shard.SearchOperationListener; import org.opensearch.index.similarity.SimilarityService; import org.opensearch.index.store.FsDirectoryFactory; -import org.opensearch.index.store.RemoteSnapshotDirectoryFactory; +import org.opensearch.index.store.remote.directory.RemoteSnapshotDirectoryFactory; import org.opensearch.indices.IndicesQueryCache; import org.opensearch.indices.breaker.CircuitBreakerService; import org.opensearch.indices.fielddata.cache.IndicesFieldDataCache; @@ -648,7 +648,8 @@ private void ensureNotFrozen() { } public static Map createBuiltInDirectoryFactories( - Supplier repositoriesService + Supplier repositoriesService, + ThreadPool threadPool ) { final Map factories = new HashMap<>(); for (Type type : Type.values()) { @@ -661,7 +662,7 @@ public static Map createBuiltInDirect factories.put(type.getSettingsKey(), DEFAULT_DIRECTORY_FACTORY); break; case REMOTE_SNAPSHOT: - factories.put(type.getSettingsKey(), new RemoteSnapshotDirectoryFactory(repositoriesService)); + factories.put(type.getSettingsKey(), new RemoteSnapshotDirectoryFactory(repositoriesService, threadPool)); break; default: throw new IllegalStateException("No directory factory mapping for built-in type " + type); diff --git a/server/src/main/java/org/opensearch/index/snapshots/blobstore/SlicedInputStream.java b/server/src/main/java/org/opensearch/index/snapshots/blobstore/SlicedInputStream.java index b9b27547fe50c..ae2e33d41c6ce 100644 --- a/server/src/main/java/org/opensearch/index/snapshots/blobstore/SlicedInputStream.java +++ b/server/src/main/java/org/opensearch/index/snapshots/blobstore/SlicedInputStream.java @@ -35,6 +35,7 @@ import java.io.IOException; import java.io.InputStream; +import java.util.Objects; /** * A {@link SlicedInputStream} is a logical @@ -100,6 +101,11 @@ public final int read() throws IOException { @Override public final int read(byte[] buffer, int offset, int length) throws IOException { + Objects.checkFromIndexSize(offset, length, buffer.length); + if (length == 0) { + return 0; + } + final InputStream stream = currentStream(); if (stream == null) { return -1; diff --git a/server/src/main/java/org/opensearch/index/store/InMemoryRemoteSnapshotDirectory.java b/server/src/main/java/org/opensearch/index/store/InMemoryRemoteSnapshotDirectory.java deleted file mode 100644 index 0757d88a4099a..0000000000000 --- a/server/src/main/java/org/opensearch/index/store/InMemoryRemoteSnapshotDirectory.java +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.index.store; - -import java.io.IOException; -import java.io.InputStream; -import java.util.Collection; -import java.util.Collections; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; - -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.store.Lock; -import org.apache.lucene.store.NoLockFactory; -import org.apache.lucene.util.SetOnce; -import org.opensearch.common.blobstore.BlobContainer; -import org.opensearch.common.blobstore.BlobPath; -import org.opensearch.common.lucene.store.ByteArrayIndexInput; -import org.opensearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshot; -import org.opensearch.repositories.blobstore.BlobStoreRepository; -import org.opensearch.snapshots.SnapshotId; - -/** - * Trivial in-memory implementation of a Directory that reads from a snapshot - * in a repository. This is functional but is only temporary to demonstrate - * functional searchable snapshot functionality. The proper implementation will - * be implemented per https://github.com/opensearch-project/OpenSearch/issues/3114. - * - * @opensearch.internal - */ -public final class InMemoryRemoteSnapshotDirectory extends Directory { - - private final BlobStoreRepository blobStoreRepository; - private final SnapshotId snapshotId; - private final BlobPath blobPath; - private final SetOnce blobContainer = new SetOnce<>(); - private final SetOnce> fileInfoMap = new SetOnce<>(); - - public InMemoryRemoteSnapshotDirectory(BlobStoreRepository blobStoreRepository, BlobPath blobPath, SnapshotId snapshotId) { - this.blobStoreRepository = blobStoreRepository; - this.snapshotId = snapshotId; - this.blobPath = blobPath; - } - - @Override - public String[] listAll() throws IOException { - return fileInfoMap().keySet().toArray(new String[0]); - } - - @Override - public void deleteFile(String name) throws IOException {} - - @Override - public IndexOutput createOutput(String name, IOContext context) { - return NoopIndexOutput.INSTANCE; - } - - @Override - public IndexInput openInput(String name, IOContext context) throws IOException { - final BlobStoreIndexShardSnapshot.FileInfo fileInfo = fileInfoMap().get(name); - - // Virtual files are contained entirely in the metadata hash field - if (fileInfo.name().startsWith("v__")) { - return new ByteArrayIndexInput(name, fileInfo.metadata().hash().bytes); - } - - try (InputStream is = blobContainer().readBlob(fileInfo.name())) { - return new ByteArrayIndexInput(name, is.readAllBytes()); - } - } - - @Override - public void close() throws IOException {} - - @Override - public long fileLength(String name) throws IOException { - initializeIfNecessary(); - return fileInfoMap.get().get(name).length(); - } - - @Override - public Set getPendingDeletions() throws IOException { - return Collections.emptySet(); - } - - @Override - public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) { - throw new UnsupportedOperationException(); - } - - @Override - public void sync(Collection names) throws IOException {} - - @Override - public void syncMetaData() {} - - @Override - public void rename(String source, String dest) throws IOException {} - - @Override - public Lock obtainLock(String name) throws IOException { - return NoLockFactory.INSTANCE.obtainLock(null, null); - } - - static class NoopIndexOutput extends IndexOutput { - - final static NoopIndexOutput INSTANCE = new NoopIndexOutput(); - - NoopIndexOutput() { - super("noop", "noop"); - } - - @Override - public void close() throws IOException { - - } - - @Override - public long getFilePointer() { - return 0; - } - - @Override - public long getChecksum() throws IOException { - return 0; - } - - @Override - public void writeByte(byte b) throws IOException { - - } - - @Override - public void writeBytes(byte[] b, int offset, int length) throws IOException { - - } - } - - private BlobContainer blobContainer() { - initializeIfNecessary(); - return blobContainer.get(); - } - - private Map fileInfoMap() { - initializeIfNecessary(); - return fileInfoMap.get(); - } - - /** - * Bit of a hack to lazily initialize the blob store to avoid running afoul - * of the assertion in {@code BlobStoreRepository#assertSnapshotOrGenericThread}. - */ - private void initializeIfNecessary() { - if (blobContainer.get() == null || fileInfoMap.get() == null) { - blobContainer.set(blobStoreRepository.blobStore().blobContainer(blobPath)); - final BlobStoreIndexShardSnapshot snapshot = blobStoreRepository.loadShardSnapshot(blobContainer.get(), snapshotId); - fileInfoMap.set( - snapshot.indexFiles().stream().collect(Collectors.toMap(BlobStoreIndexShardSnapshot.FileInfo::physicalName, f -> f)) - ); - } - } -} diff --git a/server/src/main/java/org/opensearch/index/store/RemoteSnapshotDirectoryFactory.java b/server/src/main/java/org/opensearch/index/store/RemoteSnapshotDirectoryFactory.java deleted file mode 100644 index bf7806b836b65..0000000000000 --- a/server/src/main/java/org/opensearch/index/store/RemoteSnapshotDirectoryFactory.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.index.store; - -import java.io.IOException; -import java.util.function.Supplier; - -import org.apache.lucene.store.Directory; -import org.opensearch.common.blobstore.BlobPath; -import org.opensearch.index.IndexSettings; -import org.opensearch.index.shard.ShardPath; -import org.opensearch.plugins.IndexStorePlugin; -import org.opensearch.repositories.RepositoriesService; -import org.opensearch.repositories.Repository; -import org.opensearch.repositories.blobstore.BlobStoreRepository; -import org.opensearch.snapshots.SnapshotId; - -/** - * Factory for a Directory implementation that can read directly from index - * data stored remotely in a repository. - * - * @opensearch.internal - */ -public final class RemoteSnapshotDirectoryFactory implements IndexStorePlugin.DirectoryFactory { - private final Supplier repositoriesService; - - public RemoteSnapshotDirectoryFactory(Supplier repositoriesService) { - this.repositoriesService = repositoriesService; - } - - @Override - public Directory newDirectory(IndexSettings indexSettings, ShardPath shardPath) throws IOException { - final String repositoryName = IndexSettings.SEARCHABLE_SNAPSHOT_REPOSITORY.get(indexSettings.getSettings()); - final Repository repository = repositoriesService.get().repository(repositoryName); - assert repository instanceof BlobStoreRepository : "repository should be instance of BlobStoreRepository"; - final BlobStoreRepository blobStoreRepository = (BlobStoreRepository) repository; - final BlobPath blobPath = new BlobPath().add("indices") - .add(IndexSettings.SEARCHABLE_SNAPSHOT_INDEX_ID.get(indexSettings.getSettings())) - .add(Integer.toString(shardPath.getShardId().getId())); - final SnapshotId snapshotId = new SnapshotId( - IndexSettings.SEARCHABLE_SNAPSHOT_ID_NAME.get(indexSettings.getSettings()), - IndexSettings.SEARCHABLE_SNAPSHOT_ID_UUID.get(indexSettings.getSettings()) - ); - return new InMemoryRemoteSnapshotDirectory(blobStoreRepository, blobPath, snapshotId); - } -} diff --git a/server/src/main/java/org/opensearch/index/store/Store.java b/server/src/main/java/org/opensearch/index/store/Store.java index 9122c950a6ab6..3354f7e8dbacb 100644 --- a/server/src/main/java/org/opensearch/index/store/Store.java +++ b/server/src/main/java/org/opensearch/index/store/Store.java @@ -1407,7 +1407,7 @@ public static String digestToString(long digest) { * * @opensearch.internal */ - static class LuceneVerifyingIndexOutput extends VerifyingIndexOutput { + public static class LuceneVerifyingIndexOutput extends VerifyingIndexOutput { private final StoreFileMetadata metadata; private long writtenBytes; @@ -1415,7 +1415,7 @@ static class LuceneVerifyingIndexOutput extends VerifyingIndexOutput { private String actualChecksum; private final byte[] footerChecksum = new byte[8]; // this holds the actual footer checksum data written by to this output - LuceneVerifyingIndexOutput(StoreFileMetadata metadata, IndexOutput out) { + public LuceneVerifyingIndexOutput(StoreFileMetadata metadata, IndexOutput out) { super(out); this.metadata = metadata; checksumPosition = metadata.length() - 8; // the last 8 bytes are the checksum - we store it in footerChecksum diff --git a/server/src/main/java/org/opensearch/index/store/remote/directory/RemoteSnapshotDirectory.java b/server/src/main/java/org/opensearch/index/store/remote/directory/RemoteSnapshotDirectory.java new file mode 100644 index 0000000000000..3a2749a6d325b --- /dev/null +++ b/server/src/main/java/org/opensearch/index/store/remote/directory/RemoteSnapshotDirectory.java @@ -0,0 +1,142 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.store.remote.directory; + +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.Lock; +import org.apache.lucene.store.NoLockFactory; +import org.opensearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshot; +import org.opensearch.index.store.remote.file.OnDemandBlockSnapshotIndexInput; +import org.opensearch.index.store.remote.file.OnDemandVirtualFileSnapshotIndexInput; +import org.opensearch.index.store.remote.utils.TransferManager; +import org.opensearch.repositories.blobstore.BlobStoreRepository; + +/** + * a Directory implementation that can read directly from index snapshot stored remotely in a blob store repository. + * This implementation is following this design https://github.com/opensearch-project/OpenSearch/issues/4033 + * + * @opensearch.internal + */ +public final class RemoteSnapshotDirectory extends Directory { + private static final String VIRTUAL_FILE_PREFIX = BlobStoreRepository.VIRTUAL_DATA_BLOB_PREFIX; + + private final Map fileInfoMap; + private final FSDirectory localStoreDir; + private final TransferManager transferManager; + + public RemoteSnapshotDirectory(BlobStoreIndexShardSnapshot snapshot, FSDirectory localStoreDir, TransferManager transferManager) { + this.fileInfoMap = snapshot.indexFiles() + .stream() + .collect(Collectors.toMap(BlobStoreIndexShardSnapshot.FileInfo::physicalName, f -> f)); + this.localStoreDir = localStoreDir; + this.transferManager = transferManager; + } + + @Override + public String[] listAll() throws IOException { + return fileInfoMap.keySet().toArray(new String[0]); + } + + @Override + public void deleteFile(String name) throws IOException {} + + @Override + public IndexOutput createOutput(String name, IOContext context) { + return NoopIndexOutput.INSTANCE; + } + + @Override + public IndexInput openInput(String name, IOContext context) throws IOException { + final BlobStoreIndexShardSnapshot.FileInfo fileInfo = fileInfoMap.get(name); + + if (fileInfo.name().startsWith(VIRTUAL_FILE_PREFIX)) { + return new OnDemandVirtualFileSnapshotIndexInput(fileInfo, localStoreDir, transferManager); + } + return new OnDemandBlockSnapshotIndexInput(fileInfo, localStoreDir, transferManager); + } + + @Override + public void close() throws IOException { + localStoreDir.close(); + } + + @Override + public long fileLength(String name) throws IOException { + return fileInfoMap.get(name).length(); + } + + @Override + public Set getPendingDeletions() throws IOException { + return Collections.emptySet(); + } + + @Override + public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) { + throw new UnsupportedOperationException(); + } + + @Override + public void sync(Collection names) throws IOException {} + + @Override + public void syncMetaData() {} + + @Override + public void rename(String source, String dest) throws IOException {} + + @Override + public Lock obtainLock(String name) throws IOException { + return NoLockFactory.INSTANCE.obtainLock(null, null); + } + + static class NoopIndexOutput extends IndexOutput { + + final static NoopIndexOutput INSTANCE = new NoopIndexOutput(); + + NoopIndexOutput() { + super("noop", "noop"); + } + + @Override + public void close() throws IOException { + + } + + @Override + public long getFilePointer() { + return 0; + } + + @Override + public long getChecksum() throws IOException { + return 0; + } + + @Override + public void writeByte(byte b) throws IOException { + + } + + @Override + public void writeBytes(byte[] b, int offset, int length) throws IOException { + + } + } +} diff --git a/server/src/main/java/org/opensearch/index/store/remote/directory/RemoteSnapshotDirectoryFactory.java b/server/src/main/java/org/opensearch/index/store/remote/directory/RemoteSnapshotDirectoryFactory.java new file mode 100644 index 0000000000000..fed1f127d113f --- /dev/null +++ b/server/src/main/java/org/opensearch/index/store/remote/directory/RemoteSnapshotDirectoryFactory.java @@ -0,0 +1,88 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.store.remote.directory; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; +import java.util.function.Supplier; + +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.opensearch.common.blobstore.BlobContainer; +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.shard.ShardPath; +import org.opensearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshot; +import org.opensearch.index.store.remote.utils.TransferManager; +import org.opensearch.plugins.IndexStorePlugin; +import org.opensearch.repositories.RepositoriesService; +import org.opensearch.repositories.Repository; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.snapshots.SnapshotId; +import org.opensearch.threadpool.ThreadPool; + +/** + * Factory for a Directory implementation that can read directly from index + * data stored remotely in a blob store repository. + * + * @opensearch.internal + */ +public final class RemoteSnapshotDirectoryFactory implements IndexStorePlugin.DirectoryFactory { + public static final String LOCAL_STORE_LOCATION = "RemoteLocalStore"; + + private final Supplier repositoriesService; + private final ThreadPool threadPool; + + public RemoteSnapshotDirectoryFactory(Supplier repositoriesService, ThreadPool threadPool) { + this.repositoriesService = repositoriesService; + this.threadPool = threadPool; + } + + @Override + public Directory newDirectory(IndexSettings indexSettings, ShardPath localShardPath) throws IOException { + final String repositoryName = IndexSettings.SEARCHABLE_SNAPSHOT_REPOSITORY.get(indexSettings.getSettings()); + final Repository repository = repositoriesService.get().repository(repositoryName); + assert repository instanceof BlobStoreRepository : "repository should be instance of BlobStoreRepository"; + final BlobStoreRepository blobStoreRepository = (BlobStoreRepository) repository; + try { + return createRemoteSnapshotDirectoryFromSnapshot(indexSettings, localShardPath, blobStoreRepository).get(); + } catch (InterruptedException | ExecutionException e) { + throw new IllegalStateException(e); + } + } + + private Future createRemoteSnapshotDirectoryFromSnapshot( + IndexSettings indexSettings, + ShardPath localShardPath, + BlobStoreRepository blobStoreRepository + ) throws IOException { + final BlobPath blobPath = new BlobPath().add("indices") + .add(IndexSettings.SEARCHABLE_SNAPSHOT_INDEX_ID.get(indexSettings.getSettings())) + .add(Integer.toString(localShardPath.getShardId().getId())); + final SnapshotId snapshotId = new SnapshotId( + IndexSettings.SEARCHABLE_SNAPSHOT_ID_NAME.get(indexSettings.getSettings()), + IndexSettings.SEARCHABLE_SNAPSHOT_ID_UUID.get(indexSettings.getSettings()) + ); + Path localStorePath = localShardPath.getDataPath().resolve(LOCAL_STORE_LOCATION); + FSDirectory localStoreDir = FSDirectory.open(Files.createDirectories(localStorePath)); + // make sure directory is flushed to persistent storage + localStoreDir.syncMetaData(); + // this trick is needed to bypass assertions in BlobStoreRepository::assertAllowableThreadPools in case of node restart and a remote + // index restore is invoked + return threadPool.executor(ThreadPool.Names.SNAPSHOT).submit(() -> { + final BlobContainer blobContainer = blobStoreRepository.blobStore().blobContainer(blobPath); + final BlobStoreIndexShardSnapshot snapshot = blobStoreRepository.loadShardSnapshot(blobContainer, snapshotId); + TransferManager transferManager = new TransferManager(blobContainer, threadPool.executor(ThreadPool.Names.SEARCH)); + return new RemoteSnapshotDirectory(snapshot, localStoreDir, transferManager); + }); + } +} diff --git a/server/src/main/java/org/opensearch/index/store/remote/directory/package-info.java b/server/src/main/java/org/opensearch/index/store/remote/directory/package-info.java new file mode 100644 index 0000000000000..02c53943ecef8 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/store/remote/directory/package-info.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Package containing classes to create remoted snapshot directories + */ +package org.opensearch.index.store.remote.directory; diff --git a/server/src/main/java/org/opensearch/index/store/remote/file/OnDemandBlockIndexInput.java b/server/src/main/java/org/opensearch/index/store/remote/file/OnDemandBlockIndexInput.java new file mode 100644 index 0000000000000..df05d33e8fde9 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/store/remote/file/OnDemandBlockIndexInput.java @@ -0,0 +1,411 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.store.remote.file; + +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.RandomAccessInput; + +import java.io.EOFException; +import java.io.IOException; + +/** + * Class acts as a virtual file mechanism for the accessed files and only fetches the required blocks of the actual file. + * Original/Main IndexInput file will be split using {@link OnDemandBlockIndexInput.Builder#DEFAULT_BLOCK_SIZE_SHIFT}. This class has all the + * logic of how and when to fetch specific block of the main file. Each block is identified by {@link OnDemandBlockIndexInput#currentBlockId}. + *
+ * This class delegate the responsibility of actually fetching the block when demanded to its subclasses using + * {@link OnDemandBlockIndexInput#fetchBlock(int)}. + * + * @opensearch.internal + */ +abstract class OnDemandBlockIndexInput extends IndexInput implements RandomAccessInput { + /** + * Start offset of the virtual file : non-zero in the slice case + */ + protected final long offset; + /** + * Length of the virtual file, smaller than actual file size if it's a slice + */ + protected final long length; + + /** + * Whether this index input is a clone or otherwise the root file before slicing + */ + protected final boolean isClone; + + // Variables needed for block calculation and fetching logic + /** + * Block size shift (default value is 13 = 8KB) + */ + protected final int blockSizeShift; + + /** + * Fixed block size + */ + protected final int blockSize; + + /** + * Block mask + */ + protected final int blockMask; + + // Variables for actual held open block + /** + * Current block for read, it should be a cloned block always + */ + protected IndexInput currentBlock; + + /** + * ID of the current block + */ + protected int currentBlockId; + + OnDemandBlockIndexInput(Builder builder) { + super(builder.resourceDescription); + this.isClone = builder.isClone; + this.offset = builder.offset; + this.length = builder.length; + this.blockSizeShift = builder.blockSizeShift; + this.blockSize = builder.blockSize; + this.blockMask = builder.blockMask; + } + + /** + * Builds the actual sliced IndexInput (may apply extra offset in subclasses). + **/ + protected abstract OnDemandBlockIndexInput buildSlice(String sliceDescription, long offset, long length); + + /** + * Given a blockId, fetch it's IndexInput which might be partial/split/cloned one + * @param blockId to fetch for + * @return fetched IndexInput + */ + protected abstract IndexInput fetchBlock(int blockId) throws IOException; + + @Override + public OnDemandBlockIndexInput clone() { + OnDemandBlockIndexInput clone = buildSlice("clone", offset, length()); + // Ensures that clones may be positioned at the same point as the blocked file they were cloned from + if (currentBlock != null) { + clone.currentBlock = currentBlock.clone(); + clone.currentBlockId = currentBlockId; + } + + return clone; + } + + @Override + public IndexInput slice(String sliceDescription, long offset, long length) throws IOException { + if (offset < 0 || length < 0 || offset + length > this.length()) { + throw new IllegalArgumentException( + "slice() " + + sliceDescription + + " out of bounds: offset=" + + offset + + ",length=" + + length + + ",fileLength=" + + this.length() + + ": " + + this + ); + } + + // The slice is seeked to the beginning. + return buildSlice(sliceDescription, offset, length); + } + + @Override + public void close() throws IOException { + // current block + if (currentBlock != null) { + currentBlock.close(); + currentBlock = null; + currentBlockId = 0; + } + } + + @Override + public long getFilePointer() { + if (currentBlock == null) return 0L; + return currentBlockStart() + currentBlockPosition() - offset; + } + + @Override + public long length() { + return length; + } + + @Override + public byte readByte() throws IOException { + if (currentBlock == null) { + // seek to the beginning + seek(0); + } else if (currentBlockPosition() >= blockSize) { + int blockId = currentBlockId + 1; + demandBlock(blockId); + } + return currentBlock.readByte(); + } + + @Override + public short readShort() throws IOException { + if (currentBlock != null && Short.BYTES <= (blockSize - currentBlockPosition())) { + return currentBlock.readShort(); + } else { + return super.readShort(); + } + } + + @Override + public int readInt() throws IOException { + if (currentBlock != null && Integer.BYTES <= (blockSize - currentBlockPosition())) { + return currentBlock.readInt(); + } else { + return super.readInt(); + } + } + + @Override + public long readLong() throws IOException { + if (currentBlock != null && Long.BYTES <= (blockSize - currentBlockPosition())) { + return currentBlock.readLong(); + } else { + return super.readLong(); + } + } + + @Override + public final int readVInt() throws IOException { + if (currentBlock != null && 5 <= (blockSize - currentBlockPosition())) { + return currentBlock.readVInt(); + } else { + return super.readVInt(); + } + } + + @Override + public final long readVLong() throws IOException { + if (currentBlock != null && 9 <= (blockSize - currentBlockPosition())) { + return currentBlock.readVLong(); + } else { + return super.readVLong(); + } + } + + @Override + public void seek(long pos) throws IOException { + if (pos > length()) { + throw new EOFException("read past EOF: pos=" + pos + " vs length=" + length() + ": " + this); + } + + seekInternal(pos + offset); + } + + @Override + public final byte readByte(long pos) throws IOException { + // adjust the pos if it's sliced + pos = pos + offset; + if (currentBlock != null && isInCurrentBlockRange(pos)) { + // the block contains the byte + return ((RandomAccessInput) currentBlock).readByte(getBlockOffset(pos)); + } else { + // the block does not have the byte, seek to the pos first + seekInternal(pos); + // then read the byte + return currentBlock.readByte(); + } + } + + @Override + public short readShort(long pos) throws IOException { + // adjust the pos if it's sliced + pos = pos + offset; + if (currentBlock != null && isInCurrentBlockRange(pos, Short.BYTES)) { + // the block contains enough data to satisfy this request + return ((RandomAccessInput) currentBlock).readShort(getBlockOffset(pos)); + } else { + // the block does not have enough data, seek to the pos first + seekInternal(pos); + // then read the data + return super.readShort(); + } + } + + @Override + public int readInt(long pos) throws IOException { + // adjust the pos if it's sliced + pos = pos + offset; + if (currentBlock != null && isInCurrentBlockRange(pos, Integer.BYTES)) { + // the block contains enough data to satisfy this request + return ((RandomAccessInput) currentBlock).readInt(getBlockOffset(pos)); + } else { + // the block does not have enough data, seek to the pos first + seekInternal(pos); + // then read the data + return super.readInt(); + } + } + + @Override + public long readLong(long pos) throws IOException { + // adjust the pos if it's sliced + pos = pos + offset; + if (currentBlock != null && isInCurrentBlockRange(pos, Long.BYTES)) { + // the block contains enough data to satisfy this request + return ((RandomAccessInput) currentBlock).readLong(getBlockOffset(pos)); + } else { + // the block does not have enough data, seek to the pos first + seekInternal(pos); + // then read the data + return super.readLong(); + } + } + + @Override + public final void readBytes(byte[] b, int offset, int len) throws IOException { + if (currentBlock == null) { + // lazy seek to the beginning + seek(0); + } + + int available = blockSize - currentBlockPosition(); + if (len <= available) { + // the block contains enough data to satisfy this request + currentBlock.readBytes(b, offset, len); + } else { + // the block does not have enough data. First serve all we've got. + if (available > 0) { + currentBlock.readBytes(b, offset, available); + offset += available; + len -= available; + } + + // and now, read the remaining 'len' bytes: + // len > blocksize example: FST + while (len > 0) { + int blockId = currentBlockId + 1; + int toRead = Math.min(len, blockSize); + demandBlock(blockId); + currentBlock.readBytes(b, offset, toRead); + offset += toRead; + len -= toRead; + } + } + + } + + /** + * Seek to a block position, download the block if it's necessary + * NOTE: the pos should be an adjusted position for slices + */ + private void seekInternal(long pos) throws IOException { + if (currentBlock == null || !isInCurrentBlockRange(pos)) { + demandBlock(getBlock(pos)); + } + currentBlock.seek(getBlockOffset(pos)); + } + + /** + * Check if pos in current block range + * NOTE: the pos should be an adjusted position for slices + */ + private boolean isInCurrentBlockRange(long pos) { + long offset = pos - currentBlockStart(); + return offset >= 0 && offset < blockSize; + } + + /** + * Check if [pos, pos + len) in current block range + * NOTE: the pos should be an adjusted position for slices + */ + private boolean isInCurrentBlockRange(long pos, int len) { + long offset = pos - currentBlockStart(); + return offset >= 0 && (offset + len) <= blockSize; + } + + private void demandBlock(int blockId) throws IOException { + if (currentBlock != null && currentBlockId == blockId) return; + + // close the current block before jumping to the new block + if (currentBlock != null) { + currentBlock.close(); + } + + currentBlock = fetchBlock(blockId).clone(); + currentBlockId = blockId; + } + + protected int getBlock(long pos) { + return (int) (pos >>> blockSizeShift); + } + + protected int getBlockOffset(long pos) { + return (int) (pos & blockMask); + } + + protected long getBlockStart(int blockId) { + return (long) blockId << blockSizeShift; + } + + protected long currentBlockStart() { + return getBlockStart(currentBlockId); + } + + protected int currentBlockPosition() { + return (int) currentBlock.getFilePointer(); + } + + public static Builder builder() { + return new Builder(); + } + + static class Builder { + // Block size shift (default value is 13 = 8KB) + public static final int DEFAULT_BLOCK_SIZE_SHIFT = 13; + + private String resourceDescription; + private boolean isClone; + private long offset; + private long length; + private int blockSizeShift = DEFAULT_BLOCK_SIZE_SHIFT; + private int blockSize = 1 << blockSizeShift; + private int blockMask = blockSize - 1; + + private Builder() {} + + public Builder resourceDescription(String resourceDescription) { + this.resourceDescription = resourceDescription; + return this; + } + + public Builder isClone(boolean clone) { + isClone = clone; + return this; + } + + public Builder offset(long offset) { + this.offset = offset; + return this; + } + + public Builder length(long length) { + this.length = length; + return this; + } + + public Builder blockSizeShift(int blockSizeShift) { + assert blockSizeShift < 31 : "blockSizeShift must be < 31"; + this.blockSizeShift = blockSizeShift; + this.blockSize = 1 << blockSizeShift; + this.blockMask = blockSize - 1; + return this; + } + } +} diff --git a/server/src/main/java/org/opensearch/index/store/remote/file/OnDemandBlockSnapshotIndexInput.java b/server/src/main/java/org/opensearch/index/store/remote/file/OnDemandBlockSnapshotIndexInput.java new file mode 100644 index 0000000000000..6d5fb20f23fb1 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/store/remote/file/OnDemandBlockSnapshotIndexInput.java @@ -0,0 +1,172 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.store.remote.file; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.message.ParameterizedMessage; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.IndexInput; +import org.opensearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshot.FileInfo; +import org.opensearch.index.store.remote.utils.BlobFetchRequest; +import org.opensearch.index.store.remote.utils.TransferManager; + +import java.io.IOException; +import java.util.concurrent.ExecutionException; + +/** + * This is an implementation of {@link OnDemandBlockIndexInput} where this class provides the main IndexInput using shard snapshot files. + *
+ * This class rely on {@link TransferManager} to really fetch the snapshot files from the remote blob store and maybe cache them + * + * @opensearch.internal + */ +public class OnDemandBlockSnapshotIndexInput extends OnDemandBlockIndexInput { + private static final Logger logger = LogManager.getLogger(OnDemandBlockSnapshotIndexInput.class); + + /** + * Where this class fetches IndexInput parts from + */ + final TransferManager transferManager; + + /** + * FileInfo contains snapshot metadata references for this IndexInput + */ + protected final FileInfo fileInfo; + + /** + * Underlying lucene directory to open blocks and for caching + */ + protected final FSDirectory directory; + /** + * File Name + */ + protected final String fileName; + + /** + * part size in bytes + */ + protected final long partSize; + + /** + * Size of the file, larger than length if it's a slice + */ + protected final long originalFileSize; + + public OnDemandBlockSnapshotIndexInput(FileInfo fileInfo, FSDirectory directory, TransferManager transferManager) { + this( + "BlockedSnapshotIndexInput(path=\"" + + directory.getDirectory().toString() + + "/" + + fileInfo.physicalName() + + "\", " + + "offset=" + + 0 + + ", length= " + + fileInfo.length() + + ")", + fileInfo, + 0L, + fileInfo.length(), + false, + directory, + transferManager + ); + } + + public OnDemandBlockSnapshotIndexInput( + String resourceDescription, + FileInfo fileInfo, + long offset, + long length, + boolean isClone, + FSDirectory directory, + TransferManager transferManager + ) { + this( + OnDemandBlockIndexInput.builder().resourceDescription(resourceDescription).isClone(isClone).offset(offset).length(length), + fileInfo, + directory, + transferManager + ); + } + + OnDemandBlockSnapshotIndexInput( + OnDemandBlockIndexInput.Builder builder, + FileInfo fileInfo, + FSDirectory directory, + TransferManager transferManager + ) { + super(builder); + this.transferManager = transferManager; + this.fileInfo = fileInfo; + this.partSize = fileInfo.partSize().getBytes(); + this.fileName = fileInfo.physicalName(); + this.directory = directory; + this.originalFileSize = fileInfo.length(); + } + + @Override + protected OnDemandBlockSnapshotIndexInput buildSlice(String sliceDescription, long offset, long length) { + return new OnDemandBlockSnapshotIndexInput( + OnDemandBlockIndexInput.builder() + .blockSizeShift(blockSizeShift) + .isClone(true) + .offset(this.offset + offset) + .length(length) + .resourceDescription(sliceDescription), + fileInfo, + directory, + transferManager + ); + } + + @Override + protected IndexInput fetchBlock(int blockId) throws IOException { + final String blockFileName = fileName + "." + blockId; + + final long blockStart = getBlockStart(blockId); + final long blockEnd = blockStart + getActualBlockSize(blockId); + final int part = (int) (blockStart / partSize); + final long partStart = part * partSize; + + final long position = blockStart - partStart; + final long offset = blockEnd - blockStart - partStart; + + BlobFetchRequest blobFetchRequest = BlobFetchRequest.builder() + .position(position) + .length(offset) + .blobName(fileInfo.partName(part)) + .directory(directory) + .fileName(blockFileName) + .build(); + try { + return transferManager.asyncFetchBlob(blobFetchRequest).get(); + } catch (InterruptedException | ExecutionException e) { + logger.error(() -> new ParameterizedMessage("unexpected failure while fetching [{}]", blobFetchRequest), e); + throw new IllegalStateException(e); + } + } + + @Override + public OnDemandBlockSnapshotIndexInput clone() { + OnDemandBlockSnapshotIndexInput clone = buildSlice("clone", 0L, this.length); + // ensures that clones may be positioned at the same point as the blocked file they were cloned from + if (currentBlock != null) { + clone.currentBlock = currentBlock.clone(); + clone.currentBlockId = currentBlockId; + } + + return clone; + } + + protected long getActualBlockSize(int blockId) { + return (blockId != getBlock(originalFileSize - 1)) ? blockSize : getBlockOffset(originalFileSize - 1) + 1; + } +} diff --git a/server/src/main/java/org/opensearch/index/store/remote/file/OnDemandVirtualFileSnapshotIndexInput.java b/server/src/main/java/org/opensearch/index/store/remote/file/OnDemandVirtualFileSnapshotIndexInput.java new file mode 100644 index 0000000000000..da3478561a2c7 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/store/remote/file/OnDemandVirtualFileSnapshotIndexInput.java @@ -0,0 +1,139 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.store.remote.file; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.message.ParameterizedMessage; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.IndexInput; +import org.opensearch.common.lucene.store.ByteArrayIndexInput; +import org.opensearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshot.FileInfo; +import org.opensearch.index.store.remote.utils.TransferManager; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.concurrent.ExecutionException; + +/** + * This is an implementation of {@link OnDemandBlockIndexInput} where this class provides the main IndexInput using shard snapshot virtual + * files and will basically read virtual file from memory and write it to disk . + * + * @opensearch.internal + */ +public class OnDemandVirtualFileSnapshotIndexInput extends OnDemandBlockIndexInput { + private static final Logger logger = LogManager.getLogger(OnDemandVirtualFileSnapshotIndexInput.class); + + // 2^30 should keep the virtual file in memory un-partitioned when written to disk + private static final int BLOCK_SIZE_SHIFT = 30; + + /** + * Where this class fetches IndexInput parts from + */ + private final TransferManager transferManager; + + /** + * FileInfo contains snapshot metadata references for this IndexInput + */ + private final FileInfo fileInfo; + + /** + * underlying lucene directory to open file + */ + protected final FSDirectory directory; + + /** + * file name + */ + protected final String fileName; + + public OnDemandVirtualFileSnapshotIndexInput(FileInfo fileInfo, FSDirectory directory, TransferManager transferManager) { + this( + "VirtualFileSnapshotIndexInput(path=\"" + + directory.getDirectory().toString() + + "/" + + fileInfo.physicalName() + + "\", " + + "offset=" + + 0 + + ", length= " + + fileInfo.length() + + ")", + fileInfo, + directory, + transferManager, + fileInfo.physicalName(), + 0L, + fileInfo.length(), + false + ); + } + + public OnDemandVirtualFileSnapshotIndexInput( + String resourceDescription, + FileInfo fileInfo, + FSDirectory directory, + TransferManager transferManager, + String fileName, + long offset, + long length, + boolean isClone + ) { + super( + OnDemandBlockIndexInput.builder() + .resourceDescription(resourceDescription) + .isClone(isClone) + .offset(offset) + .length(length) + .blockSizeShift(BLOCK_SIZE_SHIFT) + ); + this.fileInfo = fileInfo; + this.directory = directory; + this.fileName = fileName; + this.transferManager = transferManager; + } + + @Override + protected OnDemandVirtualFileSnapshotIndexInput buildSlice(String sliceDescription, long offset, long length) { + return new OnDemandVirtualFileSnapshotIndexInput( + sliceDescription, + this.fileInfo, + this.directory, + this.transferManager, + this.fileName, + offset, + length, + true + ); + } + + @Override + public OnDemandVirtualFileSnapshotIndexInput clone() { + OnDemandVirtualFileSnapshotIndexInput clone = buildSlice("clone", 0L, this.length); + // ensures that clones may be positioned at the same point as the blocked file they were cloned from + if (currentBlock != null) { + clone.currentBlock = currentBlock.clone(); + clone.currentBlockId = currentBlockId; + } + return clone; + } + + @Override + protected IndexInput fetchBlock(int blockId) throws IOException { + // will always have one block. + final Path filePath = directory.getDirectory().resolve(fileName); + try { + return transferManager.asyncFetchBlob(filePath, () -> new ByteArrayIndexInput(fileName, fileInfo.metadata().hash().bytes)) + .get(); + } catch (InterruptedException | ExecutionException e) { + logger.error(() -> new ParameterizedMessage("unexpected failure while fetching [{}]", filePath), e); + throw new IllegalStateException(e); + } + } +} diff --git a/server/src/main/java/org/opensearch/index/store/remote/file/package-info.java b/server/src/main/java/org/opensearch/index/store/remote/file/package-info.java new file mode 100644 index 0000000000000..469df9a2f893f --- /dev/null +++ b/server/src/main/java/org/opensearch/index/store/remote/file/package-info.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Package containing classes to abstract IndexInput as on demand blocked (partitioned) files and as a remote blob store backed + */ +package org.opensearch.index.store.remote.file; diff --git a/server/src/main/java/org/opensearch/index/store/remote/package-info.java b/server/src/main/java/org/opensearch/index/store/remote/package-info.java new file mode 100644 index 0000000000000..69e01748a5231 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/store/remote/package-info.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Package containing classes to abstract access to index input as an on-demand block files and as remote stored files + */ +package org.opensearch.index.store.remote; diff --git a/server/src/main/java/org/opensearch/index/store/remote/utils/BlobFetchRequest.java b/server/src/main/java/org/opensearch/index/store/remote/utils/BlobFetchRequest.java new file mode 100644 index 0000000000000..745c9e701cda4 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/store/remote/utils/BlobFetchRequest.java @@ -0,0 +1,133 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.store.remote.utils; + +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; + +import java.nio.file.Path; + +/** + * The specification to fetch specific block from blob store + * + * @opensearch.internal + */ +public class BlobFetchRequest { + + private final long position; + + private final long length; + + private final String blobName; + + private final Path filePath; + + private final Directory directory; + + private final String fileName; + + private BlobFetchRequest(Builder builder) { + this.position = builder.position; + this.length = builder.length; + this.blobName = builder.blobName; + this.fileName = builder.fileName; + this.filePath = builder.directory.getDirectory().resolve(fileName); + this.directory = builder.directory; + } + + public long getPosition() { + return position; + } + + public long getLength() { + return length; + } + + public String getBlobName() { + return blobName; + } + + public Path getFilePath() { + return filePath; + } + + public Directory getDirectory() { + return directory; + } + + public String getFileName() { + return fileName; + } + + public static Builder builder() { + return new Builder(); + } + + @Override + public String toString() { + return "BlobFetchRequest{" + + "position=" + + position + + ", length=" + + length + + ", blobName='" + + blobName + + '\'' + + ", filePath=" + + filePath + + ", directory=" + + directory + + ", fileName='" + + fileName + + '\'' + + '}'; + } + + /** + * Builder for BlobFetchRequest + */ + public static final class Builder { + private long position; + private long length; + private String blobName; + private FSDirectory directory; + private String fileName; + + private Builder() {} + + public Builder position(long position) { + this.position = position; + return this; + } + + public Builder length(long length) { + this.length = length; + return this; + } + + public Builder blobName(String blobName) { + this.blobName = blobName; + return this; + } + + public Builder directory(FSDirectory directory) { + this.directory = directory; + return this; + } + + public Builder fileName(String fileName) { + this.fileName = fileName; + return this; + } + + public BlobFetchRequest build() { + return new BlobFetchRequest(this); + } + } +} diff --git a/server/src/main/java/org/opensearch/index/store/remote/utils/ConcurrentInvocationLinearizer.java b/server/src/main/java/org/opensearch/index/store/remote/utils/ConcurrentInvocationLinearizer.java new file mode 100644 index 0000000000000..60e5a4a62c7bb --- /dev/null +++ b/server/src/main/java/org/opensearch/index/store/remote/utils/ConcurrentInvocationLinearizer.java @@ -0,0 +1,56 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.store.remote.utils; + +import java.util.Collections; +import java.util.Map; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.ExecutorService; +import java.util.function.Function; + +/** + * A utility class which can be used to serialize concurrent invocations and to achieve "invoke simultaneously once at any time" semantic + * + * @param the method parameter type where this method invocation will be linearized + * @param return type of the method + * @opensearch.internal + */ +public class ConcurrentInvocationLinearizer { + private final ConcurrentMap> invokeOnceCache; + private final ExecutorService executorService; + + /** + * Constructs the object + * + * @param executorService which will be used to execute the concurrent invocations + */ + public ConcurrentInvocationLinearizer(ExecutorService executorService) { + this.invokeOnceCache = new ConcurrentHashMap<>(); + this.executorService = executorService; + } + + /** + * @param input the argument to the method + * @param function delegate to actual function/method + * @return return value of the function + */ + public CompletableFuture linearize(METHOD_PARAM_TYPE input, Function function) { + return invokeOnceCache.computeIfAbsent(input, in -> CompletableFuture.supplyAsync(() -> function.apply(in), executorService)) + .whenComplete((ret, throwable) -> { + // whenComplete will always be executed (when run normally or when exception happen) + invokeOnceCache.remove(input); + }); + } + + Map> getInvokeOnceCache() { + return Collections.unmodifiableMap(invokeOnceCache); + } +} diff --git a/server/src/main/java/org/opensearch/index/store/remote/utils/TransferManager.java b/server/src/main/java/org/opensearch/index/store/remote/utils/TransferManager.java new file mode 100644 index 0000000000000..b0581215017b0 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/store/remote/utils/TransferManager.java @@ -0,0 +1,68 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.store.remote.utils; + +import org.apache.lucene.store.IndexInput; +import org.opensearch.common.blobstore.BlobContainer; +import org.opensearch.common.lucene.store.ByteArrayIndexInput; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Path; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.function.Supplier; + +/** + * This acts as entry point to fetch {@link BlobFetchRequest} and return actual {@link IndexInput}. Utilizes the BlobContainer interface to + * read snapshot files located within a repository. This basically adapts BlobContainer snapshots files into IndexInput + * + * @opensearch.internal + */ +public class TransferManager { + private final BlobContainer blobContainer; + private final ConcurrentInvocationLinearizer invocationLinearizer; + + public TransferManager(final BlobContainer blobContainer, final ExecutorService remoteStoreExecutorService) { + this.blobContainer = blobContainer; + this.invocationLinearizer = new ConcurrentInvocationLinearizer<>(remoteStoreExecutorService); + } + + /** + * Given a blobFetchRequest, return it's corresponding IndexInput. + * @param blobFetchRequest to fetch + * @return future of IndexInput augmented with internal caching maintenance tasks + */ + public CompletableFuture asyncFetchBlob(BlobFetchRequest blobFetchRequest) { + return asyncFetchBlob(blobFetchRequest.getFilePath(), () -> { + try { + return fetchBlob(blobFetchRequest); + } catch (IOException e) { + throw new IllegalStateException(e); + } + }); + } + + public CompletableFuture asyncFetchBlob(Path path, Supplier indexInputSupplier) { + return invocationLinearizer.linearize(path, p -> indexInputSupplier.get()); + } + + private IndexInput fetchBlob(BlobFetchRequest blobFetchRequest) throws IOException { + // for first phase, this is a simple remote repo blob read with no caching at all + try ( + InputStream snapshotFileInputStream = blobContainer.readBlob( + blobFetchRequest.getBlobName(), + blobFetchRequest.getPosition(), + blobFetchRequest.getLength() + ); + ) { + return new ByteArrayIndexInput(blobFetchRequest.getBlobName(), snapshotFileInputStream.readAllBytes()); + } + } +} diff --git a/server/src/main/java/org/opensearch/index/store/remote/utils/package-info.java b/server/src/main/java/org/opensearch/index/store/remote/utils/package-info.java new file mode 100644 index 0000000000000..2b59166fed313 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/store/remote/utils/package-info.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Package containing utils used in remote store + */ +package org.opensearch.index.store.remote.utils; diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index fd77d9a7ba0b9..907e21d861582 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -607,7 +607,8 @@ protected Node( .collect(Collectors.toList()); final Map builtInDirectoryFactories = IndexModule.createBuiltInDirectoryFactories( - repositoriesServiceReference::get + repositoriesServiceReference::get, + threadPool ); final Map directoryFactories = new HashMap<>(); pluginsService.filterPlugins(IndexStorePlugin.class) diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java index c36d92abcf498..3513cd6d00ce5 100644 --- a/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java @@ -203,7 +203,7 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp * {@link BlobStoreIndexShardSnapshots}. This is the case for files for which {@link StoreFileMetadata#hashEqualsContents()} is * {@code true}. */ - private static final String VIRTUAL_DATA_BLOB_PREFIX = "v__"; + public static final String VIRTUAL_DATA_BLOB_PREFIX = "v__"; /** * When set to {@code true}, {@link #bestEffortConsistency} will be set to {@code true} and concurrent modifications of the repository diff --git a/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestDecommissionAction.java b/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestDecommissionAction.java new file mode 100644 index 0000000000000..979bf05f537b7 --- /dev/null +++ b/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestDecommissionAction.java @@ -0,0 +1,54 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.rest.action.admin.cluster; + +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionRequest; +import org.opensearch.client.Requests; +import org.opensearch.client.node.NodeClient; +import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.rest.BaseRestHandler; +import org.opensearch.rest.RestRequest; +import org.opensearch.rest.action.RestToXContentListener; + +import java.io.IOException; +import java.util.List; + +import static java.util.Collections.singletonList; +import static org.opensearch.rest.RestRequest.Method.PUT; + +/** + * Registers decommission action + * + * @opensearch.api + */ +public class RestDecommissionAction extends BaseRestHandler { + + @Override + public List routes() { + return singletonList(new Route(PUT, "/_cluster/decommission/awareness/{awareness_attribute_name}/{awareness_attribute_value}")); + } + + @Override + public String getName() { + return "decommission_action"; + } + + @Override + protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) throws IOException { + DecommissionRequest decommissionRequest = createRequest(request); + return channel -> client.admin().cluster().decommission(decommissionRequest, new RestToXContentListener<>(channel)); + } + + DecommissionRequest createRequest(RestRequest request) throws IOException { + DecommissionRequest decommissionRequest = Requests.decommissionRequest(); + String attributeName = request.param("awareness_attribute_name"); + String attributeValue = request.param("awareness_attribute_value"); + return decommissionRequest.setDecommissionAttribute(new DecommissionAttribute(attributeName, attributeValue)); + } +} diff --git a/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestDeleteDecommissionStateAction.java b/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestDeleteDecommissionStateAction.java new file mode 100644 index 0000000000000..9fd7ae2248c30 --- /dev/null +++ b/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestDeleteDecommissionStateAction.java @@ -0,0 +1,52 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.rest.action.admin.cluster; + +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateRequest; +import org.opensearch.client.Requests; +import org.opensearch.client.node.NodeClient; +import org.opensearch.rest.BaseRestHandler; +import org.opensearch.rest.RestRequest; +import org.opensearch.rest.action.RestToXContentListener; + +import java.io.IOException; +import java.util.List; + +import static java.util.Collections.singletonList; +import static org.opensearch.rest.RestRequest.Method.DELETE; + +/** + * Clears the decommission metadata. + * + * @opensearch.api + */ +public class RestDeleteDecommissionStateAction extends BaseRestHandler { + + @Override + public List routes() { + return singletonList(new Route(DELETE, "/_cluster/decommission/awareness")); + } + + @Override + public String getName() { + return "delete_decommission_state_action"; + } + + @Override + protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) throws IOException { + DeleteDecommissionStateRequest deleteDecommissionStateRequest = createRequest(); + return channel -> client.admin() + .cluster() + .deleteDecommissionState(deleteDecommissionStateRequest, new RestToXContentListener<>(channel)); + } + + DeleteDecommissionStateRequest createRequest() { + return Requests.deleteDecommissionStateRequest(); + } +} diff --git a/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestGetDecommissionStateAction.java b/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestGetDecommissionStateAction.java new file mode 100644 index 0000000000000..5d72adbd6ae08 --- /dev/null +++ b/server/src/main/java/org/opensearch/rest/action/admin/cluster/RestGetDecommissionStateAction.java @@ -0,0 +1,48 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.rest.action.admin.cluster; + +import org.opensearch.action.admin.cluster.decommission.awareness.get.GetDecommissionStateRequest; +import org.opensearch.client.Requests; +import org.opensearch.client.node.NodeClient; +import org.opensearch.rest.BaseRestHandler; +import org.opensearch.rest.RestRequest; +import org.opensearch.rest.action.RestToXContentListener; + +import java.io.IOException; +import java.util.List; + +import static java.util.Collections.singletonList; +import static org.opensearch.rest.RestRequest.Method.GET; + +/** + * Returns decommissioned attribute information + * + * @opensearch.api + */ +public class RestGetDecommissionStateAction extends BaseRestHandler { + + @Override + public List routes() { + return singletonList(new Route(GET, "/_cluster/decommission/awareness/{awareness_attribute_name}/_status")); + } + + @Override + public String getName() { + return "get_decommission_state_action"; + } + + @Override + public RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) throws IOException { + GetDecommissionStateRequest getDecommissionStateRequest = Requests.getDecommissionStateRequest(); + String attributeName = request.param("awareness_attribute_name"); + getDecommissionStateRequest.attributeName(attributeName); + return channel -> client.admin().cluster().getDecommissionState(getDecommissionStateRequest, new RestToXContentListener<>(channel)); + } +} diff --git a/server/src/test/java/org/opensearch/ExceptionSerializationTests.java b/server/src/test/java/org/opensearch/ExceptionSerializationTests.java index 26b0ce7e9e20c..ff2bb77531486 100644 --- a/server/src/test/java/org/opensearch/ExceptionSerializationTests.java +++ b/server/src/test/java/org/opensearch/ExceptionSerializationTests.java @@ -49,6 +49,8 @@ import org.opensearch.cluster.block.ClusterBlockException; import org.opensearch.cluster.coordination.CoordinationStateRejectedException; import org.opensearch.cluster.coordination.NoClusterManagerBlockService; +import org.opensearch.cluster.decommission.DecommissioningFailedException; +import org.opensearch.cluster.decommission.NodeDecommissionedException; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.routing.IllegalShardRoutingStateException; import org.opensearch.cluster.routing.ShardRouting; @@ -860,6 +862,8 @@ public void testIds() { ids.put(160, NoSeedNodeLeftException.class); ids.put(161, ReplicationFailedException.class); ids.put(162, PrimaryShardClosedException.class); + ids.put(163, DecommissioningFailedException.class); + ids.put(164, NodeDecommissionedException.class); Map, Integer> reverse = new HashMap<>(); for (Map.Entry> entry : ids.entrySet()) { diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/DeleteDecommissionStateRequestTests.java b/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/DeleteDecommissionStateRequestTests.java new file mode 100644 index 0000000000000..1a95b77cc1024 --- /dev/null +++ b/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/DeleteDecommissionStateRequestTests.java @@ -0,0 +1,32 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness; + +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateRequest; +import org.opensearch.common.io.stream.BytesStreamOutput; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; + +public class DeleteDecommissionStateRequestTests extends OpenSearchTestCase { + + public void testSerialization() throws IOException { + final DeleteDecommissionStateRequest originalRequest = new DeleteDecommissionStateRequest(); + + final DeleteDecommissionStateRequest cloneRequest; + try (BytesStreamOutput out = new BytesStreamOutput()) { + originalRequest.writeTo(out); + try (StreamInput in = out.bytes().streamInput()) { + cloneRequest = new DeleteDecommissionStateRequest(in); + } + } + assertEquals(cloneRequest.clusterManagerNodeTimeout(), originalRequest.clusterManagerNodeTimeout()); + } +} diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/DeleteDecommissionStateResponseTests.java b/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/DeleteDecommissionStateResponseTests.java new file mode 100644 index 0000000000000..085eda3e9d0e7 --- /dev/null +++ b/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/DeleteDecommissionStateResponseTests.java @@ -0,0 +1,29 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness; + +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateResponse; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; + +public class DeleteDecommissionStateResponseTests extends OpenSearchTestCase { + + public void testSerialization() throws IOException { + final DeleteDecommissionStateResponse originalResponse = new DeleteDecommissionStateResponse(true); + + final DeleteDecommissionStateResponse deserialized = copyWriteable( + originalResponse, + writableRegistry(), + DeleteDecommissionStateResponse::new + ); + assertEquals(deserialized, originalResponse); + + } +} diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateRequestTests.java b/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateRequestTests.java new file mode 100644 index 0000000000000..973485e1917f7 --- /dev/null +++ b/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateRequestTests.java @@ -0,0 +1,50 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.get; + +import org.opensearch.action.ActionRequestValidationException; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; + +public class GetDecommissionStateRequestTests extends OpenSearchTestCase { + public void testSerialization() throws IOException { + String attributeName = "zone"; + final GetDecommissionStateRequest originalRequest = new GetDecommissionStateRequest(attributeName); + final GetDecommissionStateRequest deserialized = copyWriteable( + originalRequest, + writableRegistry(), + GetDecommissionStateRequest::new + ); + assertEquals(deserialized.attributeName(), originalRequest.attributeName()); + } + + public void testValidation() { + { + String attributeName = null; + final GetDecommissionStateRequest request = new GetDecommissionStateRequest(attributeName); + ActionRequestValidationException e = request.validate(); + assertNotNull(e); + assertTrue(e.getMessage().contains("attribute name is missing")); + } + { + String attributeName = ""; + final GetDecommissionStateRequest request = new GetDecommissionStateRequest(attributeName); + ActionRequestValidationException e = request.validate(); + assertNotNull(e); + assertTrue(e.getMessage().contains("attribute name is missing")); + } + { + String attributeName = "zone"; + final GetDecommissionStateRequest request = new GetDecommissionStateRequest(attributeName); + ActionRequestValidationException e = request.validate(); + assertNull(e); + } + } +} diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateResponseTests.java b/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateResponseTests.java new file mode 100644 index 0000000000000..437faf2a75720 --- /dev/null +++ b/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/get/GetDecommissionStateResponseTests.java @@ -0,0 +1,38 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.get; + +import org.opensearch.cluster.decommission.DecommissionStatus; +import org.opensearch.common.xcontent.XContentParser; +import org.opensearch.test.AbstractXContentTestCase; + +import java.io.IOException; + +public class GetDecommissionStateResponseTests extends AbstractXContentTestCase { + @Override + protected GetDecommissionStateResponse createTestInstance() { + DecommissionStatus status = null; + String attributeValue = null; + if (randomBoolean()) { + status = randomFrom(DecommissionStatus.values()); + attributeValue = randomAlphaOfLength(10); + } + return new GetDecommissionStateResponse(attributeValue, status); + } + + @Override + protected GetDecommissionStateResponse doParseInstance(XContentParser parser) throws IOException { + return GetDecommissionStateResponse.fromXContent(parser); + } + + @Override + protected boolean supportsUnknownFields() { + return false; + } +} diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionRequestTests.java b/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionRequestTests.java new file mode 100644 index 0000000000000..c189b5702dea0 --- /dev/null +++ b/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionRequestTests.java @@ -0,0 +1,61 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.put; + +import org.opensearch.action.ActionRequestValidationException; +import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; + +public class DecommissionRequestTests extends OpenSearchTestCase { + + public void testSerialization() throws IOException { + String attributeName = "zone"; + String attributeValue = "zone-1"; + DecommissionAttribute decommissionAttribute = new DecommissionAttribute(attributeName, attributeValue); + final DecommissionRequest originalRequest = new DecommissionRequest(decommissionAttribute); + + final DecommissionRequest deserialized = copyWriteable(originalRequest, writableRegistry(), DecommissionRequest::new); + + assertEquals(deserialized.getDecommissionAttribute(), originalRequest.getDecommissionAttribute()); + } + + public void testValidation() { + { + String attributeName = null; + String attributeValue = "test"; + DecommissionAttribute decommissionAttribute = new DecommissionAttribute(attributeName, attributeValue); + + final DecommissionRequest request = new DecommissionRequest(decommissionAttribute); + ActionRequestValidationException e = request.validate(); + assertNotNull(e); + assertTrue(e.getMessage().contains("attribute name is missing")); + } + { + String attributeName = "zone"; + String attributeValue = ""; + DecommissionAttribute decommissionAttribute = new DecommissionAttribute(attributeName, attributeValue); + + final DecommissionRequest request = new DecommissionRequest(decommissionAttribute); + ActionRequestValidationException e = request.validate(); + assertNotNull(e); + assertTrue(e.getMessage().contains("attribute value is missing")); + } + { + String attributeName = "zone"; + String attributeValue = "test"; + DecommissionAttribute decommissionAttribute = new DecommissionAttribute(attributeName, attributeValue); + + final DecommissionRequest request = new DecommissionRequest(decommissionAttribute); + ActionRequestValidationException e = request.validate(); + assertNull(e); + } + } +} diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionResponseTests.java b/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionResponseTests.java new file mode 100644 index 0000000000000..5ee5a5f3cf016 --- /dev/null +++ b/server/src/test/java/org/opensearch/action/admin/cluster/decommission/awareness/put/DecommissionResponseTests.java @@ -0,0 +1,21 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.cluster.decommission.awareness.put; + +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; + +public class DecommissionResponseTests extends OpenSearchTestCase { + public void testSerialization() throws IOException { + final DecommissionResponse originalRequest = new DecommissionResponse(true); + copyWriteable(originalRequest, writableRegistry(), DecommissionResponse::new); + // there are no fields so we're just checking that this doesn't throw anything + } +} diff --git a/server/src/test/java/org/opensearch/cluster/coordination/CoordinatorTests.java b/server/src/test/java/org/opensearch/cluster/coordination/CoordinatorTests.java index d96c972bc6021..74c5d0fcccbed 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/CoordinatorTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/CoordinatorTests.java @@ -87,6 +87,7 @@ import static org.opensearch.cluster.coordination.NoClusterManagerBlockService.NO_CLUSTER_MANAGER_BLOCK_SETTING; import static org.opensearch.cluster.coordination.NoClusterManagerBlockService.NO_CLUSTER_MANAGER_BLOCK_WRITES; import static org.opensearch.cluster.coordination.Reconfigurator.CLUSTER_AUTO_SHRINK_VOTING_CONFIGURATION; +import static org.opensearch.discovery.PeerFinder.DISCOVERY_FIND_PEERS_INTERVAL_DURING_DECOMMISSION_SETTING; import static org.opensearch.discovery.PeerFinder.DISCOVERY_FIND_PEERS_INTERVAL_SETTING; import static org.opensearch.monitor.StatusInfo.Status.HEALTHY; import static org.opensearch.monitor.StatusInfo.Status.UNHEALTHY; @@ -1780,6 +1781,48 @@ public void testImproveConfigurationPerformsVotingConfigExclusionStateCheck() { } } + public void testLocalNodeAlwaysCommissionedWithoutDecommissionedException() { + try (Cluster cluster = new Cluster(randomIntBetween(1, 5))) { + cluster.runRandomly(); + cluster.stabilise(); + for (ClusterNode node : cluster.clusterNodes) { + assertTrue(node.coordinator.localNodeCommissioned()); + } + } + } + + public void testClusterStabilisesForPreviouslyDecommissionedNode() { + try (Cluster cluster = new Cluster(randomIntBetween(1, 5))) { + cluster.runRandomly(); + cluster.stabilise(); + for (ClusterNode node : cluster.clusterNodes) { + assertTrue(node.coordinator.localNodeCommissioned()); + } + final ClusterNode leader = cluster.getAnyLeader(); + + ClusterNode decommissionedNode = cluster.new ClusterNode( + nextNodeIndex.getAndIncrement(), true, leader.nodeSettings, () -> new StatusInfo(HEALTHY, "healthy-info") + ); + decommissionedNode.coordinator.onNodeCommissionStatusChange(false); + cluster.clusterNodes.add(decommissionedNode); + + assertFalse(decommissionedNode.coordinator.localNodeCommissioned()); + + cluster.stabilise( + // Interval is updated to decommissioned find peer interval + defaultMillis(DISCOVERY_FIND_PEERS_INTERVAL_DURING_DECOMMISSION_SETTING) + // One message delay to send a join + + DEFAULT_DELAY_VARIABILITY + // Commit a new cluster state with the new node(s). Might be split into multiple commits, and each might need a + // followup reconfiguration + + 3 * 2 * DEFAULT_CLUSTER_STATE_UPDATE_DELAY + ); + + // once cluster stabilises the node joins and would be commissioned + assertTrue(decommissionedNode.coordinator.localNodeCommissioned()); + } + } + private ClusterState buildNewClusterStateWithVotingConfigExclusion( ClusterState currentState, Set newVotingConfigExclusion diff --git a/server/src/test/java/org/opensearch/cluster/coordination/JoinHelperTests.java b/server/src/test/java/org/opensearch/cluster/coordination/JoinHelperTests.java index a3c945cdbac3a..7b21042b2ed4a 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/JoinHelperTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/JoinHelperTests.java @@ -90,7 +90,8 @@ public void testJoinDeduplication() { startJoinRequest -> { throw new AssertionError(); }, Collections.emptyList(), (s, p, r) -> {}, - () -> new StatusInfo(HEALTHY, "info") + () -> new StatusInfo(HEALTHY, "info"), + nodeCommissioned -> {} ); transportService.start(); @@ -230,7 +231,8 @@ private void assertJoinValidationRejectsMismatchedClusterUUID(String actionName, startJoinRequest -> { throw new AssertionError(); }, Collections.emptyList(), (s, p, r) -> {}, - null + null, + nodeCommissioned -> {} ); // registers request handler transportService.start(); transportService.acceptIncomingRequests(); @@ -284,7 +286,8 @@ public void testJoinFailureOnUnhealthyNodes() { startJoinRequest -> { throw new AssertionError(); }, Collections.emptyList(), (s, p, r) -> {}, - () -> nodeHealthServiceStatus.get() + () -> nodeHealthServiceStatus.get(), + nodeCommissioned -> {} ); transportService.start(); diff --git a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java index 479dbc3835b2f..5a0da5830ebf2 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java @@ -36,9 +36,14 @@ import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.ClusterStateTaskExecutor; +import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.cluster.decommission.DecommissionAttributeMetadata; +import org.opensearch.cluster.decommission.DecommissionStatus; +import org.opensearch.cluster.decommission.NodeDecommissionedException; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodeRole; import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.cluster.routing.RerouteService; import org.opensearch.cluster.routing.allocation.AllocationService; @@ -50,6 +55,7 @@ import org.opensearch.transport.TransportService; import java.util.HashMap; +import java.util.Collections; import java.util.HashSet; import java.util.Map; @@ -290,4 +296,112 @@ public void testIsBecomeClusterManagerTask() { JoinTaskExecutor.Task joinTaskOfClusterManager = JoinTaskExecutor.newBecomeClusterManagerTask(); assertThat(joinTaskOfClusterManager.isBecomeClusterManagerTask(), is(true)); } + + public void testJoinClusterWithNoDecommission() { + Settings.builder().build(); + Metadata.Builder metaBuilder = Metadata.builder(); + Metadata metadata = metaBuilder.build(); + DiscoveryNode discoveryNode = newDiscoveryNode(Collections.singletonMap("zone", "zone-2")); + JoinTaskExecutor.ensureNodeCommissioned(discoveryNode, metadata); + } + + public void testPreventJoinClusterWithDecommission() { + Settings.builder().build(); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "zone-1"); + DecommissionStatus decommissionStatus = randomFrom(DecommissionStatus.IN_PROGRESS, DecommissionStatus.SUCCESSFUL); + DecommissionAttributeMetadata decommissionAttributeMetadata = new DecommissionAttributeMetadata( + decommissionAttribute, + decommissionStatus + ); + Metadata metadata = Metadata.builder().decommissionAttributeMetadata(decommissionAttributeMetadata).build(); + DiscoveryNode discoveryNode = newDiscoveryNode(Collections.singletonMap("zone", "zone-1")); + expectThrows(NodeDecommissionedException.class, () -> JoinTaskExecutor.ensureNodeCommissioned(discoveryNode, metadata)); + } + + public void testJoinClusterWithDifferentDecommission() { + Settings.builder().build(); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "zone-1"); + DecommissionStatus decommissionStatus = randomFrom( + DecommissionStatus.INIT, + DecommissionStatus.IN_PROGRESS, + DecommissionStatus.SUCCESSFUL + ); + DecommissionAttributeMetadata decommissionAttributeMetadata = new DecommissionAttributeMetadata( + decommissionAttribute, + decommissionStatus + ); + Metadata metadata = Metadata.builder().decommissionAttributeMetadata(decommissionAttributeMetadata).build(); + + DiscoveryNode discoveryNode = newDiscoveryNode(Collections.singletonMap("zone", "zone-2")); + JoinTaskExecutor.ensureNodeCommissioned(discoveryNode, metadata); + } + + public void testJoinFailedForDecommissionedNode() throws Exception { + final AllocationService allocationService = mock(AllocationService.class); + when(allocationService.adaptAutoExpandReplicas(any())).then(invocationOnMock -> invocationOnMock.getArguments()[0]); + final RerouteService rerouteService = (reason, priority, listener) -> listener.onResponse(null); + + final JoinTaskExecutor joinTaskExecutor = new JoinTaskExecutor(Settings.EMPTY, allocationService, logger, rerouteService, null); + + final DiscoveryNode clusterManagerNode = new DiscoveryNode(UUIDs.base64UUID(), buildNewFakeTransportAddress(), Version.CURRENT); + + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "zone1"); + DecommissionAttributeMetadata decommissionAttributeMetadata = new DecommissionAttributeMetadata( + decommissionAttribute, + DecommissionStatus.SUCCESSFUL + ); + final ClusterState clusterManagerClusterState = ClusterState.builder(ClusterName.DEFAULT) + .nodes( + DiscoveryNodes.builder() + .add(clusterManagerNode) + .localNodeId(clusterManagerNode.getId()) + .clusterManagerNodeId(clusterManagerNode.getId()) + ) + .metadata(Metadata.builder().decommissionAttributeMetadata(decommissionAttributeMetadata)) + .build(); + + final DiscoveryNode decommissionedNode = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + Collections.singletonMap("zone", "zone1"), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + + String decommissionedNodeID = decommissionedNode.getId(); + + final ClusterStateTaskExecutor.ClusterTasksResult result = joinTaskExecutor.execute( + clusterManagerClusterState, + List.of(new JoinTaskExecutor.Task(decommissionedNode, "test")) + ); + assertThat(result.executionResults.entrySet(), hasSize(1)); + final ClusterStateTaskExecutor.TaskResult taskResult = result.executionResults.values().iterator().next(); + assertFalse(taskResult.isSuccess()); + assertTrue(taskResult.getFailure() instanceof NodeDecommissionedException); + assertFalse(result.resultingState.getNodes().nodeExists(decommissionedNodeID)); + } + + public void testJoinClusterWithDecommissionFailed() { + Settings.builder().build(); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "zone-1"); + DecommissionAttributeMetadata decommissionAttributeMetadata = new DecommissionAttributeMetadata( + decommissionAttribute, + DecommissionStatus.FAILED + ); + Metadata metadata = Metadata.builder().decommissionAttributeMetadata(decommissionAttributeMetadata).build(); + + DiscoveryNode discoveryNode = newDiscoveryNode(Collections.singletonMap("zone", "zone-1")); + JoinTaskExecutor.ensureNodeCommissioned(discoveryNode, metadata); + } + + private DiscoveryNode newDiscoveryNode(Map attributes) { + return new DiscoveryNode( + randomAlphaOfLength(10), + randomAlphaOfLength(10), + buildNewFakeTransportAddress(), + attributes, + Collections.singleton(DiscoveryNodeRole.CLUSTER_MANAGER_ROLE), + Version.CURRENT + ); + } } diff --git a/server/src/test/java/org/opensearch/cluster/coordination/NodeJoinTests.java b/server/src/test/java/org/opensearch/cluster/coordination/NodeJoinTests.java index c77baba5fe167..18a7b892a424c 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/NodeJoinTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/NodeJoinTests.java @@ -39,6 +39,10 @@ import org.opensearch.cluster.OpenSearchAllocationTestCase; import org.opensearch.cluster.block.ClusterBlocks; import org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfiguration; +import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.cluster.decommission.DecommissionAttributeMetadata; +import org.opensearch.cluster.decommission.DecommissionStatus; +import org.opensearch.cluster.decommission.NodeDecommissionedException; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodeRole; @@ -775,6 +779,60 @@ public void testJoinElectedLeaderWithDeprecatedMasterRole() { assertTrue(clusterStateHasNode(node1)); } + public void testJoinFailsWhenDecommissioned() { + DiscoveryNode node0 = newNode(0, true); + DiscoveryNode node1 = newNode(1, true); + long initialTerm = randomLongBetween(1, 10); + long initialVersion = randomLongBetween(1, 10); + setupFakeClusterManagerServiceAndCoordinator( + initialTerm, + initialStateWithDecommissionedAttribute( + initialState(node0, initialTerm, initialVersion, VotingConfiguration.of(node0)), + new DecommissionAttribute("zone", "zone1") + ), + () -> new StatusInfo(HEALTHY, "healthy-info") + ); + assertFalse(isLocalNodeElectedMaster()); + long newTerm = initialTerm + randomLongBetween(1, 10); + joinNodeAndRun(new JoinRequest(node0, newTerm, Optional.of(new Join(node0, node0, newTerm, initialTerm, initialVersion)))); + assertTrue(isLocalNodeElectedMaster()); + assertFalse(clusterStateHasNode(node1)); + joinNodeAndRun(new JoinRequest(node1, newTerm, Optional.of(new Join(node1, node0, newTerm, initialTerm, initialVersion)))); + assertTrue(isLocalNodeElectedMaster()); + assertTrue(clusterStateHasNode(node1)); + DiscoveryNode decommissionedNode = new DiscoveryNode( + "data_2", + 2 + "", + buildNewFakeTransportAddress(), + Collections.singletonMap("zone", "zone1"), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + long anotherTerm = newTerm + randomLongBetween(1, 10); + + assertThat( + expectThrows( + NodeDecommissionedException.class, + () -> joinNodeAndRun(new JoinRequest(decommissionedNode, anotherTerm, Optional.empty())) + ).getMessage(), + containsString("with current status of decommissioning") + ); + assertFalse(clusterStateHasNode(decommissionedNode)); + + DiscoveryNode node3 = new DiscoveryNode( + "data_3", + 3 + "", + buildNewFakeTransportAddress(), + Collections.singletonMap("zone", "zone2"), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + long termForNode3 = anotherTerm + randomLongBetween(1, 10); + + joinNodeAndRun(new JoinRequest(node3, termForNode3, Optional.empty())); + assertTrue(clusterStateHasNode(node3)); + } + private boolean isLocalNodeElectedMaster() { return MasterServiceTests.discoveryState(clusterManagerService).nodes().isLocalNodeElectedMaster(); } @@ -782,4 +840,17 @@ private boolean isLocalNodeElectedMaster() { private boolean clusterStateHasNode(DiscoveryNode node) { return node.equals(MasterServiceTests.discoveryState(clusterManagerService).nodes().get(node.getId())); } + + private static ClusterState initialStateWithDecommissionedAttribute( + ClusterState clusterState, + DecommissionAttribute decommissionAttribute + ) { + DecommissionAttributeMetadata decommissionAttributeMetadata = new DecommissionAttributeMetadata( + decommissionAttribute, + DecommissionStatus.SUCCESSFUL + ); + return ClusterState.builder(clusterState) + .metadata(Metadata.builder(clusterState.metadata()).decommissionAttributeMetadata(decommissionAttributeMetadata)) + .build(); + } } diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java new file mode 100644 index 0000000000000..8b5343184dabd --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java @@ -0,0 +1,365 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +import org.junit.After; +import org.junit.Before; +import org.opensearch.OpenSearchTimeoutException; +import org.opensearch.Version; +import org.opensearch.action.ActionListener; +import org.opensearch.action.admin.cluster.configuration.TransportAddVotingConfigExclusionsAction; +import org.opensearch.action.admin.cluster.configuration.TransportClearVotingConfigExclusionsAction; +import org.opensearch.action.support.ActionFilters; +import org.opensearch.cluster.ClusterName; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.ClusterStateObserver; +import org.opensearch.cluster.ClusterStateUpdateTask; +import org.opensearch.cluster.coordination.CoordinationMetadata; +import org.opensearch.cluster.metadata.IndexNameExpressionResolver; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodeRole; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.allocation.AllocationService; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.concurrent.ThreadContext; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.test.transport.MockTransport; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.TransportService; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; + +import static java.util.Collections.emptySet; +import static java.util.Collections.singletonMap; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.empty; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.sameInstance; +import static org.opensearch.cluster.ClusterState.builder; +import static org.opensearch.cluster.OpenSearchAllocationTestCase.createAllocationService; +import static org.opensearch.test.ClusterServiceUtils.createClusterService; +import static org.opensearch.test.ClusterServiceUtils.setState; + +public class DecommissionControllerTests extends OpenSearchTestCase { + + private static ThreadPool threadPool; + private static ClusterService clusterService; + private TransportService transportService; + private AllocationService allocationService; + private DecommissionController decommissionController; + private ClusterSettings clusterSettings; + + @Before + public void setTransportServiceAndDefaultClusterState() { + threadPool = new TestThreadPool("test", Settings.EMPTY); + allocationService = createAllocationService(); + ClusterState clusterState = ClusterState.builder(new ClusterName("test")).build(); + logger.info("--> adding five nodes on same zone_1"); + clusterState = addNodes(clusterState, "zone_1", "node1", "node2", "node3", "node4", "node5"); + logger.info("--> adding five nodes on same zone_2"); + clusterState = addNodes(clusterState, "zone_2", "node6", "node7", "node8", "node9", "node10"); + logger.info("--> adding five nodes on same zone_3"); + clusterState = addNodes(clusterState, "zone_3", "node11", "node12", "node13", "node14", "node15"); + clusterState = setLocalNodeAsClusterManagerNode(clusterState, "node1"); + clusterState = setThreeNodesInVotingConfig(clusterState); + final ClusterState.Builder builder = builder(clusterState); + clusterService = createClusterService(threadPool, clusterState.nodes().get("node1")); + setState(clusterService, builder); + final MockTransport transport = new MockTransport(); + transportService = transport.createTransportService( + Settings.EMPTY, + threadPool, + TransportService.NOOP_TRANSPORT_INTERCEPTOR, + boundTransportAddress -> clusterService.state().nodes().get("node1"), + null, + emptySet() + ); + + final Settings.Builder nodeSettingsBuilder = Settings.builder(); + final Settings nodeSettings = nodeSettingsBuilder.build(); + clusterSettings = new ClusterSettings(nodeSettings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + + new TransportAddVotingConfigExclusionsAction( + nodeSettings, + clusterSettings, + transportService, + clusterService, + threadPool, + new ActionFilters(emptySet()), + new IndexNameExpressionResolver(new ThreadContext(Settings.EMPTY)) + ); // registers action + + new TransportClearVotingConfigExclusionsAction( + transportService, + clusterService, + threadPool, + new ActionFilters(emptySet()), + new IndexNameExpressionResolver(new ThreadContext(Settings.EMPTY)) + ); // registers action + + transportService.start(); + transportService.acceptIncomingRequests(); + decommissionController = new DecommissionController(clusterService, transportService, allocationService, threadPool); + } + + @After + public void shutdownThreadPoolAndClusterService() { + clusterService.stop(); + threadPool.shutdown(); + } + + public void testAddNodesToVotingConfigExclusion() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(2); + + ClusterStateObserver clusterStateObserver = new ClusterStateObserver(clusterService, null, logger, threadPool.getThreadContext()); + clusterStateObserver.waitForNextChange(new AdjustConfigurationForExclusions(countDownLatch)); + Set nodesToRemoveFromVotingConfig = Collections.singleton(randomFrom("node1", "node6", "node11")); + decommissionController.excludeDecommissionedNodesFromVotingConfig(nodesToRemoveFromVotingConfig, new ActionListener() { + @Override + public void onResponse(Void unused) { + countDownLatch.countDown(); + } + + @Override + public void onFailure(Exception e) { + fail("unexpected failure occurred while removing node from voting config " + e); + } + }); + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + clusterService.getClusterApplierService().state().getVotingConfigExclusions().forEach(vce -> { + assertTrue(nodesToRemoveFromVotingConfig.contains(vce.getNodeId())); + assertEquals(nodesToRemoveFromVotingConfig.size(), 1); + }); + } + + public void testClearVotingConfigExclusions() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); + decommissionController.clearVotingConfigExclusion(new ActionListener() { + @Override + public void onResponse(Void unused) { + countDownLatch.countDown(); + } + + @Override + public void onFailure(Exception e) { + fail("unexpected failure occurred while clearing voting config exclusion" + e); + } + }, false); + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + assertThat(clusterService.getClusterApplierService().state().getVotingConfigExclusions(), empty()); + } + + public void testNodesRemovedForDecommissionRequestSuccessfulResponse() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); + Set nodesToBeRemoved = new HashSet<>(); + nodesToBeRemoved.add(clusterService.state().nodes().get("node11")); + nodesToBeRemoved.add(clusterService.state().nodes().get("node12")); + nodesToBeRemoved.add(clusterService.state().nodes().get("node13")); + nodesToBeRemoved.add(clusterService.state().nodes().get("node14")); + nodesToBeRemoved.add(clusterService.state().nodes().get("node15")); + + decommissionController.removeDecommissionedNodes( + nodesToBeRemoved, + "unit-test", + TimeValue.timeValueSeconds(30L), + new ActionListener() { + @Override + public void onResponse(Void unused) { + countDownLatch.countDown(); + } + + @Override + public void onFailure(Exception e) { + fail("there shouldn't have been any failure"); + } + } + ); + + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + // test all 5 nodes removed and cluster has 10 nodes + Set nodes = StreamSupport.stream(clusterService.getClusterApplierService().state().nodes().spliterator(), false) + .collect(Collectors.toSet()); + assertEquals(nodes.size(), 10); + // test no nodes part of zone-3 + for (DiscoveryNode node : nodes) { + assertNotEquals(node.getAttributes().get("zone"), "zone-1"); + } + } + + public void testTimesOut() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); + Set nodesToBeRemoved = new HashSet<>(); + nodesToBeRemoved.add(clusterService.state().nodes().get("node11")); + nodesToBeRemoved.add(clusterService.state().nodes().get("node12")); + nodesToBeRemoved.add(clusterService.state().nodes().get("node13")); + nodesToBeRemoved.add(clusterService.state().nodes().get("node14")); + nodesToBeRemoved.add(clusterService.state().nodes().get("node15")); + decommissionController.removeDecommissionedNodes( + nodesToBeRemoved, + "unit-test-timeout", + TimeValue.timeValueMillis(2), + new ActionListener() { + @Override + public void onResponse(Void unused) { + fail("response shouldn't have been called"); + } + + @Override + public void onFailure(Exception e) { + assertThat(e, instanceOf(OpenSearchTimeoutException.class)); + assertThat(e.getMessage(), containsString("waiting for removal of decommissioned nodes")); + countDownLatch.countDown(); + } + } + ); + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + } + + public void testSuccessfulDecommissionStatusMetadataUpdate() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); + DecommissionAttributeMetadata oldMetadata = new DecommissionAttributeMetadata( + new DecommissionAttribute("zone", "zone-1"), + DecommissionStatus.IN_PROGRESS + ); + ClusterState state = clusterService.state(); + Metadata metadata = state.metadata(); + Metadata.Builder mdBuilder = Metadata.builder(metadata); + mdBuilder.decommissionAttributeMetadata(oldMetadata); + state = ClusterState.builder(state).metadata(mdBuilder).build(); + setState(clusterService, state); + + decommissionController.updateMetadataWithDecommissionStatus( + DecommissionStatus.SUCCESSFUL, + new ActionListener() { + @Override + public void onResponse(DecommissionStatus status) { + assertEquals(DecommissionStatus.SUCCESSFUL, status); + countDownLatch.countDown(); + } + + @Override + public void onFailure(Exception e) { + fail("decommission status update failed"); + } + } + ); + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + ClusterState newState = clusterService.getClusterApplierService().state(); + DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata().decommissionAttributeMetadata(); + assertEquals(decommissionAttributeMetadata.status(), DecommissionStatus.SUCCESSFUL); + } + + private static class AdjustConfigurationForExclusions implements ClusterStateObserver.Listener { + + final CountDownLatch doneLatch; + + AdjustConfigurationForExclusions(CountDownLatch latch) { + this.doneLatch = latch; + } + + @Override + public void onNewClusterState(ClusterState state) { + clusterService.getClusterManagerService().submitStateUpdateTask("reconfiguration", new ClusterStateUpdateTask() { + @Override + public ClusterState execute(ClusterState currentState) { + assertThat(currentState, sameInstance(state)); + final Set votingNodeIds = new HashSet<>(); + currentState.nodes().forEach(n -> votingNodeIds.add(n.getId())); + currentState.getVotingConfigExclusions().forEach(t -> votingNodeIds.remove(t.getNodeId())); + final CoordinationMetadata.VotingConfiguration votingConfiguration = new CoordinationMetadata.VotingConfiguration( + votingNodeIds + ); + return builder(currentState).metadata( + Metadata.builder(currentState.metadata()) + .coordinationMetadata( + CoordinationMetadata.builder(currentState.coordinationMetadata()) + .lastAcceptedConfiguration(votingConfiguration) + .lastCommittedConfiguration(votingConfiguration) + .build() + ) + ).build(); + } + + @Override + public void onFailure(String source, Exception e) { + throw new AssertionError("unexpected failure", e); + } + + @Override + public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { + doneLatch.countDown(); + } + }); + } + + @Override + public void onClusterServiceClose() { + throw new AssertionError("unexpected close"); + } + + @Override + public void onTimeout(TimeValue timeout) { + throw new AssertionError("unexpected timeout"); + } + } + + private ClusterState addNodes(ClusterState clusterState, String zone, String... nodeIds) { + DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.nodes()); + org.opensearch.common.collect.List.of(nodeIds).forEach(nodeId -> nodeBuilder.add(newNode(nodeId, singletonMap("zone", zone)))); + clusterState = ClusterState.builder(clusterState).nodes(nodeBuilder).build(); + return clusterState; + } + + private ClusterState setLocalNodeAsClusterManagerNode(ClusterState clusterState, String nodeId) { + DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.nodes()); + nodeBuilder.localNodeId(nodeId); + nodeBuilder.clusterManagerNodeId(nodeId); + clusterState = ClusterState.builder(clusterState).nodes(nodeBuilder).build(); + return clusterState; + } + + private ClusterState setThreeNodesInVotingConfig(ClusterState clusterState) { + final CoordinationMetadata.VotingConfiguration votingConfiguration = CoordinationMetadata.VotingConfiguration.of( + clusterState.nodes().get("node1"), + clusterState.nodes().get("node6"), + clusterState.nodes().get("node11") + ); + + Metadata.Builder builder = Metadata.builder() + .coordinationMetadata( + CoordinationMetadata.builder() + .lastAcceptedConfiguration(votingConfiguration) + .lastCommittedConfiguration(votingConfiguration) + .build() + ); + clusterState = ClusterState.builder(clusterState).metadata(builder).build(); + return clusterState; + } + + private static DiscoveryNode newNode(String nodeId, Map attributes) { + return new DiscoveryNode(nodeId, nodeId, buildNewFakeTransportAddress(), attributes, CLUSTER_MANAGER_DATA_ROLE, Version.CURRENT); + } + + final private static Set CLUSTER_MANAGER_DATA_ROLE = Collections.unmodifiableSet( + new HashSet<>(Arrays.asList(DiscoveryNodeRole.CLUSTER_MANAGER_ROLE, DiscoveryNodeRole.DATA_ROLE)) + ); +} diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java new file mode 100644 index 0000000000000..7dee51b7713f9 --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java @@ -0,0 +1,347 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +import org.hamcrest.Matchers; +import org.junit.After; +import org.junit.Before; +import org.mockito.ArgumentCaptor; +import org.mockito.Mockito; +import org.opensearch.Version; +import org.opensearch.action.ActionListener; +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateResponse; +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionResponse; +import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsRequest; +import org.opensearch.cluster.ClusterName; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.coordination.CoordinationMetadata; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodeRole; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.allocation.AllocationService; +import org.opensearch.cluster.routing.allocation.decider.AwarenessAllocationDecider; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.test.transport.MockTransport; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.TransportResponseHandler; +import org.opensearch.transport.TransportService; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +import static java.util.Collections.emptySet; +import static java.util.Collections.singletonMap; +import static org.opensearch.cluster.ClusterState.builder; +import static org.opensearch.cluster.OpenSearchAllocationTestCase.createAllocationService; +import static org.opensearch.test.ClusterServiceUtils.createClusterService; +import static org.opensearch.test.ClusterServiceUtils.setState; + +public class DecommissionServiceTests extends OpenSearchTestCase { + + private ThreadPool threadPool; + private ClusterService clusterService; + private TransportService transportService; + private AllocationService allocationService; + private DecommissionService decommissionService; + private ClusterSettings clusterSettings; + + @Before + public void setUpService() { + threadPool = new TestThreadPool("test", Settings.EMPTY); + clusterService = createClusterService(threadPool); + allocationService = createAllocationService(); + ClusterState clusterState = ClusterState.builder(new ClusterName("test")).build(); + logger.info("--> adding cluster manager node on zone_1"); + clusterState = addClusterManagerNodes(clusterState, "zone_1", "node1"); + logger.info("--> adding cluster manager node on zone_2"); + clusterState = addClusterManagerNodes(clusterState, "zone_2", "node6"); + logger.info("--> adding cluster manager node on zone_3"); + clusterState = addClusterManagerNodes(clusterState, "zone_3", "node11"); + logger.info("--> adding four data nodes on zone_1"); + clusterState = addDataNodes(clusterState, "zone_1", "node2", "node3", "node4", "node5"); + logger.info("--> adding four data nodes on zone_2"); + clusterState = addDataNodes(clusterState, "zone_2", "node7", "node8", "node9", "node10"); + logger.info("--> adding four data nodes on zone_3"); + clusterState = addDataNodes(clusterState, "zone_3", "node12", "node13", "node14", "node15"); + clusterState = setLocalNodeAsClusterManagerNode(clusterState, "node1"); + clusterState = setNodesInVotingConfig( + clusterState, + clusterState.nodes().get("node1"), + clusterState.nodes().get("node6"), + clusterState.nodes().get("node11") + ); + final ClusterState.Builder builder = builder(clusterState); + setState(clusterService, builder); + final MockTransport transport = new MockTransport(); + transportService = transport.createTransportService( + Settings.EMPTY, + threadPool, + TransportService.NOOP_TRANSPORT_INTERCEPTOR, + boundTransportAddress -> clusterService.state().nodes().get("node1"), + null, + emptySet() + ); + + final Settings.Builder nodeSettingsBuilder = Settings.builder() + .put(AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING.getKey(), "zone") + .put("cluster.routing.allocation.awareness.force.zone.values", "zone_1,zone_2,zone_3"); + + clusterSettings = new ClusterSettings(nodeSettingsBuilder.build(), ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + transportService.start(); + transportService.acceptIncomingRequests(); + + this.decommissionService = new DecommissionService( + nodeSettingsBuilder.build(), + clusterSettings, + clusterService, + transportService, + threadPool, + allocationService + ); + } + + @After + public void shutdownThreadPoolAndClusterService() { + clusterService.stop(); + threadPool.shutdown(); + } + + @SuppressWarnings("unchecked") + public void testDecommissioningNotStartedForInvalidAttributeName() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("rack", "rack-a"); + ActionListener listener = new ActionListener() { + @Override + public void onResponse(DecommissionResponse decommissionResponse) { + fail("on response shouldn't have been called"); + } + + @Override + public void onFailure(Exception e) { + assertTrue(e instanceof DecommissioningFailedException); + assertThat(e.getMessage(), Matchers.endsWith("invalid awareness attribute requested for decommissioning")); + countDownLatch.countDown(); + } + }; + decommissionService.startDecommissionAction(decommissionAttribute, listener); + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + } + + @SuppressWarnings("unchecked") + public void testDecommissioningNotStartedForInvalidAttributeValue() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "rack-a"); + ActionListener listener = new ActionListener() { + @Override + public void onResponse(DecommissionResponse decommissionResponse) { + fail("on response shouldn't have been called"); + } + + @Override + public void onFailure(Exception e) { + assertTrue(e instanceof DecommissioningFailedException); + assertThat( + e.getMessage(), + Matchers.endsWith( + "invalid awareness attribute value requested for decommissioning. " + + "Set forced awareness values before to decommission" + ) + ); + countDownLatch.countDown(); + } + }; + decommissionService.startDecommissionAction(decommissionAttribute, listener); + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + } + + @SuppressWarnings("unchecked") + public void testDecommissioningFailedWhenAnotherAttributeDecommissioningSuccessful() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); + DecommissionStatus oldStatus = randomFrom(DecommissionStatus.SUCCESSFUL, DecommissionStatus.IN_PROGRESS, DecommissionStatus.INIT); + DecommissionAttributeMetadata oldMetadata = new DecommissionAttributeMetadata( + new DecommissionAttribute("zone", "zone_1"), + oldStatus + ); + final ClusterState.Builder builder = builder(clusterService.state()); + setState( + clusterService, + builder.metadata(Metadata.builder(clusterService.state().metadata()).decommissionAttributeMetadata(oldMetadata).build()) + ); + ActionListener listener = new ActionListener() { + @Override + public void onResponse(DecommissionResponse decommissionResponse) { + fail("on response shouldn't have been called"); + } + + @Override + public void onFailure(Exception e) { + assertTrue(e instanceof DecommissioningFailedException); + if (oldStatus.equals(DecommissionStatus.SUCCESSFUL)) { + assertThat( + e.getMessage(), + Matchers.endsWith("already successfully decommissioned, recommission before triggering another decommission") + ); + } else { + assertThat(e.getMessage(), Matchers.endsWith("is in progress, cannot process this request")); + } + countDownLatch.countDown(); + } + }; + decommissionService.startDecommissionAction(new DecommissionAttribute("zone", "zone_2"), listener); + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + } + + public void testClearClusterDecommissionState() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "zone-2"); + DecommissionAttributeMetadata decommissionAttributeMetadata = new DecommissionAttributeMetadata( + decommissionAttribute, + DecommissionStatus.SUCCESSFUL + ); + ClusterState state = ClusterState.builder(new ClusterName("test")) + .metadata(Metadata.builder().putCustom(DecommissionAttributeMetadata.TYPE, decommissionAttributeMetadata).build()) + .build(); + + ActionListener listener = new ActionListener<>() { + @Override + public void onResponse(DeleteDecommissionStateResponse decommissionResponse) { + DecommissionAttributeMetadata metadata = clusterService.state().metadata().custom(DecommissionAttributeMetadata.TYPE); + assertNull(metadata); + countDownLatch.countDown(); + } + + @Override + public void onFailure(Exception e) { + fail("on failure shouldn't have been called"); + countDownLatch.countDown(); + } + }; + + this.decommissionService.deleteDecommissionState(listener); + + // Decommission Attribute should be removed. + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + } + + public void testDeleteDecommissionAttributeClearVotingExclusion() { + TransportService mockTransportService = Mockito.mock(TransportService.class); + Mockito.when(mockTransportService.getLocalNode()).thenReturn(Mockito.mock(DiscoveryNode.class)); + DecommissionService decommissionService = new DecommissionService( + Settings.EMPTY, + clusterSettings, + clusterService, + mockTransportService, + threadPool, + allocationService + ); + decommissionService.startRecommissionAction(Mockito.mock(ActionListener.class)); + + ArgumentCaptor clearVotingConfigExclusionsRequestArgumentCaptor = ArgumentCaptor.forClass( + ClearVotingConfigExclusionsRequest.class + ); + Mockito.verify(mockTransportService) + .sendRequest( + Mockito.any(DiscoveryNode.class), + Mockito.anyString(), + clearVotingConfigExclusionsRequestArgumentCaptor.capture(), + Mockito.any(TransportResponseHandler.class) + ); + + ClearVotingConfigExclusionsRequest request = clearVotingConfigExclusionsRequestArgumentCaptor.getValue(); + assertFalse(request.getWaitForRemoval()); + } + + public void testClusterUpdateTaskForDeletingDecommission() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); + ActionListener listener = new ActionListener<>() { + @Override + public void onResponse(DeleteDecommissionStateResponse response) { + assertTrue(response.isAcknowledged()); + assertNull(clusterService.state().metadata().decommissionAttributeMetadata()); + countDownLatch.countDown(); + } + + @Override + public void onFailure(Exception e) { + fail("On Failure shouldn't have been called"); + countDownLatch.countDown(); + } + }; + decommissionService.deleteDecommissionState(listener); + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + } + + private ClusterState addDataNodes(ClusterState clusterState, String zone, String... nodeIds) { + DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.nodes()); + org.opensearch.common.collect.List.of(nodeIds).forEach(nodeId -> nodeBuilder.add(newDataNode(nodeId, singletonMap("zone", zone)))); + clusterState = ClusterState.builder(clusterState).nodes(nodeBuilder).build(); + return clusterState; + } + + private ClusterState addClusterManagerNodes(ClusterState clusterState, String zone, String... nodeIds) { + DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.nodes()); + org.opensearch.common.collect.List.of(nodeIds) + .forEach(nodeId -> nodeBuilder.add(newClusterManagerNode(nodeId, singletonMap("zone", zone)))); + clusterState = ClusterState.builder(clusterState).nodes(nodeBuilder).build(); + return clusterState; + } + + private ClusterState setLocalNodeAsClusterManagerNode(ClusterState clusterState, String nodeId) { + DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.nodes()); + nodeBuilder.localNodeId(nodeId); + nodeBuilder.clusterManagerNodeId(nodeId); + clusterState = ClusterState.builder(clusterState).nodes(nodeBuilder).build(); + return clusterState; + } + + private ClusterState setNodesInVotingConfig(ClusterState clusterState, DiscoveryNode... nodes) { + final CoordinationMetadata.VotingConfiguration votingConfiguration = CoordinationMetadata.VotingConfiguration.of(nodes); + + Metadata.Builder builder = Metadata.builder() + .coordinationMetadata( + CoordinationMetadata.builder() + .lastAcceptedConfiguration(votingConfiguration) + .lastCommittedConfiguration(votingConfiguration) + .build() + ); + clusterState = ClusterState.builder(clusterState).metadata(builder).build(); + return clusterState; + } + + private static DiscoveryNode newDataNode(String nodeId, Map attributes) { + return new DiscoveryNode(nodeId, buildNewFakeTransportAddress(), attributes, DATA_ROLE, Version.CURRENT); + } + + private static DiscoveryNode newClusterManagerNode(String nodeId, Map attributes) { + return new DiscoveryNode(nodeId, buildNewFakeTransportAddress(), attributes, CLUSTER_MANAGER_ROLE, Version.CURRENT); + } + + final private static Set CLUSTER_MANAGER_ROLE = Collections.unmodifiableSet( + new HashSet<>(Collections.singletonList(DiscoveryNodeRole.CLUSTER_MANAGER_ROLE)) + ); + + final private static Set DATA_ROLE = Collections.unmodifiableSet( + new HashSet<>(Collections.singletonList(DiscoveryNodeRole.DATA_ROLE)) + ); + + private ClusterState removeNodes(ClusterState clusterState, String... nodeIds) { + DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.getNodes()); + org.opensearch.common.collect.List.of(nodeIds).forEach(nodeBuilder::remove); + return allocationService.disassociateDeadNodes(ClusterState.builder(clusterState).nodes(nodeBuilder).build(), false, "test"); + } +} diff --git a/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataSerializationTests.java b/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataSerializationTests.java new file mode 100644 index 0000000000000..60b3a03848830 --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataSerializationTests.java @@ -0,0 +1,83 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.metadata; + +import org.opensearch.cluster.ClusterModule; +import org.opensearch.cluster.Diff; +import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.cluster.decommission.DecommissionAttributeMetadata; +import org.opensearch.cluster.decommission.DecommissionStatus; +import org.opensearch.common.io.stream.NamedWriteableRegistry; +import org.opensearch.common.io.stream.Writeable; +import org.opensearch.common.xcontent.XContentParser; +import org.opensearch.test.AbstractDiffableSerializationTestCase; + +import java.io.IOException; + +public class DecommissionAttributeMetadataSerializationTests extends AbstractDiffableSerializationTestCase { + + @Override + protected Writeable.Reader instanceReader() { + return DecommissionAttributeMetadata::new; + } + + @Override + protected Metadata.Custom createTestInstance() { + String attributeName = randomAlphaOfLength(6); + String attributeValue = randomAlphaOfLength(6); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute(attributeName, attributeValue); + DecommissionStatus decommissionStatus = randomFrom(DecommissionStatus.values()); + return new DecommissionAttributeMetadata(decommissionAttribute, decommissionStatus); + } + + @Override + protected Metadata.Custom mutateInstance(Metadata.Custom instance) { + return randomValueOtherThan(instance, this::createTestInstance); + } + + @Override + protected Metadata.Custom makeTestChanges(Metadata.Custom testInstance) { + DecommissionAttributeMetadata decommissionAttributeMetadata = (DecommissionAttributeMetadata) testInstance; + DecommissionAttribute decommissionAttribute = decommissionAttributeMetadata.decommissionAttribute(); + String attributeName = decommissionAttribute.attributeName(); + String attributeValue = decommissionAttribute.attributeValue(); + DecommissionStatus decommissionStatus = decommissionAttributeMetadata.status(); + if (randomBoolean()) { + decommissionStatus = randomFrom(DecommissionStatus.values()); + } + if (randomBoolean()) { + attributeName = randomAlphaOfLength(6); + } + if (randomBoolean()) { + attributeValue = randomAlphaOfLength(6); + } + return new DecommissionAttributeMetadata(new DecommissionAttribute(attributeName, attributeValue), decommissionStatus); + } + + @Override + protected Writeable.Reader> diffReader() { + return DecommissionAttributeMetadata::readDiffFrom; + } + + @Override + protected NamedWriteableRegistry getNamedWriteableRegistry() { + return new NamedWriteableRegistry(ClusterModule.getNamedWriteables()); + } + + @Override + protected Metadata.Custom doParseInstance(XContentParser parser) throws IOException { + assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken()); + DecommissionAttributeMetadata decommissionAttributeMetadata = DecommissionAttributeMetadata.fromXContent(parser); + assertEquals(XContentParser.Token.END_OBJECT, parser.currentToken()); + return new DecommissionAttributeMetadata( + decommissionAttributeMetadata.decommissionAttribute(), + decommissionAttributeMetadata.status() + ); + } +} diff --git a/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataTests.java b/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataTests.java new file mode 100644 index 0000000000000..746d4565b0db3 --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataTests.java @@ -0,0 +1,52 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.metadata; + +import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.cluster.decommission.DecommissionAttributeMetadata; +import org.opensearch.cluster.decommission.DecommissionStatus; +import org.opensearch.common.io.stream.NamedWriteableRegistry; +import org.opensearch.test.AbstractNamedWriteableTestCase; + +import java.io.IOException; +import java.util.Collections; + +public class DecommissionAttributeMetadataTests extends AbstractNamedWriteableTestCase { + @Override + protected DecommissionAttributeMetadata createTestInstance() { + String attributeName = randomAlphaOfLength(6); + String attributeValue = randomAlphaOfLength(6); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute(attributeName, attributeValue); + DecommissionStatus decommissionStatus = randomFrom(DecommissionStatus.values()); + return new DecommissionAttributeMetadata(decommissionAttribute, decommissionStatus); + } + + @Override + protected DecommissionAttributeMetadata mutateInstance(DecommissionAttributeMetadata instance) throws IOException { + return randomValueOtherThan(instance, this::createTestInstance); + } + + @Override + protected NamedWriteableRegistry getNamedWriteableRegistry() { + return new NamedWriteableRegistry( + Collections.singletonList( + new NamedWriteableRegistry.Entry( + DecommissionAttributeMetadata.class, + DecommissionAttributeMetadata.TYPE, + DecommissionAttributeMetadata::new + ) + ) + ); + } + + @Override + protected Class categoryClass() { + return DecommissionAttributeMetadata.class; + } +} diff --git a/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataXContentTests.java b/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataXContentTests.java new file mode 100644 index 0000000000000..030946f4510a1 --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataXContentTests.java @@ -0,0 +1,38 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.metadata; + +import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.cluster.decommission.DecommissionAttributeMetadata; +import org.opensearch.cluster.decommission.DecommissionStatus; +import org.opensearch.common.xcontent.XContentParser; +import org.opensearch.test.AbstractXContentTestCase; + +import java.io.IOException; + +public class DecommissionAttributeMetadataXContentTests extends AbstractXContentTestCase { + @Override + protected DecommissionAttributeMetadata createTestInstance() { + String attributeName = randomAlphaOfLength(6); + String attributeValue = randomAlphaOfLength(6); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute(attributeName, attributeValue); + DecommissionStatus decommissionStatus = randomFrom(DecommissionStatus.values()); + return new DecommissionAttributeMetadata(decommissionAttribute, decommissionStatus); + } + + @Override + protected DecommissionAttributeMetadata doParseInstance(XContentParser parser) throws IOException { + return DecommissionAttributeMetadata.fromXContent(parser); + } + + @Override + protected boolean supportsUnknownFields() { + return false; + } +} diff --git a/server/src/test/java/org/opensearch/cluster/routing/AllocationIdTests.java b/server/src/test/java/org/opensearch/cluster/routing/AllocationIdTests.java index e266eacdc0320..fc008762edd35 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/AllocationIdTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/AllocationIdTests.java @@ -142,6 +142,24 @@ public void testMoveToUnassigned() { assertThat(shard.allocationId(), nullValue()); } + public void testMovePrimaryToReplica() { + logger.info("-- build started shard"); + ShardRouting shard = ShardRouting.newUnassigned( + new ShardId("test", "_na_", 0), + true, + ExistingStoreRecoverySource.INSTANCE, + new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null) + ); + shard = shard.initialize("node1", null, -1); + shard = shard.moveToStarted(); + AllocationId originalAllocationId = shard.allocationId(); + + logger.info("-- move to replica"); + shard = shard.moveActivePrimaryToReplica(); + assertNotNull(shard.allocationId()); + assertEquals(originalAllocationId, shard.allocationId()); + } + public void testSerialization() throws IOException { AllocationId allocationId = AllocationId.newInitializing(); if (randomBoolean()) { diff --git a/server/src/test/java/org/opensearch/cluster/routing/RoutingNodesTests.java b/server/src/test/java/org/opensearch/cluster/routing/RoutingNodesTests.java index 3e9088d63cfb4..73136a71bc12a 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/RoutingNodesTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/RoutingNodesTests.java @@ -33,6 +33,7 @@ package org.opensearch.cluster.routing; import org.junit.Before; +import org.mockito.Mockito; import org.opensearch.Version; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.OpenSearchAllocationTestCase; @@ -160,4 +161,40 @@ public void testInterleavedShardIterator() { } assert shardCount == this.totalNumberOfShards; } + + public void testSwapPrimaryWithReplica() { + // Initialize all the shards for test index 1 and 2 + initPrimaries(); + startInitializingShards(TEST_INDEX_1); + startInitializingShards(TEST_INDEX_1); + startInitializingShards(TEST_INDEX_2); + startInitializingShards(TEST_INDEX_2); + + // Create primary shard count imbalance between two nodes + final RoutingNodes routingNodes = this.clusterState.getRoutingNodes(); + final RoutingNode node0 = routingNodes.node("node0"); + final RoutingNode node1 = routingNodes.node("node1"); + final List shardRoutingList = node0.shardsWithState(TEST_INDEX_1, ShardRoutingState.STARTED); + final RoutingChangesObserver routingChangesObserver = Mockito.mock(RoutingChangesObserver.class); + int swaps = 0; + + for (ShardRouting routing : shardRoutingList) { + if (routing.primary()) { + ShardRouting swap = node1.getByShardId(routing.shardId()); + routingNodes.swapPrimaryWithReplica(logger, routing, swap, routingChangesObserver); + swaps++; + } + } + Mockito.verify(routingChangesObserver, Mockito.times(swaps)).replicaPromoted(Mockito.any()); + + final List shards = node1.shardsWithState(TEST_INDEX_1, ShardRoutingState.STARTED); + int shardCount = 0; + for (ShardRouting shard : shards) { + if (shard.primary()) { + shardCount++; + } + } + + assertTrue(shardCount >= swaps); + } } diff --git a/server/src/test/java/org/opensearch/cluster/routing/ShardRoutingTests.java b/server/src/test/java/org/opensearch/cluster/routing/ShardRoutingTests.java index 1dd27cb706c64..a914ef5da31ca 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/ShardRoutingTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/ShardRoutingTests.java @@ -301,6 +301,18 @@ public void testEqualsIgnoringVersion() { } } + public void testSwapPrimaryWithReplica() { + final ShardRouting unassignedShard0 = TestShardRouting.newShardRouting("test", 0, null, false, ShardRoutingState.UNASSIGNED); + assertThrows(AssertionError.class, unassignedShard0::moveActivePrimaryToReplica); + + final ShardRouting activeShard0 = TestShardRouting.newShardRouting("test", 0, "node-1", false, ShardRoutingState.STARTED); + assertThrows(IllegalShardRoutingStateException.class, activeShard0::moveActivePrimaryToReplica); + + final ShardRouting activeShard1 = TestShardRouting.newShardRouting("test", 0, "node-1", true, ShardRoutingState.STARTED); + final ShardRouting activeReplicaShard1 = activeShard1.moveActivePrimaryToReplica(); + assertFalse(activeReplicaShard1.primary()); + } + public void testExpectedSize() throws IOException { final int iters = randomIntBetween(10, 100); for (int i = 0; i < iters; i++) { diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteShardsAllocateUnassignedTests.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteShardsAllocateUnassignedTests.java new file mode 100644 index 0000000000000..7c45b20ecee1f --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteShardsAllocateUnassignedTests.java @@ -0,0 +1,123 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.routing.allocation; + +import com.carrotsearch.hppc.ObjectIntHashMap; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.routing.RoutingNode; +import org.opensearch.cluster.routing.RoutingNodes; +import org.opensearch.cluster.routing.RoutingPool; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.allocation.allocator.RemoteShardsBalancer; + +import java.util.Map; + +public class RemoteShardsAllocateUnassignedTests extends RemoteShardsBalancerBaseTestCase { + + /** + * Test Remote Shards Balancer initialization. + */ + public void testInit() { + int localOnlyNodes = 7; + int remoteCapableNodes = 3; + int localIndices = 10; + int remoteIndices = 15; + ClusterState clusterState = createInitialCluster(localOnlyNodes, remoteCapableNodes, localIndices, remoteIndices); + RoutingNodes routingNodes = new RoutingNodes(clusterState, false); + RoutingAllocation allocation = getRoutingAllocation(clusterState, routingNodes); + + RemoteShardsBalancer remoteShardsBalancer = new RemoteShardsBalancer(logger, allocation); + Map unassignedShardMap = remoteShardsBalancer.groupUnassignedShardsByIndex(); + + assertEquals(remoteIndices, unassignedShardMap.size()); + for (String index : unassignedShardMap.keySet()) { + assertTrue(index.startsWith(REMOTE_IDX_PREFIX)); + RemoteShardsBalancer.UnassignedIndexShards indexShards = unassignedShardMap.get(index); + assertEquals(5, indexShards.getPrimaries().size()); + for (ShardRouting shard : indexShards.getPrimaries()) { + assertTrue(shard.primary()); + assertEquals(shard.getIndexName(), index); + } + assertEquals(5, indexShards.getReplicas().size()); + for (ShardRouting shard : indexShards.getReplicas()) { + assertFalse(shard.primary()); + assertEquals(shard.getIndexName(), index); + } + } + } + + /** + * Test remote unassigned shard allocation for standard new cluster setup. + */ + public void testPrimaryAllocation() { + int localOnlyNodes = 7; + int remoteCapableNodes = 3; + int localIndices = 10; + int remoteIndices = 13; + ClusterState clusterState = createInitialCluster(localOnlyNodes, remoteCapableNodes, localIndices, remoteIndices); + AllocationService service = this.createRemoteCapableAllocationService(); + clusterState = allocateShardsAndBalance(clusterState, service); + RoutingNodes routingNodes = clusterState.getRoutingNodes(); + RoutingAllocation allocation = getRoutingAllocation(clusterState, routingNodes); + + assertEquals(0, routingNodes.unassigned().size()); + + ObjectIntHashMap nodePrimariesCounter = new ObjectIntHashMap<>(); + for (ShardRouting shard : clusterState.getRoutingTable().allShards()) { + assertFalse(shard.unassigned()); + RoutingNode node = routingNodes.node(shard.currentNodeId()); + RoutingPool nodePool = RoutingPool.getNodePool(node); + RoutingPool shardPool = RoutingPool.getShardPool(shard, allocation); + if (RoutingPool.REMOTE_CAPABLE.equals(shardPool)) { + assertEquals(nodePool, shardPool); + } + if (RoutingPool.getNodePool(node) == RoutingPool.REMOTE_CAPABLE + && RoutingPool.REMOTE_CAPABLE.equals(RoutingPool.getShardPool(shard, allocation)) + && shard.primary()) { + nodePrimariesCounter.putOrAdd(node.nodeId(), 1, 1); + } + } + final int indexShardLimit = (int) Math.ceil(totalPrimaries(remoteIndices) / (float) remoteCapableNodes); + for (int primaries : nodePrimariesCounter.values) { + assertTrue(primaries <= indexShardLimit); + } + } + + /** + * Test remote unassigned shard allocation when remote capable nodes fail to come up. + */ + public void testAllocationRemoteCapableNodesUnavailable() { + int localOnlyNodes = 7; + int remoteCapableNodes = 0; + int localIndices = 10; + int remoteIndices = 13; + ClusterState clusterState = createInitialCluster(localOnlyNodes, remoteCapableNodes, localIndices, remoteIndices); + AllocationService service = this.createRemoteCapableAllocationService(); + clusterState = allocateShardsAndBalance(clusterState, service); + RoutingNodes routingNodes = clusterState.getRoutingNodes(); + RoutingAllocation allocation = getRoutingAllocation(clusterState, routingNodes); + + assertEquals(totalShards(remoteIndices), routingNodes.unassigned().size()); + + for (ShardRouting shard : clusterState.getRoutingTable().allShards()) { + if (RoutingPool.getShardPool(shard, allocation) == RoutingPool.REMOTE_CAPABLE) { + assertTrue(shard.unassigned()); + } else { + assertFalse(shard.unassigned()); + RoutingNode node = routingNodes.node(shard.currentNodeId()); + assertEquals(RoutingPool.LOCAL_ONLY, RoutingPool.getNodePool(node)); + } + } + for (RoutingNode node : routingNodes) { + if (RoutingPool.getNodePool(node) == RoutingPool.REMOTE_CAPABLE) { + assertEquals(0, node.size()); + } + } + } +} diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteShardsBalancerBaseTestCase.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteShardsBalancerBaseTestCase.java new file mode 100644 index 0000000000000..789de474d8ce5 --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteShardsBalancerBaseTestCase.java @@ -0,0 +1,302 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.routing.allocation; + +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.opensearch.Version; +import org.opensearch.cluster.ClusterInfo; +import org.opensearch.cluster.ClusterModule; +import org.opensearch.cluster.ClusterName; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.DiskUsage; +import org.opensearch.cluster.EmptyClusterInfoService; +import org.opensearch.cluster.OpenSearchAllocationTestCase; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodeRole; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.RoutingNodes; +import org.opensearch.cluster.routing.RoutingTable; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.UnassignedInfo; +import org.opensearch.cluster.routing.allocation.allocator.BalancedShardsAllocator; +import org.opensearch.cluster.routing.allocation.allocator.ShardsAllocator; +import org.opensearch.cluster.routing.allocation.decider.AllocationDecider; +import org.opensearch.cluster.routing.allocation.decider.AllocationDeciders; +import org.opensearch.common.SuppressForbidden; +import org.opensearch.common.collect.ImmutableOpenMap; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.transport.TransportAddress; +import org.opensearch.common.util.FeatureFlags; +import org.opensearch.index.IndexModule; +import org.opensearch.test.gateway.TestGatewayAllocator; + +import java.net.Inet4Address; +import java.net.UnknownHostException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static org.opensearch.cluster.routing.ShardRoutingState.INITIALIZING; + +@SuppressForbidden(reason = "feature flag overrides") +public abstract class RemoteShardsBalancerBaseTestCase extends OpenSearchAllocationTestCase { + protected static final String LOCAL_NODE_PREFIX = "local-only-node"; + protected static final String REMOTE_NODE_PREFIX = "remote-capable-node"; + protected static final String LOCAL_IDX_PREFIX = "local-idx"; + protected static final String REMOTE_IDX_PREFIX = "remote-idx"; + protected static final Set MANAGER_DATA_ROLES = Set.of( + DiscoveryNodeRole.CLUSTER_MANAGER_ROLE, + DiscoveryNodeRole.DATA_ROLE + ); + protected static final Set SEARCH_DATA_ROLES = Set.of( + DiscoveryNodeRole.CLUSTER_MANAGER_ROLE, + DiscoveryNodeRole.DATA_ROLE, + DiscoveryNodeRole.SEARCH_ROLE + ); + + protected static final int PRIMARIES = 5; + protected static final int REPLICAS = 1; + private static final int MAX_REROUTE_ITERATIONS = 1000; + + protected ClusterSettings EMPTY_CLUSTER_SETTINGS = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + + @BeforeClass + public static void setup() { + System.setProperty(FeatureFlags.SEARCHABLE_SNAPSHOT, "true"); + } + + @AfterClass + public static void teardown() { + System.setProperty(FeatureFlags.SEARCHABLE_SNAPSHOT, "false"); + } + + public String getNodeId(int id, boolean isRemote, String prefix) { + if (isRemote) { + return REMOTE_NODE_PREFIX + "-" + prefix + "-" + id; + } + return LOCAL_NODE_PREFIX + "-" + prefix + "-" + id; + } + + public String getNodeId(int id, boolean isRemote) { + return getNodeId(id, isRemote, ""); + } + + public String getIndexName(int id, boolean isRemote, String prefix) { + if (isRemote) { + return REMOTE_IDX_PREFIX + "-" + prefix + "-" + id; + } + return LOCAL_IDX_PREFIX + "-" + prefix + "-" + id; + } + + public String getIndexName(int id, boolean isRemote) { + return getIndexName(id, isRemote, ""); + } + + public RoutingAllocation getRoutingAllocation(ClusterState clusterState, RoutingNodes routingNodes) { + return new RoutingAllocation( + randomAllocationDeciders(Settings.Builder.EMPTY_SETTINGS, EMPTY_CLUSTER_SETTINGS, random()), + routingNodes, + clusterState, + EmptyClusterInfoService.INSTANCE.getClusterInfo(), + null, + System.nanoTime() + ); + } + + private Map createNodeAttributes(String nodeId) { + Map attr = new HashMap<>(); + attr.put("name", nodeId); + attr.put("node_id", nodeId); + return attr; + } + + public ClusterState addNodes(ClusterState clusterState, int nodeCount, boolean isRemote) { + DiscoveryNodes.Builder nb = DiscoveryNodes.builder(clusterState.nodes()); + for (int i = 0; i < nodeCount; i++) { + String id = getNodeId(i, isRemote, "new"); + nb.add(newNode(id, id, isRemote ? SEARCH_DATA_ROLES : MANAGER_DATA_ROLES)); + } + return ClusterState.builder(clusterState).nodes(nb.build()).build(); + } + + public ClusterState addNodeWithIP(ClusterState clusterState, int nodeId, boolean isRemote, String IP) throws UnknownHostException { + TransportAddress ipAddress = new TransportAddress(Inet4Address.getByName(IP), 9200); + DiscoveryNodes.Builder nb = DiscoveryNodes.builder(clusterState.nodes()); + String id = getNodeId(nodeId, isRemote, "new"); + nb.add( + new DiscoveryNode( + id, + id, + ipAddress, + createNodeAttributes(id), + isRemote ? SEARCH_DATA_ROLES : MANAGER_DATA_ROLES, + Version.CURRENT + ) + ); + return ClusterState.builder(clusterState).nodes(nb.build()).build(); + } + + public ClusterState terminateNodes(ClusterState clusterState, AllocationService service, List nodesToTerminate) { + if (nodesToTerminate.isEmpty()) { + return clusterState; + } + logger.info("Terminating following nodes from cluster: [{}]", nodesToTerminate); + DiscoveryNodes.Builder nb = DiscoveryNodes.builder(clusterState.nodes()); + nodesToTerminate.forEach(nb::remove); + clusterState = ClusterState.builder(clusterState).nodes(nb.build()).build(); + clusterState = service.disassociateDeadNodes(clusterState, false, "nodes-terminated"); + return clusterState; + } + + public ClusterState createInitialCluster(int localOnlyNodes, int remoteCapableNodes, int localIndices, int remoteIndices) { + Metadata.Builder mb = Metadata.builder(); + for (int i = 0; i < localIndices; i++) { + mb.put( + IndexMetadata.builder(getIndexName(i, false)) + .settings(settings(Version.CURRENT).put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), "0")) + .numberOfShards(PRIMARIES) + .numberOfReplicas(REPLICAS) + ); + } + + for (int i = 0; i < remoteIndices; i++) { + mb.put( + IndexMetadata.builder(getIndexName(i, true)) + .settings( + settings(Version.CURRENT).put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), "0") + .put(IndexModule.INDEX_STORE_TYPE_SETTING.getKey(), IndexModule.Type.REMOTE_SNAPSHOT.getSettingsKey()) + ) + .numberOfShards(PRIMARIES) + .numberOfReplicas(REPLICAS) + ); + } + Metadata metadata = mb.build(); + + RoutingTable.Builder rb = RoutingTable.builder(); + for (int i = 0; i < localIndices; i++) { + rb.addAsNew(metadata.index(getIndexName(i, false))); + } + for (int i = 0; i < remoteIndices; i++) { + rb.addAsNew(metadata.index(getIndexName(i, true))); + } + RoutingTable routingTable = rb.build(); + + DiscoveryNodes.Builder nb = DiscoveryNodes.builder(); + for (int i = 0; i < localOnlyNodes; i++) { + String name = getNodeId(i, false); + nb.add(newNode(name, name, MANAGER_DATA_ROLES)); + } + for (int i = 0; i < remoteCapableNodes; i++) { + String name = getNodeId(i, true); + nb.add(newNode(name, name, SEARCH_DATA_ROLES)); + } + DiscoveryNodes nodes = nb.build(); + return ClusterState.builder(ClusterName.DEFAULT).metadata(metadata).routingTable(routingTable).nodes(nodes).build(); + } + + protected ClusterState createRemoteIndex(ClusterState state, String indexName) { + Metadata metadata = Metadata.builder(state.metadata()) + .put( + IndexMetadata.builder(indexName) + .settings( + settings(Version.CURRENT).put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), "5m") + .put(IndexModule.INDEX_STORE_TYPE_SETTING.getKey(), IndexModule.Type.REMOTE_SNAPSHOT.getSettingsKey()) + ) + .numberOfShards(PRIMARIES) + .numberOfReplicas(REPLICAS) + ) + .build(); + RoutingTable routingTable = RoutingTable.builder(state.routingTable()).addAsNew(metadata.index(indexName)).build(); + return ClusterState.builder(state).metadata(metadata).routingTable(routingTable).build(); + } + + private AllocationDeciders remoteAllocationDeciders(Settings settings, ClusterSettings clusterSettings) { + List deciders = new ArrayList<>( + ClusterModule.createAllocationDeciders(settings, clusterSettings, Collections.emptyList()) + ); + Collections.shuffle(deciders, random()); + return new AllocationDeciders(deciders); + } + + public AllocationService createRemoteCapableAllocationService() { + Settings settings = Settings.Builder.EMPTY_SETTINGS; + return new OpenSearchAllocationTestCase.MockAllocationService( + randomAllocationDeciders(settings, EMPTY_CLUSTER_SETTINGS, random()), + new TestGatewayAllocator(), + createShardAllocator(settings), + EmptyClusterInfoService.INSTANCE, + SNAPSHOT_INFO_SERVICE_WITH_NO_SHARD_SIZES + ); + } + + public AllocationService createRemoteCapableAllocationService(String excludeNodes) { + Settings settings = Settings.builder().put("cluster.routing.allocation.exclude.node_id", excludeNodes).build(); + return new MockAllocationService( + randomAllocationDeciders(settings, EMPTY_CLUSTER_SETTINGS, random()), + new TestGatewayAllocator(), + createShardAllocator(settings), + EmptyClusterInfoService.INSTANCE, + SNAPSHOT_INFO_SERVICE_WITH_NO_SHARD_SIZES + ); + } + + public AllocationDeciders createAllocationDeciders() { + Settings settings = Settings.Builder.EMPTY_SETTINGS; + return randomAllocationDeciders(settings, EMPTY_CLUSTER_SETTINGS, random()); + + } + + public ClusterState allocateShardsAndBalance(ClusterState clusterState, AllocationService service) { + int iterations = 0; + do { + clusterState = service.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING)); + clusterState = service.reroute(clusterState, "reroute"); + iterations++; + } while (!clusterState.getRoutingNodes().shardsWithState(INITIALIZING).isEmpty() && iterations < MAX_REROUTE_ITERATIONS); + return clusterState; + } + + public int totalShards(int indices) { + return indices * PRIMARIES * (REPLICAS + 1); + } + + public int totalPrimaries(int indices) { + return indices * PRIMARIES; + } + + public ShardsAllocator createShardAllocator(Settings settings) { + ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + return new BalancedShardsAllocator(settings, clusterSettings); + } + + /** + * ClusterInfo that always reports /dev/null for the shards' data paths. + */ + public static class DevNullClusterInfo extends ClusterInfo { + public DevNullClusterInfo( + ImmutableOpenMap leastAvailableSpaceUsage, + ImmutableOpenMap mostAvailableSpaceUsage, + ImmutableOpenMap shardSizes + ) { + super(leastAvailableSpaceUsage, mostAvailableSpaceUsage, shardSizes, null, ImmutableOpenMap.of()); + } + + @Override + public String getDataPath(ShardRouting shardRouting) { + return "/dev/null"; + } + } +} diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteShardsMoveShardsTests.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteShardsMoveShardsTests.java new file mode 100644 index 0000000000000..f2e79b319d0dd --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteShardsMoveShardsTests.java @@ -0,0 +1,109 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.routing.allocation; + +import org.opensearch.Version; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.health.ClusterHealthStatus; +import org.opensearch.cluster.health.ClusterStateHealth; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.routing.RoutingNodes; +import org.opensearch.cluster.routing.RoutingTable; +import org.opensearch.cluster.routing.ShardRoutingState; +import org.opensearch.index.IndexModule; + +public class RemoteShardsMoveShardsTests extends RemoteShardsBalancerBaseTestCase { + + /** + * Test reroute terminates gracefully if shards cannot move out of the excluded node + */ + public void testExcludeNodeIdMoveBlocked() { + int localOnlyNodes = 7; + int remoteCapableNodes = 2; + int localIndices = 10; + int remoteIndices = 13; + ClusterState clusterState = createInitialCluster(localOnlyNodes, remoteCapableNodes, localIndices, remoteIndices); + AllocationService service = this.createRemoteCapableAllocationService(); + clusterState = allocateShardsAndBalance(clusterState, service); + assertEquals(ClusterHealthStatus.GREEN, (new ClusterStateHealth(clusterState)).getStatus()); + + // Exclude a node + final String excludedNodeID = getNodeId(0, true); + service = createRemoteCapableAllocationService(excludedNodeID); + clusterState = allocateShardsAndBalance(clusterState, service); + + RoutingNodes routingNodes = clusterState.getRoutingNodes(); + assertEquals(ClusterHealthStatus.GREEN, (new ClusterStateHealth(clusterState)).getStatus()); + assertEquals(0, routingNodes.unassigned().size()); + assertTrue(routingNodes.node(excludedNodeID).size() > 0); + } + + /** + * Test move operations for index level allocation settings. + * Supported for local indices, not supported for remote indices. + */ + public void testIndexLevelExclusions() throws InterruptedException { + int localOnlyNodes = 7; + int remoteCapableNodes = 3; + int localIndices = 10; + int remoteIndices = 13; + ClusterState clusterState = createInitialCluster(localOnlyNodes, remoteCapableNodes, localIndices, remoteIndices); + AllocationService service = this.createRemoteCapableAllocationService(); + clusterState = allocateShardsAndBalance(clusterState, service); + RoutingNodes routingNodes = clusterState.getRoutingNodes(); + + assertEquals(ClusterHealthStatus.GREEN, (new ClusterStateHealth(clusterState)).getStatus()); + assertEquals(0, routingNodes.unassigned().size()); + + final String excludedLocalOnlyNode = getNodeId(0, false); + final String excludedRemoteCapableNode = getNodeId(0, true); + final String localIndex = routingNodes.node(excludedLocalOnlyNode).shardsWithState(ShardRoutingState.STARTED).get(0).getIndexName(); + final String remoteIndex = routingNodes.node(excludedRemoteCapableNode) + .shardsWithState(ShardRoutingState.STARTED) + .stream() + .filter(shardRouting -> shardRouting.getIndexName().startsWith(REMOTE_IDX_PREFIX)) + .findFirst() + .get() + .getIndexName(); + + Metadata.Builder mb = Metadata.builder(clusterState.metadata()); + mb.put( + IndexMetadata.builder(clusterState.metadata().index(localIndex)) + .settings( + settings(Version.CURRENT).put("index.number_of_shards", PRIMARIES) + .put("index.number_of_replicas", REPLICAS) + .put("index.routing.allocation.exclude._name", excludedLocalOnlyNode) + .build() + ) + ); + mb.put( + IndexMetadata.builder(clusterState.metadata().index(remoteIndex)) + .settings( + settings(Version.CURRENT).put("index.number_of_shards", PRIMARIES) + .put("index.number_of_replicas", REPLICAS) + .put(IndexModule.INDEX_STORE_TYPE_SETTING.getKey(), IndexModule.Type.REMOTE_SNAPSHOT.getSettingsKey()) + .put("index.routing.allocation.exclude._name", excludedRemoteCapableNode) + .build() + ) + ); + clusterState = ClusterState.builder(clusterState).metadata(mb.build()).build(); + + clusterState = allocateShardsAndBalance(clusterState, service); + assertEquals(ClusterHealthStatus.GREEN, (new ClusterStateHealth(clusterState)).getStatus()); + RoutingTable routingTable = clusterState.routingTable(); + + // No shard of updated local index should be on excluded local capable node + assertTrue(routingTable.allShards(localIndex).stream().noneMatch(shard -> shard.currentNodeId().equals(excludedLocalOnlyNode))); + + // Since remote index shards are untouched, at least one shard should + // continue to stay on the excluded remote capable node + assertTrue(routingTable.allShards(remoteIndex).stream().anyMatch(shard -> shard.currentNodeId().equals(excludedRemoteCapableNode))); + } +} diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteShardsRebalanceShardsTests.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteShardsRebalanceShardsTests.java new file mode 100644 index 0000000000000..c4358aaf12ac2 --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteShardsRebalanceShardsTests.java @@ -0,0 +1,82 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.routing.allocation; + +import com.carrotsearch.hppc.ObjectIntHashMap; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.routing.RoutingNode; +import org.opensearch.cluster.routing.RoutingNodes; +import org.opensearch.cluster.routing.RoutingPool; +import org.opensearch.cluster.routing.ShardRouting; + +public class RemoteShardsRebalanceShardsTests extends RemoteShardsBalancerBaseTestCase { + + /** + * Test remote shard allocation and balancing for standard new cluster setup. + * + * Post rebalance primaries should be balanced across all the nodes. + */ + public void testShardAllocationAndRebalance() { + int localOnlyNodes = 20; + int remoteCapableNodes = 40; + int localIndices = 40; + int remoteIndices = 80; + ClusterState clusterState = createInitialCluster(localOnlyNodes, remoteCapableNodes, localIndices, remoteIndices); + AllocationService service = this.createRemoteCapableAllocationService(); + clusterState = allocateShardsAndBalance(clusterState, service); + RoutingNodes routingNodes = clusterState.getRoutingNodes(); + RoutingAllocation allocation = getRoutingAllocation(clusterState, routingNodes); + + ObjectIntHashMap nodePrimariesCounter = getShardCounterPerNodeForRemoteCapablePool(clusterState, allocation, true); + ObjectIntHashMap nodeReplicaCounter = getShardCounterPerNodeForRemoteCapablePool(clusterState, allocation, false); + int avgPrimariesPerNode = getTotalShardCountAcrossNodes(nodePrimariesCounter) / remoteCapableNodes; + + // Primary and replica are balanced post first reroute + for (RoutingNode node : routingNodes) { + if (RoutingPool.REMOTE_CAPABLE.equals(RoutingPool.getNodePool(node))) { + assertInRange(nodePrimariesCounter.get(node.nodeId()), avgPrimariesPerNode, remoteCapableNodes - 1); + assertTrue(nodeReplicaCounter.get(node.nodeId()) >= 0); + } + } + } + + private ObjectIntHashMap getShardCounterPerNodeForRemoteCapablePool( + ClusterState clusterState, + RoutingAllocation allocation, + boolean primary + ) { + ObjectIntHashMap nodePrimariesCounter = new ObjectIntHashMap<>(); + for (ShardRouting shard : clusterState.getRoutingTable().allShards()) { + if (RoutingPool.REMOTE_CAPABLE.equals(RoutingPool.getShardPool(shard, allocation)) && shard.primary() == primary) { + nodePrimariesCounter.putOrAdd(shard.currentNodeId(), 1, 1); + } + } + return nodePrimariesCounter; + } + + private int getTotalShardCountAcrossNodes(ObjectIntHashMap nodePrimariesCounter) { + int totalShardCount = 0; + for (int value : nodePrimariesCounter.values) { + totalShardCount += value; + } + return totalShardCount; + } + + /** + * Asserts that the expected value is within the variance range. + * + * Being used to assert the average number of shards per node. + * Variance is required in case of non-absolute mean values; + * for example, total number of remote capable nodes in a cluster. + */ + private void assertInRange(int actual, int expectedMean, int variance) { + assertTrue(actual >= expectedMean - variance); + assertTrue(actual <= expectedMean + variance); + } +} diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/decider/TargetPoolAllocationDeciderTests.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/decider/TargetPoolAllocationDeciderTests.java new file mode 100644 index 0000000000000..9a415ed0b339b --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/decider/TargetPoolAllocationDeciderTests.java @@ -0,0 +1,114 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.routing.allocation.decider; + +import org.opensearch.Version; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.routing.RoutingNode; +import org.opensearch.cluster.routing.RoutingNodes; +import org.opensearch.cluster.routing.RoutingTable; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.allocation.AllocationService; +import org.opensearch.cluster.routing.allocation.RemoteShardsBalancerBaseTestCase; +import org.opensearch.cluster.routing.allocation.RoutingAllocation; + +import java.util.Collections; +import java.util.stream.Collectors; + +public class TargetPoolAllocationDeciderTests extends RemoteShardsBalancerBaseTestCase { + public void testTargetPoolAllocationDecisions() { + ClusterState clusterState = createInitialCluster(3, 3, 2, 2); + AllocationService service = this.createRemoteCapableAllocationService(); + clusterState = allocateShardsAndBalance(clusterState, service); + + // Add an unassigned primary shard for force allocation checks + Metadata metadata = Metadata.builder(clusterState.metadata()) + .put(IndexMetadata.builder("test_local_unassigned").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(1)) + .build(); + RoutingTable routingTable = RoutingTable.builder(clusterState.routingTable()) + .addAsNew(metadata.index("test_local_unassigned")) + .build(); + clusterState = ClusterState.builder(clusterState).metadata(metadata).routingTable(routingTable).build(); + + // Add remote index unassigned primary + clusterState = createRemoteIndex(clusterState, "test_remote_unassigned"); + + RoutingNodes defaultRoutingNodes = clusterState.getRoutingNodes(); + RoutingAllocation globalAllocation = getRoutingAllocation(clusterState, defaultRoutingNodes); + + ShardRouting localShard = clusterState.routingTable() + .allShards(getIndexName(0, false)) + .stream() + .filter(ShardRouting::primary) + .collect(Collectors.toList()) + .get(0); + ShardRouting remoteShard = clusterState.routingTable() + .allShards(getIndexName(0, true)) + .stream() + .filter(ShardRouting::primary) + .collect(Collectors.toList()) + .get(0); + ShardRouting unassignedLocalShard = clusterState.routingTable() + .allShards("test_local_unassigned") + .stream() + .filter(ShardRouting::primary) + .collect(Collectors.toList()) + .get(0); + ShardRouting unassignedRemoteShard = clusterState.routingTable() + .allShards("test_remote_unassigned") + .stream() + .filter(ShardRouting::primary) + .collect(Collectors.toList()) + .get(0); + IndexMetadata localIdx = globalAllocation.metadata().getIndexSafe(localShard.index()); + IndexMetadata remoteIdx = globalAllocation.metadata().getIndexSafe(remoteShard.index()); + String localNodeId = LOCAL_NODE_PREFIX; + for (RoutingNode routingNode : globalAllocation.routingNodes()) { + if (routingNode.nodeId().startsWith(LOCAL_NODE_PREFIX)) { + localNodeId = routingNode.nodeId(); + break; + } + } + String remoteNodeId = remoteShard.currentNodeId(); + RoutingNode localOnlyNode = defaultRoutingNodes.node(localNodeId); + RoutingNode remoteCapableNode = defaultRoutingNodes.node(remoteNodeId); + + AllocationDeciders deciders = new AllocationDeciders(Collections.singletonList(new TargetPoolAllocationDecider())); + + // Incompatible Pools + assertEquals(Decision.NO.type(), deciders.canAllocate(remoteShard, localOnlyNode, globalAllocation).type()); + assertEquals(Decision.NO.type(), deciders.canAllocate(remoteIdx, localOnlyNode, globalAllocation).type()); + assertEquals(Decision.NO.type(), deciders.canForceAllocatePrimary(unassignedRemoteShard, localOnlyNode, globalAllocation).type()); + + // Compatible Pools + assertEquals(Decision.YES.type(), deciders.canAllocate(localShard, remoteCapableNode, globalAllocation).type()); + assertEquals(Decision.YES.type(), deciders.canAllocate(localIdx, remoteCapableNode, globalAllocation).type()); + assertEquals(Decision.YES.type(), deciders.canAllocate(remoteShard, remoteCapableNode, globalAllocation).type()); + assertEquals(Decision.YES.type(), deciders.canAllocate(remoteIdx, remoteCapableNode, globalAllocation).type()); + assertEquals(Decision.YES.type(), deciders.canAllocate(localShard, localOnlyNode, globalAllocation).type()); + assertEquals(Decision.YES.type(), deciders.canAllocate(localIdx, localOnlyNode, globalAllocation).type()); + assertEquals( + Decision.YES.type(), + deciders.canForceAllocatePrimary(unassignedRemoteShard, remoteCapableNode, globalAllocation).type() + ); + assertEquals(Decision.YES.type(), deciders.canForceAllocatePrimary(unassignedLocalShard, localOnlyNode, globalAllocation).type()); + assertEquals( + Decision.YES.type(), + deciders.canForceAllocatePrimary(unassignedLocalShard, remoteCapableNode, globalAllocation).type() + ); + + // Verify only remote nodes are used for auto expand replica decision for remote index + assertEquals(Decision.YES.type(), deciders.shouldAutoExpandToNode(localIdx, remoteCapableNode.node(), globalAllocation).type()); + assertEquals(Decision.NO.type(), deciders.shouldAutoExpandToNode(remoteIdx, localOnlyNode.node(), globalAllocation).type()); + assertEquals(Decision.YES.type(), deciders.shouldAutoExpandToNode(localIdx, localOnlyNode.node(), globalAllocation).type()); + assertEquals(Decision.YES.type(), deciders.shouldAutoExpandToNode(remoteIdx, remoteCapableNode.node(), globalAllocation).type()); + } +} diff --git a/server/src/test/java/org/opensearch/common/lucene/store/ByteArrayIndexInputTests.java b/server/src/test/java/org/opensearch/common/lucene/store/ByteArrayIndexInputTests.java index bf331ca978009..827f9dd992294 100644 --- a/server/src/test/java/org/opensearch/common/lucene/store/ByteArrayIndexInputTests.java +++ b/server/src/test/java/org/opensearch/common/lucene/store/ByteArrayIndexInputTests.java @@ -32,8 +32,10 @@ package org.opensearch.common.lucene.store; +import java.io.EOFException; import java.io.IOException; import java.nio.charset.StandardCharsets; +import java.util.Arrays; import static org.hamcrest.Matchers.containsString; @@ -97,4 +99,58 @@ public void testSeekOverflow() throws IOException { } } + public void testRandomReadEdges() throws IOException { + final int size = 16; + byte[] input = Arrays.copyOfRange(randomUnicodeOfLength(size).getBytes(StandardCharsets.UTF_8), 0, size); + ByteArrayIndexInput indexInput = new ByteArrayIndexInput("test", input); + assertThrows(EOFException.class, () -> indexInput.readByte(-1)); + assertThrows(EOFException.class, () -> indexInput.readShort(-1)); + assertThrows(EOFException.class, () -> indexInput.readInt(-1)); + assertThrows(EOFException.class, () -> indexInput.readLong(-1)); + for (int i = 0; i < 10; i++) { + indexInput.readByte(size - Byte.BYTES); + indexInput.readShort(size - Short.BYTES); + indexInput.readInt(size - Integer.BYTES); + indexInput.readLong(size - Long.BYTES); + indexInput.readByte(0); + indexInput.readShort(0); + indexInput.readInt(0); + indexInput.readLong(0); + } + assertThrows(EOFException.class, () -> indexInput.readByte(size)); + assertThrows(EOFException.class, () -> indexInput.readShort(size - Short.BYTES + 1)); + assertThrows(EOFException.class, () -> indexInput.readInt(size - Integer.BYTES + 1)); + assertThrows(EOFException.class, () -> indexInput.readLong(size - Long.BYTES + 1)); + } + + public void testRandomAccessReads() throws IOException { + byte[] bytes = { + (byte) 85, // 01010101 + (byte) 46, // 00101110 + (byte) 177, // 10110001 + (byte) 36, // 00100100 + (byte) 231, // 11100111 + (byte) 48, // 00110000 + (byte) 137, // 10001001 + (byte) 37, // 00100101 + (byte) 137 // 10001001 + }; + ByteArrayIndexInput indexInput = new ByteArrayIndexInput("test", bytes); + // 00101110 01010101 + assertEquals(11861, indexInput.readShort(0)); + // 10110001 00101110 + assertEquals(-20178, indexInput.readShort(1)); + // 00100101 10001001 + assertEquals(9609, indexInput.readShort(6)); + // 00100100 10110001 00101110 01010101 + assertEquals(615591509, indexInput.readInt(0)); + // 00110000 11100111 00100100 10110001 + assertEquals(820454577, indexInput.readInt(2)); + // 00100101 10001001 00110000 11100111 + assertEquals(629747943, indexInput.readInt(4)); + // 00100101 10001001 00110000 11100111 00100100 10110001 00101110 01010101 + assertEquals(2704746820523863637L, indexInput.readLong(0)); + // 10001001 00100101 10001001 00110000 11100111 00100100 10110001 00101110 + assertEquals(-8564288273245753042L, indexInput.readLong(1)); + } } diff --git a/server/src/test/java/org/opensearch/discovery/PeerFinderTests.java b/server/src/test/java/org/opensearch/discovery/PeerFinderTests.java index 5e7dede0309c6..7e7bb2f0a2911 100644 --- a/server/src/test/java/org/opensearch/discovery/PeerFinderTests.java +++ b/server/src/test/java/org/opensearch/discovery/PeerFinderTests.java @@ -807,6 +807,42 @@ public void testReconnectsToDisconnectedNodes() { assertFoundPeers(rebootedOtherNode); } + public void testConnectionAttemptDuringDecommissioning() { + boolean localNodeCommissioned = randomBoolean(); + peerFinder.onNodeCommissionStatusChange(localNodeCommissioned); + + long findPeersInterval = peerFinder.getFindPeersInterval().millis(); + + final DiscoveryNode otherNode = newDiscoveryNode("node-1"); + providedAddresses.add(otherNode.getAddress()); + transportAddressConnector.addReachableNode(otherNode); + + peerFinder.activate(lastAcceptedNodes); + runAllRunnableTasks(); + assertFoundPeers(otherNode); + + transportAddressConnector.reachableNodes.clear(); + final DiscoveryNode newNode = new DiscoveryNode("new-node", otherNode.getAddress(), Version.CURRENT); + transportAddressConnector.addReachableNode(newNode); + + connectedNodes.remove(otherNode); + disconnectedNodes.add(otherNode); + + // peer discovery will be delayed now + if (localNodeCommissioned == false) { + deterministicTaskQueue.advanceTime(); + runAllRunnableTasks(); + assertPeersNotDiscovered(newNode); + } + + final long expectedTime = CONNECTION_TIMEOUT_MILLIS + findPeersInterval; + while (deterministicTaskQueue.getCurrentTimeMillis() < expectedTime) { + deterministicTaskQueue.advanceTime(); + runAllRunnableTasks(); + } + assertFoundPeers(newNode); + } + private void respondToRequests(Function responseFactory) { final CapturedRequest[] capturedRequests = capturingTransport.getCapturedRequestsAndClear(); for (final CapturedRequest capturedRequest : capturedRequests) { @@ -828,6 +864,16 @@ private void assertFoundPeers(DiscoveryNode... expectedNodesArray) { assertNotifiedOfAllUpdates(); } + private void assertPeersNotDiscovered(DiscoveryNode... undiscoveredNodesArray) { + final Set undiscoveredNodes = Arrays.stream(undiscoveredNodesArray).collect(Collectors.toSet()); + final List actualNodesList = StreamSupport.stream(peerFinder.getFoundPeers().spliterator(), false) + .collect(Collectors.toList()); + final HashSet actualNodesSet = new HashSet<>(actualNodesList); + Set intersection = new HashSet<>(actualNodesSet); + intersection.retainAll(undiscoveredNodes); + assertEquals(intersection.size(), 0); + } + private void assertNotifiedOfAllUpdates() { final Stream actualNodes = StreamSupport.stream(peerFinder.getFoundPeers().spliterator(), false); final Stream notifiedNodes = StreamSupport.stream(foundPeersFromNotification.spliterator(), false); diff --git a/server/src/test/java/org/opensearch/index/snapshots/blobstore/SlicedInputStreamTests.java b/server/src/test/java/org/opensearch/index/snapshots/blobstore/SlicedInputStreamTests.java index 3e337bbe3adae..76fb8f62b5468 100644 --- a/server/src/test/java/org/opensearch/index/snapshots/blobstore/SlicedInputStreamTests.java +++ b/server/src/test/java/org/opensearch/index/snapshots/blobstore/SlicedInputStreamTests.java @@ -32,6 +32,8 @@ package org.opensearch.index.snapshots.blobstore; import com.carrotsearch.randomizedtesting.generators.RandomNumbers; + +import org.hamcrest.MatcherAssert; import org.opensearch.test.OpenSearchTestCase; import java.io.ByteArrayInputStream; @@ -39,6 +41,9 @@ import java.io.FilterInputStream; import java.io.IOException; import java.io.InputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; import java.util.Random; import static org.hamcrest.Matchers.equalTo; @@ -86,11 +91,9 @@ protected InputStream openSlice(int slice) throws IOException { assertThat(random.nextInt(Byte.MAX_VALUE), equalTo(input.read())); break; default: - byte[] b = randomBytes(random); - byte[] buffer = new byte[b.length]; - int read = readFully(input, buffer); - assertThat(b.length, equalTo(read)); - assertArrayEquals(b, buffer); + byte[] expectedBytes = randomBytes(random); + byte[] actualBytes = input.readNBytes(expectedBytes.length); + assertArrayEquals(expectedBytes, actualBytes); break; } } @@ -107,19 +110,45 @@ protected InputStream openSlice(int slice) throws IOException { } - private int readFully(InputStream stream, byte[] buffer) throws IOException { - for (int i = 0; i < buffer.length;) { - int read = stream.read(buffer, i, buffer.length - i); - if (read == -1) { - if (i == 0) { - return -1; - } else { - return i; - } + public void testReadZeroLength() throws IOException { + try (InputStream input = createSingleByteStream()) { + final byte[] buffer = new byte[100]; + final int read = input.read(buffer, 0, 0); + MatcherAssert.assertThat(read, equalTo(0)); + } + } + + public void testInvalidOffsetAndLength() throws IOException { + try (InputStream input = createSingleByteStream()) { + final byte[] buffer = new byte[100]; + expectThrows(NullPointerException.class, () -> input.read(null, 0, 10)); + expectThrows(IndexOutOfBoundsException.class, () -> input.read(buffer, -1, 10)); + expectThrows(IndexOutOfBoundsException.class, () -> input.read(buffer, 0, -1)); + expectThrows(IndexOutOfBoundsException.class, () -> input.read(buffer, 0, buffer.length + 1)); + } + } + + public void testReadAllBytes() throws IOException { + final byte[] expectedResults = randomByteArrayOfLength(50_000); + final int numSlices = 200; + final int slizeSize = expectedResults.length / numSlices; + + final List arraySlices = new ArrayList<>(numSlices); + for (int i = 0; i < numSlices; i++) { + final int offset = slizeSize * i; + arraySlices.add(Arrays.copyOfRange(expectedResults, offset, offset + slizeSize)); + } + // Create a SlicedInputStream that will return the expected result in 2 slices + final byte[] actualResults; + try (InputStream is = new SlicedInputStream(numSlices) { + @Override + protected InputStream openSlice(int slice) { + return new ByteArrayInputStream(arraySlices.get(slice)); } - i += read; + }) { + actualResults = is.readAllBytes(); } - return buffer.length; + assertArrayEquals(expectedResults, actualResults); } private byte[] randomBytes(Random random) { @@ -129,6 +158,15 @@ private byte[] randomBytes(Random random) { return data; } + private static InputStream createSingleByteStream() { + return new SlicedInputStream(1) { + @Override + protected InputStream openSlice(int slice) { + return new ByteArrayInputStream(new byte[] { 1 }); + } + }; + } + private static final class CheckClosedInputStream extends FilterInputStream { public boolean closed = false; diff --git a/server/src/test/java/org/opensearch/index/store/remote/file/OnDemandBlockSnapshotIndexInputTests.java b/server/src/test/java/org/opensearch/index/store/remote/file/OnDemandBlockSnapshotIndexInputTests.java new file mode 100644 index 0000000000000..65969cc65359e --- /dev/null +++ b/server/src/test/java/org/opensearch/index/store/remote/file/OnDemandBlockSnapshotIndexInputTests.java @@ -0,0 +1,495 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.store.remote.file; + +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.LockFactory; +import org.apache.lucene.store.MMapDirectory; +import org.apache.lucene.store.SimpleFSLockFactory; +import org.apache.lucene.tests.util.LuceneTestCase; +import org.apache.lucene.util.Version; +import org.junit.After; +import org.junit.Before; +import org.opensearch.common.unit.ByteSizeValue; +import org.opensearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshot; +import org.opensearch.index.store.StoreFileMetadata; +import org.opensearch.index.store.remote.utils.BlobFetchRequest; +import org.opensearch.index.store.remote.utils.TransferManager; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.EOFException; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Comparator; +import java.util.concurrent.CompletableFuture; + +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; + +public class OnDemandBlockSnapshotIndexInputTests extends OpenSearchTestCase { + // params shared by all test cases + private static final String RESOURCE_DESCRIPTION = "Test OnDemandBlockSnapshotIndexInput Block Size"; + private static final long BLOCK_SNAPSHOT_FILE_OFFSET = 0; + private static final String FILE_NAME = "File_Name"; + private static final String BLOCK_FILE_PREFIX = FILE_NAME; + private static final boolean IS_CLONE = false; + private static final ByteSizeValue BYTE_SIZE_VALUE = new ByteSizeValue(1L); + private TransferManager transferManager; + private LockFactory lockFactory; + private BlobStoreIndexShardSnapshot.FileInfo fileInfo; + private Path path; + + @Before + public void init() { + transferManager = mock(TransferManager.class); + lockFactory = SimpleFSLockFactory.INSTANCE; + path = LuceneTestCase.createTempDir("OnDemandBlockSnapshotIndexInputTests"); + } + + @After + public void clean() { + try { + cleanDirectory(path); + } catch (Exception e) { + fail(); + } + } + + public void testVariousBlockSize() throws Exception { + int fileSize = 29360128; + int blockSizeShift; + + // block size 8MB, default one + blockSizeShift = 23; + OnDemandBlockSnapshotIndexInput ondemandBlockSnapshotIndexInput_8MB = createOnDemandBlockSnapshotIndexInput( + blockSizeShift, + fileSize + ); + runAllTestsFor(ondemandBlockSnapshotIndexInput_8MB, 1 << blockSizeShift, fileSize); + cleanDirectory(path); + + // block size 4KB + blockSizeShift = 12; + OnDemandBlockSnapshotIndexInput ondemandBlockSnapshotIndexInput_4KB = createOnDemandBlockSnapshotIndexInput( + blockSizeShift, + fileSize + ); + runAllTestsFor(ondemandBlockSnapshotIndexInput_4KB, 1 << blockSizeShift, fileSize); + cleanDirectory(path); + + // block size 1MB + blockSizeShift = 20; + OnDemandBlockSnapshotIndexInput ondemandBlockSnapshotIndexInput_1MB = createOnDemandBlockSnapshotIndexInput( + blockSizeShift, + fileSize + ); + runAllTestsFor(ondemandBlockSnapshotIndexInput_1MB, 1 << blockSizeShift, fileSize); + cleanDirectory(path); + + // block size 4MB + blockSizeShift = 22; + OnDemandBlockSnapshotIndexInput ondemandBlockSnapshotIndexInput_4MB = createOnDemandBlockSnapshotIndexInput( + blockSizeShift, + fileSize + ); + runAllTestsFor(ondemandBlockSnapshotIndexInput_4MB, 1 << blockSizeShift, fileSize); + cleanDirectory(path); + } + + public void runAllTestsFor(OnDemandBlockSnapshotIndexInput blockedSnapshotFile, int blockSize, int fileSize) throws Exception { + TestGroup.testGetBlock(blockedSnapshotFile, blockSize, fileSize); + TestGroup.testGetBlockOffset(blockedSnapshotFile, blockSize, fileSize); + TestGroup.testGetBlockStart(blockedSnapshotFile, blockSize); + TestGroup.testCurrentBlockStart(blockedSnapshotFile, blockSize); + TestGroup.testCurrentBlockPosition(blockedSnapshotFile, blockSize); + TestGroup.testClone(blockedSnapshotFile, blockSize); + TestGroup.testSlice(blockedSnapshotFile, blockSize); + TestGroup.testGetFilePointer(blockedSnapshotFile, blockSize); + TestGroup.testReadByte(blockedSnapshotFile, blockSize); + TestGroup.testReadShort(blockedSnapshotFile, blockSize); + TestGroup.testReadInt(blockedSnapshotFile, blockSize); + TestGroup.testReadLong(blockedSnapshotFile, blockSize); + TestGroup.testReadVInt(blockedSnapshotFile, blockSize); + TestGroup.testSeek(blockedSnapshotFile, blockSize, fileSize); + TestGroup.testReadByteWithPos(blockedSnapshotFile, blockSize); + TestGroup.testReadShortWithPos(blockedSnapshotFile, blockSize); + TestGroup.testReadIntWithPos(blockedSnapshotFile, blockSize); + TestGroup.testReadLongWithPos(blockedSnapshotFile, blockSize); + TestGroup.testReadBytes(blockedSnapshotFile, blockSize); + } + + // create OnDemandBlockSnapshotIndexInput for each block size + private OnDemandBlockSnapshotIndexInput createOnDemandBlockSnapshotIndexInput(int blockSizeShift, long fileSize) { + + // file info should be initialized per test method since file size need to be calculated + fileInfo = new BlobStoreIndexShardSnapshot.FileInfo( + FILE_NAME, + new StoreFileMetadata(FILE_NAME, fileSize, "", Version.LATEST), + BYTE_SIZE_VALUE + ); + + int blockSize = 1 << blockSizeShift; + + doAnswer(invocation -> { + BlobFetchRequest blobFetchRequest = invocation.getArgument(0); + return CompletableFuture.completedFuture( + blobFetchRequest.getDirectory().openInput(blobFetchRequest.getFileName(), IOContext.READ) + ); + }).when(transferManager).asyncFetchBlob(any()); + + FSDirectory directory = null; + try { + cleanDirectory(path); + // use MMapDirectory to create block + directory = new MMapDirectory(path, lockFactory); + } catch (IOException e) { + fail("fail to create MMapDirectory: " + e.getMessage()); + } + + initBlockFiles(blockSize, fileSize, directory); + + return new OnDemandBlockSnapshotIndexInput( + OnDemandBlockIndexInput.builder() + .resourceDescription(RESOURCE_DESCRIPTION) + .offset(BLOCK_SNAPSHOT_FILE_OFFSET) + .length(fileSize) + .blockSizeShift(blockSizeShift) + .isClone(IS_CLONE), + fileInfo, + directory, + transferManager + ); + } + + private void initBlockFiles(int blockSize, long fileSize, FSDirectory fsDirectory) { + // for this test, it's safe to convert long to int + int intFileSize = (int) fileSize; + int numOfBlocks = intFileSize / blockSize; + + int sizeOfLastBlock = intFileSize % blockSize; + + try { + + // block size will always be an integer multiple of frame size + // write 48, -80 alternatively + for (int i = 0; i < numOfBlocks; i++) { + // create normal blocks + String blockName = BLOCK_FILE_PREFIX + "." + i; + IndexOutput output = fsDirectory.createOutput(blockName, null); + // since block size is always even number, safe to do division + for (int j = 0; j < blockSize / 2; j++) { + // byte 00110000 + output.writeByte((byte) 48); + // byte 10110000 + output.writeByte((byte) -80); + } + output.close(); + } + + if (numOfBlocks > 1 && sizeOfLastBlock != 0) { + // create last block + String lastBlockName = BLOCK_FILE_PREFIX + "." + numOfBlocks; + IndexOutput output = fsDirectory.createOutput(lastBlockName, null); + for (int i = 0; i < sizeOfLastBlock; i++) { + if ((i & 1) == 0) { + output.writeByte((byte) 48); + } else { + output.writeByte((byte) -80); + } + } + output.close(); + } + + } catch (IOException e) { + fail("fail to initialize block files: " + e.getMessage()); + } + + } + + private void cleanDirectory(Path path) throws IOException { + if (Files.exists(path)) { + Files.walk(path).sorted(Comparator.reverseOrder()).forEach(f -> { + try { + Files.delete(f); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } + } + + public static class TestGroup { + + public static void testGetBlock(OnDemandBlockSnapshotIndexInput blockedSnapshotFile, int blockSize, int fileSize) { + // block 0 + assertEquals(0, blockedSnapshotFile.getBlock(0L)); + + // block 1 + assertEquals(1, blockedSnapshotFile.getBlock(blockSize)); + + // end block + assertEquals((fileSize - 1) / blockSize, blockedSnapshotFile.getBlock(fileSize - 1)); + } + + public static void testGetBlockOffset(OnDemandBlockSnapshotIndexInput blockedSnapshotFile, int blockSize, int fileSize) { + // block 0 + assertEquals(1, blockedSnapshotFile.getBlockOffset(1)); + + // block 1 + assertEquals(0, blockedSnapshotFile.getBlockOffset(blockSize)); + + // end block + assertEquals((fileSize - 1) % blockSize, blockedSnapshotFile.getBlockOffset(fileSize - 1)); + } + + public static void testGetBlockStart(OnDemandBlockSnapshotIndexInput blockedSnapshotFile, int blockSize) { + // block 0 + assertEquals(0L, blockedSnapshotFile.getBlockStart(0)); + + // block 1 + assertEquals(blockSize, blockedSnapshotFile.getBlockStart(1)); + + // block 2 + assertEquals(blockSize * 2, blockedSnapshotFile.getBlockStart(2)); + } + + public static void testCurrentBlockStart(OnDemandBlockSnapshotIndexInput blockedSnapshotFile, int blockSize) throws IOException { + // block 0 + blockedSnapshotFile.seek(blockSize - 1); + assertEquals(0L, blockedSnapshotFile.currentBlockStart()); + + // block 1 + blockedSnapshotFile.seek(blockSize * 2 - 1); + assertEquals(blockSize, blockedSnapshotFile.currentBlockStart()); + + // block 2 + blockedSnapshotFile.seek(blockSize * 3 - 1); + assertEquals(blockSize * 2, blockedSnapshotFile.currentBlockStart()); + } + + public static void testCurrentBlockPosition(OnDemandBlockSnapshotIndexInput blockedSnapshotFile, int blockSize) throws IOException { + // block 0 + blockedSnapshotFile.seek(blockSize - 1); + assertEquals(blockSize - 1, blockedSnapshotFile.currentBlockPosition()); + + // block 1 + blockedSnapshotFile.seek(blockSize + 1); + assertEquals(1, blockedSnapshotFile.currentBlockPosition()); + + // block 2 + blockedSnapshotFile.seek(blockSize * 2 + 11); + assertEquals(11, blockedSnapshotFile.currentBlockPosition()); + } + + public static void testClone(OnDemandBlockSnapshotIndexInput blockedSnapshotFile, int blockSize) throws IOException { + blockedSnapshotFile.seek(blockSize + 1); + OnDemandBlockSnapshotIndexInput clonedFile = blockedSnapshotFile.clone(); + assertEquals(clonedFile.currentBlock.getFilePointer(), blockedSnapshotFile.currentBlock.getFilePointer()); + assertEquals(clonedFile.getFilePointer(), blockedSnapshotFile.getFilePointer()); + clonedFile.seek(blockSize + 11); + assertNotEquals(clonedFile.currentBlock.getFilePointer(), blockedSnapshotFile.currentBlock.getFilePointer()); + } + + public static void testSlice(OnDemandBlockSnapshotIndexInput blockedSnapshotFile, int blockSize) throws IOException { + IndexInput slice = blockedSnapshotFile.slice("slice", blockSize - 11, 22); + OnDemandBlockSnapshotIndexInput newSlice = (OnDemandBlockSnapshotIndexInput) slice; + + assertEquals(newSlice.isClone, true); + assertEquals(newSlice.transferManager, blockedSnapshotFile.transferManager); + assertEquals(newSlice.fileName, blockedSnapshotFile.fileName); + assertEquals(newSlice.blockMask, blockedSnapshotFile.blockMask); + assertEquals(newSlice.blockSize, blockedSnapshotFile.blockSize); + assertEquals(newSlice.blockSizeShift, blockedSnapshotFile.blockSizeShift); + assertEquals(newSlice.directory, blockedSnapshotFile.directory); + assertNotEquals(newSlice.length, blockedSnapshotFile.length); + assertNotEquals(newSlice.offset, blockedSnapshotFile.offset); + + newSlice.seek(0); + assertEquals(0, newSlice.getFilePointer()); + assertEquals(0, newSlice.currentBlockId); + assertEquals(blockSize - 11, newSlice.currentBlock.getFilePointer()); + newSlice.seek(21); + assertEquals(21, newSlice.getFilePointer()); + assertEquals(1, newSlice.currentBlockId); + assertEquals(10, newSlice.currentBlock.getFilePointer()); + + try { + newSlice.seek(23); + } catch (EOFException e) { + return; + } + fail("Able to seek past file end"); + } + + public static void testGetFilePointer(OnDemandBlockSnapshotIndexInput blockedSnapshotFile, int blockSize) throws IOException { + blockedSnapshotFile.seek(blockSize - 11); + assertEquals(blockSize - 11, blockedSnapshotFile.currentBlock.getFilePointer()); + blockedSnapshotFile.seek(blockSize + 5); + assertEquals(5, blockedSnapshotFile.currentBlock.getFilePointer()); + blockedSnapshotFile.seek(blockSize * 2); + assertEquals(0, blockedSnapshotFile.currentBlock.getFilePointer()); + } + + public static void testReadByte(OnDemandBlockSnapshotIndexInput blockedSnapshotFile, int blockSize) throws IOException { + blockedSnapshotFile.seek(0); + assertEquals((byte) 48, blockedSnapshotFile.readByte()); + blockedSnapshotFile.seek(1); + assertEquals((byte) -80, blockedSnapshotFile.readByte()); + + blockedSnapshotFile.seek(blockSize - 1); + assertEquals((byte) -80, blockedSnapshotFile.readByte()); + blockedSnapshotFile.seek(blockSize); + assertEquals((byte) 48, blockedSnapshotFile.readByte()); + } + + public static void testReadShort(OnDemandBlockSnapshotIndexInput blockedSnapshotFile, int blockSize) throws IOException { + blockedSnapshotFile.seek(0); + assertEquals(-20432, blockedSnapshotFile.readShort()); + + blockedSnapshotFile.seek(blockSize); + assertEquals(-20432, blockedSnapshotFile.readShort()); + + // cross block 0 and block 1 + blockedSnapshotFile.seek(blockSize - 1); + assertEquals(12464, blockedSnapshotFile.readShort()); + } + + public static void testReadInt(OnDemandBlockSnapshotIndexInput blockedSnapshotFile, int blockSize) throws IOException { + blockedSnapshotFile.seek(0); + assertEquals(-1338986448, blockedSnapshotFile.readInt()); + + blockedSnapshotFile.seek(blockSize); + assertEquals(-1338986448, blockedSnapshotFile.readInt()); + + // 3 byte in block 0, 1 byte in block 1 + blockedSnapshotFile.seek(blockSize - 3); + assertEquals(816853168, blockedSnapshotFile.readInt()); + // 2 byte in block 0, 2 byte in block 1 + blockedSnapshotFile.seek(blockSize - 2); + assertEquals(-1338986448, blockedSnapshotFile.readInt()); + // 1 byte in block 0, 3 byte in block 1 + blockedSnapshotFile.seek(blockSize - 1); + assertEquals(816853168, blockedSnapshotFile.readInt()); + } + + public static void testReadLong(OnDemandBlockSnapshotIndexInput blockedSnapshotFile, int blockSize) throws IOException { + blockedSnapshotFile.seek(0); + assertEquals(-5750903000991223760L, blockedSnapshotFile.readLong()); + + // 7 byte in block 0, 1 byte in block 1 + blockedSnapshotFile.seek(blockSize - 7); + assertEquals(3508357643010846896L, blockedSnapshotFile.readLong()); + + // 6 byte in block 0, 2 byte in block 2 + blockedSnapshotFile.seek(blockSize - 6); + assertEquals(-5750903000991223760L, blockedSnapshotFile.readLong()); + + // 5 byte in block 0, 3 byte in block 3 + blockedSnapshotFile.seek(blockSize - 5); + assertEquals(3508357643010846896L, blockedSnapshotFile.readLong()); + + // 4 byte in block 0, 4 block in block 4 + blockedSnapshotFile.seek(blockSize - 4); + assertEquals(-5750903000991223760L, blockedSnapshotFile.readLong()); + } + + public static void testReadVInt(OnDemandBlockSnapshotIndexInput blockedSnapshotFile, int blockSize) throws IOException { + blockedSnapshotFile.seek(0); + assertEquals(48, blockedSnapshotFile.readVInt()); + + blockedSnapshotFile.seek(blockSize - 1); + assertEquals(6192, blockedSnapshotFile.readVInt()); + } + + public static void testReadVLong(OnDemandBlockSnapshotIndexInput blockedSnapshotFile) throws IOException { + blockedSnapshotFile.seek(0); + assertEquals(48, blockedSnapshotFile.readVLong()); + + blockedSnapshotFile.seek(1); + assertEquals(6192, blockedSnapshotFile.readVLong()); + } + + public static void testSeek(OnDemandBlockSnapshotIndexInput blockedSnapshotFile, int blockSize, int fileSize) throws IOException { + blockedSnapshotFile.seek(0); + assertEquals(0, blockedSnapshotFile.currentBlock.getFilePointer()); + + blockedSnapshotFile.seek(blockSize + 11); + assertEquals(11, blockedSnapshotFile.currentBlock.getFilePointer()); + + try { + blockedSnapshotFile.seek(fileSize + 1); + } catch (EOFException e) { + return; + } + fail("Able to seek past end"); + } + + public static void testReadByteWithPos(OnDemandBlockSnapshotIndexInput blockedSnapshotFile, int blockSize) throws IOException { + assertEquals(48, blockedSnapshotFile.readByte(0)); + assertEquals(-80, blockedSnapshotFile.readByte(1)); + + assertEquals(48, blockedSnapshotFile.readByte(blockSize)); + assertEquals(-80, blockedSnapshotFile.readByte(blockSize + 1)); + } + + public static void testReadShortWithPos(OnDemandBlockSnapshotIndexInput blockedSnapshotFile, int blockSize) throws IOException { + assertEquals(-20432, blockedSnapshotFile.readShort(0)); + assertEquals(12464, blockedSnapshotFile.readShort(1)); + + assertEquals(12464, blockedSnapshotFile.readShort(blockSize - 1)); + assertEquals(-20432, blockedSnapshotFile.readShort(blockSize)); + } + + public static void testReadIntWithPos(OnDemandBlockSnapshotIndexInput blockedSnapshotFile, int blockSize) throws IOException { + assertEquals(-1338986448, blockedSnapshotFile.readInt(0)); + assertEquals(-1338986448, blockedSnapshotFile.readInt(blockSize)); + + // 3 byte in block 0, 1 byte in block 1 + assertEquals(816853168, blockedSnapshotFile.readInt(blockSize - 3)); + // 2 byte in block 0, 2 byte in block 1 + assertEquals(-1338986448, blockedSnapshotFile.readInt(blockSize - 2)); + // 1 byte in block 0, 3 byte in block 1 + assertEquals(816853168, blockedSnapshotFile.readInt(blockSize - 1)); + } + + public static void testReadLongWithPos(OnDemandBlockSnapshotIndexInput blockedSnapshotFile, int blockSize) throws IOException { + assertEquals(-5750903000991223760L, blockedSnapshotFile.readLong(0)); + + // 7 byte in block 0, 1 byte in block 1 + assertEquals(3508357643010846896L, blockedSnapshotFile.readLong(blockSize - 7)); + + // 6 byte in block 0, 2 byte in block 2 + assertEquals(-5750903000991223760L, blockedSnapshotFile.readLong(blockSize - 6)); + + // 5 byte in block 0, 3 byte in block 3 + assertEquals(3508357643010846896L, blockedSnapshotFile.readLong(blockSize - 5)); + + // 4 byte in block 0, 4 block in block 4 + assertEquals(-5750903000991223760L, blockedSnapshotFile.readLong(blockSize - 4)); + } + + public static void testReadBytes(OnDemandBlockSnapshotIndexInput blockedSnapshotFile, int blockSize) throws IOException { + byte[] byteArr = new byte[2]; + blockedSnapshotFile.seek(0); + blockedSnapshotFile.readBytes(byteArr, 0, 2); + assertEquals(48, byteArr[0]); + assertEquals(-80, byteArr[1]); + + blockedSnapshotFile.seek(blockSize - 1); + blockedSnapshotFile.readBytes(byteArr, 0, 2); + assertEquals(-80, byteArr[0]); + assertEquals(48, byteArr[1]); + } + } +} diff --git a/server/src/test/java/org/opensearch/index/store/remote/utils/ConcurrentInvocationLinearizerTests.java b/server/src/test/java/org/opensearch/index/store/remote/utils/ConcurrentInvocationLinearizerTests.java new file mode 100644 index 0000000000000..2d9729095dd77 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/store/remote/utils/ConcurrentInvocationLinearizerTests.java @@ -0,0 +1,74 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.store.remote.utils; + +import org.junit.After; +import org.junit.Before; +import org.opensearch.test.OpenSearchTestCase; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicInteger; + +public class ConcurrentInvocationLinearizerTests extends OpenSearchTestCase { + private ExecutorService executorService; + + @Before + public void setup() { + executorService = Executors.newFixedThreadPool(4); + } + + public void testLinearizeShouldNotInvokeMethodMoreThanOnce() { + ConcurrentInvocationLinearizer invocationLinearizer = new ConcurrentInvocationLinearizer<>(executorService); + List> futures = new ArrayList<>(); + AtomicInteger invocationCount = new AtomicInteger(0); + for (int i = 0; i < 4; i++) { + int finalI = i; + futures.add(invocationLinearizer.linearize("input", (s) -> { + invocationCount.incrementAndGet(); + try { + Thread.sleep(1000L); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + return "val" + finalI; + })); + } + futures.forEach(stringFuture -> { + try { + // make sure all futures are same object with value same as same first value submitted + assertEquals("val0", stringFuture.get()); + } catch (InterruptedException | ExecutionException e) { + throw new RuntimeException(e); + } + }); + // make sure method calls got linearized + assertEquals(1, invocationCount.get()); + // make sure cache is cleaned out of the box + assertTrue(invocationLinearizer.getInvokeOnceCache().isEmpty()); + } + + public void testLinearizeShouldLeaveCacheEmptyEvenWhenFutureFail() throws Exception { + ConcurrentInvocationLinearizer invocationLinearizer = new ConcurrentInvocationLinearizer<>(executorService); + Future future = invocationLinearizer.linearize("input", s -> { throw new RuntimeException("exception"); }); + assertThrows(ExecutionException.class, () -> future.get()); + // make sure cache is cleaned out of the box + assertTrue(invocationLinearizer.getInvokeOnceCache().isEmpty()); + } + + @After + public void cleanUp() { + executorService.shutdownNow(); + terminate(executorService); + } +} diff --git a/server/src/test/java/org/opensearch/rest/action/admin/cluster/RestDecommissionActionTests.java b/server/src/test/java/org/opensearch/rest/action/admin/cluster/RestDecommissionActionTests.java new file mode 100644 index 0000000000000..b724de0bd5cc6 --- /dev/null +++ b/server/src/test/java/org/opensearch/rest/action/admin/cluster/RestDecommissionActionTests.java @@ -0,0 +1,50 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.rest.action.admin.cluster; + +import org.junit.Before; +import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionRequest; +import org.opensearch.rest.RestRequest; +import org.opensearch.test.rest.FakeRestRequest; +import org.opensearch.test.rest.RestActionTestCase; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +public class RestDecommissionActionTests extends RestActionTestCase { + + private RestDecommissionAction action; + + @Before + public void setupAction() { + action = new RestDecommissionAction(); + controller().registerHandler(action); + } + + public void testCreateRequest() throws IOException { + Map params = new HashMap<>(); + params.put("awareness_attribute_name", "zone"); + params.put("awareness_attribute_value", "zone-1"); + + RestRequest deprecatedRequest = buildRestRequest(params); + + DecommissionRequest request = action.createRequest(deprecatedRequest); + assertEquals(request.getDecommissionAttribute().attributeName(), "zone"); + assertEquals(request.getDecommissionAttribute().attributeValue(), "zone-1"); + assertEquals(deprecatedRequest.getHttpRequest().method(), RestRequest.Method.PUT); + } + + private FakeRestRequest buildRestRequest(Map params) { + return new FakeRestRequest.Builder(xContentRegistry()).withMethod(RestRequest.Method.PUT) + .withPath("/_cluster/decommission/awareness/{awareness_attribute_name}/{awareness_attribute_value}") + .withParams(params) + .build(); + } +} diff --git a/server/src/test/java/org/opensearch/rest/action/admin/cluster/RestDeleteDecommissionStateActionTests.java b/server/src/test/java/org/opensearch/rest/action/admin/cluster/RestDeleteDecommissionStateActionTests.java new file mode 100644 index 0000000000000..01f988efdf6eb --- /dev/null +++ b/server/src/test/java/org/opensearch/rest/action/admin/cluster/RestDeleteDecommissionStateActionTests.java @@ -0,0 +1,40 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.rest.action.admin.cluster; + +import org.junit.Before; +import org.opensearch.action.admin.cluster.decommission.awareness.delete.DeleteDecommissionStateRequest; +import org.opensearch.rest.RestHandler; +import org.opensearch.rest.RestRequest; +import org.opensearch.test.rest.RestActionTestCase; + +import java.util.List; + +public class RestDeleteDecommissionStateActionTests extends RestActionTestCase { + + private RestDeleteDecommissionStateAction action; + + @Before + public void setupAction() { + action = new RestDeleteDecommissionStateAction(); + controller().registerHandler(action); + } + + public void testRoutes() { + List routes = action.routes(); + RestHandler.Route route = routes.get(0); + assertEquals(route.getMethod(), RestRequest.Method.DELETE); + assertEquals("/_cluster/decommission/awareness", route.getPath()); + } + + public void testCreateRequest() { + DeleteDecommissionStateRequest request = action.createRequest(); + assertNotNull(request); + } +} diff --git a/server/src/test/java/org/opensearch/search/PitMultiNodeTests.java b/server/src/test/java/org/opensearch/search/PitMultiNodeTests.java index a23e4141a78e4..b11a80b9d8726 100644 --- a/server/src/test/java/org/opensearch/search/PitMultiNodeTests.java +++ b/server/src/test/java/org/opensearch/search/PitMultiNodeTests.java @@ -98,7 +98,6 @@ public Settings onNodeStopped(String nodeName) throws Exception { ActionFuture execute = client().execute(CreatePitAction.INSTANCE, request); ExecutionException ex = expectThrows(ExecutionException.class, execute::get); assertTrue(ex.getMessage().contains("Failed to execute phase [create_pit]")); - assertTrue(ex.getMessage().contains("Partial shards failure")); validatePitStats("index", 0, 0); return super.onNodeStopped(nodeName); } diff --git a/test/fixtures/hdfs-fixture/build.gradle b/test/fixtures/hdfs-fixture/build.gradle index 4585db94139d1..4ce154c98d03a 100644 --- a/test/fixtures/hdfs-fixture/build.gradle +++ b/test/fixtures/hdfs-fixture/build.gradle @@ -49,7 +49,8 @@ dependencies { api "com.fasterxml.woodstox:woodstox-core:${versions.woodstox}" api 'net.minidev:json-smart:2.4.8' api "org.mockito:mockito-core:${versions.mockito}" - api "com.google.protobuf:protobuf-java:3.21.2" + api "com.google.protobuf:protobuf-java:3.21.8" api "org.jetbrains.kotlin:kotlin-stdlib:${versions.kotlin}" api 'org.eclipse.jetty:jetty-server:9.4.49.v20220914' + api 'org.apache.zookeeper:zookeeper:3.8.0' } diff --git a/test/framework/src/main/java/org/opensearch/cluster/OpenSearchAllocationTestCase.java b/test/framework/src/main/java/org/opensearch/cluster/OpenSearchAllocationTestCase.java index a13d337fa4d26..1d527140dc038 100644 --- a/test/framework/src/main/java/org/opensearch/cluster/OpenSearchAllocationTestCase.java +++ b/test/framework/src/main/java/org/opensearch/cluster/OpenSearchAllocationTestCase.java @@ -168,6 +168,10 @@ protected static DiscoveryNode newNode(String nodeId, Set rol return new DiscoveryNode(nodeId, buildNewFakeTransportAddress(), emptyMap(), roles, Version.CURRENT); } + protected static DiscoveryNode newNode(String nodeName, String nodeId, Set roles) { + return new DiscoveryNode(nodeName, nodeId, buildNewFakeTransportAddress(), emptyMap(), roles, Version.CURRENT); + } + protected static DiscoveryNode newNode(String nodeId, Version version) { return new DiscoveryNode(nodeId, buildNewFakeTransportAddress(), emptyMap(), CLUSTER_MANAGER_DATA_ROLES, version); } diff --git a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java index 0a20654d1e441..122dadeb152bb 100644 --- a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java +++ b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java @@ -199,6 +199,11 @@ public final class InternalTestCluster extends TestCluster { nodeAndClient.node.settings() ); + private static final Predicate SEARCH_NODE_PREDICATE = nodeAndClient -> DiscoveryNode.hasRole( + nodeAndClient.node.settings(), + DiscoveryNodeRole.SEARCH_ROLE + ); + private static final Predicate NO_DATA_NO_CLUSTER_MANAGER_PREDICATE = nodeAndClient -> DiscoveryNode .isClusterManagerNode(nodeAndClient.node.settings()) == false && DiscoveryNode.isDataNode(nodeAndClient.node.settings()) == false; @@ -664,6 +669,24 @@ public synchronized void ensureAtLeastNumDataNodes(int n) { } } + /** + * Ensures that at least n search nodes are present in the cluster. + * if more nodes than n are present this method will not + * stop any of the running nodes. + */ + public synchronized void ensureAtLeastNumSearchNodes(int n) { + int size = numSearchNodes(); + if (size < n) { + logger.info("increasing cluster size from {} to {}", size, n); + if (numSharedDedicatedClusterManagerNodes > 0) { + startSearchOnlyNodes(n - size); + } else { + startNodes(n - size, Settings.builder().put(onlyRole(Settings.EMPTY, DiscoveryNodeRole.SEARCH_ROLE)).build()); + } + validateClusterFormed(); + } + } + /** * Ensures that at most n are up and running. * If less nodes that n are running this method @@ -2291,6 +2314,14 @@ public List startDataOnlyNodes(int numNodes, Settings settings) { return startNodes(numNodes, Settings.builder().put(onlyRole(settings, DiscoveryNodeRole.DATA_ROLE)).build()); } + public List startSearchOnlyNodes(int numNodes) { + return startSearchOnlyNodes(numNodes, Settings.EMPTY); + } + + public List startSearchOnlyNodes(int numNodes, Settings settings) { + return startNodes(numNodes, Settings.builder().put(onlyRole(settings, DiscoveryNodeRole.SEARCH_ROLE)).build()); + } + /** calculates a min cluster-manager nodes value based on the given number of cluster-manager nodes */ private static int getMinClusterManagerNodes(int eligibleClusterManagerNodes) { return eligibleClusterManagerNodes / 2 + 1; @@ -2347,6 +2378,10 @@ public int numDataNodes() { return dataNodeAndClients().size(); } + public int numSearchNodes() { + return searchNodeAndClients().size(); + } + @Override public int numDataAndClusterManagerNodes() { return filterNodes(nodes, DATA_NODE_PREDICATE.or(CLUSTER_MANAGER_NODE_PREDICATE)).size(); @@ -2406,6 +2441,10 @@ private Collection dataNodeAndClients() { return filterNodes(nodes, DATA_NODE_PREDICATE); } + private Collection searchNodeAndClients() { + return filterNodes(nodes, SEARCH_NODE_PREDICATE); + } + private static Collection filterNodes( Map map, Predicate predicate diff --git a/test/logger-usage/src/test/java/org/opensearch/test/loggerusage/OpenSearchLoggerUsageTests.java b/test/logger-usage/src/test/java/org/opensearch/test/loggerusage/OpenSearchLoggerUsageTests.java index e9723c269763c..3bbc8100955f3 100644 --- a/test/logger-usage/src/test/java/org/opensearch/test/loggerusage/OpenSearchLoggerUsageTests.java +++ b/test/logger-usage/src/test/java/org/opensearch/test/loggerusage/OpenSearchLoggerUsageTests.java @@ -79,31 +79,32 @@ public void testLoggerUsageChecks() throws IOException { } } - public void testLoggerUsageCheckerCompatibilityWithLog4j2Logger() throws NoSuchMethodException { + public void testLoggerUsageCheckerCompatibilityWithLog4j2Logger() { for (Method method : Logger.class.getMethods()) { if (OpenSearchLoggerUsageChecker.LOGGER_METHODS.contains(method.getName())) { - assertThat(method.getParameterTypes().length, greaterThanOrEqualTo(1)); - int markerOffset = method.getParameterTypes()[0].equals(Marker.class) ? 1 : 0; - int paramLength = method.getParameterTypes().length - markerOffset; + assertThat(method.getParameterCount(), greaterThanOrEqualTo(1)); + final Class[] parameterTypes = method.getParameterTypes(); + int markerOffset = parameterTypes[0].equals(Marker.class) ? 1 : 0; + int paramLength = parameterTypes.length - markerOffset; if (method.isVarArgs()) { assertEquals(2, paramLength); - assertEquals(String.class, method.getParameterTypes()[markerOffset]); - assertThat(method.getParameterTypes()[markerOffset + 1], is(oneOf(Object[].class, Supplier[].class))); + assertEquals(String.class, parameterTypes[markerOffset]); + assertThat(parameterTypes[markerOffset + 1], is(oneOf(Object[].class, Supplier[].class))); } else { - assertThat(method.getParameterTypes()[markerOffset], is(oneOf(Message.class, MessageSupplier.class, + assertThat(parameterTypes[markerOffset], is(oneOf(Message.class, MessageSupplier.class, CharSequence.class, Object.class, String.class, Supplier.class))); if (paramLength == 2) { - assertThat(method.getParameterTypes()[markerOffset + 1], is(oneOf(Throwable.class, Object.class))); - if (method.getParameterTypes()[markerOffset + 1].equals(Object.class)) { - assertEquals(String.class, method.getParameterTypes()[markerOffset]); + assertThat(parameterTypes[markerOffset + 1], is(oneOf(Throwable.class, Object.class))); + if (parameterTypes[markerOffset + 1].equals(Object.class)) { + assertEquals(String.class, parameterTypes[markerOffset]); } } if (paramLength > 2) { - assertEquals(String.class, method.getParameterTypes()[markerOffset]); + assertEquals(String.class, parameterTypes[markerOffset]); assertThat(paramLength, lessThanOrEqualTo(11)); for (int i = 1; i < paramLength; i++) { - assertEquals(Object.class, method.getParameterTypes()[markerOffset + i]); + assertEquals(Object.class, parameterTypes[markerOffset + i]); } } } @@ -115,16 +116,17 @@ public void testLoggerUsageCheckerCompatibilityWithLog4j2Logger() throws NoSuchM } for (Constructor constructor : ParameterizedMessage.class.getConstructors()) { - assertThat(constructor.getParameterTypes().length, greaterThanOrEqualTo(2)); - assertEquals(String.class, constructor.getParameterTypes()[0]); - assertThat(constructor.getParameterTypes()[1], is(oneOf(String[].class, Object[].class, Object.class))); - - if (constructor.getParameterTypes().length > 2) { - assertEquals(3, constructor.getParameterTypes().length); - if (constructor.getParameterTypes()[1].equals(Object.class)) { - assertEquals(Object.class, constructor.getParameterTypes()[2]); + assertThat(constructor.getParameterCount(), greaterThanOrEqualTo(2)); + final Class[] parameterTypes = constructor.getParameterTypes(); + assertEquals(String.class, parameterTypes[0]); + assertThat(parameterTypes[1], is(oneOf(String[].class, Object[].class, Object.class))); + + if (parameterTypes.length > 2) { + assertEquals(3, parameterTypes.length); + if (parameterTypes[1].equals(Object.class)) { + assertEquals(Object.class, parameterTypes[2]); } else { - assertEquals(Throwable.class, constructor.getParameterTypes()[2]); + assertEquals(Throwable.class, parameterTypes[2]); } } }