Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove loading on-disk cluster metadata from the manifest file #114698

Merged
merged 9 commits into from
Oct 24, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import org.elasticsearch.action.support.ActiveShardCount;
import org.elasticsearch.client.internal.Client;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.coordination.CoordinationMetadata;
import org.elasticsearch.cluster.metadata.IndexGraveyard;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.metadata.MappingMetadata;
Expand All @@ -27,14 +26,9 @@
import org.elasticsearch.cluster.routing.RoutingTable;
import org.elasticsearch.cluster.routing.ShardRoutingState;
import org.elasticsearch.cluster.routing.UnassignedInfo;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.Priority;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.core.CheckedConsumer;
import org.elasticsearch.core.IOUtils;
import org.elasticsearch.env.BuildVersion;
import org.elasticsearch.env.NodeEnvironment;
import org.elasticsearch.env.NodeMetadata;
import org.elasticsearch.index.IndexVersions;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.indices.IndexClosedException;
Expand All @@ -46,21 +40,15 @@
import org.elasticsearch.xcontent.XContentFactory;

import java.io.IOException;
import java.nio.file.Path;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static org.elasticsearch.action.support.WriteRequest.RefreshPolicy.IMMEDIATE;
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
import static org.elasticsearch.test.NodeRoles.nonDataNode;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.notNullValue;
Expand Down Expand Up @@ -545,52 +533,4 @@ public void testArchiveBrokenClusterSettings() throws Exception {
assertHitCount(prepareSearch().setQuery(matchAllQuery()), 1L);
}

public void testHalfDeletedIndexImport() throws Exception {
// It's possible for a 6.x node to add a tombstone for an index but not actually delete the index metadata from disk since that
// deletion is slightly deferred and may race against the node being shut down; if you upgrade to 7.x when in this state then the
// node won't start.

final String nodeName = internalCluster().startNode();
createIndex("test", 1, 0);
ensureGreen("test");

final Metadata metadata = internalCluster().getInstance(ClusterService.class).state().metadata();
final Path[] paths = internalCluster().getInstance(NodeEnvironment.class).nodeDataPaths();
final String nodeId = clusterAdmin().prepareNodesInfo(nodeName).clear().get().getNodes().get(0).getNode().getId();

writeBrokenMeta(nodeEnvironment -> {
for (final Path path : paths) {
IOUtils.rm(path.resolve(PersistedClusterStateService.METADATA_DIRECTORY_NAME));
}
MetaStateWriterUtils.writeGlobalState(
nodeEnvironment,
"test",
Metadata.builder(metadata)
// we remove the manifest file, resetting the term and making this look like an upgrade from 6.x, so must also reset the
// term in the coordination metadata
.coordinationMetadata(CoordinationMetadata.builder(metadata.coordinationMetadata()).term(0L).build())
// add a tombstone but do not delete the index metadata from disk
.putCustom(IndexGraveyard.TYPE, IndexGraveyard.builder().addTombstone(metadata.index("test").getIndex()).build())
.build()
);
NodeMetadata.FORMAT.writeAndCleanup(new NodeMetadata(nodeId, BuildVersion.current(), metadata.oldestIndexVersion()), paths);
});

ensureGreen();

assertBusy(() -> assertThat(internalCluster().getInstance(NodeEnvironment.class).availableIndexFolders(), empty()));
}

private void writeBrokenMeta(CheckedConsumer<NodeEnvironment, IOException> writer) throws Exception {
Map<String, NodeEnvironment> nodeEnvironments = Stream.of(internalCluster().getNodeNames())
.collect(Collectors.toMap(Function.identity(), nodeName -> internalCluster().getInstance(NodeEnvironment.class, nodeName)));
internalCluster().fullRestart(new RestartCallback() {
@Override
public Settings onNodeStopped(String nodeName) throws Exception {
final NodeEnvironment nodeEnvironment = nodeEnvironments.get(nodeName);
writer.accept(nodeEnvironment);
return super.onNodeStopped(nodeName);
}
});
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.metadata.IndexMetadataVerifier;
import org.elasticsearch.cluster.metadata.IndexTemplateMetadata;
import org.elasticsearch.cluster.metadata.Manifest;
import org.elasticsearch.cluster.metadata.Metadata;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.service.ClusterService;
Expand All @@ -33,8 +32,6 @@
import org.elasticsearch.common.util.concurrent.EsExecutors;
import org.elasticsearch.common.util.concurrent.EsThreadPoolExecutor;
import org.elasticsearch.core.IOUtils;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.core.UpdateForV9;
import org.elasticsearch.env.BuildVersion;
import org.elasticsearch.env.NodeMetadata;
import org.elasticsearch.index.IndexVersions;
Expand Down Expand Up @@ -185,16 +182,6 @@ private PersistedState createOnDiskPersistedState(
long lastAcceptedVersion = onDiskState.lastAcceptedVersion;
long currentTerm = onDiskState.currentTerm;

if (onDiskState.empty()) {
arteam marked this conversation as resolved.
Show resolved Hide resolved
@UpdateForV9(owner = UpdateForV9.Owner.DISTRIBUTED_COORDINATION) // legacy metadata loader is not needed anymore from v9 onwards
final Tuple<Manifest, Metadata> legacyState = metaStateService.loadFullState();
if (legacyState.v1().isEmpty() == false) {
metadata = legacyState.v2();
lastAcceptedVersion = legacyState.v1().clusterStateVersion();
currentTerm = legacyState.v1().currentTerm();
}
}

PersistedState persistedState = null;
boolean success = false;
try {
Expand Down
119 changes: 1 addition & 118 deletions server/src/main/java/org/elasticsearch/gateway/MetaStateService.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,17 @@
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexGraveyard;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.metadata.Manifest;
import org.elasticsearch.cluster.metadata.Metadata;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.core.UpdateForV9;
import org.elasticsearch.env.NodeEnvironment;
import org.elasticsearch.index.Index;
import org.elasticsearch.xcontent.NamedXContentRegistry;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Predicate;

/**
Expand All @@ -45,118 +40,6 @@ public MetaStateService(NodeEnvironment nodeEnv, NamedXContentRegistry namedXCon
this.namedXContentRegistry = namedXContentRegistry;
}

/**
* Loads the full state, which includes both the global state and all the indices meta data. <br>
* When loading, manifest file is consulted (represented by {@link Manifest} class), to load proper generations. <br>
* If there is no manifest file on disk, this method fallbacks to BWC mode, where latest generation of global and indices
* metadata is loaded. Please note that currently there is no way to distinguish between manifest file being removed and manifest
* file was not yet created. It means that this method always fallbacks to BWC mode, if there is no manifest file.
*
* @return tuple of {@link Manifest} and {@link Metadata} with global metadata and indices metadata. If there is no state on disk,
* meta state with globalGeneration -1 and empty meta data is returned.
* @throws IOException if some IOException when loading files occurs or there is no metadata referenced by manifest file.
*/
@UpdateForV9(owner = UpdateForV9.Owner.DISTRIBUTED_COORDINATION)
public Tuple<Manifest, Metadata> loadFullState() throws IOException {
final Manifest manifest = Manifest.FORMAT.loadLatestState(logger, namedXContentRegistry, nodeEnv.nodeDataPaths());
if (manifest == null) {
return loadFullStateBWC();
}

final Metadata.Builder metadataBuilder;
if (manifest.isGlobalGenerationMissing()) {
metadataBuilder = Metadata.builder();
} else {
final Metadata globalMetadata = Metadata.FORMAT.loadGeneration(
logger,
namedXContentRegistry,
manifest.globalGeneration(),
nodeEnv.nodeDataPaths()
);
if (globalMetadata != null) {
metadataBuilder = Metadata.builder(globalMetadata);
} else {
throw new IOException("failed to find global metadata [generation: " + manifest.globalGeneration() + "]");
}
}

for (Map.Entry<Index, Long> entry : manifest.indexGenerations().entrySet()) {
final Index index = entry.getKey();
final long generation = entry.getValue();
final String indexFolderName = index.getUUID();
final IndexMetadata indexMetadata = IndexMetadata.FORMAT.loadGeneration(
logger,
namedXContentRegistry,
generation,
nodeEnv.resolveIndexFolder(indexFolderName)
);
if (indexMetadata != null) {
metadataBuilder.put(indexMetadata, false);
} else {
throw new IOException(
"failed to find metadata for existing index "
+ index.getName()
+ " [location: "
+ indexFolderName
+ ", generation: "
+ generation
+ "]"
);
}
}

return new Tuple<>(manifest, metadataBuilder.build());
}

/**
* "Manifest-less" BWC version of loading metadata from disk. See also {@link #loadFullState()}
*/
private Tuple<Manifest, Metadata> loadFullStateBWC() throws IOException {
Map<Index, Long> indices = new HashMap<>();
Metadata.Builder metadataBuilder;

Tuple<Metadata, Long> metadataAndGeneration = Metadata.FORMAT.loadLatestStateWithGeneration(
logger,
namedXContentRegistry,
nodeEnv.nodeDataPaths()
);
Metadata globalMetadata = metadataAndGeneration.v1();
long globalStateGeneration = metadataAndGeneration.v2();

final IndexGraveyard indexGraveyard;
if (globalMetadata != null) {
metadataBuilder = Metadata.builder(globalMetadata);
indexGraveyard = globalMetadata.custom(IndexGraveyard.TYPE);
} else {
metadataBuilder = Metadata.builder();
indexGraveyard = IndexGraveyard.builder().build();
}

for (String indexFolderName : nodeEnv.availableIndexFolders()) {
Tuple<IndexMetadata, Long> indexMetadataAndGeneration = IndexMetadata.FORMAT.loadLatestStateWithGeneration(
logger,
namedXContentRegistry,
nodeEnv.resolveIndexFolder(indexFolderName)
);
IndexMetadata indexMetadata = indexMetadataAndGeneration.v1();
long generation = indexMetadataAndGeneration.v2();
if (indexMetadata != null) {
if (indexGraveyard.containsIndex(indexMetadata.getIndex())) {
logger.debug("[{}] found metadata for deleted index [{}]", indexFolderName, indexMetadata.getIndex());
// this index folder is cleared up when state is recovered
} else {
indices.put(indexMetadata.getIndex(), generation);
metadataBuilder.put(indexMetadata, false);
}
} else {
logger.debug("[{}] failed to find metadata for existing index location", indexFolderName);
}
}

Manifest manifest = Manifest.unknownCurrentTermAndVersion(globalStateGeneration, indices);
return new Tuple<>(manifest, metadataBuilder.build());
}

/**
* Loads the index state for the provided index name, returning null if doesn't exists.
*/
Expand Down Expand Up @@ -193,7 +76,7 @@ List<IndexMetadata> loadIndicesStates(Predicate<String> excludeIndexPathIdsPredi
}

/**
* Loads the global state, *without* index state, see {@link #loadFullState()} for that.
* Loads the global state, *without* index state
*/
Metadata loadGlobalState() throws IOException {
return Metadata.FORMAT.loadLatestState(logger, namedXContentRegistry, nodeEnv.nodeDataPaths());
Expand Down
1 change: 0 additions & 1 deletion server/src/main/java/org/elasticsearch/node/Node.java
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,6 @@ public Node start() throws NodeValidationException {
// TODO: Do not expect that the legacy metadata file is always present https://github.com/elastic/elasticsearch/issues/95211
if (Assertions.ENABLED && DiscoveryNode.isStateless(settings()) == false) {
try {
assert injector.getInstance(MetaStateService.class).loadFullState().v1().isEmpty();
final NodeMetadata nodeMetadata = NodeMetadata.FORMAT.loadLatestState(
logger,
NamedXContentRegistry.EMPTY,
Expand Down
Loading