From 706067211ae880bbe4669286ee976552e8a60446 Mon Sep 17 00:00:00 2001 From: etiennecarriere Date: Tue, 12 Sep 2017 01:09:27 +0200 Subject: [PATCH] Azure repository: Accelerate the listing of files (used in delete snapshot) (#25710) This commit reworks the azure listing of snapshot files to do a single listing, instead of once per blob. closes #25424 --- .../storage/AzureStorageServiceImpl.java | 17 ++-- .../azure/AzureSnapshotRestoreTests.java | 77 ++++++++++++------- 2 files changed, 55 insertions(+), 39 deletions(-) diff --git a/plugins/repository-azure/src/main/java/org/elasticsearch/cloud/azure/storage/AzureStorageServiceImpl.java b/plugins/repository-azure/src/main/java/org/elasticsearch/cloud/azure/storage/AzureStorageServiceImpl.java index c928d79c0c242..8268cba7f3e7f 100644 --- a/plugins/repository-azure/src/main/java/org/elasticsearch/cloud/azure/storage/AzureStorageServiceImpl.java +++ b/plugins/repository-azure/src/main/java/org/elasticsearch/cloud/azure/storage/AzureStorageServiceImpl.java @@ -24,6 +24,7 @@ import com.microsoft.azure.storage.RetryExponentialRetry; import com.microsoft.azure.storage.RetryPolicy; import com.microsoft.azure.storage.StorageException; +import com.microsoft.azure.storage.blob.BlobListingDetails; import com.microsoft.azure.storage.blob.BlobProperties; import com.microsoft.azure.storage.blob.CloudBlobClient; import com.microsoft.azure.storage.blob.CloudBlobContainer; @@ -45,6 +46,7 @@ import java.io.OutputStream; import java.net.URI; import java.net.URISyntaxException; +import java.util.EnumSet; import java.util.HashMap; import java.util.Map; @@ -291,33 +293,26 @@ public Map listBlobsByPrefix(String account, LocationMode logger.debug("listing container [{}], keyPath [{}], prefix [{}]", container, keyPath, prefix); MapBuilder blobsBuilder = MapBuilder.newMapBuilder(); + EnumSet enumBlobListingDetails = EnumSet.of(BlobListingDetails.METADATA); CloudBlobClient client = this.getSelectedClient(account, mode); CloudBlobContainer blobContainer = client.getContainerReference(container); - SocketAccess.doPrivilegedVoidException(() -> { if (blobContainer.exists()) { - for (ListBlobItem blobItem : blobContainer.listBlobs(keyPath + (prefix == null ? "" : prefix))) { + for (ListBlobItem blobItem : blobContainer.listBlobs(keyPath + (prefix == null ? "" : prefix), false, + enumBlobListingDetails, null, null)) { URI uri = blobItem.getUri(); logger.trace("blob url [{}]", uri); // uri.getPath is of the form /container/keyPath.* and we want to strip off the /container/ // this requires 1 + container.length() + 1, with each 1 corresponding to one of the / String blobPath = uri.getPath().substring(1 + container.length() + 1); - - CloudBlockBlob blob = blobContainer.getBlockBlobReference(blobPath); - - // fetch the blob attributes from Azure (getBlockBlobReference does not do this) - // this is needed to retrieve the blob length (among other metadata) from Azure Storage - blob.downloadAttributes(); - - BlobProperties properties = blob.getProperties(); + BlobProperties properties = ((CloudBlockBlob) blobItem).getProperties(); String name = blobPath.substring(keyPath.length()); logger.trace("blob url [{}], name [{}], size [{}]", uri, name, properties.getLength()); blobsBuilder.put(name, new PlainBlobMetaData(name, properties.getLength())); } } }); - return blobsBuilder.immutableMap(); } diff --git a/plugins/repository-azure/src/test/java/org/elasticsearch/repositories/azure/AzureSnapshotRestoreTests.java b/plugins/repository-azure/src/test/java/org/elasticsearch/repositories/azure/AzureSnapshotRestoreTests.java index aea47f38ef3ef..7eb808e7c956e 100644 --- a/plugins/repository-azure/src/test/java/org/elasticsearch/repositories/azure/AzureSnapshotRestoreTests.java +++ b/plugins/repository-azure/src/test/java/org/elasticsearch/repositories/azure/AzureSnapshotRestoreTests.java @@ -36,18 +36,24 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.ByteSizeUnit; +import org.elasticsearch.plugin.repository.azure.AzureRepositoryPlugin; +import org.elasticsearch.plugins.Plugin; import org.elasticsearch.repositories.RepositoryMissingException; import org.elasticsearch.repositories.RepositoryVerificationException; import org.elasticsearch.repositories.azure.AzureRepository.Repository; import org.elasticsearch.snapshots.SnapshotMissingException; +import org.elasticsearch.snapshots.SnapshotRestoreException; import org.elasticsearch.snapshots.SnapshotState; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.ESIntegTestCase.ClusterScope; import org.elasticsearch.test.store.MockFSDirectoryService; +import org.elasticsearch.test.store.MockFSIndexStore; import org.junit.After; import org.junit.Before; import java.net.URISyntaxException; +import java.util.Arrays; +import java.util.Collection; import java.util.Locale; import java.util.concurrent.TimeUnit; @@ -65,13 +71,24 @@ supportsDedicatedMasters = false, numDataNodes = 1, transportClientRatio = 0.0) public class AzureSnapshotRestoreTests extends AbstractAzureWithThirdPartyIntegTestCase { + + @Override + protected Collection> nodePlugins() { + return Arrays.asList(AzureRepositoryPlugin.class, MockFSIndexStore.TestPlugin.class); + } + private String getRepositoryPath() { String testName = "it-" + getTestName(); return testName.contains(" ") ? Strings.split(testName, " ")[0] : testName; } public static String getContainerName() { - String testName = "snapshot-itest-".concat(RandomizedTest.getContext().getRunnerSeedAsString().toLowerCase(Locale.ROOT)); + /* Have a different name per test so that there is no possible race condition. As the long can be negative, + * there mustn't be a hyphen between the 2 concatenated numbers + * (can't have 2 consecutives hyphens on Azure containers) + */ + String testName = "snapshot-itest-" + .concat(RandomizedTest.getContext().getRunnerSeedAsString().toLowerCase(Locale.ROOT)); return testName.contains(" ") ? Strings.split(testName, " ")[0] : testName; } @@ -95,9 +112,10 @@ public final void wipeAzureRepositories() throws StorageException, URISyntaxExce } public void testSimpleWorkflow() { + String repo_name = "test-repo-simple"; Client client = client(); logger.info("--> creating azure repository with path [{}]", getRepositoryPath()); - PutRepositoryResponse putRepositoryResponse = client.admin().cluster().preparePutRepository("test-repo") + PutRepositoryResponse putRepositoryResponse = client.admin().cluster().preparePutRepository(repo_name) .setType("azure").setSettings(Settings.builder() .put(Repository.CONTAINER_SETTING.getKey(), getContainerName()) .put(Repository.BASE_PATH_SETTING.getKey(), getRepositoryPath()) @@ -120,13 +138,13 @@ public void testSimpleWorkflow() { assertThat(client.prepareSearch("test-idx-3").setSize(0).get().getHits().getTotalHits(), equalTo(100L)); logger.info("--> snapshot"); - CreateSnapshotResponse createSnapshotResponse = client.admin().cluster().prepareCreateSnapshot("test-repo", "test-snap") + CreateSnapshotResponse createSnapshotResponse = client.admin().cluster().prepareCreateSnapshot(repo_name, "test-snap") .setWaitForCompletion(true).setIndices("test-idx-*", "-test-idx-3").get(); assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), greaterThan(0)); assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), equalTo(createSnapshotResponse.getSnapshotInfo().totalShards())); - assertThat(client.admin().cluster().prepareGetSnapshots("test-repo").setSnapshots("test-snap").get().getSnapshots() + assertThat(client.admin().cluster().prepareGetSnapshots(repo_name).setSnapshots("test-snap").get().getSnapshots() .get(0).state(), equalTo(SnapshotState.SUCCESS)); logger.info("--> delete some data"); @@ -148,7 +166,7 @@ public void testSimpleWorkflow() { client.admin().indices().prepareClose("test-idx-1", "test-idx-2").get(); logger.info("--> restore all indices from the snapshot"); - RestoreSnapshotResponse restoreSnapshotResponse = client.admin().cluster().prepareRestoreSnapshot("test-repo", "test-snap") + RestoreSnapshotResponse restoreSnapshotResponse = client.admin().cluster().prepareRestoreSnapshot(repo_name, "test-snap") .setWaitForCompletion(true).get(); assertThat(restoreSnapshotResponse.getRestoreInfo().totalShards(), greaterThan(0)); @@ -161,7 +179,7 @@ public void testSimpleWorkflow() { logger.info("--> delete indices"); cluster().wipeIndices("test-idx-1", "test-idx-2"); logger.info("--> restore one index after deletion"); - restoreSnapshotResponse = client.admin().cluster().prepareRestoreSnapshot("test-repo", "test-snap").setWaitForCompletion(true) + restoreSnapshotResponse = client.admin().cluster().prepareRestoreSnapshot(repo_name, "test-snap").setWaitForCompletion(true) .setIndices("test-idx-*", "-test-idx-2").get(); assertThat(restoreSnapshotResponse.getRestoreInfo().totalShards(), greaterThan(0)); ensureGreen(); @@ -177,7 +195,7 @@ public void testSimpleWorkflow() { public void testMultipleSnapshots() throws URISyntaxException, StorageException { final String indexName = "test-idx-1"; final String typeName = "doc"; - final String repositoryName = "test-repo"; + final String repositoryName = "test-repo-multiple-snapshot"; final String snapshot1Name = "test-snap-1"; final String snapshot2Name = "test-snap-2"; @@ -314,6 +332,7 @@ public void testMultipleRepositories() { * For issue #26: https://github.com/elastic/elasticsearch-cloud-azure/issues/26 */ public void testListBlobs_26() throws StorageException, URISyntaxException { + final String repositoryName="test-repo-26"; createIndex("test-idx-1", "test-idx-2", "test-idx-3"); ensureGreen(); @@ -327,29 +346,29 @@ public void testListBlobs_26() throws StorageException, URISyntaxException { ClusterAdminClient client = client().admin().cluster(); logger.info("--> creating azure repository without any path"); - PutRepositoryResponse putRepositoryResponse = client.preparePutRepository("test-repo").setType("azure") + PutRepositoryResponse putRepositoryResponse = client.preparePutRepository(repositoryName).setType("azure") .setSettings(Settings.builder() .put(Repository.CONTAINER_SETTING.getKey(), getContainerName()) ).get(); assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true)); // Get all snapshots - should be empty - assertThat(client.prepareGetSnapshots("test-repo").get().getSnapshots().size(), equalTo(0)); + assertThat(client.prepareGetSnapshots(repositoryName).get().getSnapshots().size(), equalTo(0)); logger.info("--> snapshot"); - CreateSnapshotResponse createSnapshotResponse = client.prepareCreateSnapshot("test-repo", "test-snap-26") + CreateSnapshotResponse createSnapshotResponse = client.prepareCreateSnapshot(repositoryName, "test-snap-26") .setWaitForCompletion(true).setIndices("test-idx-*").get(); assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), greaterThan(0)); // Get all snapshots - should have one - assertThat(client.prepareGetSnapshots("test-repo").get().getSnapshots().size(), equalTo(1)); + assertThat(client.prepareGetSnapshots(repositoryName).get().getSnapshots().size(), equalTo(1)); // Clean the snapshot - client.prepareDeleteSnapshot("test-repo", "test-snap-26").get(); - client.prepareDeleteRepository("test-repo").get(); + client.prepareDeleteSnapshot(repositoryName, "test-snap-26").get(); + client.prepareDeleteRepository(repositoryName).get(); logger.info("--> creating azure repository path [{}]", getRepositoryPath()); - putRepositoryResponse = client.preparePutRepository("test-repo").setType("azure") + putRepositoryResponse = client.preparePutRepository(repositoryName).setType("azure") .setSettings(Settings.builder() .put(Repository.CONTAINER_SETTING.getKey(), getContainerName()) .put(Repository.BASE_PATH_SETTING.getKey(), getRepositoryPath()) @@ -357,15 +376,15 @@ public void testListBlobs_26() throws StorageException, URISyntaxException { assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true)); // Get all snapshots - should be empty - assertThat(client.prepareGetSnapshots("test-repo").get().getSnapshots().size(), equalTo(0)); + assertThat(client.prepareGetSnapshots(repositoryName).get().getSnapshots().size(), equalTo(0)); logger.info("--> snapshot"); - createSnapshotResponse = client.prepareCreateSnapshot("test-repo", "test-snap-26").setWaitForCompletion(true) + createSnapshotResponse = client.prepareCreateSnapshot(repositoryName, "test-snap-26").setWaitForCompletion(true) .setIndices("test-idx-*").get(); assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), greaterThan(0)); // Get all snapshots - should have one - assertThat(client.prepareGetSnapshots("test-repo").get().getSnapshots().size(), equalTo(1)); + assertThat(client.prepareGetSnapshots(repositoryName).get().getSnapshots().size(), equalTo(1)); } @@ -374,23 +393,24 @@ public void testListBlobs_26() throws StorageException, URISyntaxException { * For issue #28: https://github.com/elastic/elasticsearch-cloud-azure/issues/28 */ public void testGetDeleteNonExistingSnapshot_28() throws StorageException, URISyntaxException { + final String repositoryName="test-repo-28"; ClusterAdminClient client = client().admin().cluster(); logger.info("--> creating azure repository without any path"); - PutRepositoryResponse putRepositoryResponse = client.preparePutRepository("test-repo").setType("azure") + PutRepositoryResponse putRepositoryResponse = client.preparePutRepository(repositoryName).setType("azure") .setSettings(Settings.builder() .put(Repository.CONTAINER_SETTING.getKey(), getContainerName()) ).get(); assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true)); try { - client.prepareGetSnapshots("test-repo").addSnapshots("nonexistingsnapshotname").get(); + client.prepareGetSnapshots(repositoryName).addSnapshots("nonexistingsnapshotname").get(); fail("Shouldn't be here"); } catch (SnapshotMissingException ex) { // Expected } try { - client.prepareDeleteSnapshot("test-repo", "nonexistingsnapshotname").get(); + client.prepareDeleteSnapshot(repositoryName, "nonexistingsnapshotname").get(); fail("Shouldn't be here"); } catch (SnapshotMissingException ex) { // Expected @@ -419,18 +439,19 @@ public void testForbiddenContainerName() throws Exception { * @param correct Is this container name correct */ private void checkContainerName(final String container, final boolean correct) throws Exception { + String repositoryName = "test-repo-checkContainerName"; logger.info("--> creating azure repository with container name [{}]", container); // It could happen that we just removed from a previous test the same container so // we can not create it yet. assertBusy(() -> { try { - PutRepositoryResponse putRepositoryResponse = client().admin().cluster().preparePutRepository("test-repo") + PutRepositoryResponse putRepositoryResponse = client().admin().cluster().preparePutRepository(repositoryName) .setType("azure").setSettings(Settings.builder() .put(Repository.CONTAINER_SETTING.getKey(), container) .put(Repository.BASE_PATH_SETTING.getKey(), getRepositoryPath()) .put(Repository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(1000, 10000), ByteSizeUnit.BYTES) ).get(); - client().admin().cluster().prepareDeleteRepository("test-repo").get(); + client().admin().cluster().prepareDeleteRepository(repositoryName).get(); try { logger.info("--> remove container [{}]", container); cleanRepositoryFiles(container); @@ -451,9 +472,10 @@ private void checkContainerName(final String container, final boolean correct) t * Test case for issue #23: https://github.com/elastic/elasticsearch-cloud-azure/issues/23 */ public void testNonExistingRepo_23() { + final String repositoryName = "test-repo-test23"; Client client = client(); logger.info("--> creating azure repository with path [{}]", getRepositoryPath()); - PutRepositoryResponse putRepositoryResponse = client.admin().cluster().preparePutRepository("test-repo") + PutRepositoryResponse putRepositoryResponse = client.admin().cluster().preparePutRepository(repositoryName) .setType("azure").setSettings(Settings.builder() .put(Repository.CONTAINER_SETTING.getKey(), getContainerName()) .put(Repository.BASE_PATH_SETTING.getKey(), getRepositoryPath()) @@ -463,9 +485,9 @@ public void testNonExistingRepo_23() { logger.info("--> restore non existing snapshot"); try { - client.admin().cluster().prepareRestoreSnapshot("test-repo", "no-existing-snapshot").setWaitForCompletion(true).get(); + client.admin().cluster().prepareRestoreSnapshot(repositoryName, "no-existing-snapshot").setWaitForCompletion(true).get(); fail("Shouldn't be here"); - } catch (SnapshotMissingException ex) { + } catch (SnapshotRestoreException ex) { // Expected } } @@ -475,9 +497,8 @@ public void testNonExistingRepo_23() { */ public void testRemoveAndCreateContainer() throws Exception { final String container = getContainerName().concat("-testremove"); - final AzureStorageService storageService = new AzureStorageServiceImpl(internalCluster().getDefaultSettings(), - AzureStorageSettings.load(internalCluster().getDefaultSettings())); - + final AzureStorageService storageService = new AzureStorageServiceImpl(nodeSettings(0),AzureStorageSettings.load(nodeSettings(0))); + // It could happen that we run this test really close to a previous one // so we might need some time to be able to create the container assertBusy(() -> {