diff --git a/docs/en/seatunnel-engine/deployment.md b/docs/en/seatunnel-engine/deployment.md index c07cd45d6b1a..d058d50bb520 100644 --- a/docs/en/seatunnel-engine/deployment.md +++ b/docs/en/seatunnel-engine/deployment.md @@ -222,6 +222,30 @@ map: fs.oss.credentials.provider: org.apache.hadoop.fs.aliyun.oss.AliyunCredentialsProvider ``` +if you used kafka, the kafka used must support creating a compact topic, you can config like this : + +```yaml +map: + engine*: + map-store: + enabled: true + initial-mode: EAGER + factory-class-name: org.apache.seatunnel.engine.server.persistence.FileMapStoreFactory + properties: + type: kafka + bootstrap.servers: localhost:9092 + storage.compact.topic.prefix: imap- + storage.compact.topic.replication.factor: 3 + consumer.override.auto.offset.reset: earliest + producer.override.acks: all + +``` + ++ The configuration with the prefix 'consumer.override.' is used to override the configuration of the consumer ++ The configuration with the prefix 'producer.override.' is used to override the configuration of the producer ++ The configuration with the prefix 'admin.override.' is used to override the configuration of the admin ++ The configuration with the prefix 'topic.override.' is used to override the configuration of the topic + ## 6. Config SeaTunnel Engine Client All SeaTunnel Engine Client config in `hazelcast-client.yaml`. diff --git a/pom.xml b/pom.xml index 51b03a26d5bb..bb6f7d3966a5 100644 --- a/pom.xml +++ b/pom.xml @@ -146,6 +146,7 @@ 2.4.7 3.1.4 4.1.60.Final + 3.4.1 @@ -485,6 +486,12 @@ ${netty-buffer.version} + + org.apache.kafka + kafka-clients + ${kafka.version} + + diff --git a/seatunnel-dist/release-docs/LICENSE b/seatunnel-dist/release-docs/LICENSE index 5d19a35a8740..2db949ba5a09 100644 --- a/seatunnel-dist/release-docs/LICENSE +++ b/seatunnel-dist/release-docs/LICENSE @@ -275,6 +275,8 @@ The text of each license is the standard Apache 2.0 license. (Apache-2.0) listenablefuture (com.google.guava:listenablefuture:9999.0-empty-to-avoid-conflict-with-guava https://mvnrepository.com/artifact/com.google.guava/listenablefuture/9999.0-empty-to-avoid-conflict-with-guava) (Apache-2.0) accessors-smart (com.google.guava:accessors-smart:2.4.7 - https://mvnrepository.com/artifact/net.minidev/accessors-smart) (Apache-2.0) json-smart (net.minidev:json-smart:2.4.7 - https://mvnrepository.com/artifact/net.minidev/json-smart) + (Apache-2.0) kafka-clients (org.apache.kafka:kafka-clients:3.4.1 - https://mvnrepository.com/artifact/org.apache.kafka/kafka-clients) + (Apache-2.0) lz4-java (org.lz4:lz4-java:1.8.0 - https://mvnrepository.com/artifact/org.lz4/lz4-java) ======================================================================== MOZILLA PUBLIC LICENSE License @@ -294,6 +296,7 @@ The text of each license is also included at licenses/LICENSE-[project].txt. (New BSD license) Protocol Buffer Java API (com.google.protobuf:protobuf-java:2.5.0 - http://code.google.com/p/protobuf) (BSD 3-Clause) Scala Library (org.scala-lang:scala-library:2.11.12 - http://www.scala-lang.org/) (BSD 3-Clause) Scala Library (org.ow2.asm:asm:9.1 - https://mvnrepository.com/artifact/org.ow2.asm/asm/) + (BSD 2-Clause) zstd-jni (com.github.luben:zstd-jni:1.5.2-1 - https://mvnrepository.com/artifact/com.github.luben/zstd-jni) ======================================================================== CDDL License ======================================================================== diff --git a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/pom.xml b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/pom.xml index a1315565349f..5883b0ee3388 100644 --- a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/pom.xml +++ b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/pom.xml @@ -58,6 +58,13 @@ ${project.version} test + + + org.apache.seatunnel + imap-storage-kafka + ${project.version} + test + org.apache.seatunnel seatunnel-hadoop3-3.1.4-uber diff --git a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/ClusterFaultToleranceIT.java b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/ClusterFaultToleranceIT.java index f7571968e8ff..eff1297e24fc 100644 --- a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/ClusterFaultToleranceIT.java +++ b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/ClusterFaultToleranceIT.java @@ -1154,4 +1154,213 @@ public void testStreamJobRestoreFromOssInAllNodeDown() } } } + + @SuppressWarnings("checkstyle:RegexpSingleline") + @Test + @Disabled + public void testStreamJobRestoreFromKafkaInAllNodeDown() + throws ExecutionException, InterruptedException { + String BOOTSTRAP_SERVERS = "localhost:9092"; + String TOPIC_PREFIX = "imap-"; + Integer TOPIC_REPLICATION_FACTOR = 1; + + String testCaseName = "testStreamJobRestoreFromKafkaInAllNodeDown"; + String testClusterName = + "ClusterFaultToleranceIT_testStreamJobRestoreFromKafkaInAllNodeDown_" + + System.currentTimeMillis(); + int testRowNumber = 1000; + int testParallelism = 6; + HazelcastInstanceImpl node1 = null; + HazelcastInstanceImpl node2 = null; + SeaTunnelClient engineClient = null; + + try { + String yaml = + "hazelcast:\n" + + " cluster-name: seatunnel\n" + + " network:\n" + + " rest-api:\n" + + " enabled: true\n" + + " endpoint-groups:\n" + + " CLUSTER_WRITE:\n" + + " enabled: true\n" + + " join:\n" + + " tcp-ip:\n" + + " enabled: true\n" + + " member-list:\n" + + " - localhost\n" + + " port:\n" + + " auto-increment: true\n" + + " port-count: 100\n" + + " port: 5801\n" + + " map:\n" + + " engine*:\n" + + " map-store:\n" + + " enabled: true\n" + + " initial-mode: EAGER\n" + + " factory-class-name: org.apache.seatunnel.engine.server.persistence.FileMapStoreFactory\n" + + " properties:\n" + + " type: kafka\n" + + " bootstrap.servers: " + + BOOTSTRAP_SERVERS + + "\n" + + " storage.compact.topic.prefix: " + + TOPIC_PREFIX + + "\n" + + " storage.compact.topic.replication.factor: " + + TOPIC_REPLICATION_FACTOR + + "\n" + + " properties:\n" + + " hazelcast.invocation.max.retry.count: 200\n" + + " hazelcast.tcp.join.port.try.count: 30\n" + + " hazelcast.invocation.retry.pause.millis: 2000\n" + + " hazelcast.slow.operation.detector.stacktrace.logging.enabled: true\n" + + " hazelcast.logging.type: log4j2\n" + + " hazelcast.operation.generic.thread.count: 200\n"; + + Config hazelcastConfig = Config.loadFromString(yaml); + hazelcastConfig.setClusterName(TestUtils.getClusterName(testClusterName)); + SeaTunnelConfig seaTunnelConfig = ConfigProvider.locateAndGetSeaTunnelConfig(); + seaTunnelConfig.setHazelcastConfig(hazelcastConfig); + node1 = SeaTunnelServerStarter.createHazelcastInstance(seaTunnelConfig); + + node2 = SeaTunnelServerStarter.createHazelcastInstance(seaTunnelConfig); + + // waiting all node added to cluster + HazelcastInstanceImpl finalNode = node1; + Awaitility.await() + .atMost(10000, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> + Assertions.assertEquals( + 2, finalNode.getCluster().getMembers().size())); + + Common.setDeployMode(DeployMode.CLIENT); + ImmutablePair testResources = + createTestResources( + testCaseName, JobMode.STREAMING, testRowNumber, testParallelism); + JobConfig jobConfig = new JobConfig(); + jobConfig.setName(testCaseName); + + ClientConfig clientConfig = ConfigProvider.locateAndGetClientConfig(); + clientConfig.setClusterName(TestUtils.getClusterName(testClusterName)); + engineClient = new SeaTunnelClient(clientConfig); + JobExecutionEnvironment jobExecutionEnv = + engineClient.createExecutionContext(testResources.getRight(), jobConfig); + ClientJobProxy clientJobProxy = jobExecutionEnv.execute(); + Long jobId = clientJobProxy.getJobId(); + + ClientJobProxy finalClientJobProxy = clientJobProxy; + Awaitility.await() + .atMost(600000, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> { + // Wait some tasks commit finished, and we can get rows from the + // sink target dir + Thread.sleep(2000); + System.out.println( + "\n=================================" + + FileUtils.getFileLineNumberFromDir( + testResources.getLeft()) + + "=================================\n"); + Assertions.assertTrue( + JobStatus.RUNNING.equals(finalClientJobProxy.getJobStatus()) + && FileUtils.getFileLineNumberFromDir( + testResources.getLeft()) + > 1); + }); + + Thread.sleep(5000); + // shutdown all node + node1.shutdown(); + node2.shutdown(); + + log.info( + "==========================================All node is done========================================"); + Thread.sleep(10000); + + node1 = SeaTunnelServerStarter.createHazelcastInstance(seaTunnelConfig); + + node2 = SeaTunnelServerStarter.createHazelcastInstance(seaTunnelConfig); + + log.info( + "==========================================All node is start, begin check node size ========================================"); + // waiting all node added to cluster + HazelcastInstanceImpl restoreFinalNode = node1; + Awaitility.await() + .atMost(60000, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> + Assertions.assertEquals( + 2, restoreFinalNode.getCluster().getMembers().size())); + + log.info( + "==========================================All node is running========================================"); + engineClient = new SeaTunnelClient(clientConfig); + ClientJobProxy newClientJobProxy = engineClient.createJobClient().getJobProxy(jobId); + CompletableFuture waitForJobCompleteFuture = + CompletableFuture.supplyAsync(newClientJobProxy::waitForJobComplete); + + Thread.sleep(10000); + + Awaitility.await() + .atMost(100000, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> { + // Wait job write all rows in file + Thread.sleep(2000); + System.out.println( + "\n=================================" + + FileUtils.getFileLineNumberFromDir( + testResources.getLeft()) + + "=================================\n"); + JobStatus jobStatus = null; + try { + jobStatus = newClientJobProxy.getJobStatus(); + } catch (Exception e) { + log.error(ExceptionUtils.getMessage(e)); + } + + Assertions.assertTrue( + JobStatus.RUNNING.equals(jobStatus) + && testRowNumber * testParallelism + == FileUtils.getFileLineNumberFromDir( + testResources.getLeft())); + }); + + // sleep 10s and expect the job don't write more rows. + Thread.sleep(10000); + log.info( + "==========================================Cancel Job========================================"); + newClientJobProxy.cancelJob(); + + Awaitility.await() + .atMost(600000, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> + Assertions.assertTrue( + waitForJobCompleteFuture.isDone() + && JobStatus.CANCELED.equals( + waitForJobCompleteFuture.get()))); + // prove that the task was restarted + Long fileLineNumberFromDir = + FileUtils.getFileLineNumberFromDir(testResources.getLeft()); + Assertions.assertEquals(testRowNumber * testParallelism, fileLineNumberFromDir); + + } finally { + log.info( + "==========================================Clean test resource ========================================"); + if (engineClient != null) { + engineClient.shutdown(); + } + + if (node1 != null) { + node1.shutdown(); + } + + if (node2 != null) { + node2.shutdown(); + } + } + } } diff --git a/seatunnel-engine/seatunnel-engine-server/pom.xml b/seatunnel-engine/seatunnel-engine-server/pom.xml index b31756674bf8..bbf68bb49b09 100644 --- a/seatunnel-engine/seatunnel-engine-server/pom.xml +++ b/seatunnel-engine/seatunnel-engine-server/pom.xml @@ -43,6 +43,11 @@ imap-storage-file ${project.version} + + org.apache.seatunnel + imap-storage-kafka + ${project.version} + com.hazelcast hazelcast diff --git a/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-kafka/pom.xml b/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-kafka/pom.xml new file mode 100644 index 000000000000..cd8dd5b146f0 --- /dev/null +++ b/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-kafka/pom.xml @@ -0,0 +1,57 @@ + + + + 4.0.0 + + org.apache.seatunnel + imap-storage-plugins + ${revision} + + + org.apache.seatunnel + imap-storage-kafka + SeaTunnel : Engine : Storage : IMap Storage Plugins : Kafka + + + UTF-8 + + + + + org.apache.seatunnel + serializer-protobuf + ${project.version} + + + org.apache.kafka + kafka-clients + + + org.awaitility + awaitility + + + org.apache.commons + commons-lang3 + + + diff --git a/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-kafka/src/main/java/org/apache/seatunnel/engine/imap/storage/kafka/IMapKafkaStorage.java b/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-kafka/src/main/java/org/apache/seatunnel/engine/imap/storage/kafka/IMapKafkaStorage.java new file mode 100644 index 000000000000..0c7b81861db7 --- /dev/null +++ b/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-kafka/src/main/java/org/apache/seatunnel/engine/imap/storage/kafka/IMapKafkaStorage.java @@ -0,0 +1,393 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.seatunnel.engine.imap.storage.kafka; + +import org.apache.seatunnel.engine.imap.storage.api.IMapStorage; +import org.apache.seatunnel.engine.imap.storage.kafka.bean.IMapDataStruct; +import org.apache.seatunnel.engine.imap.storage.kafka.config.KafkaConfiguration; +import org.apache.seatunnel.engine.imap.storage.kafka.config.KafkaConfigurationConstants; +import org.apache.seatunnel.engine.imap.storage.kafka.utils.TopicAdmin; +import org.apache.seatunnel.engine.serializer.api.Serializer; +import org.apache.seatunnel.engine.serializer.protobuf.ProtoStuffSerializer; + +import org.apache.commons.lang3.ClassUtils; +import org.apache.kafka.clients.consumer.Consumer; +import org.apache.kafka.clients.consumer.ConsumerConfig; +import org.apache.kafka.clients.consumer.ConsumerRecord; +import org.apache.kafka.clients.consumer.ConsumerRecords; +import org.apache.kafka.clients.consumer.KafkaConsumer; +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.Producer; +import org.apache.kafka.clients.producer.ProducerConfig; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.clients.producer.RecordMetadata; +import org.apache.kafka.common.KafkaException; +import org.apache.kafka.common.PartitionInfo; +import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.common.errors.WakeupException; +import org.apache.kafka.common.serialization.ByteArrayDeserializer; +import org.apache.kafka.common.serialization.ByteArraySerializer; + +import com.google.common.collect.Maps; +import lombok.extern.slf4j.Slf4j; + +import java.io.IOException; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; + +import static org.apache.seatunnel.engine.imap.storage.kafka.config.KafkaConfigurationConstants.BUSINESS_KEY; + +@Slf4j +public class IMapKafkaStorage implements IMapStorage { + private KafkaConfiguration kafkaConfiguration; + + private Consumer consumer; + private Producer producer; + + private Serializer serializer; + + private List partitions = new ArrayList<>(); + + private String businessName; + + @Override + public void initialize(Map config) { + String bootstrapServers = + config.get(KafkaConfigurationConstants.KAFKA_BOOTSTRAP_SERVERS).toString(); + String compactTopicPrefix = + config.get(KafkaConfigurationConstants.KAFKA_STORAGE_COMPACT_TOPIC_PREFIX) + .toString(); + this.businessName = (String) config.get(BUSINESS_KEY); + String compactTopic = compactTopicPrefix.concat(businessName); + Integer topicReplicationFactor = + Integer.parseInt( + config.getOrDefault( + KafkaConfigurationConstants + .KAFKA_STORAGE_COMPACT_TOPIC_REPLICATION_FACTOR, + 3) + .toString()); + Integer topicPartition = + Integer.parseInt( + config.getOrDefault( + KafkaConfigurationConstants + .KAFKA_STORAGE_COMPACT_TOPIC_PARTITION, + 1) + .toString()); + + kafkaConfiguration = + KafkaConfiguration.builder() + .bootstrapServers(bootstrapServers) + .storageTopic(compactTopic) + .storageTopicPartition(topicPartition) + .storageTopicReplicationFactor(topicReplicationFactor) + .consumerConfigs( + KafkaConfiguration.setExtraConfiguration( + config, + KafkaConfigurationConstants.KAFKA_CONSUMER_CONFIGS_PREFIX)) + .producerConfigs( + KafkaConfiguration.setExtraConfiguration( + config, + KafkaConfigurationConstants.KAFKA_PRODUCER_CONFIGS_PREFIX)) + .adminConfigs( + KafkaConfiguration.setExtraConfiguration( + config, + KafkaConfigurationConstants.KAFKA_ADMIN_CONFIGS_PREFIX)) + .topicConfigs( + KafkaConfiguration.setExtraConfiguration( + config, + KafkaConfigurationConstants.KAFKA_TOPIC_CONFIGS_PREFIX)) + .build(); + + // Init serializer, default ProtoStuffSerializer + this.serializer = new ProtoStuffSerializer(); + maybeCreateTopicAndValidateCompactConfig(); + this.consumer = createConsumer(); + this.producer = createProducer(); + } + + private void maybeCreateTopicAndValidateCompactConfig() { + // create admin client + TopicAdmin topicAdmin = new TopicAdmin(kafkaConfiguration); + try { + // It must be compact topic + topicAdmin.maybeCreateTopic(kafkaConfiguration.getStorageTopic()); + topicAdmin.verifyTopicCleanupPolicyOnlyCompact(kafkaConfiguration.getStorageTopic()); + + } catch (Exception e) { + throw new RuntimeException(e); + } finally { + try { + topicAdmin.close(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + } + + /** + * Create producer + * + * @return + */ + private Producer createProducer() { + Map producerConfigs = kafkaConfiguration.getProducerConfigs(); + producerConfigs.put( + ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, kafkaConfiguration.getBootstrapServers()); + producerConfigs.put(ProducerConfig.ACKS_CONFIG, "all"); + producerConfigs.put(ProducerConfig.MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION, 1); + producerConfigs.put( + ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName()); + producerConfigs.put( + ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName()); + return new KafkaProducer<>(producerConfigs); + } + + /** + * Create consumer + * + * @return + */ + private Consumer createConsumer() { + // create topic + Map consumerConfigs = kafkaConfiguration.getConsumerConfigs(); + consumerConfigs.put( + ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, kafkaConfiguration.getBootstrapServers()); + consumerConfigs.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); + consumerConfigs.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false); + consumerConfigs.put( + ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, + ByteArrayDeserializer.class.getName()); + consumerConfigs.put( + ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, + ByteArrayDeserializer.class.getName()); + Consumer consumer = new KafkaConsumer<>(consumerConfigs); + List partitionInfos = + consumer.partitionsFor(kafkaConfiguration.getStorageTopic()); + if (partitionInfos == null) { + throw new RuntimeException( + "Could not look up partition metadata for offset backing store topic in" + + " allotted period. This could indicate a connectivity issue, unavailable topic partitions, or if" + + " this is your first use of the topic it may have taken too long to create."); + } + for (PartitionInfo partition : partitionInfos) { + this.partitions.add(new TopicPartition(partition.topic(), partition.partition())); + } + consumer.assign(this.partitions); + return consumer; + } + + @Override + public boolean store(Object key, Object value) { + try { + Map.Entry data = convertToMapEntry(key, value); + Future callback = + producer.send( + new ProducerRecord<>( + kafkaConfiguration.getStorageTopic(), + data.getKey(), + data.getValue())); + return Objects.nonNull(callback.get()); + } catch (IOException | InterruptedException | ExecutionException e) { + throw new RuntimeException(e); + } + } + + @Override + public Set storeAll(Map all) { + for (Map.Entry item : all.entrySet()) { + store(item.getKey(), item.getValue()); + } + return all.keySet(); + } + + @Override + public boolean delete(Object key) { + try { + byte[] bKey = convertToBytes(key); + // Sending tombstone messages will be cleared during topic compact + Future callback = + producer.send( + new ProducerRecord<>(kafkaConfiguration.getStorageTopic(), bKey, null)); + return Objects.nonNull(callback.get()); + } catch (IOException | InterruptedException | ExecutionException e) { + throw new RuntimeException(e); + } + } + + @Override + public Set deleteAll(Collection keys) { + for (Object key : keys) { + delete(key); + } + return new HashSet<>(keys); + } + + @Override + public Map loadAll() throws IOException { + + Map result = Maps.newConcurrentMap(); + + // Always consume from the beginning of all partitions. + consumer.seekToBeginning(partitions); + // Start to read data + Set assignment = consumer.assignment(); + Map endOffsets = consumer.endOffsets(assignment); + log.info("Reading to end of log offsets {}", endOffsets); + while (!endOffsets.isEmpty()) { + Iterator> it = endOffsets.entrySet().iterator(); + while (it.hasNext()) { + Map.Entry entry = it.next(); + TopicPartition topicPartition = entry.getKey(); + long endOffset = entry.getValue(); + long lastConsumedOffset = consumer.position(topicPartition); + if (lastConsumedOffset >= endOffset) { + log.info("Read to end offset {} for {}", endOffset, topicPartition); + it.remove(); + } else { + log.info( + "Behind end offset {} for {}; last-read offset is {}", + endOffset, + topicPartition, + lastConsumedOffset); + poll( + Integer.MAX_VALUE, + new java.util.function.Consumer>() { + @Override + public void accept(ConsumerRecord record) { + try { + IMapDataStruct key = + serializer.deserialize( + record.key(), IMapDataStruct.class); + Class keyClazz = ClassUtils.getClass(key.getClassName()); + Object originalKey = + serializer.deserialize(key.getValue(), keyClazz); + + if (record.value() == null) { + result.remove(originalKey); + } else { + IMapDataStruct value = + serializer.deserialize( + record.value(), IMapDataStruct.class); + Class valueClazz = + ClassUtils.getClass(value.getClassName()); + Object originalValue = + serializer.deserialize( + value.getValue(), valueClazz); + result.put(originalKey, originalValue); + } + + } catch (IOException | ClassNotFoundException e) { + throw new RuntimeException(e); + } + } + }); + break; + } + } + } + return result; + } + + private void poll( + long timeoutMs, java.util.function.Consumer> accepter) { + try { + ConsumerRecords records = consumer.poll(Duration.ofMillis(timeoutMs)); + for (ConsumerRecord record : records) { + accepter.accept(record); + } + } catch (WakeupException e) { + // Expected on get() or stop(). The calling code should handle this + throw e; + } catch (KafkaException e) { + log.error("Error polling: " + e); + throw e; + } + } + + @Override + public Set loadAllKeys() { + try { + return loadAll().keySet(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public void destroy(boolean deleteAllFileFlag) { + log.info("start destroy IMapKafkaStorage, businessName is {}", businessName); + if (deleteAllFileFlag) { + // Delete compact topic + TopicAdmin admin = new TopicAdmin(kafkaConfiguration); + try { + admin.deleteTopic(kafkaConfiguration.getStorageTopic()); + } catch (ExecutionException | InterruptedException e) { + throw new RuntimeException(e); + } finally { + try { + admin.close(); + } catch (Exception e) { + // Do nothing + } + } + } + } + + public Map.Entry convertToMapEntry(Object key, Object value) + throws IOException { + byte[] bKey = convertToBytes(key); + byte[] bValue = convertToBytes(value); + return new Map.Entry() { + @Override + public byte[] getKey() { + return bKey; + } + + @Override + public byte[] getValue() { + return bValue; + } + + @Override + public byte[] setValue(byte[] value) { + return new byte[0]; + } + }; + } + + private byte[] convertToBytes(Object data) throws IOException { + IMapDataStruct struct = + IMapDataStruct.builder() + .value(serializer.serialize(data)) + .className(data.getClass().getName()) + .build(); + return serializer.serialize(struct); + } +} diff --git a/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-kafka/src/main/java/org/apache/seatunnel/engine/imap/storage/kafka/IMapKafkaStorageFactory.java b/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-kafka/src/main/java/org/apache/seatunnel/engine/imap/storage/kafka/IMapKafkaStorageFactory.java new file mode 100644 index 000000000000..a668cb1ad77e --- /dev/null +++ b/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-kafka/src/main/java/org/apache/seatunnel/engine/imap/storage/kafka/IMapKafkaStorageFactory.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.seatunnel.engine.imap.storage.kafka; + +import org.apache.seatunnel.engine.imap.storage.api.IMapStorage; +import org.apache.seatunnel.engine.imap.storage.api.IMapStorageFactory; +import org.apache.seatunnel.engine.imap.storage.api.exception.IMapStorageException; +import org.apache.seatunnel.engine.imap.storage.kafka.config.KafkaConfigurationConstants; + +import com.google.auto.service.AutoService; + +import java.util.Map; + +@AutoService(IMapStorageFactory.class) +public class IMapKafkaStorageFactory implements IMapStorageFactory { + @Override + public String factoryIdentifier() { + return "kafka"; + } + + @Override + public IMapStorage create(Map configuration) throws IMapStorageException { + CheckConfigNull(configuration); + IMapKafkaStorage iMapKafkaStorage = new IMapKafkaStorage(); + iMapKafkaStorage.initialize(configuration); + return iMapKafkaStorage; + } + + private void CheckConfigNull(Map configuration) { + if (!configuration.containsKey(KafkaConfigurationConstants.KAFKA_BOOTSTRAP_SERVERS)) { + throw new IllegalArgumentException( + KafkaConfigurationConstants.KAFKA_BOOTSTRAP_SERVERS + " is required"); + } + } +} diff --git a/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-kafka/src/main/java/org/apache/seatunnel/engine/imap/storage/kafka/bean/IMapDataStruct.java b/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-kafka/src/main/java/org/apache/seatunnel/engine/imap/storage/kafka/bean/IMapDataStruct.java new file mode 100644 index 000000000000..275485826fee --- /dev/null +++ b/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-kafka/src/main/java/org/apache/seatunnel/engine/imap/storage/kafka/bean/IMapDataStruct.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.seatunnel.engine.imap.storage.kafka.bean; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +import java.io.Serializable; + +@Data +@Builder +@AllArgsConstructor +@NoArgsConstructor +public class IMapDataStruct implements Serializable { + private byte[] value; + private String className; +} diff --git a/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-kafka/src/main/java/org/apache/seatunnel/engine/imap/storage/kafka/config/KafkaConfiguration.java b/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-kafka/src/main/java/org/apache/seatunnel/engine/imap/storage/kafka/config/KafkaConfiguration.java new file mode 100644 index 000000000000..2744e2991fa3 --- /dev/null +++ b/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-kafka/src/main/java/org/apache/seatunnel/engine/imap/storage/kafka/config/KafkaConfiguration.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.seatunnel.engine.imap.storage.kafka.config; + +import com.google.common.collect.Maps; +import lombok.Builder; +import lombok.Data; + +import java.io.Serializable; +import java.util.Map; + +@Builder +@Data +public class KafkaConfiguration implements Serializable { + private String bootstrapServers; + private String storageTopic; + private Integer storageTopicReplicationFactor; + private Integer storageTopicPartition; + private Map producerConfigs; + private Map consumerConfigs; + private Map adminConfigs; + private Map topicConfigs; + + public static Map setExtraConfiguration( + Map config, String prefix) { + Map extraConfigs = Maps.newConcurrentMap(); + config.forEach( + (k, v) -> { + if (k.startsWith(prefix)) { + extraConfigs.put(k.replace(prefix, ""), config.get(k)); + } + }); + return extraConfigs; + } +} diff --git a/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-kafka/src/main/java/org/apache/seatunnel/engine/imap/storage/kafka/config/KafkaConfigurationConstants.java b/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-kafka/src/main/java/org/apache/seatunnel/engine/imap/storage/kafka/config/KafkaConfigurationConstants.java new file mode 100644 index 000000000000..4b60d7e0523d --- /dev/null +++ b/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-kafka/src/main/java/org/apache/seatunnel/engine/imap/storage/kafka/config/KafkaConfigurationConstants.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.seatunnel.engine.imap.storage.kafka.config; + +public class KafkaConfigurationConstants { + public static final String BUSINESS_KEY = "businessName"; + public static final String KAFKA_BOOTSTRAP_SERVERS = "bootstrap.servers"; + public static final String KAFKA_STORAGE_COMPACT_TOPIC_PREFIX = "storage.compact.topic.prefix"; + public static final String KAFKA_STORAGE_COMPACT_TOPIC_REPLICATION_FACTOR = + "storage.compact.topic.replication.factor"; + public static final String KAFKA_STORAGE_COMPACT_TOPIC_PARTITION = + "storage.compact.topic.partition"; + + public static final String KAFKA_CONSUMER_CONFIGS_PREFIX = "consumer.override."; + public static final String KAFKA_PRODUCER_CONFIGS_PREFIX = "producer.override."; + public static final String KAFKA_ADMIN_CONFIGS_PREFIX = "admin.override."; + public static final String KAFKA_TOPIC_CONFIGS_PREFIX = "topic.override."; +} diff --git a/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-kafka/src/main/java/org/apache/seatunnel/engine/imap/storage/kafka/utils/NewTopicBuilder.java b/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-kafka/src/main/java/org/apache/seatunnel/engine/imap/storage/kafka/utils/NewTopicBuilder.java new file mode 100644 index 000000000000..8aa09120141a --- /dev/null +++ b/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-kafka/src/main/java/org/apache/seatunnel/engine/imap/storage/kafka/utils/NewTopicBuilder.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.seatunnel.engine.imap.storage.kafka.utils; + +import org.apache.kafka.clients.admin.NewTopic; +import org.apache.kafka.common.config.TopicConfig; + +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; + +import static org.apache.kafka.common.config.TopicConfig.CLEANUP_POLICY_COMPACT; +import static org.apache.kafka.common.config.TopicConfig.CLEANUP_POLICY_CONFIG; +import static org.apache.kafka.common.config.TopicConfig.UNCLEAN_LEADER_ELECTION_ENABLE_CONFIG; + +public class NewTopicBuilder { + + public static NewTopicBuilder defineTopic(String topicName) { + return new NewTopicBuilder(topicName); + } + + private final String name; + private int numPartitions; + private short replicationFactor; + private final Map configs = new HashMap<>(); + + NewTopicBuilder(String name) { + this.name = name; + } + + public NewTopicBuilder partitions(int numPartitions) { + this.numPartitions = numPartitions; + return this; + } + + public NewTopicBuilder replicationFactor(short replicationFactor) { + this.replicationFactor = replicationFactor; + return this; + } + + public NewTopicBuilder compacted() { + this.configs.put(CLEANUP_POLICY_CONFIG, CLEANUP_POLICY_COMPACT); + return this; + } + + public NewTopicBuilder minInSyncReplicas(short minInSyncReplicas) { + this.configs.put( + TopicConfig.MIN_IN_SYNC_REPLICAS_CONFIG, Short.toString(minInSyncReplicas)); + return this; + } + + public NewTopicBuilder uncleanLeaderElection(boolean allow) { + this.configs.put(UNCLEAN_LEADER_ELECTION_ENABLE_CONFIG, Boolean.toString(allow)); + return this; + } + + public NewTopicBuilder config(Map configs) { + if (configs != null) { + for (Map.Entry entry : configs.entrySet()) { + Object value = entry.getValue(); + this.configs.put(entry.getKey(), value != null ? value.toString() : null); + } + } else { + // clear config + this.configs.clear(); + } + return this; + } + + public NewTopic build() { + return new NewTopic(name, Optional.of(numPartitions), Optional.of(replicationFactor)) + .configs(configs); + } +} diff --git a/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-kafka/src/main/java/org/apache/seatunnel/engine/imap/storage/kafka/utils/TopicAdmin.java b/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-kafka/src/main/java/org/apache/seatunnel/engine/imap/storage/kafka/utils/TopicAdmin.java new file mode 100644 index 000000000000..c8d542ce2b90 --- /dev/null +++ b/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-kafka/src/main/java/org/apache/seatunnel/engine/imap/storage/kafka/utils/TopicAdmin.java @@ -0,0 +1,394 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.seatunnel.engine.imap.storage.kafka.utils; + +import org.apache.seatunnel.engine.imap.storage.kafka.config.KafkaConfiguration; + +import org.apache.kafka.clients.admin.Admin; +import org.apache.kafka.clients.admin.AdminClientConfig; +import org.apache.kafka.clients.admin.Config; +import org.apache.kafka.clients.admin.ConfigEntry; +import org.apache.kafka.clients.admin.CreateTopicsOptions; +import org.apache.kafka.clients.admin.DeleteTopicsOptions; +import org.apache.kafka.clients.admin.DescribeConfigsOptions; +import org.apache.kafka.clients.admin.DescribeTopicsOptions; +import org.apache.kafka.clients.admin.NewTopic; +import org.apache.kafka.clients.admin.TopicDescription; +import org.apache.kafka.common.KafkaFuture; +import org.apache.kafka.common.config.ConfigException; +import org.apache.kafka.common.config.ConfigResource; +import org.apache.kafka.common.config.TopicConfig; +import org.apache.kafka.common.errors.ClusterAuthorizationException; +import org.apache.kafka.common.errors.InvalidConfigurationException; +import org.apache.kafka.common.errors.TopicAuthorizationException; +import org.apache.kafka.common.errors.TopicExistsException; +import org.apache.kafka.common.errors.UnknownTopicOrPartitionException; +import org.apache.kafka.common.errors.UnsupportedVersionException; +import org.apache.kafka.common.utils.Utils; + +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; +import lombok.extern.slf4j.Slf4j; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.ExecutionException; +import java.util.stream.Collectors; + +import static org.apache.kafka.common.config.TopicConfig.CLEANUP_POLICY_CONFIG; + +@Slf4j +public class TopicAdmin implements AutoCloseable { + private final KafkaConfiguration kafkaConfiguration; + private final Admin admin; + + public TopicAdmin(KafkaConfiguration config) { + this.kafkaConfiguration = config; + this.admin = createKafkaAdminClient(); + } + + private Admin createKafkaAdminClient() { + Map configs = kafkaConfiguration.getAdminConfigs(); + configs.put( + AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, + kafkaConfiguration.getBootstrapServers()); + return Admin.create(configs); + } + + public void maybeCreateTopic(String topic) { + Map existing = describeTopics(topic); + if (!existing.isEmpty()) { + log.info("Topic {} is existing, not create!!", topic); + return; + } + log.info("Creating topic '{}'", topic); + NewTopicBuilder newTopicBuilder = + NewTopicBuilder.defineTopic(topic) + .config(kafkaConfiguration.getTopicConfigs()) + .compacted() + .partitions(kafkaConfiguration.getStorageTopicPartition()) + .replicationFactor( + kafkaConfiguration.getStorageTopicReplicationFactor().shortValue()); + NewTopic newTopic = newTopicBuilder.build(); + // Create the topic if it doesn't exist + Set newTopics = createTopics(newTopic); + if (!newTopics.contains(topic)) { + log.info( + "Using admin client to check cleanup policy of '{}' topic is '{}'", + topic, + TopicConfig.CLEANUP_POLICY_COMPACT); + verifyTopicCleanupPolicyOnlyCompact(topic); + } + } + + /** + * Create topics + * + * @param topics + * @return + */ + public Set createTopics(NewTopic... topics) { + Map topicsByName = new HashMap<>(); + if (topics != null) { + for (NewTopic topic : topics) { + if (topic != null) { + topicsByName.put(topic.name(), topic); + } + } + } + if (topicsByName.isEmpty()) { + return Collections.emptySet(); + } + String bootstrapServers = kafkaConfiguration.getBootstrapServers(); + String topicNameList = Utils.join(topicsByName.keySet(), "', '"); + + Map> results = + admin.createTopics( + topicsByName.values(), + new CreateTopicsOptions().validateOnly(false)) + .values(); + + Set createdTopicNames = new HashSet<>(); + for (Map.Entry> entry : results.entrySet()) { + String topic = entry.getKey(); + try { + entry.getValue().get(); + createdTopicNames.add(topic); + } catch (ExecutionException e) { + Throwable cause = e.getCause(); + if (cause instanceof TopicExistsException) { + log.info( + "Found existing topic '{}' on the brokers at {}", + topic, + bootstrapServers); + createdTopicNames.add(topic); + continue; + } + if (cause instanceof UnsupportedVersionException) { + log.error( + "Unable to create topic(s) '{}' since the brokers at {} do not support the CreateTopics API.", + topicNameList, + bootstrapServers, + cause); + return Collections.emptySet(); + } + if (cause instanceof ClusterAuthorizationException) { + log.info( + "Not authorized to create topic(s) '{}' upon the brokers {}.", + topicNameList, + bootstrapServers); + return Collections.emptySet(); + } + if (cause instanceof TopicAuthorizationException) { + log.info( + "Not authorized to create topic(s) '{}' upon the brokers {}.", + topicNameList, + bootstrapServers); + return Collections.emptySet(); + } + if (cause instanceof InvalidConfigurationException) { + throw new RuntimeException( + "Unable to create topic(s) '" + + topicNameList + + "': " + + cause.getMessage(), + cause); + } + throw new RuntimeException( + "Error while attempting to create/find topic(s) '" + topicNameList + "'", + e); + } catch (InterruptedException e) { + Thread.interrupted(); + throw new RuntimeException( + "Interrupted while attempting to create/find topic(s) '" + + topicNameList + + "'", + e); + } + } + return createdTopicNames; + } + + /** + * Verify topic cleanup policy + * + * @param topic + */ + public void verifyTopicCleanupPolicyOnlyCompact(String topic) { + Set cleanupPolicies = topicCleanupPolicy(topic); + if (cleanupPolicies.isEmpty()) { + log.info( + "Unable to use admin client to verify the cleanup policy of '{}' " + + "topic is '{}', either because does not have the required permission to " + + "describe topic configurations.Please ensure that the cleanup.policy policy is compact", + topic, + TopicConfig.CLEANUP_POLICY_COMPACT); + return; + } + Set expectedPolicies = Collections.singleton(TopicConfig.CLEANUP_POLICY_COMPACT); + if (!cleanupPolicies.equals(expectedPolicies)) { + String expectedPolicyStr = String.join(",", expectedPolicies); + String cleanupPolicyStr = String.join(",", cleanupPolicies); + String msg = + String.format( + "The '%s' configuration for Topic '%s' must be '%s', but the " + + "current configuration is '%s', which may cause data loss", + TopicConfig.CLEANUP_POLICY_CONFIG, + topic, + expectedPolicyStr, + cleanupPolicyStr); + throw new ConfigException(msg); + } + } + + public Set topicCleanupPolicy(String topic) { + Config topicConfig = describeTopicConfigs(topic).get(topic); + if (topicConfig == null) { + log.warn("Unable to find topic '{}' when getting cleanup policy", topic); + return Collections.emptySet(); + } + ConfigEntry entry = topicConfig.get(CLEANUP_POLICY_CONFIG); + if (entry != null && entry.value() != null) { + String policy = entry.value(); + log.info("Found cleanup.policy = {} for topic '{}'", policy, topic); + return Arrays.stream(policy.split(",")) + .map(String::trim) + .filter(s -> !s.isEmpty()) + .map(String::toLowerCase) + .collect(Collectors.toSet()); + } + log.debug("Found no cleanup.policy for topic '{}'", topic); + return Collections.emptySet(); + } + + public Map describeTopicConfigs(String... topicNames) { + if (topicNames == null) { + return Collections.emptyMap(); + } + Collection topics = + Arrays.stream(topicNames) + .filter(Objects::nonNull) + .map(String::trim) + .filter(s -> !s.isEmpty()) + .collect(Collectors.toList()); + if (topics.isEmpty()) { + return Collections.emptyMap(); + } + String bootstrapServers = kafkaConfiguration.getBootstrapServers(); + String topicNameList = topics.stream().collect(Collectors.joining(", ")); + Collection resources = + topics.stream() + .map(t -> new ConfigResource(ConfigResource.Type.TOPIC, t)) + .collect(Collectors.toList()); + + Map> topicConfigs = + admin.describeConfigs(resources, new DescribeConfigsOptions()).values(); + + Map result = new HashMap<>(); + for (Map.Entry> entry : topicConfigs.entrySet()) { + ConfigResource resource = entry.getKey(); + KafkaFuture configs = entry.getValue(); + String topic = resource.name(); + try { + result.put(topic, configs.get()); + } catch (ExecutionException e) { + Throwable cause = e.getCause(); + if (cause instanceof UnknownTopicOrPartitionException) { + log.info( + "Topic '{}' does not exist on the brokers at {}", + topic, + bootstrapServers); + result.put(topic, null); + } else if (cause instanceof ClusterAuthorizationException + || cause instanceof TopicAuthorizationException) { + log.warn( + "Not authorized to describe topic config for topic '{}' on brokers at {}", + topic, + bootstrapServers); + } else if (cause instanceof UnsupportedVersionException) { + log.warn( + "API to describe topic config for topic '{}' is unsupported on brokers at {}", + topic, + bootstrapServers); + + } else { + String msg = + String.format( + "Error while attempting to describe topic config for topic '%s' on brokers at %s", + topic, bootstrapServers); + throw new RuntimeException(msg, e); + } + } catch (InterruptedException e) { + Thread.interrupted(); + String msg = + String.format( + "Interrupted while attempting to describe topic configs '%s'", + topicNameList); + throw new RuntimeException(msg, e); + } + } + return result; + } + + public Map describeTopics(String... topics) { + if (topics == null) { + return Collections.emptyMap(); + } + String bootstrapServers = kafkaConfiguration.getBootstrapServers(); + String topicNames = String.join(", ", topics); + + Map> newResults = + admin.describeTopics(Arrays.asList(topics), new DescribeTopicsOptions()).values(); + + Map existingTopics = new HashMap<>(); + newResults.forEach( + (topic, desc) -> { + try { + existingTopics.put(topic, desc.get()); + } catch (ExecutionException e) { + Throwable cause = e.getCause(); + if (cause instanceof UnknownTopicOrPartitionException) { + log.debug( + "Topic '{}' does not exist on the brokers at {}", + topic, + bootstrapServers); + return; + } + if (cause instanceof ClusterAuthorizationException + || cause instanceof TopicAuthorizationException) { + String msg = + String.format( + "Not authorized to describe topic(s) '%s' on the brokers %s", + topicNames, bootstrapServers); + throw new RuntimeException(msg, cause); + } + if (cause instanceof UnsupportedVersionException) { + String msg = + String.format( + "Unable to describe topic(s) '%s' since the brokers " + + "at %s do not support the DescribeTopics API.", + topicNames, bootstrapServers); + throw new RuntimeException(msg, cause); + } + throw new RuntimeException( + "Error while attempting to describe topics '" + topicNames + "'", + e); + } catch (InterruptedException e) { + Thread.interrupted(); + throw new RuntimeException( + "Interrupted while attempting to describe topics '" + + topicNames + + "'", + e); + } + }); + return existingTopics; + } + + public boolean deleteTopic(String topic) throws ExecutionException, InterruptedException { + Set deleteTopicsResult = deleteTopics(topic); + return deleteTopicsResult.contains(topic); + } + + public Set deleteTopics(String... topics) + throws ExecutionException, InterruptedException { + Map> topicsResult = + admin.deleteTopics(Lists.newArrayList(topics), new DeleteTopicsOptions()) + .topicNameValues(); + Set deleteTopics = Sets.newHashSet(); + for (Map.Entry> entry : topicsResult.entrySet()) { + entry.getValue().get(); + deleteTopics.add(entry.getKey()); + } + return deleteTopics; + } + + @Override + public void close() throws Exception { + this.admin.close(); + } +} diff --git a/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-kafka/src/test/java/org/apache/seatunnel/engine/imap/storage/kafka/IMapKafkaStorageTest.java b/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-kafka/src/test/java/org/apache/seatunnel/engine/imap/storage/kafka/IMapKafkaStorageTest.java new file mode 100644 index 000000000000..ae2da6433d39 --- /dev/null +++ b/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-kafka/src/test/java/org/apache/seatunnel/engine/imap/storage/kafka/IMapKafkaStorageTest.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.seatunnel.engine.imap.storage.kafka; + +import org.apache.seatunnel.engine.imap.storage.kafka.config.KafkaConfigurationConstants; + +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.condition.EnabledOnOs; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.awaitility.Awaitility.await; +import static org.junit.jupiter.api.condition.OS.LINUX; +import static org.junit.jupiter.api.condition.OS.MAC; + +@EnabledOnOs({LINUX, MAC}) +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +@Disabled +public class IMapKafkaStorageTest { + static String KAFKA_BOOTSTRAP_SERVERS = "localhost:9092"; + private static final Map config; + private static final IMapKafkaStorage storage; + + static { + config = new HashMap<>(); + config.put(KafkaConfigurationConstants.KAFKA_BOOTSTRAP_SERVERS, KAFKA_BOOTSTRAP_SERVERS); + config.put(KafkaConfigurationConstants.BUSINESS_KEY, "storage-test"); + config.put(KafkaConfigurationConstants.KAFKA_STORAGE_COMPACT_TOPIC_PREFIX, "imap-"); + config.put(KafkaConfigurationConstants.KAFKA_STORAGE_COMPACT_TOPIC_REPLICATION_FACTOR, 1); + storage = new IMapKafkaStorage(); + storage.initialize(config); + } + + @Test + void testAll() throws IOException { + + List keys = new ArrayList<>(); + String key1Index = "key1"; + String key2Index = "key2"; + String key50Index = "key50"; + + AtomicInteger dataSize = new AtomicInteger(); + Long keyValue = 123456789L; + for (int i = 0; i < 100; i++) { + String key = "key" + i; + Long value = System.currentTimeMillis(); + + if (i == 50) { + // delete + storage.delete(key1Index); + keys.remove(key1Index); + // update + storage.store(key2Index, keyValue); + keys.add(key2Index); + value = keyValue; + new Thread( + () -> { + try { + dataSize.set(storage.loadAll().size()); + } catch (IOException e) { + throw new RuntimeException(e); + } + }) + .start(); + } + storage.store(key, value); + keys.add(key); + storage.delete(key1Index); + keys.remove(key1Index); + } + + await().atMost(1, TimeUnit.SECONDS).until(dataSize::get, size -> size > 0); + Map loadAllDatas = storage.loadAll(); + Assertions.assertTrue(dataSize.get() >= 50); + Assertions.assertEquals(keyValue, loadAllDatas.get(key50Index)); + Assertions.assertEquals(keyValue, loadAllDatas.get(key2Index)); + Assertions.assertNull(loadAllDatas.get(key1Index)); + + storage.deleteAll(keys); + } + + @AfterAll + public void destroy() { + this.storage.destroy(true); + } +} diff --git a/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/pom.xml b/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/pom.xml index ae4fc608732a..87b3fdbd5830 100644 --- a/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/pom.xml +++ b/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/pom.xml @@ -33,6 +33,7 @@ imap-storage-file + imap-storage-kafka diff --git a/tools/dependencies/known-dependencies.txt b/tools/dependencies/known-dependencies.txt index 3a1e736b68b5..0bd24638c7a6 100755 --- a/tools/dependencies/known-dependencies.txt +++ b/tools/dependencies/known-dependencies.txt @@ -38,3 +38,7 @@ listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar accessors-smart-2.4.7.jar asm-9.1.jar json-smart-2.4.7.jar +kafka-clients-3.4.1.jar +lz4-java-1.8.0.jar +snappy-java-1.1.8.4.jar +zstd-jni-1.5.2-1.jar