Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Imporve][Fake-Connector-V2]support user-defined-schmea and random data for fake-table #2406

Merged
merged 23 commits into from
Aug 23, 2022
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.apache.seatunnel.connectors.seatunnel.common.schema;

import java.io.Serializable;
import org.apache.seatunnel.api.table.type.ArrayType;
import org.apache.seatunnel.api.table.type.BasicType;
import org.apache.seatunnel.api.table.type.DecimalType;
Expand All @@ -33,8 +34,9 @@
import org.apache.seatunnel.shade.com.typesafe.config.ConfigRenderOptions;

import java.util.Map;
import java.io.Serializable;

public class SeatunnelSchema {
public class SeatunnelSchema implements Serializable {
public static final String SCHEMA = "schema";
private static final String FIELD_KEY = "fields";
private static final String SIMPLE_SCHEMA_FILED = "content";
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.seatunnel.connectors.seatunnel.fake.source;

import static org.apache.seatunnel.api.table.type.BasicType.BOOLEAN_TYPE;
import static org.apache.seatunnel.api.table.type.BasicType.BYTE_TYPE;
import static org.apache.seatunnel.api.table.type.BasicType.DOUBLE_TYPE;
import static org.apache.seatunnel.api.table.type.BasicType.FLOAT_TYPE;
import static org.apache.seatunnel.api.table.type.BasicType.INT_TYPE;
import static org.apache.seatunnel.api.table.type.BasicType.LONG_TYPE;
import static org.apache.seatunnel.api.table.type.BasicType.SHORT_TYPE;
import static org.apache.seatunnel.api.table.type.BasicType.STRING_TYPE;
import static org.apache.seatunnel.api.table.type.BasicType.VOID_TYPE;

import org.apache.seatunnel.api.table.type.ArrayType;
import org.apache.seatunnel.api.table.type.BasicType;
import org.apache.seatunnel.api.table.type.DecimalType;
import org.apache.seatunnel.api.table.type.LocalTimeType;
import org.apache.seatunnel.api.table.type.MapType;
import org.apache.seatunnel.api.table.type.PrimitiveByteArrayType;
import org.apache.seatunnel.api.table.type.SeaTunnelDataType;
import org.apache.seatunnel.api.table.type.SeaTunnelRow;
import org.apache.seatunnel.api.table.type.SeaTunnelRowType;
import org.apache.seatunnel.connectors.seatunnel.common.schema.SeatunnelSchema;

import org.apache.commons.lang3.RandomStringUtils;
import org.apache.commons.lang3.RandomUtils;

import java.lang.reflect.Array;
import java.math.BigDecimal;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;

public class FakeRandomData {

private final SeatunnelSchema schema;

public FakeRandomData(SeatunnelSchema schema) {
this.schema = schema;
}

public SeaTunnelRow randomRow() {
SeaTunnelRowType seaTunnelRowType = schema.getSeaTunnelRowType();
String[] fieldNames = seaTunnelRowType.getFieldNames();
SeaTunnelDataType<?>[] fieldTypes = seaTunnelRowType.getFieldTypes();
List<Object> randomRow = new ArrayList<>(fieldNames.length);
for (SeaTunnelDataType<?> fieldType : fieldTypes) {
randomRow.add(randomColumnValue(fieldType));
}
return new SeaTunnelRow(randomRow.toArray());
}

@SuppressWarnings("magicnumber")
private Object randomColumnValue(SeaTunnelDataType<?> fieldType) {
if (BOOLEAN_TYPE.equals(fieldType)) {
return RandomUtils.nextInt(0, 2) == 1;
} else if (BYTE_TYPE.equals(fieldType)) {
return (byte) RandomUtils.nextInt(Byte.MIN_VALUE, Byte.MAX_VALUE);
} else if (SHORT_TYPE.equals(fieldType)) {
return (short) RandomUtils.nextInt(Byte.MAX_VALUE, Short.MAX_VALUE);
} else if (INT_TYPE.equals(fieldType)) {
return RandomUtils.nextInt(Short.MAX_VALUE, Integer.MAX_VALUE);
} else if (LONG_TYPE.equals(fieldType)) {
return RandomUtils.nextLong(Integer.MAX_VALUE, Long.MAX_VALUE);
} else if (FLOAT_TYPE.equals(fieldType)) {
return RandomUtils.nextFloat(Float.MIN_VALUE, Float.MAX_VALUE);
} else if (DOUBLE_TYPE.equals(fieldType)) {
return RandomUtils.nextDouble(Float.MAX_VALUE, Double.MAX_VALUE);
} else if (STRING_TYPE.equals(fieldType)) {
return RandomStringUtils.randomAlphabetic(10);
} else if (LocalTimeType.LOCAL_DATE_TYPE.equals(fieldType)) {
return randomLocalDateTime().toLocalDate();
} else if (LocalTimeType.LOCAL_TIME_TYPE.equals(fieldType)) {
return randomLocalDateTime().toLocalTime();
} else if (LocalTimeType.LOCAL_DATE_TIME_TYPE.equals(fieldType)) {
return randomLocalDateTime();
} else if (fieldType instanceof DecimalType) {
DecimalType decimalType = (DecimalType) fieldType;
return new BigDecimal(RandomStringUtils.randomNumeric(decimalType.getPrecision() - decimalType.getScale()) + "." +
RandomStringUtils.randomNumeric(decimalType.getScale()));
} else if (fieldType instanceof ArrayType) {
ArrayType<?, ?> arrayType = (ArrayType<?, ?>) fieldType;
BasicType<?> elementType = arrayType.getElementType();
Object value = randomColumnValue(elementType);
Object arr = Array.newInstance(elementType.getTypeClass(), 1);
Array.set(arr, 0, value);
return arr;
} else if (fieldType instanceof MapType) {
MapType<?, ?> mapType = (MapType<?, ?>) fieldType;
SeaTunnelDataType<?> keyType = mapType.getKeyType();
Object key = randomColumnValue(keyType);
SeaTunnelDataType<?> valueType = mapType.getValueType();
Object value = randomColumnValue(valueType);
HashMap<Object, Object> objectObjectHashMap = new HashMap<>();
objectObjectHashMap.put(key, value);
return objectObjectHashMap;
} else if (fieldType instanceof PrimitiveByteArrayType) {
return RandomUtils.nextBytes(10);
} else if (VOID_TYPE.equals(fieldType) || fieldType == null) {
return Void.TYPE;
} else {
throw new UnsupportedOperationException("Unexpected value: " + fieldType);
}
}

@SuppressWarnings("magicnumber")
private LocalDateTime randomLocalDateTime() {
return LocalDateTime.of(
LocalDateTime.now().getYear(),
RandomUtils.nextInt(1, 12),
RandomUtils.nextInt(1, LocalDateTime.now().getDayOfMonth()),
RandomUtils.nextInt(0, 24),
RandomUtils.nextInt(0, 59)
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,10 @@
import org.apache.seatunnel.api.common.SeaTunnelContext;
import org.apache.seatunnel.api.source.Boundedness;
import org.apache.seatunnel.api.source.SeaTunnelSource;
import org.apache.seatunnel.api.table.type.BasicType;
import org.apache.seatunnel.api.table.type.SeaTunnelDataType;
import org.apache.seatunnel.api.table.type.SeaTunnelRow;
import org.apache.seatunnel.api.table.type.SeaTunnelRowType;
import org.apache.seatunnel.common.constants.JobMode;
import org.apache.seatunnel.connectors.seatunnel.common.schema.SeatunnelSchema;
import org.apache.seatunnel.connectors.seatunnel.common.source.AbstractSingleSplitReader;
import org.apache.seatunnel.connectors.seatunnel.common.source.AbstractSingleSplitSource;
import org.apache.seatunnel.connectors.seatunnel.common.source.SingleSplitReaderContext;
Expand All @@ -38,6 +37,7 @@ public class FakeSource extends AbstractSingleSplitSource<SeaTunnelRow> {

private Config pluginConfig;
private SeaTunnelContext seaTunnelContext;
private SeatunnelSchema schema;

@Override
public Boundedness getBoundedness() {
Expand All @@ -46,14 +46,12 @@ public Boundedness getBoundedness() {

@Override
public SeaTunnelRowType getProducedType() {
return new SeaTunnelRowType(
new String[]{"name", "age", "timestamp"},
new SeaTunnelDataType<?>[]{BasicType.STRING_TYPE, BasicType.INT_TYPE, BasicType.LONG_TYPE});
return schema.getSeaTunnelRowType();
}

@Override
public AbstractSingleSplitReader<SeaTunnelRow> createReader(SingleSplitReaderContext readerContext) throws Exception {
return new FakeSourceReader(readerContext);
return new FakeSourceReader(readerContext, new FakeRandomData(schema));
}

@Override
Expand All @@ -64,6 +62,7 @@ public String getPluginName() {
@Override
public void prepare(Config pluginConfig) {
this.pluginConfig = pluginConfig;
this.schema = SeatunnelSchema.buildWithConfig(pluginConfig);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,17 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Random;
import java.util.concurrent.ThreadLocalRandom;

public class FakeSourceReader extends AbstractSingleSplitReader<SeaTunnelRow> {

private static final Logger LOGGER = LoggerFactory.getLogger(FakeSourceReader.class);

private final SingleSplitReaderContext context;

private final String[] names = {"Wenjun", "Fanjia", "Zongwen", "CalvinKirs"};
private final int[] ages = {11, 22, 33, 44};
private final FakeRandomData fakeRandomData;

public FakeSourceReader(SingleSplitReaderContext context) {
public FakeSourceReader(SingleSplitReaderContext context, FakeRandomData randomData) {
this.context = context;
this.fakeRandomData = randomData;
}

@Override
Expand All @@ -56,11 +53,8 @@ public void close() {
@SuppressWarnings("magicnumber")
public void pollNext(Collector<SeaTunnelRow> output) throws InterruptedException {
// Generate a random number of rows to emit.
Random random = ThreadLocalRandom.current();
int size = random.nextInt(10) + 1;
for (int i = 0; i < size; i++) {
int randomIndex = random.nextInt(names.length);
SeaTunnelRow seaTunnelRow = new SeaTunnelRow(new Object[]{names[randomIndex], ages[randomIndex], System.currentTimeMillis()});
for (int i = 0; i < 10; i++) {
SeaTunnelRow seaTunnelRow = fakeRandomData.randomRow();
output.collect(seaTunnelRow);
}
if (Boundedness.BOUNDED.equals(context.getBoundedness())) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.seatunnel.connectors.seatunnel.fake.source;

import org.apache.seatunnel.api.table.type.SeaTunnelDataType;
import org.apache.seatunnel.api.table.type.SeaTunnelRow;
import org.apache.seatunnel.api.table.type.SeaTunnelRowType;
import org.apache.seatunnel.api.table.type.SqlType;
import org.apache.seatunnel.connectors.seatunnel.common.schema.SeatunnelSchema;

import org.apache.seatunnel.shade.com.typesafe.config.Config;
import org.apache.seatunnel.shade.com.typesafe.config.ConfigFactory;

import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;

import java.io.File;
import java.io.FileNotFoundException;
import java.lang.reflect.Array;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.file.Paths;
import java.util.Map;

public class FakeRandomDataTest {

@ParameterizedTest
@ValueSource(strings = {"complex.schema.conf", "simple.schema.conf"})
public void testComplexSchemaParse(String conf) throws FileNotFoundException, URISyntaxException {
Config testConfigFile = getTestConfigFile(conf);
SeatunnelSchema seatunnelSchema = SeatunnelSchema.buildWithConfig(testConfigFile);
FakeRandomData fakeRandomData = new FakeRandomData(seatunnelSchema);
SeaTunnelRow seaTunnelRow = fakeRandomData.randomRow();
Assertions.assertNotNull(seaTunnelRow);
Object[] fields = seaTunnelRow.getFields();
Assertions.assertNotNull(fields);
SeaTunnelRowType seaTunnelRowType = seatunnelSchema.getSeaTunnelRowType();
SeaTunnelDataType<?>[] fieldTypes = seaTunnelRowType.getFieldTypes();
for (int i = 0; i < fieldTypes.length; i++) {
if (fieldTypes[i].getSqlType() != SqlType.NULL) {
Assertions.assertNotNull(fields[i]);
} else {
Assertions.assertSame(fields[i], Void.TYPE);
}
if (fieldTypes[i].getSqlType() == SqlType.MAP) {
Assertions.assertTrue(fields[i] instanceof Map);
Map<?, ?> field = (Map) fields[i];
field.forEach((k, v) -> Assertions.assertTrue(k != null && v != null));
}
if (fieldTypes[i].getSqlType() == SqlType.ARRAY) {
Assertions.assertTrue(fields[i].getClass().isArray());
Assertions.assertNotNull(Array.get(fields[i], 0));
}
}
}

private Config getTestConfigFile(String configFile) throws FileNotFoundException, URISyntaxException {
if (!configFile.startsWith("/")) {
configFile = "/" + configFile;
}
URL resource = FakeRandomDataTest.class.getResource(configFile);
if (resource == null) {
throw new FileNotFoundException("Can't find config file: " + configFile);
}
String path = Paths.get(resource.toURI()).toString();
Config config = ConfigFactory.parseFile(new File(path));
assert config.hasPath("schema");
return config.getConfig("schema");
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

schema {
fields {
map = "map<string, map<string, string>>"
map_array = "map<string, map<string, array<int>>>"
array = "array<tinyint>"
string = string
boolean = boolean
tinyint = tinyint
smallint = smallint
int = int
bigint = bigint
float = float
double = double
decimal = "decimal(30, 8)"
null = "null"
bytes = bytes
date = date
time = time
timestamp = timestamp
}
}
Loading